Revision 5f312940
Added by Benoit Parmentier over 12 years ago
climate/research/oregon/interpolation/gwr_reg.R | ||
---|---|---|
9 | 9 |
library(rgdal) |
10 | 10 |
library(spgwr) |
11 | 11 |
|
12 |
setwd("/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations/") |
|
13 |
prop<-0 |
|
12 |
###Parameters and arguments |
|
13 |
|
|
14 |
infile1<-"ghcn_or_tmax_b_03032012_OR83M.shp" |
|
15 |
path<- "/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations/" |
|
16 |
setwd(path) |
|
17 |
#infile2<-"dates_interpolation_03012012.txt" # list of 10 dates for the regression |
|
18 |
infile2<-"dates_interpolation_03052012.txt" |
|
19 |
prop<-0.3 |
|
20 |
out_prefix<-"_03132012_0" |
|
14 | 21 |
|
15 | 22 |
###Reading the shapefile from the local directory |
16 | 23 |
ghcn<-readOGR(".", "ghcn_or_tmax_b_03032012_OR83M") |
... | ... | |
20 | 27 |
|
21 | 28 |
ghcn$Northness_w <- sin(ghcn$slope)*cos(ghcn$ASPECT) #Adding a variable to the dataframe |
22 | 29 |
ghcn$Eastness_w <- sin(ghcn$slope)*sin(ghcn$ASPECT) #adding variable to the dataframe. |
23 |
|
|
30 |
|
|
24 | 31 |
set.seed(100) |
25 | 32 |
|
26 |
ghcn.subsets <-subset(ghcn,ghcn$date=="20100101")
|
|
33 |
dates <-readLines(paste(path,"/",infile2, sep=""))
|
|
27 | 34 |
|
28 |
#summary(lm(y~x,data=df,weights=(df$wght1)^(3/4)) |
|
29 |
#ggwr.sel: find the bandwith from the data |
|
35 |
results <- matrix(1,length(dates),3) #This is a matrix containing the diagnostic measures from the GAM models. |
|
30 | 36 |
|
31 |
###Regression part 1: Creating a validation dataset by creating training and testing datasets |
|
37 |
#Screening for bad values |
|
38 |
#tmax~ lon + lat + ELEV_SRTM + Eastness + Northness + DISTOC |
|
39 |
#tmax range: min max) |
|
40 |
ghcn_test<-subset(ghcn,ghcn$tmax>-150 & ghcn$tmax<400) |
|
41 |
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0) |
|
42 |
ghcn<-ghcn_test2 |
|
43 |
#lon range |
|
44 |
#lat range |
|
45 |
#ELEV_SRTM |
|
46 |
#Eastness |
|
47 |
#Northness |
|
32 | 48 |
|
33 |
n<- nrow(ghcn.subsets) |
|
34 |
ns<- n-round(n*prop) #Create a sample from the data frame with 70% of the rows |
|
35 |
nv<- n-ns #create a sample for validation with prop of the rows |
|
36 |
#ns<-n-round(n*prop) #Create a sample from the data frame with 70% of the rows |
|
37 |
ind.training <- sample(nrow(ghcn.subsets), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly |
|
38 |
ind.testing <- setdiff(1:nrow(ghcn.subsets), ind.training) |
|
39 |
data_s <- ghcn.subsets[ind.training, ] |
|
40 |
data_v <- ghcn.subsets[ind.testing, ] |
|
49 |
#ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date==as.numeric(d)))#this creates a list of 10 subsets data |
|
50 |
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, ghcn$date==as.numeric(d))) |
|
41 | 51 |
|
42 |
bwG <- gwr.sel(tmax~ lon + lat + ELEV_SRTM + Eastness + Northness + DISTOC,data=data_s,gweight=gwr.Gauss, verbose = FALSE) |
|
43 |
gwrG<- gwr(tmax~ lon + lat + ELEV_SRTM + Eastness + Northness + DISTOC, data=data_s, bandwidth=bwG, gweight=gwr.Gauss, hatmatrix=TRUE) |
|
52 |
#ghcn.subsets <-subset(ghcn,ghcn$date=="20100101") |
|
53 |
#summary(lm(y~x,data=df,weights=(df$wght1)^(3/4)) |
|
54 |
#ggwr.sel: find the bandwith from the data |
|
44 | 55 |
|
45 |
Res_fit<-gwrG$lm$residuals |
|
46 |
RMSE_f<-sqrt(sum(Res_fit^2)/ns) |
|
47 | 56 |
|
48 |
t<- data_s$tmax-gwrG$lm$fitted.values |
|
49 |
t2<-t-Res_fit #This should be zero |
|
50 | 57 |
|
58 |
###Regression part 1: Creating a validation dataset by creating training and testing datasets |
|
59 |
for(i in 1:length(dates)){ # start of the for loop #1 |
|
60 |
|
|
61 |
###Regression part 1: Creating a validation dataset by creating training and testing datasets |
|
62 |
|
|
63 |
n<-nrow(ghcn.subsets[[i]]) |
|
64 |
ns<-n-round(n*prop) #Create a sample from the data frame with 70% of the rows |
|
65 |
nv<-n-ns #create a sample for validation with prop of the rows |
|
66 |
#ns<-n-round(n*prop) #Create a sample from the data frame with 70% of the rows |
|
67 |
ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly |
|
68 |
ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training) |
|
69 |
data_s <- ghcn.subsets[[i]][ind.training, ] |
|
70 |
data_v <- ghcn.subsets[[i]][ind.testing, ] |
|
71 |
bwG <- gwr.sel(tmax~ lon + lat + ELEV_SRTM + Eastness + Northness + DISTOC,data=data_s,gweight=gwr.Gauss, verbose = FALSE) |
|
72 |
gwrG<- gwr(tmax~ lon + lat + ELEV_SRTM + Eastness + Northness + DISTOC, data=data_s, bandwidth=bwG, gweight=gwr.Gauss, hatmatrix=TRUE) |
|
73 |
|
|
74 |
Res_fit<-gwrG$lm$residuals |
|
75 |
RMSE_f<-sqrt(sum(Res_fit^2)/ns) |
|
76 |
t<- data_s$tmax-gwrG$lm$fitted.values #Checking output |
|
77 |
t2<-t-Res_fit #This should be zero |
|
78 |
data_s$residuals <- Res_fit #adding field to the data |
|
79 |
|
|
80 |
#Saving the subset in a dataframe |
|
81 |
data_name<-paste("ghcn_v_",dates[[i]],sep="") |
|
82 |
assign(data_name,data_v) |
|
83 |
data_name<-paste("ghcn_s_",dates[[i]],sep="") |
|
84 |
assign(data_name,data_s) |
|
85 |
|
|
86 |
results[i,1]<- dates[i] #storing the interpolation dates in the first column |
|
87 |
results[i,2]<- ns #number of stations used in the training stage |
|
88 |
results[i,3]<- RMSE_f |
|
89 |
} |
|
90 |
|
|
91 |
## Plotting and saving diagnostic measures |
|
92 |
results_num <-results |
|
93 |
mode(results_num)<- "numeric" |
|
94 |
# Make it numeric first |
|
95 |
# Now turn it into a data.frame... |
|
96 |
|
|
97 |
results_table<-as.data.frame(results_num) |
|
98 |
colnames(results_table)<-c("dates","ns","RMSE_gwr1") |
|
99 |
|
|
100 |
write.csv(results_table, file= paste(path,"/","results_GWR_Assessment",out_prefix,".txt",sep="")) |
|
101 |
|
|
102 |
# End of script########## |
|
103 |
|
|
104 |
# ############################### |
|
105 |
|
|
106 |
|
|
107 |
|
|
51 | 108 |
#Compare the coefficients and residuals using both 30 and 100% |
52 |
|
|
53 | 109 |
#coefficients are stored in gwrG$SDF$lon |
54 |
|
|
55 | 110 |
#write out a new shapefile (including .prj component) |
56 |
|
|
57 |
writeOGR(data_s,".", "ghcn_1507_s", driver ="ESRI Shapefile") |
|
58 |
|
|
111 |
#writeOGR(data_s,".", "ghcn_1507_s", driver ="ESRI Shapefile") |
|
59 | 112 |
#ogrInfo(".", "ghcn_1507_s") #This will check the file... |
60 | 113 |
#plot(ghcn_1507, axes=TRUE, border="gray") |
61 | 114 |
|
Also available in: Unified diff
GWR in adding loop for 10 dates