Project

General

Profile

« Previous | Next » 

Revision 5f312940

Added by Benoit Parmentier over 12 years ago

GWR in adding loop for 10 dates

View differences:

climate/research/oregon/interpolation/gwr_reg.R
9 9
library(rgdal)
10 10
library(spgwr)
11 11

  
12
setwd("/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations/")
13
prop<-0
12
###Parameters and arguments
13

  
14
infile1<-"ghcn_or_tmax_b_03032012_OR83M.shp" 
15
path<- "/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations/"
16
setwd(path)
17
#infile2<-"dates_interpolation_03012012.txt"  # list of 10 dates for the regression
18
infile2<-"dates_interpolation_03052012.txt"
19
prop<-0.3
20
out_prefix<-"_03132012_0"
14 21

  
15 22
###Reading the shapefile from the local directory
16 23
ghcn<-readOGR(".", "ghcn_or_tmax_b_03032012_OR83M") 
......
20 27

  
21 28
ghcn$Northness_w <- sin(ghcn$slope)*cos(ghcn$ASPECT) #Adding a variable to the dataframe
22 29
ghcn$Eastness_w  <- sin(ghcn$slope)*sin(ghcn$ASPECT)  #adding variable to the dataframe.
23
 
30

  
24 31
set.seed(100)
25 32

  
26
ghcn.subsets <-subset(ghcn,ghcn$date=="20100101")
33
dates <-readLines(paste(path,"/",infile2, sep=""))
27 34

  
28
#summary(lm(y~x,data=df,weights=(df$wght1)^(3/4))
29
#ggwr.sel: find the bandwith from the data
35
results <- matrix(1,length(dates),3)            #This is a matrix containing the diagnostic measures from the GAM models.
30 36

  
31
###Regression part 1: Creating a validation dataset by creating training and testing datasets
37
#Screening for bad values
38
#tmax~ lon + lat + ELEV_SRTM + Eastness + Northness + DISTOC
39
#tmax range: min max)
40
ghcn_test<-subset(ghcn,ghcn$tmax>-150 & ghcn$tmax<400)
41
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0)
42
ghcn<-ghcn_test2
43
#lon range
44
#lat range
45
#ELEV_SRTM
46
#Eastness
47
#Northness
32 48

  
33
n<- nrow(ghcn.subsets)
34
ns<- n-round(n*prop)  #Create a sample from the data frame with 70% of the rows
35
nv<- n-ns             #create a sample for validation with prop of the rows
36
#ns<-n-round(n*prop)  #Create a sample from the data frame with 70% of the rows
37
ind.training <- sample(nrow(ghcn.subsets), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly
38
ind.testing <- setdiff(1:nrow(ghcn.subsets), ind.training)
39
data_s <- ghcn.subsets[ind.training, ]
40
data_v <- ghcn.subsets[ind.testing, ]
49
#ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date==as.numeric(d)))#this creates a list of 10 subsets data
50
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, ghcn$date==as.numeric(d)))
41 51

  
42
bwG <- gwr.sel(tmax~ lon + lat + ELEV_SRTM + Eastness + Northness + DISTOC,data=data_s,gweight=gwr.Gauss, verbose = FALSE)
43
gwrG<- gwr(tmax~ lon + lat + ELEV_SRTM + Eastness + Northness + DISTOC, data=data_s, bandwidth=bwG, gweight=gwr.Gauss, hatmatrix=TRUE)
52
#ghcn.subsets <-subset(ghcn,ghcn$date=="20100101")
53
#summary(lm(y~x,data=df,weights=(df$wght1)^(3/4))
54
#ggwr.sel: find the bandwith from the data
44 55

  
45
Res_fit<-gwrG$lm$residuals
46
RMSE_f<-sqrt(sum(Res_fit^2)/ns)
47 56

  
48
t<- data_s$tmax-gwrG$lm$fitted.values
49
t2<-t-Res_fit #This should be zero
50 57

  
58
###Regression part 1: Creating a validation dataset by creating training and testing datasets
59
for(i in 1:length(dates)){            # start of the for loop #1
60
  
61
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
62
    
63
  n<-nrow(ghcn.subsets[[i]])
64
  ns<-n-round(n*prop)  #Create a sample from the data frame with 70% of the rows
65
  nv<-n-ns             #create a sample for validation with prop of the rows
66
  #ns<-n-round(n*prop)  #Create a sample from the data frame with 70% of the rows
67
  ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly
68
  ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training)
69
  data_s <- ghcn.subsets[[i]][ind.training, ]
70
  data_v <- ghcn.subsets[[i]][ind.testing, ]
71
  bwG <- gwr.sel(tmax~ lon + lat + ELEV_SRTM + Eastness + Northness + DISTOC,data=data_s,gweight=gwr.Gauss, verbose = FALSE)
72
  gwrG<- gwr(tmax~ lon + lat + ELEV_SRTM + Eastness + Northness + DISTOC, data=data_s, bandwidth=bwG, gweight=gwr.Gauss, hatmatrix=TRUE)
73

  
74
  Res_fit<-gwrG$lm$residuals
75
  RMSE_f<-sqrt(sum(Res_fit^2)/ns)
76
  t<- data_s$tmax-gwrG$lm$fitted.values #Checking output
77
  t2<-t-Res_fit #This should be zero
78
  data_s$residuals <- Res_fit #adding field to the data 
79
  
80
  #Saving the subset in a dataframe
81
  data_name<-paste("ghcn_v_",dates[[i]],sep="")
82
  assign(data_name,data_v)
83
  data_name<-paste("ghcn_s_",dates[[i]],sep="")
84
  assign(data_name,data_s)
85
  
86
  results[i,1]<- dates[i]  #storing the interpolation dates in the first column
87
  results[i,2]<- ns        #number of stations used in the training stage
88
  results[i,3]<- RMSE_f
89
  }
90
  
91
## Plotting and saving diagnostic measures
92
results_num <-results
93
mode(results_num)<- "numeric"
94
# Make it numeric first
95
# Now turn it into a data.frame...
96

  
97
results_table<-as.data.frame(results_num)
98
colnames(results_table)<-c("dates","ns","RMSE_gwr1")
99

  
100
write.csv(results_table, file= paste(path,"/","results_GWR_Assessment",out_prefix,".txt",sep=""))
101

  
102
# End of script##########
103

  
104
# ###############################
105

  
106
  
107
  
51 108
#Compare the coefficients and residuals using both 30 and 100%
52

  
53 109
#coefficients are stored in gwrG$SDF$lon
54

  
55 110
#write out a new shapefile (including .prj component)
56

  
57
writeOGR(data_s,".", "ghcn_1507_s", driver ="ESRI Shapefile")
58

  
111
#writeOGR(data_s,".", "ghcn_1507_s", driver ="ESRI Shapefile")
59 112
#ogrInfo(".", "ghcn_1507_s") #This will check the file...
60 113
#plot(ghcn_1507, axes=TRUE, border="gray")
61 114

  

Also available in: Unified diff