Project

General

Profile

« Previous | Next » 

Revision 0f4426cb

Added by Benoit Parmentier over 11 years ago

GAM fusion function, modifications for first fusion prediction in Venezuela

View differences:

climate/research/oregon/interpolation/GAM_fusion_function_multisampling.R
3 3
  date<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
4 4
  month<-strftime(date, "%m")          # current month of the date being processed
5 5
  LST_month<-paste("mm_",month,sep="") # name of LST month to be matched
6
  
6
  proj_str<-proj4string(dst)
7 7
  #Adding layer LST to the raster stack
8 8

  
9 9
  pos<-match("LST",layerNames(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA
......
16 16
  #r1[r1 < (min_val)]<-NA
17 17
  s_raster<-addLayer(s_raster,r1)            #Adding current month
18 18
  
19
  pos<-match("elev",layerNames(s_raster))
20
  layerNames(s_raster)[pos]<-"elev_1"
19 21
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
20
  #Problem here...
21
  mod_LST <-ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))]  #Match interpolation date and monthly LST average
22
  ghcn.subsets[[i]] = transform(ghcn.subsets[[i]],LST = as.data.frame(mod_LST))            #Add the variable LST to the subset dataset
23
  dst$LST<-dst[[LST_month]]
24
  #n<-nrow(ghcn.subsets[[i]])
25
  #ns<-n-round(n*prop)   #Create a sample from the data frame with 70% of the rows
26
  #nv<-n-ns              #create a sample for validation with prop of the rows
27
  #ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly
22
  data_day<-ghcn.subsets[[i]]
23
  mod_LST <- ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))]  #Match interpolation date and monthly LST average
24
  data_day$LST <- as.data.frame(mod_LST)[,1] #Add the variable LST to the dataset
25
  dst$LST<-dst[[LST_month]] #Add the variable LST to the monthly dataset
26
  
28 27
  ind.training<-sampling[[i]]
29
  ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training)
30
  data_s <- ghcn.subsets[[i]][ind.training, ]   #Training dataset currently used in the modeling
31
  data_v <- ghcn.subsets[[i]][ind.testing, ]    #Testing/validation dataset using input sampling
28
  ind.testing <- setdiff(1:nrow(data_day), ind.training)
29
  data_s <- data_day[ind.training, ]   #Training dataset currently used in the modeling
30
  data_v <- data_day[ind.testing, ]    #Testing/validation dataset using input sampling
32 31
  
33 32
  ns<-nrow(data_s)
34 33
  nv<-nrow(data_v)
......
50 49
  #min_val<-(-15)     #Screening for extreme values
51 50
  #themolst[themolst < (min_val)]<-NA
52 51
  
53
  plot(themolst)
54
  
55 52
  ###########
56 53
  # STEP 2 - Weather station means across same days: Monthly mean calculation
57 54
  ###########
......
65 62
  #sta_lola=modst[,c("lon","lat")] #Extracting locations of stations for the current month..
66 63
  
67 64
  #proj_str="+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
68
  #lookup<-function(r,lat,lon) {
69
  #  xy<-project(cbind(lon,lat),proj_str);
70
  #  cidx<-cellFromXY(r,xy);
71
  #  return(r[cidx])
72
  #}
65
  lookup<-function(r,lat,lon) {
66
    xy<-project(cbind(lon,lat),proj_str);
67
    cidx<-cellFromXY(r,xy);
68
    return(r[cidx])
69
  }
73 70
  #sta_tmax_from_lst=lookup(themolst,sta_lola$lat,sta_lola$lon) #Extracted values of LST for the stations
74 71
  sta_tmax_from_lst<-modst$LST
75 72
  #########
......
80 77
  #Added by Benoit
81 78
  modst$LSTD_bias<-sta_bias  #Adding bias to data frame modst containning the monthly average for 10 years
82 79
  
83
  bias_xy=project(as.matrix(sta_lola),proj_str)
80
  #bias_xy=project(as.matrix(sta_lola),proj_str)
84 81
  png(paste("LST_TMax_scatterplot_",sampling_dat$date[i],"_",sampling_dat$prop[i],"_",sampling_dat$run_samp[i], out_prefix,".png", sep=""))
85 82
  plot(modst$TMax,sta_tmax_from_lst,xlab="Station mo Tmax",ylab="LST mo Tmax",main=paste("LST vs TMax for",datelabel,sep=" "))
86 83
  abline(0,1)
84
  nb_point<-paste("n=",length(modst$TMax),sep="")
85
  mean_bias<-paste("LST bias= ",format(mean(modst$LSTD_bias,na.rm=TRUE),digits=3),sep="")
86
  #Add the number of data points on the plot
87
  legend("topleft",legend=c(mean_bias,nb_point),bty="n")
87 88
  dev.off()
88 89
  
89 90
  #added by Benoit 
90 91
  #x<-ghcn.subsets[[i]]  #Holds both training and testing for instance 161 rows for Jan 1
91
  x<-data_v
92
  d<-data_s
93
  
92
  x<-as.data.frame(data_v)
93
  d<-as.data.frame(data_s)
94
  #x[x$value==-999.9]<-NA
95
  for (j in 1:nrow(x)){
96
    if (x$value[j]== -999.9){
97
      x$value[j]<-NA
98
    }
99
  }
100
  for (j in 1:nrow(d)){
101
    if (d$value[j]== -999.9){
102
      d$value[j]<-NA
103
    }
104
  }
105
  #x[x$value==-999.9]<-NA
106
  #d[d$value==-999.9]<-NA
94 107
  pos<-match("value",names(d)) #Find column with name "value"
95 108
  #names(d)[pos]<-c("dailyTmax")
96 109
  names(d)[pos]<-y_var_name
97 110
  names(x)[pos]<-y_var_name
98 111
  #names(x)[pos]<-c("dailyTmax")
99
  d$dailyTmax=(as.numeric(d$dailyTmax))/10 #stored as 1/10 degree C to allow integer storage
100
  x$dailyTmax=(as.numeric(x$dailyTmax))/10 #stored as 1/10 degree C to allow integer storage
101 112
  pos<-match("station",names(d)) #Find column with name "value"
102 113
  names(d)[pos]<-c("id")
103 114
  names(x)[pos]<-c("id")
......
131 142
  #fitbias<-Tps(bias_xy,sta_bias) #use TPS or krige
132 143
  
133 144
  #Adding options to use only training stations : 07/11/2012
134
  bias_xy=project(as.matrix(sta_lola),proj_str)
145
  #bias_xy=project(as.matrix(sta_lola),proj_str)
135 146
  #bias_xy2=project(as.matrix(c(dmoday$lon,dmoday$lat),proj_str)
147
  bias_xy<-coordinates(modst)
136 148
  if(bias_val==1){
137 149
    sta_bias<-dmoday$LSTD_bias
138 150
    bias_xy<-cbind(dmoday$x_OR83M,dmoday$y_OR83M)
......
152 164
  
153 165
  daily_sta_lola=dmoday[,c("lon","lat")] #could be same as before but why assume merge does this - assume not
154 166
  daily_sta_xy=project(as.matrix(daily_sta_lola),proj_str)
167
  
155 168
  daily_delta=dmoday$dailyTmax-dmoday$TMax
156 169
  
157 170
  #fitdelta<-Tps(daily_sta_xy,daily_delta) #use TPS or krige
......
213 226
  ########
214 227
  # check: assessment of results: validation
215 228
  ########
216
  RMSE<-function(x,y) {return(mean((x-y)^2)^0.5)}
217
  MAE_fun<-function(x,y) {return(mean(abs(x-y)))}
229
  RMSE<-function(res) {return(((mean(res,na.rm=TRUE))^2)^0.5)}
230
  MAE_fun<-function(res) {return(mean(abs(res),na.rm=TRUE))}
218 231
  #ME_fun<-function(x,y){return(mean(abs(y)))}
219 232
  #FIT ASSESSMENT
220 233
  sta_pred_data_s=lookup(tmax_predicted,data_s$lat,data_s$lon)
221
  rmse_fit=RMSE(sta_pred_data_s,data_s$dailyTmax)
222
  mae_fit=MAE_fun(sta_pred_data_s,data_s$dailyTmax)
234
  
235
  rmse_fit=RMSE(sta_pred_data_s-data_s$dailyTmax)
236
  mae_fit=MAE_fun(sta_pred_data_s-data_s$dailyTmax)
223 237
    
224 238
  sta_pred=lookup(tmax_predicted,data_v$lat,data_v$lon)
225 239
  #sta_pred=lookup(tmax_predicted,daily_sta_lola$lat,daily_sta_lola$lon)
......
228 242
  #names(data_v)[pos]<-c("dailyTmax")
229 243
  tmax<-data_v$dailyTmax
230 244
  #data_v$dailyTmax<-tmax
231
  rmse=RMSE(sta_pred,tmax)
232
  mae<-MAE_fun(sta_pred,tmax)
245
  rmse=RMSE(sta_pred-tmax)
246
  mae<-MAE_fun(sta_pred-tmax)
233 247
  r2<-cor(sta_pred,tmax)^2              #R2, coef. of var
234
  me<-mean(sta_pred-tmax)
248
  me<-mean(sta_pred-tmax,na.rm=T)
235 249
   
236 250
  png(paste("Predicted_tmax_versus_observed_scatterplot_",sampling_dat$date[i],"_",sampling_dat$prop[i],
237 251
            "_",sampling_dat$run_samp[i],out_prefix,".png", sep=""))
......
243 257
  
244 258
  ###BEFORE GAM prediction the data object must be transformed to SDF
245 259
  
246
  coords<- data_v[,c('x_OR83M','y_OR83M')]
260
  coords<- data_v[,c('x','y')]
247 261
  coordinates(data_v)<-coords
248
  proj4string(data_v)<-CRS  #Need to assign coordinates...
249
  coords<- data_s[,c('x_OR83M','y_OR83M')]
262
  proj4string(data_v)<-proj_str  #Need to assign coordinates...
263
  coords<- data_s[,c('x','y')]
250 264
  coordinates(data_s)<-coords
251
  proj4string(data_s)<-CRS  #Need to assign coordinates..
252
  coords<- modst[,c('x_OR83M','y_OR83M')]
253
  coordinates(modst)<-coords
254
  proj4string(modst)<-CRS  #Need to assign coordinates..
265
  proj4string(data_s)<-proj_str  #Need to assign coordinates..
266
  coords<- modst[,c('x','y')]
267
  #coordinates(modst)<-coords
268
  #proj4string(modst)<-proj_str  #Need to assign coordinates..
255 269
  
256 270
  ns<-nrow(data_s) #This is added to because some loss of data might have happened because of the averaging...
257 271
  nv<-nrow(data_v)
......
274 288
  
275 289
  list_formulas<-vector("list",nmodels)
276 290
  
277
  list_formulas[[1]] <- as.formula("y_var ~ s(ELEV_SRTM)", env=.GlobalEnv)
291
  list_formulas[[1]] <- as.formula("y_var ~ s(elev_1)", env=.GlobalEnv)
278 292
  list_formulas[[2]] <- as.formula("y_var ~ s(LST)", env=.GlobalEnv)
279
  list_formulas[[3]] <- as.formula("y_var ~ s(ELEV_SRTM,LST)", env=.GlobalEnv)
280
  list_formulas[[4]] <- as.formula("y_var ~ s(lat) + s(lon)+ s(ELEV_SRTM)", env=.GlobalEnv)
281
  list_formulas[[5]] <- as.formula("y_var ~ s(lat,lon,ELEV_SRTM)", env=.GlobalEnv)
282
  list_formulas[[6]] <- as.formula("y_var ~ s(lat,lon) + s(ELEV_SRTM) + s(Northness_w,Eastness_w) + s(LST)", env=.GlobalEnv)
283
  list_formulas[[7]] <- as.formula("y_var ~ s(lat,lon) + s(ELEV_SRTM) + s(Northness_w,Eastness_w) + s(LST) + s(LC1)", env=.GlobalEnv)
284
  list_formulas[[8]] <- as.formula("y_var ~ s(lat,lon) + s(ELEV_SRTM) + s(Northness_w,Eastness_w) + s(LST) + s(LC3)", env=.GlobalEnv)
285
  list_formulas[[9]] <- as.formula("y_var ~ s(lat,lon) + s(ELEV_SRTM) + s(Northness_w,Eastness_w) + s(LST) + s(DISTOC)", env=.GlobalEnv)
293
  list_formulas[[3]] <- as.formula("y_var ~ s(elev_1,LST)", env=.GlobalEnv)
294
  list_formulas[[4]] <- as.formula("y_var ~ s(lat) + s(lon)+ s(elev_1)", env=.GlobalEnv)
295
  list_formulas[[5]] <- as.formula("y_var ~ s(lat,lon,elev_1)", env=.GlobalEnv)
296
  list_formulas[[6]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST)", env=.GlobalEnv)
297
  list_formulas[[7]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(LC2)", env=.GlobalEnv)
298
  list_formulas[[8]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(LC6)", env=.GlobalEnv)
299
  list_formulas[[9]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(DISTOC)", env=.GlobalEnv)
286 300
  
287 301
  #list_formulas[[1]] <- as.formula("y_var ~ s(ELEV_SRTM)", env=.GlobalEnv)
288 302
  #list_formulas[[2]] <- as.formula("y_var ~ s(lat,lon)", env=.GlobalEnv)

Also available in: Unified diff