Project

General

Profile

« Previous | Next » 

Revision ca9c0dff

Added by Benoit Parmentier almost 12 years ago

GAM CAI raster prediction OR tmax,corretion constant sampling and GAM climatology models

View differences:

climate/research/oregon/interpolation/GAM_CAI_analysis_raster_prediction_multisampling.R
6 6
#Method is assedsed using constant sampling with variation  of validation sample with different  #
7 7
#hold out proportions.                                                                           #
8 8
#AUTHOR: Benoit Parmentier                                                                       #
9
#DATE: 10/25/2012                                                                                #
9
#DATE: 10/30/2012                                                                                #
10 10
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#491--                                  #
11 11
###################################################################################################
12 12

  
......
43 43
#GHCN Database for 1980-2010 for study area (OR) 
44 44
data3<-read.table(paste(path,"/","ghcn_data_TMAXy1980_2010_OR_0602012.txt",sep=""),sep=",", header=TRUE)
45 45

  
46
nmodels<-8   #number of models running
46
nmodels<-5   #number of models running
47 47
y_var_name<-"dailyTmax"
48 48
climgam=1                                                     #if 1, then GAM is run on the climatology rather than the daily deviation surface...
49 49
predval<-1
......
51 51

  
52 52
seed_number<- 100                                             #Seed number for random sampling, if seed_number<0, no seed number is used..
53 53
#out_prefix<-"_365d_GAM_CAI2_const_10222012_"                  #User defined output prefix
54
out_prefix<-"_365d_GAM_CAI2_all_lstd_10262012"                #User defined output prefix
54
#out_prefix<-"_365d_GAM_CAI2_const_all_lstd_10272012"                #User defined output prefix
55
out_prefix<-"_365d_GAM_CAI3_all_10302012"                #User defined output prefix
55 56

  
56 57
bias_val<-0            #if value 1 then daily training data is used in the bias surface rather than the all monthly stations (added on 07/11/2012)
57 58
bias_prediction<-1     #if value 1 then use GAM for the BIAS prediction otherwise GAM direct reprediction for y_var (daily tmax)
......
64 65
CRS_interp<-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
65 66
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84
66 67

  
67
source("GAM_CAI_function_multisampling_10252012.R")
68
#source("GAM_CAI_function_multisampling_10252012.R")
69
source("GAM_CAI_function_multisampling_10302012.R")
68 70

  
69 71
############ START OF THE SCRIPT ##################
70 72

  
......
139 141

  
140 142
LC_s<-stack(LC1,LC3,LC4,LC6)
141 143
layerNames(LC_s)<-c("LC1_forest","LC3_grass","LC4_crop","LC6_urban")
142
plot(LC_s)
144
#plot(LC_s)
143 145

  
144 146
pos<-match("CANHEIGHT",layerNames(s_raster)) #Find column with name "value"
145 147
CANHEIGHT<-raster(s_raster,layer=pos)             #Select layer from stack
146 148
s_raster<-dropLayer(s_raster,pos)
147 149
CANHEIGHT[is.na(CANHEIGHT)]<-0
150
pos<-match("ELEV_SRTM",layerNames(s_raster)) #Find column with name "ELEV_SRTM"
151
ELEV_SRTM<-raster(s_raster,layer=pos)             #Select layer from stack on 10/30
152
s_raster<-dropLayer(s_raster,pos)
153
ELEV_SRTM[ELEV_SRTM <0]<-NA
148 154

  
149 155
xy<-coordinates(r1)  #get x and y projected coordinates...
150 156
xy_latlon<-project(xy, CRS, inv=TRUE) # find lat long for projected coordinats (or pixels...)
......
157 163
values(lon)<-xy_latlon[,1]
158 164
values(lat)<-xy_latlon[,2]
159 165

  
160
r<-stack(N,E,Nw,Ew,lon,lat,LC1,LC3,LC4,LC6, CANHEIGHT)
161
rnames<-c("Northness","Eastness","Northness_w","Eastness_w", "lon","lat","LC1","LC3","LC4","LC6","CANHEIGHT")
166
r<-stack(N,E,Nw,Ew,lon,lat,LC1,LC3,LC4,LC6, CANHEIGHT,ELEV_SRTM)
167
rnames<-c("Northness","Eastness","Northness_w","Eastness_w", "lon","lat","LC1","LC3","LC4","LC6","CANHEIGHT","ELEV_SRTM")
162 168
layerNames(r)<-rnames
163 169
s_raster<-addLayer(s_raster, r)
164 170

  
......
222 228
stations_val<-as.data.frame(stations_val)
223 229
dst_extract<-cbind(dst_month,stations_val)
224 230
dst<-dst_extract
231
#Now clean and screen monthly values
232
dst_all<-dst
233
dst<-subset(dst,dst$TMax>-15 & dst$TMax<40)
234
dst<-subset(dst,dst$ELEV_SRTM>0) #This will drop two stations...or 24 rows
225 235

  
226 236
######### Preparing daily values for training and testing
227 237

  
......
263 273
#ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date==d)) #this creates a list of 10 or 365 subsets dataset based on dates
264 274
ghcn.subsets <-lapply(as.character(sampling_dat$date), function(d) subset(ghcn, date==d)) #this creates a list of 10 or 365 subsets dataset based on dates
265 275

  
266
sampling<-vector("list",length(ghcn.subsets))
276
if (seed_number>0) {
277
  set.seed(seed_number)                        #Using a seed number allow results based on random number to be compared...
278
}
267 279

  
280
sampling<-vector("list",length(ghcn.subsets))
281
sampling_station_id<-vector("list",length(ghcn.subsets))
268 282
for(i in 1:length(ghcn.subsets)){
269 283
  n<-nrow(ghcn.subsets[[i]])
270 284
  prop<-(sampling_dat$prop[i])/100
......
272 286
  nv<-n-ns              #create a sample for validation with prop of the rows
273 287
  ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly
274 288
  ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training)
289
  #Find the corresponding 
290
  data_sampled<-ghcn.subsets[[i]][ind.training,] #selected the randomly sampled stations
291
  station_id.training<-data_sampled$station     #selected id for the randomly sampled stations (115)
292
  #Save the information
275 293
  sampling[[i]]<-ind.training
294
  sampling_station_id[[i]]<- station_id.training
276 295
}
277

  
296
## Use same samples across the year...
278 297
if (constant==1){
279 298
  sampled<-sampling[[1]]
299
  data_sampled<-ghcn.subsets[[1]][sampled,] #selected the randomly sampled stations
300
  station_sampled<-data_sampled$station     #selected id for the randomly sampled stations (115)
280 301
  list_const_sampling<-vector("list",sn)
302
  list_const_sampling_station_id<-vector("list",sn)
281 303
  for(i in 1:sn){
282
    list_const_sampling[[i]]<-sampled
304
    station_id.training<-intersect(station_sampled,ghcn.subsets[[i]]$station)
305
    ind.training<-match(station_id.training,ghcn.subsets[[i]]$station)
306
    list_const_sampling[[i]]<-ind.training
307
    list_const_sampling_station_id[[i]]<-station_id.training
283 308
  }
284
  sampling<-list_const_sampling  
309
  sampling<-list_const_sampling 
310
  sampling_station_id<-list_const_sampling_station_id
285 311
}
286 312

  
287 313
######## Prediction for the range of dates
......
330 356

  
331 357
# Save before plotting
332 358
#sampling_obj<-list(sampling_dat=sampling_dat,training=sampling)
333
sampling_obj<-list(sampling_dat=sampling_dat,training=sampling, tb=tb_diagnostic)
359
#sampling_obj<-list(sampling_dat=sampling_dat,training=sampling, tb=tb_diagnostic)
360
sampling_obj<-list(sampling_dat=sampling_dat,training=sampling, training_id=sampling_station_id, tb=tb_diagnostic)
334 361

  
335 362
write.table(avg_tb, file= paste(path,"/","results2_fusion_Assessment_measure_avg_",out_prefix,".txt",sep=""), sep=",")
336 363
write.table(median_tb, file= paste(path,"/","results2_fusion_Assessment_measure_median_",out_prefix,".txt",sep=""), sep=",")
337 364
write.table(tb_diagnostic, file= paste(path,"/","results2_fusion_Assessment_measure",out_prefix,".txt",sep=""), sep=",")
338 365
write.table(tb, file= paste(path,"/","results2_fusion_Assessment_measure_all",out_prefix,".txt",sep=""), sep=",")
339 366

  
340

  
341
# tb_RMSE<-subset(tb, metric=="RMSE")
342
# tb_MAE<-subset(tb,metric=="MAE")
343
# tb_ME<-subset(tb,metric=="ME")
344
# tb_R2<-subset(tb,metric=="R2")
345
# tb_RMSE_f<-subset(tb, metric=="RMSE_f")
346
# tb_MAE_f<-subset(tb,metric=="MAE_f")
347
# tb_R2_f<-subset(tb,metric=="R2_f")
348
# 
349
# tb_diagnostic1<-rbind(tb_RMSE,tb_MAE,tb_ME,tb_R2)
350
# #tb_diagnostic2<-rbind(tb_,tb_MAE,tb_ME,tb_R2)
351
# 
352
# mean_RMSE<-sapply(tb_RMSE[,4:(nmodels+4)],mean)
353
# mean_MAE<-sapply(tb_MAE[,4:(nmodels+4)],mean)
354
# mean_R2<-sapply(tb_R2[,4:(nmodels+4)],mean)
355
# mean_ME<-sapply(tb_ME[,4:(nmodels+4)],mean)
356
# mean_MAE_f<-sapply(tb_MAE[,4:(nmodels+4)],mean)
357
# mean_RMSE_f<-sapply(tb_RMSE_f[,4:(nmodels+4)],mean)
358
# 
359
# #Wrting metric results in textfile and model objects in .RData file
360
# write.table(tb_diagnostic1, file= paste(path,"/","results2_CAI_Assessment_measure1",out_prefix,".txt",sep=""), sep=",")
361
# write.table(tb, file= paste(path,"/","results2_CAI_Assessment_measure_all",out_prefix,".txt",sep=""), sep=",")
362 367
save(sampling_obj, file= paste(path,"/","results2_CAI_sampling_obj",out_prefix,".RData",sep=""))
363 368
save(gam_CAI_mod,file= paste(path,"/","results2_CAI_Assessment_measure_all",out_prefix,".RData",sep=""))
364 369

  

Also available in: Unified diff