Project

General

Profile

« Previous | Next » 

Revision 0f602e87

Added by Benoit Parmentier over 11 years ago

GAM Fusion, raster prediction separation in three fuctions, Venezuela interpolation

View differences:

climate/research/oregon/interpolation/GAM_fusion_analysis_raster_prediction_multisampling.R
8 8
#2)Constant sampling: use the same sample over the runs
9 9
#3)over dates: run over for example 365 dates without mulitsampling
10 10
#4)use seed number: use seed if random samples must be repeatable
11
#5)GAM fusion: possibilty of running GAM+FUSION or GAM separately 
11
#5)GAM fusion: possibilty of running GAM+FUSION and other options added
12
#The interpolation is done first at the monthly time scale then delta surfaces are added.
12 13
#AUTHOR: Benoit Parmentier                                                                        
13
#DATE: 02/08/2013                                                                                 
14
#DATE: 02/13/2013                                                                                 
14 15
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--                                   
15 16
###################################################################################################
16 17

  
......
27 28
library(parallel)                            # Urbanek S. and Ripley B., package for multi cores & parralel processing
28 29
library(reshape)
29 30
library(plotrix)
31
library(maptools)
30 32
### Parameters and argument
31 33

  
32 34
infile2<-"list_365_dates_04212012.txt"
......
44 46
y_var_name<-"dailyTmax"
45 47
predval<-1
46 48
seed_number<- 100  #if seed zero then no seed?                                                                 #Seed number for random sampling
47
out_prefix<-"_10d_GAM_fus5_all_lstd_02082013"                #User defined output prefix
49
out_prefix<-"_365d_GAM_fus5_all_lstd_02132013"                #User defined output prefix
48 50

  
49 51
bias_val<-0            #if value 1 then training data is used in the bias surface rather than the all monthly stations
50 52
bias_prediction<-1     #if value 1 then use GAM for the BIAS prediction otherwise GAM direct repdiction for y_var (daily tmax)
......
57 59
#CRS_interp<-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
58 60
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84
59 61

  
60
source(file.path(script_path,"GAM_fusion_function_multisampling_02082013.R"))
61

  
62
source(file.path(script_path,"GAM_fusion_function_multisampling_02132013.R"))
63
source(file.path(script_path,"GAM_fusion_function_multisampling_validation_metrics_02132013.R"))
64
                              
62 65
###################### START OF THE SCRIPT ########################
63 66

  
64 67
###Reading the daily station data and setting up for models' comparison
......
114 117

  
115 118
#s_sgdf<-as(s_raster,"SpatialGridDataFrame") #Conversion to spatial grid data frame
116 119

  
117
######  Preparing tables for model assessment: specific diagnostic/metrics
118

  
119
#Model assessment: specific diagnostics/metrics
120
results_AIC<- matrix(1,1,nmodels+3)  
121
results_GCV<- matrix(1,1,nmodels+3)
122
results_DEV<- matrix(1,1,nmodels+3)
123
#results_RMSE_f<- matrix(1,length(models)+3)
124

  
125
#Model assessment: general diagnostic/metrics 
126
results_RMSE <- matrix(1,1,nmodels+4)
127
results_MAE <- matrix(1,1,nmodels+4)
128
results_ME <- matrix(1,1,nmodels+4)       #There are 8+1 models
129
results_R2 <- matrix(1,1,nmodels+4)       #Coef. of determination for the validation dataset
130

  
131
results_RMSE_f<- matrix(1,1,nmodels+4)    #RMSE fit, RMSE for the training dataset
132
results_MAE_f <- matrix(1,1,nmodels+4)
133

  
134 120
######### Preparing daily and monthly values for training and testing
135 121
                  
136 122
#Screening for daily bad values: value is tmax in this case
......
181 167
#ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date==d)) #this creates a list of 10 or 365 subsets dataset based on dates
182 168
ghcn.subsets <-lapply(as.character(sampling_dat$date), function(d) subset(ghcn, date==d)) #this creates a list of 10 or 365 subsets dataset based on dates
183 169

  
170
#Make this a function??
184 171
## adding choice of constant sample 
185 172
if (seed_number>0) {
186 173
  set.seed(seed_number)                        #Using a seed number allow results based on random number to be compared...
......
221 208

  
222 209
######## Prediction for the range of dates and sampling data
223 210

  
224
#gam_fus_mod<-mclapply(1:length(dates), runGAMFusion,mc.preschedule=FALSE,mc.cores = 8) #This is the end bracket from mclapply(...) statement
225
#gam_fus_mod_s<-mclapply(1:1, runGAMFusion,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement
226
gam_fus_mod_s<-mclapply(1:length(ghcn.subsets), runGAMFusion,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
227
#gam_fus_mod2<-mclapply(4:4, runGAMFusion,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement
211
#First predict at the monthly time scale: climatology
228 212

  
229
save(gam_fus_mod_s,file= paste(path,"/","results2_fusion_Assessment_measure_all",out_prefix,".RData",sep=""))
213
gamclim_fus_mod<-mclapply(1:12, runClim_KGFusion,mc.preschedule=FALSE,mc.cores = 4) #This is the end bracket from mclapply(...) statement
230 214

  
231
## Plotting and saving diagnostic measures
215
save(gamclim_fus_mod,file= paste("gamclim_fus_mod",out_prefix,".RData",sep=""))
232 216

  
233
tb<-gam_fus_mod_s[[1]][[3]][0,]  #empty data frame with metric table structure that can be used in rbinding...
234
tb_tmp<-gam_fus_mod_s #copy
217
#now get list of raster clim layers
235 218

  
236
for (i in 1:length(tb_tmp)){
237
  tmp<-tb_tmp[[i]][[3]]
238
  tb<-rbind(tb,tmp)
219
list_tmp<-vector("list",length(gamclim_fus_mod))
220
for (i in 1:length(gamclim_fus_mod)){
221
  tmp<-gamclim_fus_mod[[i]]$clim
222
  list_tmp[[i]]<-tmp
239 223
}
240
rm(tb_tmp)
241 224

  
242
for(i in 4:ncol(tb)){            # start of the for loop #1
243
  tb[,i]<-as.numeric(as.character(tb[,i]))  
244
}
225
#put together list of clim models per month...
226
rast_clim_yearlist<-list_tmp
227
#Second predict at the daily time scale: delta
228

  
229
#gam_fus_mod<-mclapply(1:1, runGAMFusion,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement
230
gam_fus_mod<-mclapply(1:length(ghcn.subsets), runGAMFusion,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
245 231

  
246
metrics<-as.character(unique(tb$metric))            #Name of accuracy metrics (RMSE,MAE etc.)
247
tb_metric_list<-vector("list",length(metrics))
232
save(gam_fus_mod,file= paste("gam_fus_mod",out_prefix,".RData",sep=""))
248 233

  
249
for(i in 1:length(metrics)){            # Reorganizing information in terms of metrics 
250
  metric_name<-paste("tb_",metrics[i],sep="")
251
  tb_metric<-subset(tb, metric==metrics[i])
252
  tb_metric<-cbind(tb_metric,sampling_dat[,2:3])
253
  assign(metric_name,tb_metric)
254
  tb_metric_list[[i]]<-tb_metric
234
#Add accuracy_metrics section/function
235
#now get list of raster daily prediction layers
236
#gam_fus_mod_tmp<-gam_fus_mod
237
#this should be change later once correction has been made
238
for (i in 1:length(gam_fus_mod)){
239
  obj_names<-c(y_var_name,"clim","delta","data_s","sampling_dat","data_v",
240
               "mod_kr_day")
241
  names(gam_fus_mod[[i]])<-obj_names
242
  names(gam_fus_mod[[i]][[y_var_name]])<-c("mod1","mod2","mod3","mod4","mod_kr")
255 243
}
256 244

  
257
tb_diagnostic<-do.call(rbind,tb_metric_list)
258
tb_diagnostic[["prop"]]<-as.factor(tb_diagnostic[["prop"]])
245
##
246
list_tmp<-vector("list",length(gam_fus_mod))
247
for (i in 1:length(gam_fus_mod)){
248
  tmp<-gam_fus_mod[[i]][[y_var_name]]  #y_var_name is the variable predicted (tmax or tmin)
249
  list_tmp[[i]]<-tmp
250
}
251
rast_day_yearlist<-list_tmp
252

  
253
#calculate_accuary_metrics<-function(i)
254
gam_fus_validation_mod<-mclapply(1:length(gam_fus_mod), calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
255
#gam_fus_validation_mod<-mclapply(1:1, calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement
256

  
257
save(gam_fus_validation_mod,file= paste("gam_fus_validation_mod",out_prefix,".RData",sep=""))
259 258

  
260
mod_pat<-glob2rx("mod*")   
261
mod_var<-grep(mod_pat,names(tb_diagnostic),value=TRUE) # using grep with "value" extracts the matching names         
259
####This part concerns validation assessment and must be moved later...
260
## make this a function??
261
list_tmp<-vector("list",length(gam_fus_validation_mod))
262
for (i in 1:length(gam_fus_validation_mod)){
263
  tmp<-gam_fus_validation_mod[[i]]$metrics_v
264
  list_tmp[[i]]<-tmp
265
}
266
tb_diagnostic<-do.call(rbind,list_tmp) #long rownames
267
rownames(tb_diagnostic)<-NULL #remove row names
268

  
269
mod_names<-unique(tb_diagnostic$pred_mod) #models that have accuracy metrics
262 270

  
271
#now boxplots and mean per models
263 272
t<-melt(tb_diagnostic,
264
        measure=mod_var, 
265
        id=c("dates","metric","prop"),
273
        #measure=mod_var, 
274
        id=c("date","pred_mod","prop"),
266 275
        na.rm=F)
267
avg_tb<-cast(t,metric+prop~variable,mean)
268
median_tb<-cast(t,metric+prop~variable,median)
269
avg_tb[["prop"]]<-as.numeric(as.character(avg_tb[["prop"]]))
270
avg_RMSE<-subset(avg_tb,metric=="RMSE")
276
avg_tb<-cast(t,pred_mod~variable,mean)
277
tb<-tb_diagnostic
278
tb_mod_list<-vector("list",length(mod_names))
279
for(i in 1:length(mod_names)){            # Reorganizing information in terms of metrics 
280
  mod_name_tb<-paste("tb_",mod_names[i],sep="")
281
  tb_mod<-subset(tb, pred_mod==mod_names[i])
282
  assign(mod_name_tb,tb_mod)
283
  tb_mod_list[[i]]<-tb_mod
284
}
285
names(tb_mod_list)<-mod_names
271 286

  
272
sampling_obj<-list(sampling_dat=sampling_dat,training=sampling, training_id=sampling_station_id, tb=tb_diagnostic)
287
mod_metrics<-do.call(cbind,tb_mod_list)
288
mod_pat<-glob2rx("*.rmse")   
289
mod_var<-grep(mod_pat,names(mod_metrics),value=TRUE) # using grep with "value" extracts the matching names         
273 290

  
274
write.table(avg_tb, file= paste(path,"/","results2_fusion_Assessment_measure_avg_",out_prefix,".txt",sep=""), sep=",")
275
write.table(median_tb, file= paste(path,"/","results2_fusion_Assessment_measure_median_",out_prefix,".txt",sep=""), sep=",")
276
write.table(tb_diagnostic, file= paste(path,"/","results2_fusion_Assessment_measure",out_prefix,".txt",sep=""), sep=",")
277
write.table(tb, file= paste(path,"/","results2_fusion_Assessment_measure_all",out_prefix,".txt",sep=""), sep=",")
291
boxplot(mod_metrics[[mod_var]])
292
test<-mod_metrics[mod_var]
293
boxplot(test,outline=FALSE,horizontal=FALSE,cex=0.5)
278 294

  
279
save(sampling_obj, file= paste(path,"/","results2_fusion_sampling_obj",out_prefix,".RData",sep=""))
280
#save(gam_fus_mod_s,file= paste(path,"/","results2_fusion_Assessment_measure_all",out_prefix,".RData",sep=""))
281
gam_fus_mod_obj<-list(gam_fus_mod=gam_fus_mod_s,sampling_obj=sampling_obj)
282
save(gam_fus_mod_obj,file= paste(path,"/","results_mod_obj_",out_prefix,".RData",sep=""))
295
####End of part to be changed...
283 296

  
284
#### END OF SCRIPT
297
#### END OF SCRIPT #######

Also available in: Unified diff