Revision 0f602e87
Added by Benoit Parmentier over 11 years ago
climate/research/oregon/interpolation/GAM_fusion_analysis_raster_prediction_multisampling.R | ||
---|---|---|
8 | 8 |
#2)Constant sampling: use the same sample over the runs |
9 | 9 |
#3)over dates: run over for example 365 dates without mulitsampling |
10 | 10 |
#4)use seed number: use seed if random samples must be repeatable |
11 |
#5)GAM fusion: possibilty of running GAM+FUSION or GAM separately |
|
11 |
#5)GAM fusion: possibilty of running GAM+FUSION and other options added |
|
12 |
#The interpolation is done first at the monthly time scale then delta surfaces are added. |
|
12 | 13 |
#AUTHOR: Benoit Parmentier |
13 |
#DATE: 02/08/2013
|
|
14 |
#DATE: 02/13/2013
|
|
14 | 15 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363-- |
15 | 16 |
################################################################################################### |
16 | 17 |
|
... | ... | |
27 | 28 |
library(parallel) # Urbanek S. and Ripley B., package for multi cores & parralel processing |
28 | 29 |
library(reshape) |
29 | 30 |
library(plotrix) |
31 |
library(maptools) |
|
30 | 32 |
### Parameters and argument |
31 | 33 |
|
32 | 34 |
infile2<-"list_365_dates_04212012.txt" |
... | ... | |
44 | 46 |
y_var_name<-"dailyTmax" |
45 | 47 |
predval<-1 |
46 | 48 |
seed_number<- 100 #if seed zero then no seed? #Seed number for random sampling |
47 |
out_prefix<-"_10d_GAM_fus5_all_lstd_02082013" #User defined output prefix
|
|
49 |
out_prefix<-"_365d_GAM_fus5_all_lstd_02132013" #User defined output prefix
|
|
48 | 50 |
|
49 | 51 |
bias_val<-0 #if value 1 then training data is used in the bias surface rather than the all monthly stations |
50 | 52 |
bias_prediction<-1 #if value 1 then use GAM for the BIAS prediction otherwise GAM direct repdiction for y_var (daily tmax) |
... | ... | |
57 | 59 |
#CRS_interp<-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs"; |
58 | 60 |
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84 |
59 | 61 |
|
60 |
source(file.path(script_path,"GAM_fusion_function_multisampling_02082013.R")) |
|
61 |
|
|
62 |
source(file.path(script_path,"GAM_fusion_function_multisampling_02132013.R")) |
|
63 |
source(file.path(script_path,"GAM_fusion_function_multisampling_validation_metrics_02132013.R")) |
|
64 |
|
|
62 | 65 |
###################### START OF THE SCRIPT ######################## |
63 | 66 |
|
64 | 67 |
###Reading the daily station data and setting up for models' comparison |
... | ... | |
114 | 117 |
|
115 | 118 |
#s_sgdf<-as(s_raster,"SpatialGridDataFrame") #Conversion to spatial grid data frame |
116 | 119 |
|
117 |
###### Preparing tables for model assessment: specific diagnostic/metrics |
|
118 |
|
|
119 |
#Model assessment: specific diagnostics/metrics |
|
120 |
results_AIC<- matrix(1,1,nmodels+3) |
|
121 |
results_GCV<- matrix(1,1,nmodels+3) |
|
122 |
results_DEV<- matrix(1,1,nmodels+3) |
|
123 |
#results_RMSE_f<- matrix(1,length(models)+3) |
|
124 |
|
|
125 |
#Model assessment: general diagnostic/metrics |
|
126 |
results_RMSE <- matrix(1,1,nmodels+4) |
|
127 |
results_MAE <- matrix(1,1,nmodels+4) |
|
128 |
results_ME <- matrix(1,1,nmodels+4) #There are 8+1 models |
|
129 |
results_R2 <- matrix(1,1,nmodels+4) #Coef. of determination for the validation dataset |
|
130 |
|
|
131 |
results_RMSE_f<- matrix(1,1,nmodels+4) #RMSE fit, RMSE for the training dataset |
|
132 |
results_MAE_f <- matrix(1,1,nmodels+4) |
|
133 |
|
|
134 | 120 |
######### Preparing daily and monthly values for training and testing |
135 | 121 |
|
136 | 122 |
#Screening for daily bad values: value is tmax in this case |
... | ... | |
181 | 167 |
#ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date==d)) #this creates a list of 10 or 365 subsets dataset based on dates |
182 | 168 |
ghcn.subsets <-lapply(as.character(sampling_dat$date), function(d) subset(ghcn, date==d)) #this creates a list of 10 or 365 subsets dataset based on dates |
183 | 169 |
|
170 |
#Make this a function?? |
|
184 | 171 |
## adding choice of constant sample |
185 | 172 |
if (seed_number>0) { |
186 | 173 |
set.seed(seed_number) #Using a seed number allow results based on random number to be compared... |
... | ... | |
221 | 208 |
|
222 | 209 |
######## Prediction for the range of dates and sampling data |
223 | 210 |
|
224 |
#gam_fus_mod<-mclapply(1:length(dates), runGAMFusion,mc.preschedule=FALSE,mc.cores = 8) #This is the end bracket from mclapply(...) statement |
|
225 |
#gam_fus_mod_s<-mclapply(1:1, runGAMFusion,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement |
|
226 |
gam_fus_mod_s<-mclapply(1:length(ghcn.subsets), runGAMFusion,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement |
|
227 |
#gam_fus_mod2<-mclapply(4:4, runGAMFusion,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement |
|
211 |
#First predict at the monthly time scale: climatology |
|
228 | 212 |
|
229 |
save(gam_fus_mod_s,file= paste(path,"/","results2_fusion_Assessment_measure_all",out_prefix,".RData",sep=""))
|
|
213 |
gamclim_fus_mod<-mclapply(1:12, runClim_KGFusion,mc.preschedule=FALSE,mc.cores = 4) #This is the end bracket from mclapply(...) statement
|
|
230 | 214 |
|
231 |
## Plotting and saving diagnostic measures
|
|
215 |
save(gamclim_fus_mod,file= paste("gamclim_fus_mod",out_prefix,".RData",sep=""))
|
|
232 | 216 |
|
233 |
tb<-gam_fus_mod_s[[1]][[3]][0,] #empty data frame with metric table structure that can be used in rbinding... |
|
234 |
tb_tmp<-gam_fus_mod_s #copy |
|
217 |
#now get list of raster clim layers |
|
235 | 218 |
|
236 |
for (i in 1:length(tb_tmp)){ |
|
237 |
tmp<-tb_tmp[[i]][[3]] |
|
238 |
tb<-rbind(tb,tmp) |
|
219 |
list_tmp<-vector("list",length(gamclim_fus_mod)) |
|
220 |
for (i in 1:length(gamclim_fus_mod)){ |
|
221 |
tmp<-gamclim_fus_mod[[i]]$clim |
|
222 |
list_tmp[[i]]<-tmp |
|
239 | 223 |
} |
240 |
rm(tb_tmp) |
|
241 | 224 |
|
242 |
for(i in 4:ncol(tb)){ # start of the for loop #1 |
|
243 |
tb[,i]<-as.numeric(as.character(tb[,i])) |
|
244 |
} |
|
225 |
#put together list of clim models per month... |
|
226 |
rast_clim_yearlist<-list_tmp |
|
227 |
#Second predict at the daily time scale: delta |
|
228 |
|
|
229 |
#gam_fus_mod<-mclapply(1:1, runGAMFusion,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement |
|
230 |
gam_fus_mod<-mclapply(1:length(ghcn.subsets), runGAMFusion,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement |
|
245 | 231 |
|
246 |
metrics<-as.character(unique(tb$metric)) #Name of accuracy metrics (RMSE,MAE etc.) |
|
247 |
tb_metric_list<-vector("list",length(metrics)) |
|
232 |
save(gam_fus_mod,file= paste("gam_fus_mod",out_prefix,".RData",sep="")) |
|
248 | 233 |
|
249 |
for(i in 1:length(metrics)){ # Reorganizing information in terms of metrics |
|
250 |
metric_name<-paste("tb_",metrics[i],sep="") |
|
251 |
tb_metric<-subset(tb, metric==metrics[i]) |
|
252 |
tb_metric<-cbind(tb_metric,sampling_dat[,2:3]) |
|
253 |
assign(metric_name,tb_metric) |
|
254 |
tb_metric_list[[i]]<-tb_metric |
|
234 |
#Add accuracy_metrics section/function |
|
235 |
#now get list of raster daily prediction layers |
|
236 |
#gam_fus_mod_tmp<-gam_fus_mod |
|
237 |
#this should be change later once correction has been made |
|
238 |
for (i in 1:length(gam_fus_mod)){ |
|
239 |
obj_names<-c(y_var_name,"clim","delta","data_s","sampling_dat","data_v", |
|
240 |
"mod_kr_day") |
|
241 |
names(gam_fus_mod[[i]])<-obj_names |
|
242 |
names(gam_fus_mod[[i]][[y_var_name]])<-c("mod1","mod2","mod3","mod4","mod_kr") |
|
255 | 243 |
} |
256 | 244 |
|
257 |
tb_diagnostic<-do.call(rbind,tb_metric_list) |
|
258 |
tb_diagnostic[["prop"]]<-as.factor(tb_diagnostic[["prop"]]) |
|
245 |
## |
|
246 |
list_tmp<-vector("list",length(gam_fus_mod)) |
|
247 |
for (i in 1:length(gam_fus_mod)){ |
|
248 |
tmp<-gam_fus_mod[[i]][[y_var_name]] #y_var_name is the variable predicted (tmax or tmin) |
|
249 |
list_tmp[[i]]<-tmp |
|
250 |
} |
|
251 |
rast_day_yearlist<-list_tmp |
|
252 |
|
|
253 |
#calculate_accuary_metrics<-function(i) |
|
254 |
gam_fus_validation_mod<-mclapply(1:length(gam_fus_mod), calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement |
|
255 |
#gam_fus_validation_mod<-mclapply(1:1, calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement |
|
256 |
|
|
257 |
save(gam_fus_validation_mod,file= paste("gam_fus_validation_mod",out_prefix,".RData",sep="")) |
|
259 | 258 |
|
260 |
mod_pat<-glob2rx("mod*") |
|
261 |
mod_var<-grep(mod_pat,names(tb_diagnostic),value=TRUE) # using grep with "value" extracts the matching names |
|
259 |
####This part concerns validation assessment and must be moved later... |
|
260 |
## make this a function?? |
|
261 |
list_tmp<-vector("list",length(gam_fus_validation_mod)) |
|
262 |
for (i in 1:length(gam_fus_validation_mod)){ |
|
263 |
tmp<-gam_fus_validation_mod[[i]]$metrics_v |
|
264 |
list_tmp[[i]]<-tmp |
|
265 |
} |
|
266 |
tb_diagnostic<-do.call(rbind,list_tmp) #long rownames |
|
267 |
rownames(tb_diagnostic)<-NULL #remove row names |
|
268 |
|
|
269 |
mod_names<-unique(tb_diagnostic$pred_mod) #models that have accuracy metrics |
|
262 | 270 |
|
271 |
#now boxplots and mean per models |
|
263 | 272 |
t<-melt(tb_diagnostic, |
264 |
measure=mod_var, |
|
265 |
id=c("dates","metric","prop"),
|
|
273 |
#measure=mod_var,
|
|
274 |
id=c("date","pred_mod","prop"),
|
|
266 | 275 |
na.rm=F) |
267 |
avg_tb<-cast(t,metric+prop~variable,mean) |
|
268 |
median_tb<-cast(t,metric+prop~variable,median) |
|
269 |
avg_tb[["prop"]]<-as.numeric(as.character(avg_tb[["prop"]])) |
|
270 |
avg_RMSE<-subset(avg_tb,metric=="RMSE") |
|
276 |
avg_tb<-cast(t,pred_mod~variable,mean) |
|
277 |
tb<-tb_diagnostic |
|
278 |
tb_mod_list<-vector("list",length(mod_names)) |
|
279 |
for(i in 1:length(mod_names)){ # Reorganizing information in terms of metrics |
|
280 |
mod_name_tb<-paste("tb_",mod_names[i],sep="") |
|
281 |
tb_mod<-subset(tb, pred_mod==mod_names[i]) |
|
282 |
assign(mod_name_tb,tb_mod) |
|
283 |
tb_mod_list[[i]]<-tb_mod |
|
284 |
} |
|
285 |
names(tb_mod_list)<-mod_names |
|
271 | 286 |
|
272 |
sampling_obj<-list(sampling_dat=sampling_dat,training=sampling, training_id=sampling_station_id, tb=tb_diagnostic) |
|
287 |
mod_metrics<-do.call(cbind,tb_mod_list) |
|
288 |
mod_pat<-glob2rx("*.rmse") |
|
289 |
mod_var<-grep(mod_pat,names(mod_metrics),value=TRUE) # using grep with "value" extracts the matching names |
|
273 | 290 |
|
274 |
write.table(avg_tb, file= paste(path,"/","results2_fusion_Assessment_measure_avg_",out_prefix,".txt",sep=""), sep=",") |
|
275 |
write.table(median_tb, file= paste(path,"/","results2_fusion_Assessment_measure_median_",out_prefix,".txt",sep=""), sep=",") |
|
276 |
write.table(tb_diagnostic, file= paste(path,"/","results2_fusion_Assessment_measure",out_prefix,".txt",sep=""), sep=",") |
|
277 |
write.table(tb, file= paste(path,"/","results2_fusion_Assessment_measure_all",out_prefix,".txt",sep=""), sep=",") |
|
291 |
boxplot(mod_metrics[[mod_var]]) |
|
292 |
test<-mod_metrics[mod_var] |
|
293 |
boxplot(test,outline=FALSE,horizontal=FALSE,cex=0.5) |
|
278 | 294 |
|
279 |
save(sampling_obj, file= paste(path,"/","results2_fusion_sampling_obj",out_prefix,".RData",sep="")) |
|
280 |
#save(gam_fus_mod_s,file= paste(path,"/","results2_fusion_Assessment_measure_all",out_prefix,".RData",sep="")) |
|
281 |
gam_fus_mod_obj<-list(gam_fus_mod=gam_fus_mod_s,sampling_obj=sampling_obj) |
|
282 |
save(gam_fus_mod_obj,file= paste(path,"/","results_mod_obj_",out_prefix,".RData",sep="")) |
|
295 |
####End of part to be changed... |
|
283 | 296 |
|
284 |
#### END OF SCRIPT |
|
297 |
#### END OF SCRIPT ####### |
Also available in: Unified diff
GAM Fusion, raster prediction separation in three fuctions, Venezuela interpolation