Revision c1e1cc30
Added by Benoit Parmentier almost 12 years ago
climate/research/oregon/interpolation/GAM_fusion_analysis_raster_prediction_multisampling.R | ||
---|---|---|
11 | 11 |
#5)GAM fusion: possibilty of running GAM+FUSION and other options added |
12 | 12 |
#The interpolation is done first at the monthly time scale then delta surfaces are added. |
13 | 13 |
#AUTHOR: Benoit Parmentier |
14 |
#DATE: 02/13/2013
|
|
14 |
#DATE: 02/20/2013
|
|
15 | 15 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363-- |
16 | 16 |
################################################################################################### |
17 | 17 |
|
... | ... | |
29 | 29 |
library(reshape) |
30 | 30 |
library(plotrix) |
31 | 31 |
library(maptools) |
32 |
|
|
32 | 33 |
### Parameters and argument |
33 | 34 |
|
34 | 35 |
infile2<-"list_365_dates_04212012.txt" |
... | ... | |
42 | 43 |
script_path<-"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/" |
43 | 44 |
setwd(in_path) |
44 | 45 |
|
45 |
nmodels<-9 #number of models running |
|
46 |
y_var_name<-"dailyTmax" |
|
47 |
predval<-1 |
|
48 |
seed_number<- 100 #if seed zero then no seed? #Seed number for random sampling |
|
49 |
out_prefix<-"_365d_GAM_fus5_all_lstd_02132013" #User defined output prefix |
|
50 |
|
|
51 |
bias_val<-0 #if value 1 then training data is used in the bias surface rather than the all monthly stations |
|
52 |
bias_prediction<-1 #if value 1 then use GAM for the BIAS prediction otherwise GAM direct repdiction for y_var (daily tmax) |
|
46 |
y_var_name<-"dailyTmax" |
|
47 |
out_prefix<-"_365d_GAM_fus5_all_lstd_02202013" #User defined output prefix |
|
48 |
seed_number<- 100 #if seed zero then no seed? |
|
53 | 49 |
nb_sample<-1 #number of time random sampling must be repeated for every hold out proportion |
54 | 50 |
prop_min<-0.3 #if prop_min=prop_max and step=0 then predicitons are done for the number of dates... |
55 | 51 |
prop_max<-0.3 |
... | ... | |
59 | 55 |
#CRS_interp<-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs"; |
60 | 56 |
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84 |
61 | 57 |
|
62 |
source(file.path(script_path,"GAM_fusion_function_multisampling_02132013.R")) |
|
63 |
source(file.path(script_path,"GAM_fusion_function_multisampling_validation_metrics_02132013.R")) |
|
58 |
#Models to run...this can be change for each run |
|
59 |
list_models<-c("y_var ~ s(elev_1)", |
|
60 |
"y_var ~ s(LST)", |
|
61 |
"y_var ~ s(elev_1,LST)", |
|
62 |
"y_var ~ s(lat) + s(lon)+ s(elev_1)", |
|
63 |
"y_var ~ s(lat,lon,elev_1)", |
|
64 |
"y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST)", |
|
65 |
"y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(LC2)", |
|
66 |
"y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(LC6)", |
|
67 |
"y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(DISTOC)") |
|
68 |
|
|
69 |
#Default name of LST avg to be matched |
|
70 |
lst_avg<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12") |
|
71 |
|
|
72 |
source(file.path(script_path,"GAM_fusion_function_multisampling_02202013.R")) |
|
73 |
source(file.path(script_path,"GAM_fusion_function_multisampling_validation_metrics_02202013.R")) |
|
64 | 74 |
|
65 | 75 |
###################### START OF THE SCRIPT ######################## |
66 | 76 |
|
77 |
#Create log file to keep track of details such as processing times and parameters. |
|
78 |
|
|
79 |
log_fname<-paste("R_log_t",out_prefix, ".log",sep="") |
|
80 |
|
|
81 |
if (file.exists(log_fname)){ #Stop the script??? |
|
82 |
file.remove(log_fname) |
|
83 |
log_file<-file(log_fname,"w") |
|
84 |
} |
|
85 |
if (!file.exists(log_fname)){ |
|
86 |
log_file<-file(log_fname,"w") |
|
87 |
} |
|
88 |
|
|
89 |
time1<-proc.time() #Start stop watch |
|
90 |
writeLines(paste("Starting script at this local Date and Time: ",as.character(Sys.time()),sep=""), |
|
91 |
con=log_file,sep="\n") |
|
92 |
writeLines("Starting script process time:",con=log_file,sep="\n") |
|
93 |
writeLines(as.character(time1),con=log_file,sep="\n") |
|
94 |
|
|
67 | 95 |
###Reading the daily station data and setting up for models' comparison |
68 | 96 |
ghcn<-readOGR(dsn=in_path,layer=sub(".shp","",infile_daily)) |
69 | 97 |
CRS_interp<-proj4string(ghcn) #Storing projection information (ellipsoid, datum,etc.) |
... | ... | |
209 | 237 |
######## Prediction for the range of dates and sampling data |
210 | 238 |
|
211 | 239 |
#First predict at the monthly time scale: climatology |
212 |
|
|
213 |
gamclim_fus_mod<-mclapply(1:12, runClim_KGFusion,mc.preschedule=FALSE,mc.cores = 4) #This is the end bracket from mclapply(...) statement |
|
214 |
|
|
240 |
writeLines("Predictions at monthly scale:",con=log_file,sep="\n") |
|
241 |
t1<-proc.time() |
|
242 |
gamclim_fus_mod<-mclapply(1:12, runClim_KGFusion,mc.preschedule=FALSE,mc.cores = 6) #This is the end bracket from mclapply(...) statement |
|
243 |
#gamclim_fus_mod<-mclapply(1:12, runClim_KGFusion,mc.preschedule=FALSE,mc.cores = 4) #This is the end bracket from mclapply(...) statement |
|
215 | 244 |
save(gamclim_fus_mod,file= paste("gamclim_fus_mod",out_prefix,".RData",sep="")) |
245 |
t2<-proc.time()-t1 |
|
246 |
writeLines(as.character(t2),con=log_file,sep="\n") |
|
216 | 247 |
|
217 | 248 |
#now get list of raster clim layers |
218 | 249 |
|
... | ... | |
227 | 258 |
#Second predict at the daily time scale: delta |
228 | 259 |
|
229 | 260 |
#gam_fus_mod<-mclapply(1:1, runGAMFusion,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement |
261 |
writeLines("Predictions at the daily scale:",con=log_file,sep="\n") |
|
262 |
t1<-proc.time() |
|
230 | 263 |
gam_fus_mod<-mclapply(1:length(ghcn.subsets), runGAMFusion,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement |
231 |
|
|
232 | 264 |
save(gam_fus_mod,file= paste("gam_fus_mod",out_prefix,".RData",sep="")) |
265 |
t2<-proc.time()-t1 |
|
266 |
writeLines(as.character(t2),con=log_file,sep="\n") |
|
233 | 267 |
|
234 | 268 |
#Add accuracy_metrics section/function |
235 | 269 |
#now get list of raster daily prediction layers |
236 | 270 |
#gam_fus_mod_tmp<-gam_fus_mod |
237 | 271 |
#this should be change later once correction has been made |
238 |
for (i in 1:length(gam_fus_mod)){ |
|
239 |
obj_names<-c(y_var_name,"clim","delta","data_s","sampling_dat","data_v", |
|
240 |
"mod_kr_day") |
|
241 |
names(gam_fus_mod[[i]])<-obj_names |
|
242 |
names(gam_fus_mod[[i]][[y_var_name]])<-c("mod1","mod2","mod3","mod4","mod_kr") |
|
243 |
} |
|
272 |
#for (i in 1:length(gam_fus_mod)){
|
|
273 |
# obj_names<-c(y_var_name,"clim","delta","data_s","sampling_dat","data_v",
|
|
274 |
# "mod_kr_day")
|
|
275 |
# names(gam_fus_mod[[i]])<-obj_names
|
|
276 |
# names(gam_fus_mod[[i]][[y_var_name]])<-c("mod1","mod2","mod3","mod4","mod_kr")
|
|
277 |
#}
|
|
244 | 278 |
|
245 | 279 |
## |
246 | 280 |
list_tmp<-vector("list",length(gam_fus_mod)) |
... | ... | |
248 | 282 |
tmp<-gam_fus_mod[[i]][[y_var_name]] #y_var_name is the variable predicted (tmax or tmin) |
249 | 283 |
list_tmp[[i]]<-tmp |
250 | 284 |
} |
251 |
rast_day_yearlist<-list_tmp |
|
285 |
rast_day_yearlist<-list_tmp #list of predicted images
|
|
252 | 286 |
|
287 |
writeLines("Validation step:",con=log_file,sep="\n") |
|
288 |
t1<-proc.time() |
|
253 | 289 |
#calculate_accuary_metrics<-function(i) |
254 | 290 |
gam_fus_validation_mod<-mclapply(1:length(gam_fus_mod), calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement |
255 | 291 |
#gam_fus_validation_mod<-mclapply(1:1, calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement |
256 |
|
|
257 | 292 |
save(gam_fus_validation_mod,file= paste("gam_fus_validation_mod",out_prefix,".RData",sep="")) |
293 |
t2<-proc.time()-t1 |
|
294 |
writeLines(as.character(t2),con=log_file,sep="\n") |
|
258 | 295 |
|
259 | 296 |
####This part concerns validation assessment and must be moved later... |
260 | 297 |
## make this a function?? |
... | ... | |
292 | 329 |
test<-mod_metrics[mod_var] |
293 | 330 |
boxplot(test,outline=FALSE,horizontal=FALSE,cex=0.5) |
294 | 331 |
|
332 |
#close log_file connection and add meta data |
|
333 |
writeLines("Finished script process time:",con=log_file,sep="\n") |
|
334 |
time2<-proc.time()-time1 |
|
335 |
writeLines(as.character(time2),con=log_file,sep="\n") |
|
336 |
#later on add all the paramters used in the script... |
|
337 |
writeLines(paste("Finished script at this local Date and Time: ",as.character(Sys.time()),sep=""), |
|
338 |
con=log_file,sep="\n") |
|
339 |
writeLines("End of script",con=log_file,sep="\n") |
|
340 |
close(log_file) |
|
341 |
|
|
295 | 342 |
####End of part to be changed... |
296 | 343 |
|
297 | 344 |
#### END OF SCRIPT ####### |
Also available in: Unified diff
GAM fusion raster prediction Venezuela, adding log file to scrip track of script processing time