Revision ae46eb91
Added by Benoit Parmentier almost 12 years ago
climate/research/oregon/interpolation/GAM_fusion_analysis_raster_prediction_multisampling.R | ||
---|---|---|
15 | 15 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#568-- |
16 | 16 |
################################################################################################### |
17 | 17 |
|
18 |
###Loading R library and packages |
|
19 |
library(gtools) # loading some useful tools |
|
20 |
library(mgcv) # GAM package by Simon Wood |
|
21 |
library(sp) # Spatial pacakge with class definition by Bivand et al. |
|
22 |
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al. |
|
23 |
library(rgdal) # GDAL wrapper for R, spatial utilities |
|
24 |
library(gstat) # Kriging and co-kriging by Pebesma et al. |
|
25 |
library(fields) # NCAR Spatial Interpolation methods such as kriging, splines |
|
26 |
library(raster) # Hijmans et al. package for raster processing |
|
27 |
library(rasterVis) |
|
28 |
library(parallel) # Urbanek S. and Ripley B., package for multi cores & parralel processing |
|
29 |
library(reshape) |
|
30 |
library(plotrix) |
|
31 |
library(maptools) |
|
32 |
|
|
33 |
### Parameters and arguments |
|
34 |
|
|
35 |
## output param from previous script: Database_stations_covariates_processing_function |
|
36 |
#infile_monthly<-"monthly_covariates_ghcn_data_TMAXy2010_2010_VE_02082013.shp" |
|
37 |
#infile_daily<-"daily_covariates_ghcn_data_TMAXy2010_2010_VE_02082013.shp" |
|
38 |
#infile_locs<-"stations_venezuela_region_y2010_2010_VE_02082013.shp" |
|
39 |
infile_covariates<-"covariates__venezuela_region__VE_01292013.tif" #this is an output from covariate script |
|
40 |
var<-"TMAX" |
|
41 |
#out_prefix<-"_365d_GAM_fus5_all_lstd_02202013" #User defined output prefix should be defined in master script |
|
42 |
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84: same as earlier |
|
43 |
|
|
44 |
infile_monthly<-list_outfiles$monthly_covar_ghcn_data #outile4 from database_covar script |
|
45 |
infile_daily<-list_outfiles$daily_covar_ghcn_data #outfile3 from database_covar script |
|
46 |
infile_locs<- list_outfiles$loc_stations_ghcn #outfile2? from database covar script |
|
47 |
rnames <-c("x","y","lon","lat","N","E","N_w","E_w","elev","slope","aspect","CANHEIGHT","DISTOC") |
|
48 |
lc_names<-c("LC1","LC2","LC3","LC4","LC5","LC6","LC7","LC8","LC9","LC10","LC11","LC12") |
|
49 |
lst_names<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12", |
|
50 |
"nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08", |
|
51 |
"nobs_09","nobs_10","nobs_11","nobs_12") |
|
52 |
covar_names<-c(rnames,lc_names,lst_names) |
|
53 |
### |
|
54 |
#Input for sampling function... |
|
55 |
seed_number<- 100 #if seed zero then no seed? |
|
56 |
nb_sample<-1 #number of time random sampling must be repeated for every hold out proportion |
|
57 |
step<-0 |
|
58 |
constant<-0 #if value 1 then use the same samples as date one for the all set of dates |
|
59 |
prop_minmax<-c(0.3,0.3) #if prop_min=prop_max and step=0 then predicitons are done for the number of dates... |
|
60 |
infile_dates<-"list_365_dates_04212012.txt" |
|
61 |
|
|
62 |
#Models to run...this can be change for each run |
|
63 |
list_models<-c("y_var ~ s(elev_1)", |
|
64 |
"y_var ~ s(LST)", |
|
65 |
"y_var ~ s(elev_1,LST)", |
|
66 |
"y_var ~ s(lat) + s(lon)+ s(elev_1)", |
|
67 |
"y_var ~ s(lat,lon,elev_1)", |
|
68 |
"y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST)", |
|
69 |
"y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(LC2)", |
|
70 |
"y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(LC6)", |
|
71 |
"y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(DISTOC)") |
|
72 |
|
|
73 |
#Choose interpolation method... |
|
74 |
interpolation_method<-c("gam_fusion","gam_CAI") #other otpions to be added later |
|
75 |
|
|
76 |
#Default name of LST avg to be matched |
|
77 |
lst_avg<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12") |
|
78 |
|
|
79 |
in_path<-"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/input_data" |
|
80 |
#Create on the fly output folder... |
|
81 |
out_path<-"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/output_data" |
|
82 |
script_path<-"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/" |
|
83 |
setwd(in_path) |
|
84 |
|
|
85 |
|
|
86 |
#PARSING INPUTS/ARGUMENTS |
|
87 |
list_param_data_prep<-c(infile_monthly,infile_daily,infile_locs,infile_covariates,covar_names,var,out_prefix,CRS_locs_WGS84) |
|
88 |
list_param_raster_prediction<-c(list_param_data_prep, |
|
89 |
seed_number,nb_sample,step,constant,prop_minmax,infile_dates, |
|
90 |
list_models,lst_avg,in_path,out_path,script_path, |
|
91 |
interpolation_method) |
|
92 |
|
|
93 |
#9 parameters used in the data preparation stage and input in the current script |
|
94 |
list_param_data_prep<-list_param_raster_prediction$list_param_data_prep |
|
95 |
infile_monthly<-list_param_data_prep$infile_monthly |
|
96 |
infile_daily<-list_param_data_prep$infile_daily |
|
97 |
infile_locs<-list_param_data_prep$infile_locs |
|
98 |
infile_covariates<-list_param_data_prep$infile_covariates |
|
99 |
covar_names<- list_param_data_prep$covar_names |
|
100 |
var<-list_param_data_prep$var |
|
101 |
out_prefix<-list_param_data_prep$out_prefix |
|
102 |
CRS_locs_WGS84<-list_param_data_prep$CRS_locs_WGS84 |
|
103 |
|
|
104 |
#6 parameters for sampling function |
|
105 |
seed_number<-list_param_raster_prediction$seed_number |
|
106 |
nb_sample<-list_param_raster_prediction$nb_sample |
|
107 |
step<-list_param_raster_prediction$step |
|
108 |
constant<-list_param_raster_prediction$constant |
|
109 |
prop_minmax<-list_param_raster_prediction$prop_minmax |
|
110 |
infile_dates<-list_param_raster_prediction$infile_dates |
|
111 |
|
|
112 |
#6 additional parameters for monthly climatology and more |
|
113 |
list_models<-list_param_raster_prediction$list_models |
|
114 |
lst_avg<-list_param_raster_prediction$lst_avg |
|
115 |
in_path<-list_param_raster_prediction$in_path |
|
116 |
out_path<-list_param_raster_prediction$out_path |
|
117 |
script_path<-list_param_raster_prediction$script_path |
|
118 |
interpolation_method<-list_param_raster_prediction$interpolation_method |
|
119 |
|
|
120 |
source(file.path(script_path,"sampling_script_functions_03052013.R")) |
|
121 |
source(file.path(script_path,"GAM_fusion_function_multisampling_03122013.R")) |
|
122 |
source(file.path(script_path,"GAM_fusion_function_multisampling_validation_metrics_02262013.R")) |
|
123 |
|
|
124 |
|
|
125 |
###################### START OF THE SCRIPT ######################## |
|
126 |
|
|
127 |
|
|
128 |
if (var=="TMAX"){ |
|
129 |
y_var_name<-"dailyTmax" |
|
130 |
} |
|
131 |
if (var=="TMIN"){ |
|
132 |
y_var_name<-"dailyTmin" |
|
133 |
} |
|
134 |
|
|
135 |
################# CREATE LOG FILE ##################### |
|
136 |
|
|
137 |
#create log file to keep track of details such as processing times and parameters. |
|
138 |
|
|
139 |
log_fname<-paste("R_log_raster_prediction",out_prefix, ".log",sep="") |
|
140 |
|
|
141 |
if (file.exists(log_fname)){ #Stop the script??? |
|
142 |
file.remove(log_fname) |
|
143 |
log_file<-file(log_fname,"w") |
|
144 |
} |
|
145 |
if (!file.exists(log_fname)){ |
|
146 |
log_file<-file(log_fname,"w") |
|
147 |
} |
|
148 |
|
|
149 |
time1<-proc.time() #Start stop watch |
|
150 |
writeLines(paste("Starting script at this local Date and Time: ",as.character(Sys.time()),sep=""), |
|
151 |
con=log_file,sep="\n") |
|
152 |
writeLines("Starting script process time:",con=log_file,sep="\n") |
|
153 |
writeLines(as.character(time1),con=log_file,sep="\n") |
|
154 |
|
|
155 |
############### READING INPUTS: DAILY STATION DATA AND OTEHR DATASETS ################# |
|
156 |
|
|
157 |
ghcn<-readOGR(dsn=in_path,layer=sub(".shp","",basename(infile_daily))) |
|
158 |
CRS_interp<-proj4string(ghcn) #Storing projection information (ellipsoid, datum,etc.) |
|
159 |
stat_loc<-readOGR(dsn=in_path,layer=sub(".shp","",basename(infile_locs))) |
|
160 |
dates <-readLines(file.path(in_path,infile_dates)) #dates to be predicted |
|
161 |
|
|
162 |
#Reading of covariate brick covariates can be changed... |
|
163 |
|
|
164 |
s_raster<-brick(infile_covariates) #read in the data brck |
|
165 |
names(s_raster)<-covar_names #Assigning names to the raster layers: making sure it is included in the extraction |
|
166 |
pos<-match("elev",names(s_raster)) |
|
167 |
names(s_raster)[pos]<-"elev_1" |
|
168 |
|
|
169 |
#Screen for extreme values": this needs more thought, min and max val vary with regions |
|
170 |
#min_val<-(-15+273.16) #if values less than -15C then screen out (note the Kelvin units that will need to be changed later in all datasets) |
|
171 |
#r1[r1 < (min_val)]<-NA |
|
172 |
|
|
173 |
#Reading monthly data |
|
174 |
data3<-readOGR(dsn=in_path,layer=sub(".shp","",basename(infile_monthly))) |
|
175 |
dst_all<-data3 |
|
176 |
dst<-data3 |
|
177 |
|
|
178 |
### TO DO -important ### |
|
179 |
#Cleaning/sceerniging functions for daily stations, monthly stations and covariates?? do this during the preparation stage!!!?? |
|
180 |
### |
|
181 |
|
|
182 |
########### CREATE SAMPLING -TRAINING AND TESTING STATIONS ########### |
|
183 |
|
|
184 |
#Input for sampling function... |
|
185 |
|
|
186 |
#dates #list of dates for prediction |
|
187 |
#ghcn_name<-"ghcn" #infile daily data |
|
188 |
|
|
189 |
list_param_sampling<-list(seed_number,nb_sample,step,constant,prop_minmax,dates,ghcn) |
|
190 |
#list_param_sampling<-list(seed_number,nb_sample,step,constant,prop_minmax,dates,ghcn_name) |
|
191 |
names(list_param_sampling)<-c("seed_number","nb_sample","step","constant","prop_minmax","dates","ghcn") |
|
192 |
|
|
193 |
#run function |
|
194 |
sampling_obj<-sampling_training_testing(list_param_sampling) |
|
195 |
|
|
196 |
|
|
197 |
########### PREDICT FOR MONTHLY SCALE ################## |
|
198 |
|
|
199 |
#First predict at the monthly time scale: climatology |
|
200 |
writeLines("Predictions at monthly scale:",con=log_file,sep="\n") |
|
201 |
t1<-proc.time() |
|
202 |
j=12 |
|
203 |
list_param_runClim_KGFusion<-list(j,s_raster,covar_names,list_models,dst,var,y_var_name, out_prefix) |
|
204 |
names(list_param_runClim_KGFusion)<-c("list_index","covar_rast","covar_names","list_models","dst","var","y_var_name","out_prefix") |
|
205 |
source(file.path(script_path,"GAM_fusion_function_multisampling_03122013.R")) |
|
206 |
|
|
207 |
gamclim_fus_mod<-mclapply(1:6, list_param=list_param_runClim_KGFusion, runClim_KGFusion,mc.preschedule=FALSE,mc.cores = 6) #This is the end bracket from mclapply(...) statement |
|
208 |
#gamclim_fus_mod<-mclapply(1:6, runClim_KGFusion,mc.preschedule=FALSE,mc.cores = 6) #This is the end bracket from mclapply(...) statement |
|
209 |
save(gamclim_fus_mod,file= paste("gamclim_fus_mod",out_prefix,".RData",sep="")) |
|
210 |
t2<-proc.time()-t1 |
|
211 |
writeLines(as.character(t2),con=log_file,sep="\n") |
|
212 |
|
|
213 |
#now get list of raster clim layers |
|
214 |
|
|
215 |
list_tmp<-vector("list",length(gamclim_fus_mod)) |
|
216 |
for (i in 1:length(gamclim_fus_mod)){ |
|
217 |
tmp<-gamclim_fus_mod[[i]]$clim |
|
218 |
list_tmp[[i]]<-tmp |
|
18 |
raster_prediction_gam_fusion<-function(list_param_raster_prediction){ |
|
19 |
|
|
20 |
##Function to predict temperature interpolation with 21 input parameters |
|
21 |
|
|
22 |
|
|
23 |
|
|
24 |
###Loading R library and packages |
|
25 |
|
|
26 |
library(gtools) # loading some useful tools |
|
27 |
library(mgcv) # GAM package by Simon Wood |
|
28 |
library(sp) # Spatial pacakge with class definition by Bivand et al. |
|
29 |
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al. |
|
30 |
library(rgdal) # GDAL wrapper for R, spatial utilities |
|
31 |
library(gstat) # Kriging and co-kriging by Pebesma et al. |
|
32 |
library(fields) # NCAR Spatial Interpolation methods such as kriging, splines |
|
33 |
library(raster) # Hijmans et al. package for raster processing |
|
34 |
library(rasterVis) |
|
35 |
library(parallel) # Urbanek S. and Ripley B., package for multi cores & parralel processing |
|
36 |
library(reshape) |
|
37 |
library(plotrix) |
|
38 |
library(maptools) |
|
39 |
|
|
40 |
### Parameters and arguments |
|
41 |
#PARSING INPUTS/ARGUMENTS |
|
42 |
#list_param_data_prep<-c(infile_monthly,infile_daily,infile_locs,infile_covariates,covar_names,var,out_prefix,CRS_locs_WGS84) |
|
43 |
#list_param_raster_prediction<-c(list_param_data_prep, |
|
44 |
# seed_number,nb_sample,step,constant,prop_minmax,infile_dates, |
|
45 |
# list_models,lst_avg,in_path,out_path,script_path, |
|
46 |
# interpolation_method) |
|
47 |
|
|
48 |
#9 parameters used in the data preparation stage and input in the current script |
|
49 |
list_param_data_prep<-list_param_raster_prediction$list_param_data_prep |
|
50 |
infile_monthly<-list_param_data_prep$infile_monthly |
|
51 |
infile_daily<-list_param_data_prep$infile_daily |
|
52 |
infile_locs<-list_param_data_prep$infile_locs |
|
53 |
infile_covariates<-list_param_data_prep$infile_covariates #raster covariate brick, tif file |
|
54 |
covar_names<- list_param_data_prep$covar_names #remove at a later stage... |
|
55 |
var<-list_param_data_prep$var |
|
56 |
out_prefix<-list_param_data_prep$out_prefix |
|
57 |
CRS_locs_WGS84<-list_param_data_prep$CRS_locs_WGS84 |
|
58 |
|
|
59 |
#6 parameters for sampling function |
|
60 |
seed_number<-list_param_raster_prediction$seed_number |
|
61 |
nb_sample<-list_param_raster_prediction$nb_sample |
|
62 |
step<-list_param_raster_prediction$step |
|
63 |
constant<-list_param_raster_prediction$constant |
|
64 |
prop_minmax<-list_param_raster_prediction$prop_minmax |
|
65 |
infile_dates<-list_param_raster_prediction$infile_dates |
|
66 |
|
|
67 |
#6 additional parameters for monthly climatology and more |
|
68 |
list_models<-list_param_raster_prediction$list_models |
|
69 |
lst_avg<-list_param_raster_prediction$lst_avg |
|
70 |
in_path<-list_param_raster_prediction$in_path |
|
71 |
out_path<-list_param_raster_prediction$out_path |
|
72 |
script_path<-list_param_raster_prediction$script_path |
|
73 |
interpolation_method<-list_param_raster_prediction$interpolation_method |
|
74 |
|
|
75 |
setwd(in_path) |
|
76 |
|
|
77 |
source(file.path(script_path,"sampling_script_functions_03122013.R")) |
|
78 |
source(file.path(script_path,"GAM_fusion_function_multisampling_03122013.R")) |
|
79 |
source(file.path(script_path,"GAM_fusion_function_multisampling_validation_metrics_03122013.R")) |
|
80 |
|
|
81 |
|
|
82 |
###################### START OF THE SCRIPT ######################## |
|
83 |
|
|
84 |
|
|
85 |
if (var=="TMAX"){ |
|
86 |
y_var_name<-"dailyTmax" |
|
87 |
} |
|
88 |
if (var=="TMIN"){ |
|
89 |
y_var_name<-"dailyTmin" |
|
90 |
} |
|
91 |
|
|
92 |
################# CREATE LOG FILE ##################### |
|
93 |
|
|
94 |
#create log file to keep track of details such as processing times and parameters. |
|
95 |
|
|
96 |
log_fname<-paste("R_log_raster_prediction",out_prefix, ".log",sep="") |
|
97 |
|
|
98 |
if (file.exists(log_fname)){ #Stop the script??? |
|
99 |
file.remove(log_fname) |
|
100 |
log_file<-file(log_fname,"w") |
|
101 |
} |
|
102 |
if (!file.exists(log_fname)){ |
|
103 |
log_file<-file(log_fname,"w") |
|
104 |
} |
|
105 |
|
|
106 |
time1<-proc.time() #Start stop watch |
|
107 |
writeLines(paste("Starting script at this local Date and Time: ",as.character(Sys.time()),sep=""), |
|
108 |
con=log_file,sep="\n") |
|
109 |
writeLines("Starting script process time:",con=log_file,sep="\n") |
|
110 |
writeLines(as.character(time1),con=log_file,sep="\n") |
|
111 |
|
|
112 |
############### READING INPUTS: DAILY STATION DATA AND OTEHR DATASETS ################# |
|
113 |
|
|
114 |
ghcn<-readOGR(dsn=in_path,layer=sub(".shp","",basename(infile_daily))) |
|
115 |
CRS_interp<-proj4string(ghcn) #Storing projection information (ellipsoid, datum,etc.) |
|
116 |
stat_loc<-readOGR(dsn=in_path,layer=sub(".shp","",basename(infile_locs))) |
|
117 |
dates <-readLines(file.path(in_path,infile_dates)) #dates to be predicted |
|
118 |
|
|
119 |
#Reading of covariate brick covariates can be changed... |
|
120 |
|
|
121 |
s_raster<-brick(infile_covariates) #read in the data brck |
|
122 |
names(s_raster)<-covar_names #Assigning names to the raster layers: making sure it is included in the extraction |
|
123 |
pos<-match("elev",names(s_raster)) |
|
124 |
names(s_raster)[pos]<-"elev_1" |
|
125 |
|
|
126 |
#Screen for extreme values": this needs more thought, min and max val vary with regions |
|
127 |
#min_val<-(-15+273.16) #if values less than -15C then screen out (note the Kelvin units that will need to be changed later in all datasets) |
|
128 |
#r1[r1 < (min_val)]<-NA |
|
129 |
|
|
130 |
#Reading monthly data |
|
131 |
data3<-readOGR(dsn=in_path,layer=sub(".shp","",basename(infile_monthly))) |
|
132 |
dst_all<-data3 |
|
133 |
dst<-data3 |
|
134 |
|
|
135 |
### TO DO -important ### |
|
136 |
#Cleaning/sceerniging functions for daily stations, monthly stations and covariates?? do this during the preparation stage!!!?? |
|
137 |
### |
|
138 |
|
|
139 |
########### CREATE SAMPLING -TRAINING AND TESTING STATIONS ########### |
|
140 |
|
|
141 |
#Input for sampling function... |
|
142 |
|
|
143 |
#dates #list of dates for prediction |
|
144 |
#ghcn_name<-"ghcn" #infile daily data |
|
145 |
|
|
146 |
list_param_sampling<-list(seed_number,nb_sample,step,constant,prop_minmax,dates,ghcn) |
|
147 |
#list_param_sampling<-list(seed_number,nb_sample,step,constant,prop_minmax,dates,ghcn_name) |
|
148 |
names(list_param_sampling)<-c("seed_number","nb_sample","step","constant","prop_minmax","dates","ghcn") |
|
149 |
|
|
150 |
#run function |
|
151 |
sampling_obj<-sampling_training_testing(list_param_sampling) |
|
152 |
|
|
153 |
########### PREDICT FOR MONTHLY SCALE ################## |
|
154 |
|
|
155 |
#First predict at the monthly time scale: climatology |
|
156 |
writeLines("Predictions at monthly scale:",con=log_file,sep="\n") |
|
157 |
t1<-proc.time() |
|
158 |
j=12 |
|
159 |
#browser() |
|
160 |
list_param_runClim_KGFusion<-list(j,s_raster,covar_names,lst_avg,list_models,dst,var,y_var_name, out_prefix) |
|
161 |
names(list_param_runClim_KGFusion)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","var","y_var_name","out_prefix") |
|
162 |
#source(file.path(script_path,"GAM_fusion_function_multisampling_03122013.R")) |
|
163 |
gamclim_fus_mod<-mclapply(1:12, list_param=list_param_runClim_KGFusion, runClim_KGFusion,mc.preschedule=FALSE,mc.cores = 6) #This is the end bracket from mclapply(...) statement |
|
164 |
#gamclim_fus_mod<-mclapply(1:6, runClim_KGFusion,mc.preschedule=FALSE,mc.cores = 6) #This is the end bracket from mclapply(...) statement |
|
165 |
save(gamclim_fus_mod,file= paste("gamclim_fus_mod",out_prefix,".RData",sep="")) |
|
166 |
t2<-proc.time()-t1 |
|
167 |
writeLines(as.character(t2),con=log_file,sep="\n") |
|
168 |
|
|
169 |
#now get list of raster clim layers |
|
170 |
|
|
171 |
list_tmp<-vector("list",length(gamclim_fus_mod)) |
|
172 |
for (i in 1:length(gamclim_fus_mod)){ |
|
173 |
tmp<-gamclim_fus_mod[[i]]$clim |
|
174 |
list_tmp[[i]]<-tmp |
|
175 |
} |
|
176 |
|
|
177 |
################## PREDICT AT DAILY TIME SCALE ################# |
|
178 |
|
|
179 |
#put together list of clim models per month... |
|
180 |
#rast_clim_yearlist<-list_tmp |
|
181 |
clim_yearlist<-list_tmp |
|
182 |
#Second predict at the daily time scale: delta |
|
183 |
|
|
184 |
#gam_fus_mod<-mclapply(1:1, runGAMFusion,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement |
|
185 |
writeLines("Predictions at the daily scale:",con=log_file,sep="\n") |
|
186 |
t1<-proc.time() |
|
187 |
|
|
188 |
#input a list:note that ghcn.subsets is not sampling_obj$data_day_ghcn |
|
189 |
list_param_runGAMFusion<-list(i,clim_yearlist,sampling_obj,dst,var,y_var_name, out_prefix) |
|
190 |
names(list_param_runGAMFusion)<-c("list_index","clim_yearlist","sampling_obj","dst","var","y_var_name","out_prefix") |
|
191 |
#test<-mclapply(1:18, runGAMFusion,list_param=list_param_runGAMFusion,mc.preschedule=FALSE,mc.cores = 9) |
|
192 |
|
|
193 |
gam_fus_mod<-mclapply(1:length(sampling_obj$ghcn_data_day),list_param=list_param_runGAMFusion,runGAMFusion,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement |
|
194 |
|
|
195 |
#gam_fus_mod<-mclapply(1:length(sampling_obj$ghcn_data_day),runGAMFusion,list_param_runGAMFusion,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement |
|
196 |
#gam_fus_mod<-mclapply(1:length(ghcn.subsets), runGAMFusion,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement |
|
197 |
save(gam_fus_mod,file= paste("gam_fus_mod",out_prefix,".RData",sep="")) |
|
198 |
t2<-proc.time()-t1 |
|
199 |
writeLines(as.character(t2),con=log_file,sep="\n") |
|
200 |
browser() |
|
201 |
############### NOW RUN VALIDATION ######################### |
|
202 |
|
|
203 |
list_tmp<-vector("list",length(gam_fus_mod)) |
|
204 |
for (i in 1:length(gam_fus_mod)){ |
|
205 |
tmp<-gam_fus_mod[[i]][[y_var_name]] #y_var_name is the variable predicted (dailyTmax or dailyTmin) |
|
206 |
list_tmp[[i]]<-tmp |
|
207 |
} |
|
208 |
rast_day_yearlist<-list_tmp #list of predicted images |
|
209 |
|
|
210 |
writeLines("Validation step:",con=log_file,sep="\n") |
|
211 |
t1<-proc.time() |
|
212 |
#calculate_accuary_metrics<-function(i) |
|
213 |
list_param_validation<-list(i,rast_day_yearlist,gam_fus_mod,y_var_name, out_prefix) |
|
214 |
names(list_param_validation)<-c("list_index","rast_day_year_list","gam_fus_mod","y_var_name","out_prefix") |
|
215 |
|
|
216 |
#gam_fus_validation_mod<-mclapply(1:length(gam_fus_mod), calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement |
|
217 |
gam_fus_validation_mod<-mclapply(1:length(gam_fus_mod), list_param=list_param_validation, calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement |
|
218 |
|
|
219 |
#gam_fus_validation_mod<-mclapply(1:1, calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement |
|
220 |
save(gam_fus_validation_mod,file= paste("gam_fus_validation_mod",out_prefix,".RData",sep="")) |
|
221 |
t2<-proc.time()-t1 |
|
222 |
writeLines(as.character(t2),con=log_file,sep="\n") |
|
223 |
|
|
224 |
#################### ASSESSMENT OF PREDICTIONS: PLOTS OF ACCURACY METRICS ########### |
|
225 |
|
|
226 |
##Create data.frame with valiation metrics for a full year |
|
227 |
tb_diagnostic_v<-extract_from_list_obj(gam_fus_validation_mod,"metrics_v") |
|
228 |
rownames(tb_diagnostic_v)<-NULL #remove row names |
|
229 |
|
|
230 |
#Call function to create plots of metrics for validation dataset |
|
231 |
metric_names<-c("rmse","mae","me","r","m50") |
|
232 |
summary_metrics<-boxplot_from_tb(tb_diagnostic_v,metric_names,out_prefix) |
|
233 |
names(summary_metrics)<-c("avg","median") |
|
234 |
##Write out information concerning accuracy and predictions |
|
235 |
outfile<-file.path(in_path,paste("assessment_measures_",out_prefix,".txt",sep="")) |
|
236 |
write.table(tb_diagnostic_v,file= outfile,row.names=FALSE,sep=",") |
|
237 |
write.table(summary_metrics[[1]], file= outfile, append=TRUE,sep=",") #write out avg |
|
238 |
write.table(summary_metrics[[2]], file= outfile, append=TRUE,sep=",") #write out median |
|
239 |
|
|
240 |
#################### CLOSE LOG FILE #################### |
|
241 |
|
|
242 |
#close log_file connection and add meta data |
|
243 |
writeLines("Finished script process time:",con=log_file,sep="\n") |
|
244 |
time2<-proc.time()-time1 |
|
245 |
writeLines(as.character(time2),con=log_file,sep="\n") |
|
246 |
#later on add all the paramters used in the script... |
|
247 |
writeLines(paste("Finished script at this local Date and Time: ",as.character(Sys.time()),sep=""), |
|
248 |
con=log_file,sep="\n") |
|
249 |
writeLines("End of script",con=log_file,sep="\n") |
|
250 |
close(log_file) |
|
251 |
|
|
252 |
################### PREPARE RETURN OBJECT ############### |
|
253 |
#Will add more information to be returned |
|
254 |
|
|
255 |
raster_prediction_obj<-list(gamclim_fus_mod,gam_fus_mod,gam_fus_validation_mod,tb_diagnostic_v) |
|
256 |
names(raster_prediction_obj)<-c("gamclim_fus_mod","gam_fus_mod","gam_fus_validation_mod","tb_diagnostic_v") |
|
257 |
save(raster_prediction_obj,file= paste("raster_prediction_obj_",out_prefix,".RData",sep="")) |
|
258 |
|
|
259 |
return(raster_prediction_obj) |
|
219 | 260 |
} |
220 | 261 |
|
221 |
################## PREDICT AT DAILY TIME SCALE ################# |
|
222 |
|
|
223 |
#put together list of clim models per month... |
|
224 |
#rast_clim_yearlist<-list_tmp |
|
225 |
clim_yearlist<-list_tmp |
|
226 |
#Second predict at the daily time scale: delta |
|
227 |
|
|
228 |
#gam_fus_mod<-mclapply(1:1, runGAMFusion,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement |
|
229 |
writeLines("Predictions at the daily scale:",con=log_file,sep="\n") |
|
230 |
t1<-proc.time() |
|
231 |
|
|
232 |
#input a list:note that ghcn.subsets is not sampling_obj$data_day_ghcn |
|
233 |
list_param_runGAMFusion<-list(i,clim_yearlist,sampling_obj,dst,var,y_var_name, out_prefix) |
|
234 |
names(list_param_runGAMFusion)<-c("list_index","clim_yearlist","sampling_obj","dst","var","y_var_name","out_prefix") |
|
235 |
#test<-mclapply(1:18, runGAMFusion,list_param=list_param_runGAMFusion,mc.preschedule=FALSE,mc.cores = 9) |
|
236 |
|
|
237 |
gam_fus_mod<-mclapply(1:length(sampling_obj$ghcn_data_day),list_param=list_param_runGAMFusion,runGAMFusion,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement |
|
238 |
|
|
239 |
#gam_fus_mod<-mclapply(1:length(sampling_obj$ghcn_data_day),runGAMFusion,list_param_runGAMFusion,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement |
|
240 |
#gam_fus_mod<-mclapply(1:length(ghcn.subsets), runGAMFusion,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement |
|
241 |
save(gam_fus_mod,file= paste("gam_fus_mod",out_prefix,".RData",sep="")) |
|
242 |
t2<-proc.time()-t1 |
|
243 |
writeLines(as.character(t2),con=log_file,sep="\n") |
|
244 |
|
|
245 |
############### NOW RUN VALIDATION ######################### |
|
246 |
|
|
247 |
list_tmp<-vector("list",length(gam_fus_mod)) |
|
248 |
for (i in 1:length(gam_fus_mod)){ |
|
249 |
tmp<-gam_fus_mod[[i]][[y_var_name]] #y_var_name is the variable predicted (tmax or tmin) |
|
250 |
list_tmp[[i]]<-tmp |
|
251 |
} |
|
252 |
rast_day_yearlist<-list_tmp #list of predicted images |
|
253 |
|
|
254 |
writeLines("Validation step:",con=log_file,sep="\n") |
|
255 |
t1<-proc.time() |
|
256 |
#calculate_accuary_metrics<-function(i) |
|
257 |
gam_fus_validation_mod<-mclapply(1:length(gam_fus_mod), calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement |
|
258 |
#gam_fus_validation_mod<-mclapply(1:1, calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement |
|
259 |
save(gam_fus_validation_mod,file= paste("gam_fus_validation_mod",out_prefix,".RData",sep="")) |
|
260 |
t2<-proc.time()-t1 |
|
261 |
writeLines(as.character(t2),con=log_file,sep="\n") |
|
262 |
|
|
263 |
#################### ASSESSMENT OF PREDICTIONS: PLOTS OF ACCURACY METRICS ########### |
|
264 |
|
|
265 |
##Create data.frame with valiation metrics for a full year |
|
266 |
tb_diagnostic_v<-extract_from_list_obj(gam_fus_validation_mod,"metrics_v") |
|
267 |
rownames(tb_diagnostic_v)<-NULL #remove row names |
|
268 |
|
|
269 |
#Call function to create plots of metrics for validation dataset |
|
270 |
metric_names<-c("rmse","mae","me","r","m50") |
|
271 |
summary_metrics<-boxplot_from_tb(tb_diagnostic_v,metric_names,out_prefix) |
|
272 |
names(summary_metrics)<-c("avg","median") |
|
273 |
##Write out information concerning accuracy and predictions |
|
274 |
outfile<-file.path(in_path,paste("assessment_measures_",out_prefix,".txt",sep="")) |
|
275 |
write.table(tb_diagnostic_v,file= outfile,row.names=FALSE,sep=",") |
|
276 |
write.table(summary_metrics[[1]], file= outfile, append=TRUE,sep=",") #write out avg |
|
277 |
write.table(summary_metrics[[2]], file= outfile, append=TRUE,sep=",") #write out median |
|
278 |
|
|
279 |
#################### CLOSE LOG FILE ############# |
|
280 |
|
|
281 |
#close log_file connection and add meta data |
|
282 |
writeLines("Finished script process time:",con=log_file,sep="\n") |
|
283 |
time2<-proc.time()-time1 |
|
284 |
writeLines(as.character(time2),con=log_file,sep="\n") |
|
285 |
#later on add all the paramters used in the script... |
|
286 |
writeLines(paste("Finished script at this local Date and Time: ",as.character(Sys.time()),sep=""), |
|
287 |
con=log_file,sep="\n") |
|
288 |
writeLines("End of script",con=log_file,sep="\n") |
|
289 |
close(log_file) |
|
290 |
|
|
291 |
############################################################ |
|
292 |
######################## END OF SCRIPT ##################### |
|
262 |
#################################################################### |
|
263 |
######################## END OF SCRIPT/FUNCTION ##################### |
Also available in: Unified diff
GAM fusion raster prediction, script now a function with arguments parsing and call from master function