1
|
######################### Raster prediction GAM FUSION ####################################
|
2
|
############################ Interpolation of temperature for given processing region ##########################################
|
3
|
#This script interpolates temperature values using MODIS LST, covariates and GHCND station data.
|
4
|
#It requires the text file of stations and a shape file of the study area.
|
5
|
#Note that the projection for both GHCND and study area is lonlat WGS84.
|
6
|
#Options to run this program are:
|
7
|
#1) Multisampling: vary the porportions of hold out and use random samples for each run
|
8
|
#2)Constant sampling: use the same sample over the runs
|
9
|
#3)over dates: run over for example 365 dates without mulitsampling
|
10
|
#4)use seed number: use seed if random samples must be repeatable
|
11
|
#5)GAM fusion: possibilty of running GAM+FUSION or GAM+CAI and other options added
|
12
|
#The interpolation is done first at the monthly time scale then delta surfaces are added.
|
13
|
#AUTHOR: Benoit Parmentier
|
14
|
#DATE: 03/27/2013
|
15
|
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#568--
|
16
|
#
|
17
|
# TO DO:
|
18
|
# 1) modidy to make it general for any method i.e. make call to method e.g. gam_fus, gam_cai etc.
|
19
|
# 2) simplify and bundle validation steps, make it general--method_mod_validation
|
20
|
# 3) solve issues with log file recordings
|
21
|
# 4) output location folder on the fly???
|
22
|
|
23
|
###################################################################################################
|
24
|
|
25
|
raster_prediction_gam_fusion<-function(list_param_raster_prediction){
|
26
|
|
27
|
##Function to predict temperature interpolation with 21 input parameters
|
28
|
#9 parameters used in the data preparation stage and input in the current script
|
29
|
#1)list_param_data_prep: used in earlier code for the query from the database and extraction for raster brick
|
30
|
#2)infile_monthly:
|
31
|
#3)infile_daily
|
32
|
#4)infile_locs:
|
33
|
#5)infile_covariates: raster covariate brick, tif file
|
34
|
#6)covar_names: covar_names #remove at a later stage...
|
35
|
#7)var: variable being interpolated-TMIN or TMAX
|
36
|
#8)out_prefix
|
37
|
#9)CRS_locs_WGS84
|
38
|
#
|
39
|
#6 parameters for sampling function
|
40
|
#10)seed_number
|
41
|
#11)nb_sample
|
42
|
#12)step
|
43
|
#13)constant
|
44
|
#14)prop_minmax
|
45
|
#15)dates_selected
|
46
|
#
|
47
|
#6 additional parameters for monthly climatology and more
|
48
|
#16)list_models: model formulas in character vector
|
49
|
#17)lst_avg: LST climatology name in the brick of covariate--change later
|
50
|
#18)n_path
|
51
|
#19)out_path
|
52
|
#20)script_path: path to script
|
53
|
#21)interpolation_method: c("gam_fusion","gam_CAI") #other otpions to be added later
|
54
|
|
55
|
###Loading R library and packages
|
56
|
|
57
|
library(gtools) # loading some useful tools
|
58
|
library(mgcv) # GAM package by Simon Wood
|
59
|
library(sp) # Spatial pacakge with class definition by Bivand et al.
|
60
|
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al.
|
61
|
library(rgdal) # GDAL wrapper for R, spatial utilities
|
62
|
library(gstat) # Kriging and co-kriging by Pebesma et al.
|
63
|
library(fields) # NCAR Spatial Interpolation methods such as kriging, splines
|
64
|
library(raster) # Hijmans et al. package for raster processing
|
65
|
library(rasterVis)
|
66
|
library(parallel) # Urbanek S. and Ripley B., package for multi cores & parralel processing
|
67
|
library(reshape)
|
68
|
library(plotrix)
|
69
|
library(maptools)
|
70
|
library(gdata) #Nesssary to use cbindX
|
71
|
|
72
|
### Parameters and arguments
|
73
|
#PARSING INPUTS/ARGUMENTS
|
74
|
#
|
75
|
# names(list_param_raster_prediction)<-c("list_param_data_prep",
|
76
|
# "seed_number","nb_sample","step","constant","prop_minmax","dates_selected",
|
77
|
# "list_models","lst_avg","in_path","out_path","script_path",
|
78
|
# "interpolation_method")
|
79
|
|
80
|
#9 parameters used in the data preparation stage and input in the current script
|
81
|
list_param_data_prep<-list_param_raster_prediction$list_param_data_prep
|
82
|
infile_monthly<-list_param_data_prep$infile_monthly
|
83
|
infile_daily<-list_param_data_prep$infile_daily
|
84
|
infile_locs<-list_param_data_prep$infile_locs
|
85
|
infile_covariates<-list_param_data_prep$infile_covariates #raster covariate brick, tif file
|
86
|
covar_names<- list_param_data_prep$covar_names #remove at a later stage...
|
87
|
var<-list_param_data_prep$var
|
88
|
out_prefix<-list_param_data_prep$out_prefix
|
89
|
CRS_locs_WGS84<-list_param_data_prep$CRS_locs_WGS84
|
90
|
|
91
|
#6 parameters for sampling function
|
92
|
seed_number<-list_param_raster_prediction$seed_number
|
93
|
nb_sample<-list_param_raster_prediction$nb_sample
|
94
|
step<-list_param_raster_prediction$step
|
95
|
constant<-list_param_raster_prediction$constant
|
96
|
prop_minmax<-list_param_raster_prediction$prop_minmax
|
97
|
dates_selected<-list_param_raster_prediction$dates_selected
|
98
|
|
99
|
#6 additional parameters for monthly climatology and more
|
100
|
list_models<-list_param_raster_prediction$list_models
|
101
|
lst_avg<-list_param_raster_prediction$lst_avg
|
102
|
in_path<-list_param_raster_prediction$in_path
|
103
|
out_path<-list_param_raster_prediction$out_path
|
104
|
script_path<-list_param_raster_prediction$script_path
|
105
|
interpolation_method<-list_param_raster_prediction$interpolation_method
|
106
|
|
107
|
setwd(in_path)
|
108
|
|
109
|
#Sourcing in the master script to avoid confusion on the latest versions of scripts and functions!!!
|
110
|
|
111
|
#source(file.path(script_path,"sampling_script_functions_03122013.R"))
|
112
|
#source(file.path(script_path,"GAM_fusion_function_multisampling_03122013.R"))
|
113
|
#source(file.path(script_path,"GAM_fusion_function_multisampling_validation_metrics_03182013.R"))
|
114
|
|
115
|
|
116
|
###################### START OF THE SCRIPT ########################
|
117
|
|
118
|
|
119
|
if (var=="TMAX"){
|
120
|
y_var_name<-"dailyTmax"
|
121
|
}
|
122
|
if (var=="TMIN"){
|
123
|
y_var_name<-"dailyTmin"
|
124
|
}
|
125
|
|
126
|
################# CREATE LOG FILE #####################
|
127
|
|
128
|
#create log file to keep track of details such as processing times and parameters.
|
129
|
|
130
|
log_fname<-paste("R_log_raster_prediction",out_prefix, ".log",sep="")
|
131
|
|
132
|
if (file.exists(log_fname)){ #Stop the script???
|
133
|
file.remove(log_fname)
|
134
|
log_file<-file(log_fname,"w")
|
135
|
}
|
136
|
if (!file.exists(log_fname)){
|
137
|
log_file<-file(log_fname,"w")
|
138
|
}
|
139
|
|
140
|
time1<-proc.time() #Start stop watch
|
141
|
writeLines(paste("Starting script at this local Date and Time: ",as.character(Sys.time()),sep=""),
|
142
|
con=log_file,sep="\n")
|
143
|
writeLines("Starting script process time:",con=log_file,sep="\n")
|
144
|
writeLines(as.character(time1),con=log_file,sep="\n")
|
145
|
|
146
|
############### READING INPUTS: DAILY STATION DATA AND OTEHR DATASETS #################
|
147
|
|
148
|
ghcn<-readOGR(dsn=in_path,layer=sub(".shp","",basename(infile_daily)))
|
149
|
CRS_interp<-proj4string(ghcn) #Storing projection information (ellipsoid, datum,etc.)
|
150
|
stat_loc<-readOGR(dsn=in_path,layer=sub(".shp","",basename(infile_locs)))
|
151
|
#dates2 <-readLines(file.path(in_path,dates_selected)) #dates to be predicted, now read directly from the file
|
152
|
if (dates_selected==""){
|
153
|
dates<-as.character(sort(unique(ghcn$date))) #dates to be predicted
|
154
|
}
|
155
|
if (dates_selected!=""){
|
156
|
dates<-dates_selected #dates to be predicted
|
157
|
}
|
158
|
|
159
|
#Reading of covariate brick covariates can be changed...
|
160
|
|
161
|
s_raster<-brick(infile_covariates) #read in the data brck
|
162
|
names(s_raster)<-covar_names #Assigning names to the raster layers: making sure it is included in the extraction
|
163
|
pos<-match("elev",names(s_raster))
|
164
|
names(s_raster)[pos]<-"elev_1"
|
165
|
|
166
|
#Screen for extreme values": this needs more thought, min and max val vary with regions
|
167
|
#min_val<-(-15+273.16) #if values less than -15C then screen out (note the Kelvin units that will need to be changed later in all datasets)
|
168
|
#r1[r1 < (min_val)]<-NA
|
169
|
|
170
|
#Reading monthly data
|
171
|
data3<-readOGR(dsn=in_path,layer=sub(".shp","",basename(infile_monthly)))
|
172
|
dst_all<-data3
|
173
|
dst<-data3
|
174
|
|
175
|
### TO DO -important ###
|
176
|
#Cleaning/sceerniging functions for daily stations, monthly stations and covariates?? do this during the preparation stage!!!??
|
177
|
###
|
178
|
|
179
|
########### CREATE SAMPLING -TRAINING AND TESTING STATIONS ###########
|
180
|
|
181
|
#Input for sampling function...
|
182
|
|
183
|
#dates #list of dates for prediction
|
184
|
#ghcn_name<-"ghcn" #infile daily data
|
185
|
|
186
|
list_param_sampling<-list(seed_number,nb_sample,step,constant,prop_minmax,dates,ghcn)
|
187
|
#list_param_sampling<-list(seed_number,nb_sample,step,constant,prop_minmax,dates,ghcn_name)
|
188
|
names(list_param_sampling)<-c("seed_number","nb_sample","step","constant","prop_minmax","dates","ghcn")
|
189
|
|
190
|
#run function, note that dates must be a character vector!!
|
191
|
sampling_obj<-sampling_training_testing(list_param_sampling)
|
192
|
|
193
|
########### PREDICT FOR MONTHLY SCALE ##################
|
194
|
|
195
|
#First predict at the monthly time scale: climatology
|
196
|
writeLines("Predictions at monthly scale:",con=log_file,sep="\n")
|
197
|
t1<-proc.time()
|
198
|
j=12
|
199
|
#browser()
|
200
|
#list_param_runClim_KGFusion<-list(j,s_raster,covar_names,lst_avg,list_models,dst,var,y_var_name, out_prefix)
|
201
|
names(list_param_runClim_KGFusion)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","var","y_var_name","out_prefix")
|
202
|
#source(file.path(script_path,"GAM_fusion_function_multisampling_03122013.R"))
|
203
|
gamclim_fus_mod<-mclapply(1:12, list_param=list_param_runClim_KGFusion, runClim_KGFusion,mc.preschedule=FALSE,mc.cores = 6) #This is the end bracket from mclapply(...) statement
|
204
|
#gamclim_fus_mod<-mclapply(1:6, list_param=list_param_runClim_KGFusion, runClim_KGFusion,mc.preschedule=FALSE,mc.cores = 6) #This is the end bracket from mclapply(...) statement
|
205
|
|
206
|
#gamclim_fus_mod<-runClim_KGFusion(1,list_param=list_param_runClim_KGFusion) #This is the end bracket from mclapply(...) statement
|
207
|
save(gamclim_fus_mod,file= paste("gamclim_fus_mod_",y_var_name,out_prefix,".RData",sep=""))
|
208
|
t2<-proc.time()-t1
|
209
|
writeLines(as.character(t2),con=log_file,sep="\n")
|
210
|
|
211
|
#now get list of raster clim layers
|
212
|
|
213
|
list_tmp<-vector("list",length(gamclim_fus_mod))
|
214
|
for (i in 1:length(gamclim_fus_mod)){
|
215
|
tmp<-gamclim_fus_mod[[i]]$clim
|
216
|
list_tmp[[i]]<-tmp
|
217
|
}
|
218
|
|
219
|
################## PREDICT AT DAILY TIME SCALE #################
|
220
|
|
221
|
#put together list of clim models per month...
|
222
|
#rast_clim_yearlist<-list_tmp
|
223
|
clim_yearlist<-list_tmp
|
224
|
#Second predict at the daily time scale: delta
|
225
|
|
226
|
#gam_fus_mod<-mclapply(1:1, runGAMFusion,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement
|
227
|
writeLines("Predictions at the daily scale:",con=log_file,sep="\n")
|
228
|
t1<-proc.time()
|
229
|
|
230
|
#input a list:note that ghcn.subsets is not sampling_obj$data_day_ghcn
|
231
|
list_param_runGAMFusion<-list(i,clim_yearlist,sampling_obj,dst,var,y_var_name, out_prefix)
|
232
|
names(list_param_runGAMFusion)<-c("list_index","clim_yearlist","sampling_obj","dst","var","y_var_name","out_prefix")
|
233
|
#test<-mclapply(1:18, runGAMFusion,list_param=list_param_runGAMFusion,mc.preschedule=FALSE,mc.cores = 9)
|
234
|
#test<-runGAMFusion(1,list_param=list_param_runGAMFusion)
|
235
|
|
236
|
#MAKE IT GENERAL: for any method: replace "gam_fus_mod" by "method_mod_obj"?
|
237
|
|
238
|
gam_fus_mod<-mclapply(1:length(sampling_obj$ghcn_data_day),list_param=list_param_runGAMFusion,runGAMFusion,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
|
239
|
|
240
|
#gam_fus_mod<-mclapply(1:length(sampling_obj$ghcn_data_day),runGAMFusion,list_param_runGAMFusion,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
|
241
|
#gam_fus_mod<-mclapply(1:length(ghcn.subsets), runGAMFusion,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
|
242
|
|
243
|
save(gam_fus_mod,file= paste("gam_fus_mod_",y_var_name,out_prefix,".RData",sep=""))
|
244
|
t2<-proc.time()-t1
|
245
|
writeLines(as.character(t2),con=log_file,sep="\n")
|
246
|
#browser()
|
247
|
|
248
|
############### NOW RUN VALIDATION #########################
|
249
|
#SIMPLIFY THIS PART: one call
|
250
|
|
251
|
list_tmp<-vector("list",length(gam_fus_mod))
|
252
|
for (i in 1:length(gam_fus_mod)){
|
253
|
tmp<-gam_fus_mod[[i]][[y_var_name]] #y_var_name is the variable predicted (dailyTmax or dailyTmin)
|
254
|
list_tmp[[i]]<-tmp
|
255
|
}
|
256
|
rast_day_yearlist<-list_tmp #list of predicted images
|
257
|
|
258
|
writeLines("Validation step:",con=log_file,sep="\n")
|
259
|
t1<-proc.time()
|
260
|
#calculate_accuary_metrics<-function(i)
|
261
|
list_param_validation<-list(i,rast_day_yearlist,gam_fus_mod,y_var_name, out_prefix)
|
262
|
names(list_param_validation)<-c("list_index","rast_day_year_list","method_mod_obj","y_var_name","out_prefix") #same names for any method
|
263
|
|
264
|
#gam_fus_validation_mod<-mclapply(1:length(gam_fus_mod), calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
|
265
|
gam_fus_validation_mod<-mclapply(1:length(gam_fus_mod), list_param=list_param_validation, calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
|
266
|
|
267
|
#gam_fus_validation_mod<-mclapply(1:1, calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement
|
268
|
save(gam_fus_validation_mod,file= paste("gam_fus_validation_mod_",y_var_name,out_prefix,".RData",sep=""))
|
269
|
t2<-proc.time()-t1
|
270
|
writeLines(as.character(t2),con=log_file,sep="\n")
|
271
|
|
272
|
#################### ASSESSMENT OF PREDICTIONS: PLOTS OF ACCURACY METRICS ###########
|
273
|
|
274
|
##Create data.frame with valiation metrics for a full year
|
275
|
tb_diagnostic_v<-extract_from_list_obj(gam_fus_validation_mod,"metrics_v")
|
276
|
rownames(tb_diagnostic_v)<-NULL #remove row names
|
277
|
|
278
|
#Call function to create plots of metrics for validation dataset
|
279
|
metric_names<-c("rmse","mae","me","r","m50")
|
280
|
summary_metrics_v<-boxplot_from_tb(tb_diagnostic_v,metric_names,out_prefix)
|
281
|
names(summary_metrics_v)<-c("avg","median")
|
282
|
|
283
|
#################### CLOSE LOG FILE ####################
|
284
|
|
285
|
#close log_file connection and add meta data
|
286
|
writeLines("Finished script process time:",con=log_file,sep="\n")
|
287
|
time2<-proc.time()-time1
|
288
|
writeLines(as.character(time2),con=log_file,sep="\n")
|
289
|
#later on add all the paramters used in the script...
|
290
|
writeLines(paste("Finished script at this local Date and Time: ",as.character(Sys.time()),sep=""),
|
291
|
con=log_file,sep="\n")
|
292
|
writeLines("End of script",con=log_file,sep="\n")
|
293
|
close(log_file)
|
294
|
|
295
|
################### PREPARE RETURN OBJECT ###############
|
296
|
#Will add more information to be returned
|
297
|
|
298
|
raster_prediction_obj<-list(gamclim_fus_mod,gam_fus_mod,gam_fus_validation_mod,tb_diagnostic_v,summary_metrics_v)
|
299
|
names(raster_prediction_obj)<-c("gamclim_fus_mod","gam_fus_mod","gam_fus_validation_mod","tb_diagnostic_v",
|
300
|
"summary_metrics_v")
|
301
|
save(raster_prediction_obj,file= paste("raster_prediction_obj_",y_var_name,out_prefix,".RData",sep=""))
|
302
|
|
303
|
return(raster_prediction_obj)
|
304
|
}
|
305
|
|
306
|
####################################################################
|
307
|
######################## END OF SCRIPT/FUNCTION #####################
|