1
|
######################### Raster prediction GAM FUSION ####################################
|
2
|
############################ Interpolation of temperature for given processing region ##########################################
|
3
|
#This script interpolates temperature values using MODIS LST, covariates and GHCND station data.
|
4
|
#It requires the text file of stations and a shape file of the study area.
|
5
|
#Note that the projection for both GHCND and study area is lonlat WGS84.
|
6
|
#Options to run this program are:
|
7
|
#1) Multisampling: vary the porportions of hold out and use random samples for each run
|
8
|
#2)Constant sampling: use the same sample over the runs
|
9
|
#3)over dates: run over for example 365 dates without mulitsampling
|
10
|
#4)use seed number: use seed if random samples must be repeatable
|
11
|
#5)GAM fusion: possibilty of running GAM+FUSION or GAM+CAI and other options added
|
12
|
#The interpolation is done first at the monthly time scale then delta surfaces are added.
|
13
|
#AUTHOR: Benoit Parmentier
|
14
|
#DATE: 03/12/2013
|
15
|
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#568--
|
16
|
###################################################################################################
|
17
|
|
18
|
raster_prediction_gam_fusion<-function(list_param_raster_prediction){
|
19
|
|
20
|
##Function to predict temperature interpolation with 21 input parameters
|
21
|
|
22
|
|
23
|
|
24
|
###Loading R library and packages
|
25
|
|
26
|
library(gtools) # loading some useful tools
|
27
|
library(mgcv) # GAM package by Simon Wood
|
28
|
library(sp) # Spatial pacakge with class definition by Bivand et al.
|
29
|
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al.
|
30
|
library(rgdal) # GDAL wrapper for R, spatial utilities
|
31
|
library(gstat) # Kriging and co-kriging by Pebesma et al.
|
32
|
library(fields) # NCAR Spatial Interpolation methods such as kriging, splines
|
33
|
library(raster) # Hijmans et al. package for raster processing
|
34
|
library(rasterVis)
|
35
|
library(parallel) # Urbanek S. and Ripley B., package for multi cores & parralel processing
|
36
|
library(reshape)
|
37
|
library(plotrix)
|
38
|
library(maptools)
|
39
|
|
40
|
### Parameters and arguments
|
41
|
#PARSING INPUTS/ARGUMENTS
|
42
|
#
|
43
|
# names(list_param_raster_prediction)<-c("list_param_data_prep",
|
44
|
# "seed_number","nb_sample","step","constant","prop_minmax","dates_selected",
|
45
|
# "list_models","lst_avg","in_path","out_path","script_path",
|
46
|
# "interpolation_method")
|
47
|
|
48
|
#9 parameters used in the data preparation stage and input in the current script
|
49
|
list_param_data_prep<-list_param_raster_prediction$list_param_data_prep
|
50
|
infile_monthly<-list_param_data_prep$infile_monthly
|
51
|
infile_daily<-list_param_data_prep$infile_daily
|
52
|
infile_locs<-list_param_data_prep$infile_locs
|
53
|
infile_covariates<-list_param_data_prep$infile_covariates #raster covariate brick, tif file
|
54
|
covar_names<- list_param_data_prep$covar_names #remove at a later stage...
|
55
|
var<-list_param_data_prep$var
|
56
|
out_prefix<-list_param_data_prep$out_prefix
|
57
|
CRS_locs_WGS84<-list_param_data_prep$CRS_locs_WGS84
|
58
|
|
59
|
#6 parameters for sampling function
|
60
|
seed_number<-list_param_raster_prediction$seed_number
|
61
|
nb_sample<-list_param_raster_prediction$nb_sample
|
62
|
step<-list_param_raster_prediction$step
|
63
|
constant<-list_param_raster_prediction$constant
|
64
|
prop_minmax<-list_param_raster_prediction$prop_minmax
|
65
|
dates_selected<-list_param_raster_prediction$dates_selected
|
66
|
|
67
|
#6 additional parameters for monthly climatology and more
|
68
|
list_models<-list_param_raster_prediction$list_models
|
69
|
lst_avg<-list_param_raster_prediction$lst_avg
|
70
|
in_path<-list_param_raster_prediction$in_path
|
71
|
out_path<-list_param_raster_prediction$out_path
|
72
|
script_path<-list_param_raster_prediction$script_path
|
73
|
interpolation_method<-list_param_raster_prediction$interpolation_method
|
74
|
|
75
|
setwd(in_path)
|
76
|
|
77
|
source(file.path(script_path,"sampling_script_functions_03122013.R"))
|
78
|
source(file.path(script_path,"GAM_fusion_function_multisampling_03122013.R"))
|
79
|
source(file.path(script_path,"GAM_fusion_function_multisampling_validation_metrics_03122013.R"))
|
80
|
|
81
|
|
82
|
###################### START OF THE SCRIPT ########################
|
83
|
|
84
|
|
85
|
if (var=="TMAX"){
|
86
|
y_var_name<-"dailyTmax"
|
87
|
}
|
88
|
if (var=="TMIN"){
|
89
|
y_var_name<-"dailyTmin"
|
90
|
}
|
91
|
|
92
|
################# CREATE LOG FILE #####################
|
93
|
|
94
|
#create log file to keep track of details such as processing times and parameters.
|
95
|
|
96
|
log_fname<-paste("R_log_raster_prediction",out_prefix, ".log",sep="")
|
97
|
|
98
|
if (file.exists(log_fname)){ #Stop the script???
|
99
|
file.remove(log_fname)
|
100
|
log_file<-file(log_fname,"w")
|
101
|
}
|
102
|
if (!file.exists(log_fname)){
|
103
|
log_file<-file(log_fname,"w")
|
104
|
}
|
105
|
|
106
|
time1<-proc.time() #Start stop watch
|
107
|
writeLines(paste("Starting script at this local Date and Time: ",as.character(Sys.time()),sep=""),
|
108
|
con=log_file,sep="\n")
|
109
|
writeLines("Starting script process time:",con=log_file,sep="\n")
|
110
|
writeLines(as.character(time1),con=log_file,sep="\n")
|
111
|
|
112
|
############### READING INPUTS: DAILY STATION DATA AND OTEHR DATASETS #################
|
113
|
|
114
|
ghcn<-readOGR(dsn=in_path,layer=sub(".shp","",basename(infile_daily)))
|
115
|
CRS_interp<-proj4string(ghcn) #Storing projection information (ellipsoid, datum,etc.)
|
116
|
stat_loc<-readOGR(dsn=in_path,layer=sub(".shp","",basename(infile_locs)))
|
117
|
#dates2 <-readLines(file.path(in_path,dates_selected)) #dates to be predicted, now read directly from the file
|
118
|
if (dates_selected==""){
|
119
|
dates<-as.character(sort(unique(ghcn$date))) #dates to be predicted
|
120
|
}
|
121
|
if (dates_selected!=""){
|
122
|
dates<-dates_selected #dates to be predicted
|
123
|
}
|
124
|
|
125
|
#Reading of covariate brick covariates can be changed...
|
126
|
|
127
|
s_raster<-brick(infile_covariates) #read in the data brck
|
128
|
names(s_raster)<-covar_names #Assigning names to the raster layers: making sure it is included in the extraction
|
129
|
pos<-match("elev",names(s_raster))
|
130
|
names(s_raster)[pos]<-"elev_1"
|
131
|
|
132
|
#Screen for extreme values": this needs more thought, min and max val vary with regions
|
133
|
#min_val<-(-15+273.16) #if values less than -15C then screen out (note the Kelvin units that will need to be changed later in all datasets)
|
134
|
#r1[r1 < (min_val)]<-NA
|
135
|
|
136
|
#Reading monthly data
|
137
|
data3<-readOGR(dsn=in_path,layer=sub(".shp","",basename(infile_monthly)))
|
138
|
dst_all<-data3
|
139
|
dst<-data3
|
140
|
|
141
|
### TO DO -important ###
|
142
|
#Cleaning/sceerniging functions for daily stations, monthly stations and covariates?? do this during the preparation stage!!!??
|
143
|
###
|
144
|
|
145
|
########### CREATE SAMPLING -TRAINING AND TESTING STATIONS ###########
|
146
|
|
147
|
#Input for sampling function...
|
148
|
|
149
|
#dates #list of dates for prediction
|
150
|
#ghcn_name<-"ghcn" #infile daily data
|
151
|
|
152
|
list_param_sampling<-list(seed_number,nb_sample,step,constant,prop_minmax,dates,ghcn)
|
153
|
#list_param_sampling<-list(seed_number,nb_sample,step,constant,prop_minmax,dates,ghcn_name)
|
154
|
names(list_param_sampling)<-c("seed_number","nb_sample","step","constant","prop_minmax","dates","ghcn")
|
155
|
|
156
|
#run function, note that dates must be a character vector!!
|
157
|
sampling_obj<-sampling_training_testing(list_param_sampling)
|
158
|
|
159
|
########### PREDICT FOR MONTHLY SCALE ##################
|
160
|
|
161
|
#First predict at the monthly time scale: climatology
|
162
|
writeLines("Predictions at monthly scale:",con=log_file,sep="\n")
|
163
|
t1<-proc.time()
|
164
|
j=12
|
165
|
#browser()
|
166
|
list_param_runClim_KGFusion<-list(j,s_raster,covar_names,lst_avg,list_models,dst,var,y_var_name, out_prefix)
|
167
|
names(list_param_runClim_KGFusion)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","var","y_var_name","out_prefix")
|
168
|
#source(file.path(script_path,"GAM_fusion_function_multisampling_03122013.R"))
|
169
|
gamclim_fus_mod<-mclapply(1:12, list_param=list_param_runClim_KGFusion, runClim_KGFusion,mc.preschedule=FALSE,mc.cores = 6) #This is the end bracket from mclapply(...) statement
|
170
|
#gamclim_fus_mod<-mclapply(1:6, runClim_KGFusion,mc.preschedule=FALSE,mc.cores = 6) #This is the end bracket from mclapply(...) statement
|
171
|
save(gamclim_fus_mod,file= paste("gamclim_fus_mod",out_prefix,".RData",sep=""))
|
172
|
t2<-proc.time()-t1
|
173
|
writeLines(as.character(t2),con=log_file,sep="\n")
|
174
|
|
175
|
#now get list of raster clim layers
|
176
|
|
177
|
list_tmp<-vector("list",length(gamclim_fus_mod))
|
178
|
for (i in 1:length(gamclim_fus_mod)){
|
179
|
tmp<-gamclim_fus_mod[[i]]$clim
|
180
|
list_tmp[[i]]<-tmp
|
181
|
}
|
182
|
|
183
|
################## PREDICT AT DAILY TIME SCALE #################
|
184
|
|
185
|
#put together list of clim models per month...
|
186
|
#rast_clim_yearlist<-list_tmp
|
187
|
clim_yearlist<-list_tmp
|
188
|
#Second predict at the daily time scale: delta
|
189
|
|
190
|
#gam_fus_mod<-mclapply(1:1, runGAMFusion,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement
|
191
|
writeLines("Predictions at the daily scale:",con=log_file,sep="\n")
|
192
|
t1<-proc.time()
|
193
|
|
194
|
#input a list:note that ghcn.subsets is not sampling_obj$data_day_ghcn
|
195
|
list_param_runGAMFusion<-list(i,clim_yearlist,sampling_obj,dst,var,y_var_name, out_prefix)
|
196
|
names(list_param_runGAMFusion)<-c("list_index","clim_yearlist","sampling_obj","dst","var","y_var_name","out_prefix")
|
197
|
#test<-mclapply(1:18, runGAMFusion,list_param=list_param_runGAMFusion,mc.preschedule=FALSE,mc.cores = 9)
|
198
|
|
199
|
gam_fus_mod<-mclapply(1:length(sampling_obj$ghcn_data_day),list_param=list_param_runGAMFusion,runGAMFusion,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
|
200
|
|
201
|
#gam_fus_mod<-mclapply(1:length(sampling_obj$ghcn_data_day),runGAMFusion,list_param_runGAMFusion,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
|
202
|
#gam_fus_mod<-mclapply(1:length(ghcn.subsets), runGAMFusion,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
|
203
|
save(gam_fus_mod,file= paste("gam_fus_mod",out_prefix,".RData",sep=""))
|
204
|
t2<-proc.time()-t1
|
205
|
writeLines(as.character(t2),con=log_file,sep="\n")
|
206
|
#browser()
|
207
|
############### NOW RUN VALIDATION #########################
|
208
|
|
209
|
list_tmp<-vector("list",length(gam_fus_mod))
|
210
|
for (i in 1:length(gam_fus_mod)){
|
211
|
tmp<-gam_fus_mod[[i]][[y_var_name]] #y_var_name is the variable predicted (dailyTmax or dailyTmin)
|
212
|
list_tmp[[i]]<-tmp
|
213
|
}
|
214
|
rast_day_yearlist<-list_tmp #list of predicted images
|
215
|
|
216
|
writeLines("Validation step:",con=log_file,sep="\n")
|
217
|
t1<-proc.time()
|
218
|
#calculate_accuary_metrics<-function(i)
|
219
|
list_param_validation<-list(i,rast_day_yearlist,gam_fus_mod,y_var_name, out_prefix)
|
220
|
names(list_param_validation)<-c("list_index","rast_day_year_list","method_mod_obj","y_var_name","out_prefix") #same names for any method
|
221
|
|
222
|
#gam_fus_validation_mod<-mclapply(1:length(gam_fus_mod), calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
|
223
|
gam_fus_validation_mod<-mclapply(1:length(gam_fus_mod), list_param=list_param_validation, calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
|
224
|
|
225
|
#gam_fus_validation_mod<-mclapply(1:1, calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement
|
226
|
save(gam_fus_validation_mod,file= paste("gam_fus_validation_mod",out_prefix,".RData",sep=""))
|
227
|
t2<-proc.time()-t1
|
228
|
writeLines(as.character(t2),con=log_file,sep="\n")
|
229
|
|
230
|
#################### ASSESSMENT OF PREDICTIONS: PLOTS OF ACCURACY METRICS ###########
|
231
|
|
232
|
##Create data.frame with valiation metrics for a full year
|
233
|
tb_diagnostic_v<-extract_from_list_obj(gam_fus_validation_mod,"metrics_v")
|
234
|
rownames(tb_diagnostic_v)<-NULL #remove row names
|
235
|
|
236
|
#Call function to create plots of metrics for validation dataset
|
237
|
metric_names<-c("rmse","mae","me","r","m50")
|
238
|
summary_metrics<-boxplot_from_tb(tb_diagnostic_v,metric_names,out_prefix)
|
239
|
names(summary_metrics)<-c("avg","median")
|
240
|
##Write out information concerning accuracy and predictions
|
241
|
outfile<-file.path(in_path,paste("assessment_measures_",out_prefix,".txt",sep=""))
|
242
|
write.table(tb_diagnostic_v,file= outfile,row.names=FALSE,sep=",")
|
243
|
write.table(summary_metrics[[1]], file= outfile, append=TRUE,sep=",") #write out avg
|
244
|
write.table(summary_metrics[[2]], file= outfile, append=TRUE,sep=",") #write out median
|
245
|
|
246
|
#################### CLOSE LOG FILE ####################
|
247
|
|
248
|
#close log_file connection and add meta data
|
249
|
writeLines("Finished script process time:",con=log_file,sep="\n")
|
250
|
time2<-proc.time()-time1
|
251
|
writeLines(as.character(time2),con=log_file,sep="\n")
|
252
|
#later on add all the paramters used in the script...
|
253
|
writeLines(paste("Finished script at this local Date and Time: ",as.character(Sys.time()),sep=""),
|
254
|
con=log_file,sep="\n")
|
255
|
writeLines("End of script",con=log_file,sep="\n")
|
256
|
close(log_file)
|
257
|
|
258
|
################### PREPARE RETURN OBJECT ###############
|
259
|
#Will add more information to be returned
|
260
|
|
261
|
raster_prediction_obj<-list(gamclim_fus_mod,gam_fus_mod,gam_fus_validation_mod,tb_diagnostic_v)
|
262
|
names(raster_prediction_obj)<-c("gamclim_fus_mod","gam_fus_mod","gam_fus_validation_mod","tb_diagnostic_v")
|
263
|
save(raster_prediction_obj,file= paste("raster_prediction_obj_",out_prefix,".RData",sep=""))
|
264
|
|
265
|
return(raster_prediction_obj)
|
266
|
}
|
267
|
|
268
|
####################################################################
|
269
|
######################## END OF SCRIPT/FUNCTION #####################
|