1 |
1cd7ecf2
|
Benoit Parmentier
|
######################### Raster prediction GAM FUSION ####################################
|
2 |
|
|
############################ Interpolation of temperature for given processing region ##########################################
|
3 |
|
|
#This script interpolates temperature values using MODIS LST, covariates and GHCND station data.
|
4 |
|
|
#It requires the text file of stations and a shape file of the study area.
|
5 |
fb039a6b
|
Benoit Parmentier
|
#Note that the projection for both GHCND and study area is lonlat WGS84.
|
6 |
|
|
#Options to run this program are:
|
7 |
|
|
#1) Multisampling: vary the porportions of hold out and use random samples for each run
|
8 |
|
|
#2)Constant sampling: use the same sample over the runs
|
9 |
|
|
#3)over dates: run over for example 365 dates without mulitsampling
|
10 |
|
|
#4)use seed number: use seed if random samples must be repeatable
|
11 |
1508a57e
|
Benoit Parmentier
|
#5)GAM fusion: possibilty of running GAM+FUSION or GAM+CAI and other options added
|
12 |
0f602e87
|
Benoit Parmentier
|
#The interpolation is done first at the monthly time scale then delta surfaces are added.
|
13 |
fb039a6b
|
Benoit Parmentier
|
#AUTHOR: Benoit Parmentier
|
14 |
1cd7ecf2
|
Benoit Parmentier
|
#DATE: 03/12/2013
|
15 |
1508a57e
|
Benoit Parmentier
|
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#568--
|
16 |
e7bf2d1b
|
Benoit Parmentier
|
###################################################################################################
|
17 |
|
|
|
18 |
ae46eb91
|
Benoit Parmentier
|
raster_prediction_gam_fusion<-function(list_param_raster_prediction){
|
19 |
|
|
|
20 |
|
|
##Function to predict temperature interpolation with 21 input parameters
|
21 |
|
|
|
22 |
|
|
|
23 |
|
|
|
24 |
|
|
###Loading R library and packages
|
25 |
|
|
|
26 |
|
|
library(gtools) # loading some useful tools
|
27 |
|
|
library(mgcv) # GAM package by Simon Wood
|
28 |
|
|
library(sp) # Spatial pacakge with class definition by Bivand et al.
|
29 |
|
|
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al.
|
30 |
|
|
library(rgdal) # GDAL wrapper for R, spatial utilities
|
31 |
|
|
library(gstat) # Kriging and co-kriging by Pebesma et al.
|
32 |
|
|
library(fields) # NCAR Spatial Interpolation methods such as kriging, splines
|
33 |
|
|
library(raster) # Hijmans et al. package for raster processing
|
34 |
|
|
library(rasterVis)
|
35 |
|
|
library(parallel) # Urbanek S. and Ripley B., package for multi cores & parralel processing
|
36 |
|
|
library(reshape)
|
37 |
|
|
library(plotrix)
|
38 |
|
|
library(maptools)
|
39 |
|
|
|
40 |
|
|
### Parameters and arguments
|
41 |
|
|
#PARSING INPUTS/ARGUMENTS
|
42 |
e63d760c
|
Benoit Parmentier
|
#
|
43 |
|
|
# names(list_param_raster_prediction)<-c("list_param_data_prep",
|
44 |
|
|
# "seed_number","nb_sample","step","constant","prop_minmax","dates_selected",
|
45 |
|
|
# "list_models","lst_avg","in_path","out_path","script_path",
|
46 |
|
|
# "interpolation_method")
|
47 |
ae46eb91
|
Benoit Parmentier
|
|
48 |
|
|
#9 parameters used in the data preparation stage and input in the current script
|
49 |
|
|
list_param_data_prep<-list_param_raster_prediction$list_param_data_prep
|
50 |
|
|
infile_monthly<-list_param_data_prep$infile_monthly
|
51 |
|
|
infile_daily<-list_param_data_prep$infile_daily
|
52 |
|
|
infile_locs<-list_param_data_prep$infile_locs
|
53 |
|
|
infile_covariates<-list_param_data_prep$infile_covariates #raster covariate brick, tif file
|
54 |
|
|
covar_names<- list_param_data_prep$covar_names #remove at a later stage...
|
55 |
|
|
var<-list_param_data_prep$var
|
56 |
|
|
out_prefix<-list_param_data_prep$out_prefix
|
57 |
|
|
CRS_locs_WGS84<-list_param_data_prep$CRS_locs_WGS84
|
58 |
|
|
|
59 |
|
|
#6 parameters for sampling function
|
60 |
|
|
seed_number<-list_param_raster_prediction$seed_number
|
61 |
|
|
nb_sample<-list_param_raster_prediction$nb_sample
|
62 |
|
|
step<-list_param_raster_prediction$step
|
63 |
|
|
constant<-list_param_raster_prediction$constant
|
64 |
|
|
prop_minmax<-list_param_raster_prediction$prop_minmax
|
65 |
e63d760c
|
Benoit Parmentier
|
dates_selected<-list_param_raster_prediction$dates_selected
|
66 |
ae46eb91
|
Benoit Parmentier
|
|
67 |
|
|
#6 additional parameters for monthly climatology and more
|
68 |
|
|
list_models<-list_param_raster_prediction$list_models
|
69 |
|
|
lst_avg<-list_param_raster_prediction$lst_avg
|
70 |
|
|
in_path<-list_param_raster_prediction$in_path
|
71 |
|
|
out_path<-list_param_raster_prediction$out_path
|
72 |
|
|
script_path<-list_param_raster_prediction$script_path
|
73 |
|
|
interpolation_method<-list_param_raster_prediction$interpolation_method
|
74 |
|
|
|
75 |
|
|
setwd(in_path)
|
76 |
|
|
|
77 |
|
|
source(file.path(script_path,"sampling_script_functions_03122013.R"))
|
78 |
|
|
source(file.path(script_path,"GAM_fusion_function_multisampling_03122013.R"))
|
79 |
|
|
source(file.path(script_path,"GAM_fusion_function_multisampling_validation_metrics_03122013.R"))
|
80 |
|
|
|
81 |
|
|
|
82 |
|
|
###################### START OF THE SCRIPT ########################
|
83 |
|
|
|
84 |
|
|
|
85 |
|
|
if (var=="TMAX"){
|
86 |
|
|
y_var_name<-"dailyTmax"
|
87 |
|
|
}
|
88 |
|
|
if (var=="TMIN"){
|
89 |
|
|
y_var_name<-"dailyTmin"
|
90 |
|
|
}
|
91 |
|
|
|
92 |
|
|
################# CREATE LOG FILE #####################
|
93 |
|
|
|
94 |
|
|
#create log file to keep track of details such as processing times and parameters.
|
95 |
|
|
|
96 |
|
|
log_fname<-paste("R_log_raster_prediction",out_prefix, ".log",sep="")
|
97 |
|
|
|
98 |
|
|
if (file.exists(log_fname)){ #Stop the script???
|
99 |
|
|
file.remove(log_fname)
|
100 |
|
|
log_file<-file(log_fname,"w")
|
101 |
|
|
}
|
102 |
|
|
if (!file.exists(log_fname)){
|
103 |
|
|
log_file<-file(log_fname,"w")
|
104 |
|
|
}
|
105 |
|
|
|
106 |
|
|
time1<-proc.time() #Start stop watch
|
107 |
|
|
writeLines(paste("Starting script at this local Date and Time: ",as.character(Sys.time()),sep=""),
|
108 |
|
|
con=log_file,sep="\n")
|
109 |
|
|
writeLines("Starting script process time:",con=log_file,sep="\n")
|
110 |
|
|
writeLines(as.character(time1),con=log_file,sep="\n")
|
111 |
|
|
|
112 |
|
|
############### READING INPUTS: DAILY STATION DATA AND OTEHR DATASETS #################
|
113 |
|
|
|
114 |
|
|
ghcn<-readOGR(dsn=in_path,layer=sub(".shp","",basename(infile_daily)))
|
115 |
|
|
CRS_interp<-proj4string(ghcn) #Storing projection information (ellipsoid, datum,etc.)
|
116 |
|
|
stat_loc<-readOGR(dsn=in_path,layer=sub(".shp","",basename(infile_locs)))
|
117 |
e63d760c
|
Benoit Parmentier
|
#dates2 <-readLines(file.path(in_path,dates_selected)) #dates to be predicted, now read directly from the file
|
118 |
|
|
if (dates_selected==""){
|
119 |
|
|
dates<-as.character(sort(unique(ghcn$date))) #dates to be predicted
|
120 |
|
|
}
|
121 |
|
|
if (dates_selected!=""){
|
122 |
|
|
dates<-dates_selected #dates to be predicted
|
123 |
|
|
}
|
124 |
ae46eb91
|
Benoit Parmentier
|
|
125 |
|
|
#Reading of covariate brick covariates can be changed...
|
126 |
|
|
|
127 |
|
|
s_raster<-brick(infile_covariates) #read in the data brck
|
128 |
|
|
names(s_raster)<-covar_names #Assigning names to the raster layers: making sure it is included in the extraction
|
129 |
|
|
pos<-match("elev",names(s_raster))
|
130 |
|
|
names(s_raster)[pos]<-"elev_1"
|
131 |
|
|
|
132 |
|
|
#Screen for extreme values": this needs more thought, min and max val vary with regions
|
133 |
|
|
#min_val<-(-15+273.16) #if values less than -15C then screen out (note the Kelvin units that will need to be changed later in all datasets)
|
134 |
|
|
#r1[r1 < (min_val)]<-NA
|
135 |
|
|
|
136 |
|
|
#Reading monthly data
|
137 |
|
|
data3<-readOGR(dsn=in_path,layer=sub(".shp","",basename(infile_monthly)))
|
138 |
|
|
dst_all<-data3
|
139 |
|
|
dst<-data3
|
140 |
|
|
|
141 |
|
|
### TO DO -important ###
|
142 |
|
|
#Cleaning/sceerniging functions for daily stations, monthly stations and covariates?? do this during the preparation stage!!!??
|
143 |
|
|
###
|
144 |
|
|
|
145 |
|
|
########### CREATE SAMPLING -TRAINING AND TESTING STATIONS ###########
|
146 |
|
|
|
147 |
|
|
#Input for sampling function...
|
148 |
|
|
|
149 |
|
|
#dates #list of dates for prediction
|
150 |
|
|
#ghcn_name<-"ghcn" #infile daily data
|
151 |
|
|
|
152 |
|
|
list_param_sampling<-list(seed_number,nb_sample,step,constant,prop_minmax,dates,ghcn)
|
153 |
|
|
#list_param_sampling<-list(seed_number,nb_sample,step,constant,prop_minmax,dates,ghcn_name)
|
154 |
|
|
names(list_param_sampling)<-c("seed_number","nb_sample","step","constant","prop_minmax","dates","ghcn")
|
155 |
|
|
|
156 |
e63d760c
|
Benoit Parmentier
|
#run function, note that dates must be a character vector!!
|
157 |
ae46eb91
|
Benoit Parmentier
|
sampling_obj<-sampling_training_testing(list_param_sampling)
|
158 |
|
|
|
159 |
|
|
########### PREDICT FOR MONTHLY SCALE ##################
|
160 |
|
|
|
161 |
|
|
#First predict at the monthly time scale: climatology
|
162 |
|
|
writeLines("Predictions at monthly scale:",con=log_file,sep="\n")
|
163 |
|
|
t1<-proc.time()
|
164 |
|
|
j=12
|
165 |
|
|
#browser()
|
166 |
|
|
list_param_runClim_KGFusion<-list(j,s_raster,covar_names,lst_avg,list_models,dst,var,y_var_name, out_prefix)
|
167 |
|
|
names(list_param_runClim_KGFusion)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","var","y_var_name","out_prefix")
|
168 |
|
|
#source(file.path(script_path,"GAM_fusion_function_multisampling_03122013.R"))
|
169 |
|
|
gamclim_fus_mod<-mclapply(1:12, list_param=list_param_runClim_KGFusion, runClim_KGFusion,mc.preschedule=FALSE,mc.cores = 6) #This is the end bracket from mclapply(...) statement
|
170 |
|
|
#gamclim_fus_mod<-mclapply(1:6, runClim_KGFusion,mc.preschedule=FALSE,mc.cores = 6) #This is the end bracket from mclapply(...) statement
|
171 |
|
|
save(gamclim_fus_mod,file= paste("gamclim_fus_mod",out_prefix,".RData",sep=""))
|
172 |
|
|
t2<-proc.time()-t1
|
173 |
|
|
writeLines(as.character(t2),con=log_file,sep="\n")
|
174 |
|
|
|
175 |
|
|
#now get list of raster clim layers
|
176 |
|
|
|
177 |
|
|
list_tmp<-vector("list",length(gamclim_fus_mod))
|
178 |
|
|
for (i in 1:length(gamclim_fus_mod)){
|
179 |
|
|
tmp<-gamclim_fus_mod[[i]]$clim
|
180 |
|
|
list_tmp[[i]]<-tmp
|
181 |
|
|
}
|
182 |
|
|
|
183 |
|
|
################## PREDICT AT DAILY TIME SCALE #################
|
184 |
|
|
|
185 |
|
|
#put together list of clim models per month...
|
186 |
|
|
#rast_clim_yearlist<-list_tmp
|
187 |
|
|
clim_yearlist<-list_tmp
|
188 |
|
|
#Second predict at the daily time scale: delta
|
189 |
|
|
|
190 |
|
|
#gam_fus_mod<-mclapply(1:1, runGAMFusion,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement
|
191 |
|
|
writeLines("Predictions at the daily scale:",con=log_file,sep="\n")
|
192 |
|
|
t1<-proc.time()
|
193 |
|
|
|
194 |
|
|
#input a list:note that ghcn.subsets is not sampling_obj$data_day_ghcn
|
195 |
|
|
list_param_runGAMFusion<-list(i,clim_yearlist,sampling_obj,dst,var,y_var_name, out_prefix)
|
196 |
|
|
names(list_param_runGAMFusion)<-c("list_index","clim_yearlist","sampling_obj","dst","var","y_var_name","out_prefix")
|
197 |
|
|
#test<-mclapply(1:18, runGAMFusion,list_param=list_param_runGAMFusion,mc.preschedule=FALSE,mc.cores = 9)
|
198 |
|
|
|
199 |
|
|
gam_fus_mod<-mclapply(1:length(sampling_obj$ghcn_data_day),list_param=list_param_runGAMFusion,runGAMFusion,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
|
200 |
|
|
|
201 |
|
|
#gam_fus_mod<-mclapply(1:length(sampling_obj$ghcn_data_day),runGAMFusion,list_param_runGAMFusion,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
|
202 |
|
|
#gam_fus_mod<-mclapply(1:length(ghcn.subsets), runGAMFusion,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
|
203 |
|
|
save(gam_fus_mod,file= paste("gam_fus_mod",out_prefix,".RData",sep=""))
|
204 |
|
|
t2<-proc.time()-t1
|
205 |
|
|
writeLines(as.character(t2),con=log_file,sep="\n")
|
206 |
e63d760c
|
Benoit Parmentier
|
#browser()
|
207 |
ae46eb91
|
Benoit Parmentier
|
############### NOW RUN VALIDATION #########################
|
208 |
|
|
|
209 |
|
|
list_tmp<-vector("list",length(gam_fus_mod))
|
210 |
|
|
for (i in 1:length(gam_fus_mod)){
|
211 |
|
|
tmp<-gam_fus_mod[[i]][[y_var_name]] #y_var_name is the variable predicted (dailyTmax or dailyTmin)
|
212 |
|
|
list_tmp[[i]]<-tmp
|
213 |
|
|
}
|
214 |
|
|
rast_day_yearlist<-list_tmp #list of predicted images
|
215 |
|
|
|
216 |
|
|
writeLines("Validation step:",con=log_file,sep="\n")
|
217 |
|
|
t1<-proc.time()
|
218 |
|
|
#calculate_accuary_metrics<-function(i)
|
219 |
|
|
list_param_validation<-list(i,rast_day_yearlist,gam_fus_mod,y_var_name, out_prefix)
|
220 |
e63d760c
|
Benoit Parmentier
|
names(list_param_validation)<-c("list_index","rast_day_year_list","method_mod_obj","y_var_name","out_prefix") #same names for any method
|
221 |
ae46eb91
|
Benoit Parmentier
|
|
222 |
|
|
#gam_fus_validation_mod<-mclapply(1:length(gam_fus_mod), calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
|
223 |
|
|
gam_fus_validation_mod<-mclapply(1:length(gam_fus_mod), list_param=list_param_validation, calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
|
224 |
|
|
|
225 |
|
|
#gam_fus_validation_mod<-mclapply(1:1, calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement
|
226 |
|
|
save(gam_fus_validation_mod,file= paste("gam_fus_validation_mod",out_prefix,".RData",sep=""))
|
227 |
|
|
t2<-proc.time()-t1
|
228 |
|
|
writeLines(as.character(t2),con=log_file,sep="\n")
|
229 |
|
|
|
230 |
|
|
#################### ASSESSMENT OF PREDICTIONS: PLOTS OF ACCURACY METRICS ###########
|
231 |
|
|
|
232 |
|
|
##Create data.frame with valiation metrics for a full year
|
233 |
|
|
tb_diagnostic_v<-extract_from_list_obj(gam_fus_validation_mod,"metrics_v")
|
234 |
|
|
rownames(tb_diagnostic_v)<-NULL #remove row names
|
235 |
|
|
|
236 |
|
|
#Call function to create plots of metrics for validation dataset
|
237 |
|
|
metric_names<-c("rmse","mae","me","r","m50")
|
238 |
|
|
summary_metrics<-boxplot_from_tb(tb_diagnostic_v,metric_names,out_prefix)
|
239 |
|
|
names(summary_metrics)<-c("avg","median")
|
240 |
|
|
##Write out information concerning accuracy and predictions
|
241 |
|
|
outfile<-file.path(in_path,paste("assessment_measures_",out_prefix,".txt",sep=""))
|
242 |
|
|
write.table(tb_diagnostic_v,file= outfile,row.names=FALSE,sep=",")
|
243 |
|
|
write.table(summary_metrics[[1]], file= outfile, append=TRUE,sep=",") #write out avg
|
244 |
|
|
write.table(summary_metrics[[2]], file= outfile, append=TRUE,sep=",") #write out median
|
245 |
|
|
|
246 |
|
|
#################### CLOSE LOG FILE ####################
|
247 |
|
|
|
248 |
|
|
#close log_file connection and add meta data
|
249 |
|
|
writeLines("Finished script process time:",con=log_file,sep="\n")
|
250 |
|
|
time2<-proc.time()-time1
|
251 |
|
|
writeLines(as.character(time2),con=log_file,sep="\n")
|
252 |
|
|
#later on add all the paramters used in the script...
|
253 |
|
|
writeLines(paste("Finished script at this local Date and Time: ",as.character(Sys.time()),sep=""),
|
254 |
|
|
con=log_file,sep="\n")
|
255 |
|
|
writeLines("End of script",con=log_file,sep="\n")
|
256 |
|
|
close(log_file)
|
257 |
|
|
|
258 |
|
|
################### PREPARE RETURN OBJECT ###############
|
259 |
|
|
#Will add more information to be returned
|
260 |
|
|
|
261 |
|
|
raster_prediction_obj<-list(gamclim_fus_mod,gam_fus_mod,gam_fus_validation_mod,tb_diagnostic_v)
|
262 |
|
|
names(raster_prediction_obj)<-c("gamclim_fus_mod","gam_fus_mod","gam_fus_validation_mod","tb_diagnostic_v")
|
263 |
|
|
save(raster_prediction_obj,file= paste("raster_prediction_obj_",out_prefix,".RData",sep=""))
|
264 |
|
|
|
265 |
|
|
return(raster_prediction_obj)
|
266 |
e7bf2d1b
|
Benoit Parmentier
|
}
|
267 |
|
|
|
268 |
ae46eb91
|
Benoit Parmentier
|
####################################################################
|
269 |
|
|
######################## END OF SCRIPT/FUNCTION #####################
|