Project

General

Profile

« Previous | Next » 

Revision e983f0e5

Added by Alberto Guzman almost 11 years ago

Adding from nas.

View differences:

climate/research/world/interpolation/GAM_fusion_analysis_raster_prediction_multisampling.R
1
#########################    Raster prediction    ####################################
2
############################ Interpolation of temperature for given processing region ##########################################
3
#This script interpolates temperature values using MODIS LST, covariates and GHCND station data.                      
4
#It requires the text file of stations and a shape file of the study area.           
5
#Note that the projection for both GHCND and study area is lonlat WGS84.       
6
#Options to run this program are:
7
#1) Multisampling: vary the porportions of hold out and use random samples for each run
8
#2)Constant sampling: use the same sample over the runs
9
#3)over dates: run over for example 365 dates without mulitsampling
10
#4)use seed number: use seed if random samples must be repeatable
11
#5)possibilty of running GAM+FUSION or GAM+CAI and other options added
12
#The interpolation is done first at the monthly time scale then delta surfaces are added.
13
#AUTHOR: Benoit Parmentier                                                                        
14
#DATE: 07/16/2013                                                                                 
15
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#568--     
16
#
17
# TO DO:
18
# 1) modify to make it general for any method i.e. make call to method e.g. gam_fus, gam_cai etc.
19
# 2) simplify and bundle validation steps, make it general--method_mod_validation
20
# 3) solve issues with log file recordings
21
# 4) output location folder on the fly???
22

  
23
###################################################################################################
24

  
25
raster_prediction_fun <-function(list_param_raster_prediction){
26

  
27
  ##Function to predict temperature interpolation with 21 input parameters
28
  #9 parameters used in the data preparation stage and input in the current script
29
  #1)list_param_data_prep: used in earlier code for the query from the database and extraction for raster brick
30
  #2)infile_monthly:
31
  #3)infile_daily
32
  #4)infile_locs:
33
  #5)infile_covariates: raster covariate brick, tif file
34
  #6)covar_names: covar_names #remove at a later stage...
35
  #7)var: variable being interpolated-TMIN or TMAX
36
  #8)out_prefix
37
  #9)CRS_locs_WGS84
38
  #10)screen_data_training
39
  #
40
  #6 parameters for sampling function
41
  #10)seed_number
42
  #11)nb_sample
43
  #12)step
44
  #13)constant
45
  #14)prop_minmax
46
  #15)dates_selected
47
  #
48
  #6 additional parameters for monthly climatology and more
49
  #16)list_models: model formulas in character vector
50
  #17)lst_avg: LST climatology name in the brick of covariate--change later
51
  #18)n_path
52
  #19)out_path
53
  #20)script_path: path to script
54
  #21)interpolation_method: c("gam_fusion","gam_CAI") #other otpions to be added later
55
  
56
  ###Loading R library and packages     
57
  
58
  library(gtools)                                         # loading some useful tools 
59
  library(mgcv)                                           # GAM package by Simon Wood
60
  library(sp)                                             # Spatial pacakge with class definition by Bivand et al.
61
  library(spdep)                               # Spatial pacakge with methods and spatial stat. by Bivand et al.
62
  library(rgdal)                               # GDAL wrapper for R, spatial utilities
63
  library(gstat)                               # Kriging and co-kriging by Pebesma et al.
64
  library(fields)                             # NCAR Spatial Interpolation methods such as kriging, splines
65
  library(raster)                              # Hijmans et al. package for raster processing
66
  library(rasterVis)
67
  library(parallel)                            # Urbanek S. and Ripley B., package for multi cores & parralel processing
68
  library(reshape)
69
  library(plotrix)
70
  library(maptools)
71
  library(gdata) #Nesssary to use cbindX
72
  library(automap)  #autokrige function
73
  library(spgwr)   #GWR method
74
  
75
  ### Parameters and arguments
76
  #PARSING INPUTS/ARGUMENTS
77
#   
78
#   names(list_param_raster_prediction)<-c("list_param_data_prep",
79
#                                          "seed_number","nb_sample","step","constant","prop_minmax","dates_selected",
80
#                                          "list_models","lst_avg","in_path","out_path","script_path",
81
#                                          "interpolation_method")
82
  
83
  #9 parameters used in the data preparation stage and input in the current script
84
  list_param_data_prep<-list_param_raster_prediction$list_param_data_prep
85
  infile_monthly<-list_param_data_prep$infile_monthly
86
  infile_daily<-list_param_data_prep$infile_daily
87
  infile_locs<-list_param_data_prep$infile_locs
88
  infile_covariates<-list_param_data_prep$infile_covariates #raster covariate brick, tif file
89
  covar_names<- list_param_data_prep$covar_names #remove at a later stage...
90
  var<-list_param_data_prep$var
91
  out_prefix<-list_param_data_prep$out_prefix
92
  CRS_locs_WGS84<-list_param_data_prep$CRS_locs_WGS84
93

  
94
  #6 parameters for sampling function
95
  seed_number<-list_param_raster_prediction$seed_number
96
  nb_sample<-list_param_raster_prediction$nb_sample
97
  step<-list_param_raster_prediction$step
98
  constant<-list_param_raster_prediction$constant
99
  prop_minmax<-list_param_raster_prediction$prop_minmax
100
  dates_selected<-list_param_raster_prediction$dates_selected
101
  
102
  #6 additional parameters for monthly climatology and more
103
  list_models<-list_param_raster_prediction$list_models
104
  lst_avg<-list_param_raster_prediction$lst_avg
105
  out_path<-list_param_raster_prediction$out_path
106
  script_path<-list_param_raster_prediction$script_path
107
  interpolation_method<-list_param_raster_prediction$interpolation_method
108
  screen_data_training <-list_param_raster_prediction$screen_data_training
109
  
110
  setwd(out_path)
111
  
112
  ###################### START OF THE SCRIPT ########################
113
   
114
  if (var=="TMAX"){
115
    y_var_name<-"dailyTmax"                                       
116
  }
117
  
118
  if (var=="TMIN"){
119
    y_var_name<-"dailyTmin"                                       
120
  }
121
  
122
  ################# CREATE LOG FILE #####################
123
  
124
  #create log file to keep track of details such as processing times and parameters.
125
  
126
  #log_fname<-paste("R_log_raster_prediction",out_prefix, ".log",sep="")
127
  log_fname<-paste("R_log_raster_prediction",out_prefix, ".log",sep="")
128
  #sink(log_fname) #create new log file
129
  file.create(file.path(out_path,log_fname)) #create new log file
130
  
131
  time1<-proc.time()    #Start stop watch
132
  
133
  cat(paste("Starting script at this local Date and Time: ",as.character(Sys.time()),sep=""),
134
             file=log_fname,sep="\n")
135
  cat("Starting script process time:",file=log_fname,sep="\n",append=TRUE)
136
  cat(as.character(time1),file=log_fname,sep="\n",append=TRUE)    
137
  
138

  
139
  ############### READING INPUTS: DAILY STATION DATA AND OTEHR DATASETS  #################
140
  #infile_daily = gsub("/data/project/layers/commons/","/nobackup/aguzman4/climate/interp/testdata/",infile_daily)
141
  ghcn<-readOGR(dsn=dirname(infile_daily),layer=sub(".shp","",basename(infile_daily)))
142
  CRS_interp<-proj4string(ghcn)                       #Storing projection information (ellipsoid, datum,etc.)
143
  stat_loc<-readOGR(dsn=dirname(infile_locs),layer=sub(".shp","",basename(infile_locs)))
144
  #dates2 <-readLines(file.path(in_path,dates_selected)) #dates to be predicted, now read directly from the file
145
  if (dates_selected==""){
146
    dates<-as.character(sort(unique(ghcn$date))) #dates to be predicted 
147
  }
148
  if (dates_selected!=""){
149
    dates<-dates_selected #dates to be predicted 
150
  }
151
  
152

  
153
  #Reading in covariate brickcan be changed...
154
  
155
  s_raster<-brick(infile_covariates)                   #read in the data brck
156
  names(s_raster)<-covar_names               #Assigning names to the raster layers: making sure it is included in the extraction
157
    
158
  #Reading monthly data
159
  #Getting this name from command line
160
  dst<-readOGR(dsn=(infile_monthly),layer=sub(".shp","",basename(infile_monthly)))
161
  ########### CREATE SAMPLING -TRAINING AND TESTING STATIONS ###########
162
  #Input for sampling function...
163
  
164
  #dates #list of dates for prediction
165
  #ghcn_name<-"ghcn" #infile daily data 
166
  
167
  list_param_sampling<-list(seed_number,nb_sample,step,constant,prop_minmax,dates,ghcn)
168

  
169
  names(list_param_sampling)<-c("seed_number","nb_sample","step","constant","prop_minmax","dates","ghcn")
170
  
171
  #run function, note that dates must be a character vector!!
172
  sampling_obj<-sampling_training_testing(list_param_sampling)
173
  
174
  ########### PREDICT FOR MONTHLY SCALE  ##################
175
  #First predict at the monthly time scale: climatology
176
  cat("Predictions at monthly scale:",file=log_fname,sep="\n", append=TRUE)
177
  cat(paste("Local Date and Time: ",as.character(Sys.time()),sep=""),
178
      file=log_fname,sep="\n")
179
  t1<-proc.time()
180
  j=12
181

  
182
  if (interpolation_method=="gam_fusion"){
183
    list_param_runClim_KGFusion<-list(j,s_raster,covar_names,lst_avg,list_models,dst,var,y_var_name, out_prefix,out_path)
184
    names(list_param_runClim_KGFusion)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","var","y_var_name","out_prefix","out_path")
185
    
186
    print("Monthly")
187
    clim_method_mod_obj<-mclapply(1:12, list_param=list_param_runClim_KGFusion, runClim_KGFusion,mc.preschedule=FALSE,mc.cores = 6) #This is the end bracket from mclapply(...) statement
188
    print("done")
189
   
190
    file2<-file.path(out_path,paste(interpolation_method,"_mod_",y_var_name,out_prefix,".RData",sep=""))
191
    save(clim_method_mod_obj,file= file.path(out_path,paste(interpolation_method,"_mod_",y_var_name,out_prefix,".RData",sep="")))
192
    list_tmp<-vector("list",length(clim_method_mod_obj))
193
    for (i in 1:length(clim_method_mod_obj)){
194
      tmp<-clim_method_mod_obj[[i]]$clim
195
      list_tmp[[i]]<-tmp
196
    }
197
    clim_yearlist<-list_tmp
198
  }
199

  
200
  if (interpolation_method=="gam_CAI"){
201
    list_param_runClim_KGCAI<-list(j,s_raster,covar_names,lst_avg,list_models,dst,var,y_var_name, out_prefix,out_path)
202
    names(list_param_runClim_KGCAI)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","var","y_var_name","out_prefix","out_path")
203
    clim_method_mod_obj<-mclapply(1:12, list_param=list_param_runClim_KGCAI, runClim_KGCAI,mc.preschedule=FALSE,mc.cores = 6) #This is the end bracket from mclapply(...) statement
204
    save(clim_method_mod_obj,file= file.path(out_path,paste(interpolation_method,"_mod_",y_var_name,out_prefix,".RData",sep="")))
205
    list_tmp<-vector("list",length(clim_method_mod_obj))
206
    for (i in 1:length(clim_method_mod_obj)){
207
      tmp<-clim_method_mod_obj[[i]]$clim
208
      list_tmp[[i]]<-tmp
209
    }
210
    clim_yearlist<-list_tmp
211
  }
212
  t2<-proc.time()-t1
213
  cat(as.character(t2),file=log_fname,sep="\n", append=TRUE)
214

  
215
  ################## PREDICT AT DAILY TIME SCALE #################
216
  #Predict at daily time scale from single time scale or multiple time scale methods: 2 methods availabe now
217
  
218
  #put together list of clim models per month...
219
  #rast_clim_yearlist<-list_tmp
220
  
221
  #Second predict at the daily time scale: delta
222
  
223
  #method_mod_obj<-mclapply(1:1, runGAMFusion,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement
224
  print("daily")
225
  cat("Predictions at the daily scale:",file=log_fname,sep="\n", append=TRUE)
226
  t1<-proc.time()
227
  cat(paste("Local Date and Time: ",as.character(Sys.time()),sep=""),
228
      file=log_fname,sep="\n")
229
  
230
  #TODO : Same call for all functions!!! Replace by one "if" for all multi time scale methods...
231
  if (interpolation_method=="gam_CAI" | interpolation_method=="gam_fusion"){
232
    #input a list:note that ghcn.subsets is not sampling_obj$data_day_ghcn
233
    i<-1
234
    list_param_run_prediction_daily_deviation <-list(i,clim_yearlist,sampling_obj,dst,var,y_var_name, interpolation_method,out_prefix,out_path)
235
    names(list_param_run_prediction_daily_deviation)<-c("list_index","clim_yearlist","sampling_obj","dst","var","y_var_name","interpolation_method","out_prefix","out_path")
236
    method_mod_obj<-mclapply(1:length(sampling_obj$ghcn_data_day),list_param=list_param_run_prediction_daily_deviation,run_prediction_daily_deviation,mc.preschedule=FALSE,mc.cores = 6) #This is the end bracket from mclapply(...) statement
237
    save(method_mod_obj,file= file.path(out_path,paste("method_mod_obj_",interpolation_method,"_",y_var_name,out_prefix,".RData",sep="")))
238
   }
239
  #TODO : Same call for all functions!!! Replace by one "if" for all daily single time scale methods...
240
  if (interpolation_method=="gam_daily"){
241
    #input a list:note that ghcn.subsets is not sampling_obj$data_day_ghcn
242
    i<-1
243
    list_param_run_prediction_gam_daily <-list(i,s_raster,covar_names,lst_avg,list_models,dst,screen_data_training,var,y_var_name, sampling_obj,interpolation_method,out_prefix,out_path)
244
    names(list_param_run_prediction_gam_daily)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","screen_data_training","var","y_var_name","sampling_obj","interpolation_method","out_prefix","out_path")
245
    method_mod_obj<-mclapply(1:length(sampling_obj$ghcn_data_day),list_param=list_param_run_prediction_gam_daily,runGAM_day_fun,mc.preschedule=FALSE,mc.cores = 12) #This is the end bracket from mclapply(...) statement
246
    save(method_mod_obj,file= file.path(out_path,paste("method_mod_obj_",interpolation_method,"_",y_var_name,out_prefix,".RData",sep="")))
247
    
248
  }
249
  
250
  if (interpolation_method=="kriging_daily"){
251
    #input a list:note that ghcn.subsets is not sampling_obj$data_day_ghcn
252
    i<-1
253
    list_param_run_prediction_kriging_daily <-list(i,s_raster,covar_names,lst_avg,list_models,dst,var,y_var_name, sampling_obj,interpolation_method,out_prefix,out_path)
254
    names(list_param_run_prediction_kriging_daily)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","var","y_var_name","sampling_obj","interpolation_method","out_prefix","out_path")
255
    #test <- runKriging_day_fun(1,list_param_run_prediction_kriging_daily)
256
    method_mod_obj<-mclapply(1:length(sampling_obj$ghcn_data_day),list_param=list_param_run_prediction_kriging_daily,runKriging_day_fun,mc.preschedule=FALSE,mc.cores = 6) #This is the end bracket from mclapply(...) statement
257
    #method_mod_obj<-mclapply(1:18,list_param=list_param_run_prediction_kriging_daily,runKriging_day_fun,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
258
    
259
    save(method_mod_obj,file= file.path(out_path,paste("method_mod_obj_",interpolation_method,"_",y_var_name,out_prefix,".RData",sep="")))
260
    
261
  }
262
  
263
  if (interpolation_method=="gwr_daily"){
264
    #input a list:note that ghcn.subsets is not sampling_obj$data_day_ghcn
265
    i<-1
266
    list_param_run_prediction_gwr_daily <-list(i,s_raster,covar_names,lst_avg,list_models,dst,var,y_var_name, sampling_obj,interpolation_method,out_prefix,out_path)
267
    names(list_param_run_prediction_gwr_daily)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","var","y_var_name","sampling_obj","interpolation_method","out_prefix","out_path")
268
    #test <- run_interp_day_fun(1,list_param_run_prediction_gwr_daily)
269
    method_mod_obj<-mclapply(1:length(sampling_obj$ghcn_data_day),list_param=list_param_run_prediction_gwr_daily,run_interp_day_fun,mc.preschedule=FALSE,mc.cores = 12) #This is the end bracket from mclapply(...) statement
270
    
271
    save(method_mod_obj,file= file.path(out_path,paste("method_mod_obj_",interpolation_method,"_",y_var_name,out_prefix,".RData",sep="")))
272
    
273
  }
274
  t2<-proc.time()-t1
275
  cat(as.character(t2),file=log_fname,sep="\n", append=TRUE)
276
  #browser()
277
  
278
  ############### NOW RUN VALIDATION #########################
279
  #SIMPLIFY THIS PART: one call
280
  print("validation")
281
 
282
  list_tmp<-vector("list",length(method_mod_obj))
283
  for (i in 1:length(method_mod_obj)){
284
    tmp<-method_mod_obj[[i]][[y_var_name]]  #y_var_name is the variable predicted (dailyTmax or dailyTmin)
285
    list_tmp[[i]]<-tmp
286
  }
287
  rast_day_yearlist<-list_tmp #list of predicted images over full year... 
288

  
289
  cat("Validation step:",file=log_fname,sep="\n", append=TRUE)
290
  t1<-proc.time()
291
  cat(paste("Local Date and Time: ",as.character(Sys.time()),sep=""),
292
      file=log_fname,sep="\n")
293
  
294
  list_param_validation<-list(i,rast_day_yearlist,method_mod_obj,y_var_name, out_prefix, out_path)
295
  names(list_param_validation)<-c("list_index","rast_day_year_list","method_mod_obj","y_var_name","out_prefix", "out_path") #same names for any method
296
  
297
  validation_mod_obj <-mclapply(1:length(method_mod_obj), list_param=list_param_validation, calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = 12) 
298
  #test_val<-calculate_accuracy_metrics(1,list_param_validation)
299
  save(validation_mod_obj,file= file.path(out_path,paste(interpolation_method,"_validation_mod_obj_",y_var_name,out_prefix,".RData",sep="")))
300
  t2<-proc.time()-t1
301
  cat(as.character(t2),file=log_fname,sep="\n", append=TRUE)
302
  
303
  #################### ASSESSMENT OF PREDICTIONS: PLOTS OF ACCURACY METRICS ###########
304
  ##Create data.frame with valiation metrics for a full year
305
  tb_diagnostic_v<-extract_from_list_obj(validation_mod_obj,"metrics_v")
306
  rownames(tb_diagnostic_v)<-NULL #remove row names
307
  
308
  #Call functions to create plots of metrics for validation dataset
309
  metric_names<-c("rmse","mae","me","r","m50")
310
  summary_metrics_v<- boxplot_from_tb(tb_diagnostic_v,metric_names,out_prefix,out_path)
311
  names(summary_metrics_v)<-c("avg","median")
312
  summary_month_metrics_v<- boxplot_month_from_tb(tb_diagnostic_v,metric_names,out_prefix,out_path)
313
  
314
  #################### CLOSE LOG FILE  ####################
315
  
316
  #close log_file connection and add meta data
317
  cat("Finished script process time:",file=log_fname,sep="\n", append=TRUE)
318
  time2<-proc.time()-time1
319
  cat(as.character(time2),file=log_fname,sep="\n", append=TRUE)
320
  #later on add all the parameters used in the script...
321
  cat(paste("Finished script at this local Date and Time: ",as.character(Sys.time()),sep=""),
322
             file=log_fname,sep="\n", append=TRUE)
323
  cat("End of script",file=log_fname,sep="\n", append=TRUE)
324
  
325
  ################### PREPARE RETURN OBJECT ###############
326
  #Will add more information to be returned
327
  if (interpolation_method=="gam_CAI" | interpolation_method=="gam_fusion"){
328
    raster_prediction_obj<-list(clim_method_mod_obj,method_mod_obj,validation_mod_obj,tb_diagnostic_v,
329
                                summary_metrics_v,summary_month_metrics_v)
330
    names(raster_prediction_obj)<-c("clim_method_mod_obj","method_mod_obj","validation_mod_obj","tb_diagnostic_v",
331
                                    "summary_metrics_v","summary_month_metrics_v")  
332
    save(raster_prediction_obj,file= file.path(out_path,paste("raster_prediction_obj_",interpolation_method,"_", y_var_name,out_prefix,".RData",sep="")))
333
    
334
  }
335
  
336
  #use %in% instead of "|" operator
337
  if (interpolation_method=="gam_daily" | interpolation_method=="kriging_daily" | interpolation_method=="gwr_daily"){
338
    raster_prediction_obj<-list(method_mod_obj,validation_mod_obj,tb_diagnostic_v,
339
                                summary_metrics_v,summary_month_metrics_v)
340
    names(raster_prediction_obj)<-c("method_mod_obj","validation_mod_obj","tb_diagnostic_v",
341
                                    "summary_metrics_v","summary_month_metrics_v")  
342
    save(raster_prediction_obj,file= file.path(out_path,paste("raster_prediction_obj_",interpolation_method,"_", y_var_name,out_prefix,".RData",sep="")))
343
    
344
  }
345
  return(raster_prediction_obj)
346
}
347

  
348
####################################################################
349
######################## END OF SCRIPT/FUNCTION #####################
climate/research/world/interpolation/GAM_fusion_function_multisampling.R
1
##################  Functions for use in the raster prediction stage   #######################################
2
############################ Interpolation in a given tile/region ##########################################
3
#This script contains 5 functions used in the interpolation of temperature in the specfied study/processing area:                             
4
# 1)predict_raster_model<-function(in_models,r_stack,out_filename)                                                             
5
# 2)fit_models<-function(list_formulas,data_training)           
6
# 3)runClim_KGCAI<-function(j,list_param) : function that peforms GAM CAI method
7
# 4)runClim_KGFusion<-function(j,list_param) function for monthly step (climatology) in the fusion method
8
# 5)runGAMFusion <- function(i,list_param) : daily step for fusion method, perform daily prediction
9
#
10
#AUTHOR: Benoit Parmentier                                                                       
11
#DATE: 05/07/2013                                                                                 
12
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--   
13

  
14
##Comments and TODO:
15
#This script is meant to be for general processing tile by tile or region by region.
16
# Note that the functions are called from GAM_fusion_analysis_raster_prediction_mutlisampling.R.
17
# This will be expanded to other methods.
18
##################################################################################################
19

  
20

  
21
predict_raster_model<-function(in_models,r_stack,out_filename){
22
  #This functions performs predictions on a raster grid given input models.
23
  #Arguments: list of fitted models, raster stack of covariates
24
  #Output: spatial grid data frame of the subset of tiles
25
  list_rast_pred<-vector("list",length(in_models))
26
  for (i in 1:length(in_models)){
27
    mod <-in_models[[i]] #accessing GAM model ojbect "j"
28
    raster_name<-out_filename[[i]]
29
    if (inherits(mod,"gam")) {           #change to c("gam","autoKrige")
30
      raster_pred<- predict(object=r_stack,model=mod,na.rm=FALSE) #Using the coeff to predict new values.
31
      names(raster_pred)<-"y_pred"  
32
      writeRaster(raster_pred, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
33
      list_rast_pred[[i]]<-raster_name
34
    }
35
  }
36
  if (inherits(mod,"try-error")) {
37
    print(paste("no gam model fitted:",mod[1],sep=" ")) #change message for any model type...
38
  }
39
  return(list_rast_pred)
40
}
41

  
42
fit_models<-function(list_formulas,data_training){
43
  #This functions several models and returns model objects.
44
  #Arguments: - list of formulas for GAM models
45
  #           - fitting data in a data.frame or SpatialPointDataFrame
46
  #Output: list of model objects 
47
  list_fitted_models<-vector("list",length(list_formulas))
48
  for (k in 1:length(list_formulas)){
49
    formula<-list_formulas[[k]]
50
    mod<- try(gam(formula, data=data_training,k=5)) #change to any model!!
51
    #mod<- try(autoKrige(formula, input_data=data_s,new_data=s_sgdf,data_variogram=data_s))
52
    model_name<-paste("mod",k,sep="")
53
    assign(model_name,mod) 
54
    list_fitted_models[[k]]<-mod
55
  }
56
  return(list_fitted_models) 
57
}
58

  
59
####
60
#TODO:
61
#Add log file and calculate time and sizes for processes-outputs
62
runClim_KGCAI <-function(j,list_param){
63

  
64
  #Make this a function with multiple argument that can be used by mcmapply??
65
  #Arguments: 
66
  #1)list_index: j 
67
  #2)covar_rast: covariates raster images used in the modeling
68
  #3)covar_names: names of input variables 
69
  #4)lst_avg: list of LST climatogy names, may be removed later on
70
  #5)list_models: list input models for bias calculation
71
  #6)dst: data at the monthly time scale
72
  #7)var: TMAX or TMIN, variable being interpolated
73
  #8)y_var_name: output name, not used at this stage
74
  #9)out_prefix
75
  #10) out_path
76
  
77
  #The output is a list of four shapefile names produced by the function:
78
  #1) clim: list of output names for raster climatogies 
79
  #2) data_month: monthly training data for bias surface modeling
80
  #3) mod: list of model objects fitted 
81
  #4) formulas: list of formulas used in bias modeling
82
    
83
  ### PARSING INPUT ARGUMENTS
84
  #list_param_runGAMFusion<-list(i,clim_yearlist,sampling_obj,var,y_var_name, out_prefix)
85
    
86
  index<-list_param$j
87
  s_raster<-list_param$covar_rast
88
  covar_names<-list_param$covar_names
89
  lst_avg<-list_param$lst_avg
90
  list_models<-list_param$list_models
91
  dst<-list_param$dst #monthly station dataset
92
  var<-list_param$var
93
  y_var_name<-list_param$y_var_name
94
  out_prefix<-list_param$out_prefix
95
  out_path<-list_param$out_path
96
  
97
  #Model and response variable can be changed without affecting the script
98
  prop_month<-0 #proportion retained for validation
99
  run_samp<-1
100
  
101
  #### STEP 2: PREPARE DATA
102
    
103
  data_month<-dst[dst$month==j,] #Subsetting dataset for the relevant month of the date being processed
104
  LST_name<-lst_avg[j] # name of LST month to be matched
105
  data_month$LST<-data_month[[LST_name]]
106
  
107
  #Create formulas object from list of characters...
108
  list_formulas<-lapply(list_models,as.formula,env=.GlobalEnv) #mulitple arguments passed to lapply!!  
109

  
110
  #TMax to model...
111
  if (var=="TMAX"){   
112
    data_month$y_var<-data_month$TMax #Adding TMax as the variable modeled
113
  }
114
  if (var=="TMIN"){   
115
    data_month$y_var<-data_month$TMin #Adding TMin as the variable modeled
116
  }
117
  #Fit gam models using data and list of formulas
118
  mod_list<-fit_models(list_formulas,data_month) #only gam at this stage
119
  cname<-paste("mod",1:length(mod_list),sep="") #change to more meaningful name?
120
  names(mod_list)<-cname
121
  #Adding layer LST to the raster stack  
122
  
123
  pos<-match("LST",names(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA
124
  s_raster<-dropLayer(s_raster,pos)      # If it exists drop layer
125
  LST<-subset(s_raster,LST_name)
126
  names(LST)<-"LST"
127
  s_raster<-addLayer(s_raster,LST)            #Adding current month
128
  
129
  #Now generate file names for the predictions...
130
  list_out_filename<-vector("list",length(mod_list))
131
  names(list_out_filename)<-cname  
132
  
133
  for (k in 1:length(list_out_filename)){
134
    #j indicate which month is predicted
135
    data_name<-paste(var,"_clim_month_",j,"_",cname[k],"_",prop_month,
136
                     "_",run_samp,sep="")
137
    raster_name<-file.path(out_path,paste("CAI_",data_name,out_prefix,".tif", sep=""))
138
    list_out_filename[[k]]<-raster_name
139
  }
140
  
141
  #now predict values for raster image...
142
  rast_clim_list<-predict_raster_model(mod_list,s_raster,list_out_filename)
143
  names(rast_clim_list)<-cname
144
  #Some models will not be predicted because of the lack of training data...remove empty string from list of models
145
  rast_clim_list<-rast_clim_list[!sapply(rast_clim_list,is.null)] #remove NULL elements in list
146
  
147
  #Adding Kriging for Climatology options
148
  
149
  clim_xy<-coordinates(data_month)
150
  fitclim<-Krig(clim_xy,data_month$y_var,theta=1e5) #use TPS or krige 
151
  #fitclim<-Krig(clim_xy,data_month$TMax,theta=1e5) #use TPS or krige 
152
  mod_krtmp1<-fitclim
153
  model_name<-"mod_kr"
154
  
155
  clim_rast<-interpolate(LST,fitclim) #interpolation using function from raster package
156
  #Saving kriged surface in raster images
157
  #data_name<-paste("clim_month_",j,"_",model_name,"_",prop_month,
158
  #                 "_",run_samp,sep="")
159
  #raster_name_clim<-paste("fusion_",data_name,out_prefix,".tif", sep="")
160
  #writeRaster(clim_rast, filename=raster_name_clim,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
161
  
162
  #now climatology layer
163
  #clim_rast<-LST-bias_rast
164
  data_name<-paste(var,"_clim_month_",j,"_",model_name,"_",prop_month,
165
                   "_",run_samp,sep="")
166
  raster_name_clim<-file.path(out_path,paste("CAI_",data_name,out_prefix,".tif", sep=""))
167
  writeRaster(clim_rast, filename=raster_name_clim,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
168
  
169
  #Adding to current objects
170
  mod_list[[model_name]]<-mod_krtmp1
171
  #rast_bias_list[[model_name]]<-raster_name_bias
172
  rast_clim_list[[model_name]]<-raster_name_clim
173
  
174
  #Prepare object to return
175
  clim_obj<-list(rast_clim_list,data_month,mod_list,list_formulas)
176
  names(clim_obj)<-c("clim","data_month","mod","formulas")
177
  
178
  save(clim_obj,file= file.path(out_path,paste("clim_obj_CAI_month_",j,"_",var,"_",out_prefix,".RData",sep="")))
179
  
180
  return(clim_obj) 
181
}
182
#
183

  
184
runClim_KGFusion<-function(j,list_param){
185
  
186
  #Make this a function with multiple argument that can be used by mcmapply??
187
  #Arguments: 
188
  #1)list_index: j 
189
  #2)covar_rast: covariates raster images used in the modeling
190
  #3)covar_names: names of input variables 
191
  #4)lst_avg: list of LST climatogy names, may be removed later on
192
  #5)list_models: list input models for bias calculation
193
  #6)dst: data at the monthly time scale
194
  #7)var: TMAX or TMIN, variable being interpolated
195
  #8)y_var_name: output name, not used at this stage
196
  #9)out_prefix
197
  #
198
  #The output is a list of four shapefile names produced by the function:
199
  #1) clim: list of output names for raster climatogies 
200
  #2) data_month: monthly training data for bias surface modeling
201
  #3) mod: list of model objects fitted 
202
  #4) formulas: list of formulas used in bias modeling
203
  
204
  ### PARSING INPUT ARGUMENTS
205
  #list_param_runGAMFusion<-list(i,clim_yearlist,sampling_obj,var,y_var_name, out_prefix)
206
 
207
  options(error=function()traceback(2))
208
 
209
  index<-list_param$j
210
  s_raster<-list_param$covar_rast
211
  covar_names<-list_param$covar_names
212
  lst_avg<-list_param$lst_avg
213
  list_models<-list_param$list_models
214
  dst<-list_param$dst #monthly station dataset
215
  var<-list_param$var
216
  y_var_name<-list_param$y_var_name
217
  out_prefix<-list_param$out_prefix
218
  out_path<-list_param$out_path
219

  
220
  #Model and response variable can be changed without affecting the script
221
  prop_month<-0 #proportion retained for validation
222
  run_samp<-1 #This option can be added later on if/when neeeded
223
  
224
  #### STEP 2: PREPARE DATA
225
  
226
  data_month<-dst[dst$month==j,] #Subsetting dataset for the relevant month of the date being processed
227
  LST_name<-lst_avg[j] # name of LST month to be matched
228
  data_month$LST<-data_month[[LST_name]]
229
  
230
  #Adding layer LST to the raster stack 
231
  covar_rast<-s_raster
232
  pos<-match("LST",names(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA
233

  
234
  s_raster<-dropLayer(s_raster,pos)      # If it exists drop layer
235
  LST<-subset(s_raster,LST_name)
236
  names(LST)<-"LST"
237
  s_raster<-addLayer(s_raster,LST)            #Adding current month
238
 
239
  #LST bias to model...
240
  if (var=="TMAX"){
241
    data_month$LSTD_bias<-data_month$LST-data_month$TMax
242
    data_month$y_var<-data_month$LSTD_bias #Adding bias as the variable modeled
243
  }
244
  if (var=="TMIN"){
245
    data_month$LSTD_bias<-data_month$LST-data_month$TMin
246
    data_month$y_var<-data_month$LSTD_bias #Adding bias as the variable modeled
247
  }
248
  
249
  #### STEP3:  NOW FIT AND PREDICT  MODEL
250

  
251
  list_formulas<-lapply(list_models,as.formula,env=.GlobalEnv) #mulitple arguments passed to lapply!!
252

  
253
  mod_list<-fit_models(list_formulas,data_month) #only gam at this stage
254
  cname<-paste("mod",1:length(mod_list),sep="") #change to more meaningful name?
255
  names(mod_list)<-cname
256

  
257
  #Now generate file names for the predictions...
258
  list_out_filename<-vector("list",length(mod_list))
259
  names(list_out_filename)<-cname  
260

  
261
  for (k in 1:length(list_out_filename)){
262
    #j indicate which month is predicted, var indicates TMIN or TMAX
263
    data_name<-paste(var,"_bias_LST_month_",j,"_",cname[k],"_",prop_month,
264
                     "_",run_samp,sep="")
265
    raster_name<-file.path(out_path,paste("fusion_",data_name,out_prefix,".tif", sep=""))
266
    list_out_filename[[k]]<-raster_name
267
  }
268

  
269
  print("predict raster model")
270
  #now predict values for raster image...by providing fitted model list, raster brick and list of output file names
271
  rast_bias_list<-predict_raster_model(mod_list,s_raster,list_out_filename)
272
  print("done")
273
  names(rast_bias_list)<-cname
274
  #Some modles will not be predicted...remove them
275
  rast_bias_list<-rast_bias_list[!sapply(rast_bias_list,is.null)] #remove NULL elements in list
276

  
277
  mod_rast<-stack(rast_bias_list)  #stack of bias raster images from models
278
  rast_clim_list<-vector("list",nlayers(mod_rast))
279
  names(rast_clim_list)<-names(rast_bias_list)
280
  for (k in 1:nlayers(mod_rast)){
281
    clim_fus_rast<-LST-subset(mod_rast,k)
282
    data_name<-paste(var,"_clim_LST_month_",j,"_",names(rast_clim_list)[k],"_",prop_month,
283
                     "_",run_samp,sep="")
284
    raster_name<-file.path(out_path,paste("fusion_",data_name,out_prefix,".tif", sep=""))
285
    rast_clim_list[[k]]<-raster_name
286
    writeRaster(clim_fus_rast, filename=raster_name,overwrite=TRUE)  #Wri
287
  }
288
  
289
  #### STEP 4:Adding Kriging for Climatology options
290
  bias_xy<-coordinates(data_month)
291
  #fitbias<-Krig(bias_xy,data_month$LSTD_bias,theta=1e5) #use TPS or krige 
292
  fitbias<-try(Krig(bias_xy,data_month$LSTD_bias,theta=1e5)) #use TPS or krige 
293
 
294
  model_name<-"mod_kr"
295

  
296
  if (inherits(fitbias,"Krig")){
297
    
298
    #Saving kriged surface in raster images
299
    bias_rast<-bias_rast<-interpolate(LST,fitbias) #interpolation using function from raster package
300
    data_name<-paste(var,"_bias_LST_month_",j,"_",model_name,"_",prop_month,
301
                     "_",run_samp,sep="")
302
    raster_name_bias<-file.path(out_path,paste("fusion_",data_name,out_prefix,".tif", sep=""))
303
    writeRaster(bias_rast, filename=raster_name_bias,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
304
    
305
    #now climatology layer
306
    clim_rast<-LST-bias_rast
307
    data_name<-paste(var,"_clim_LST_month_",j,"_",model_name,"_",prop_month,
308
                     "_",run_samp,sep="")
309
    raster_name_clim<-file.path(out_path,paste("fusion_",data_name,out_prefix,".tif", sep=""))
310
    writeRaster(clim_rast, filename=raster_name_clim,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
311
    #Adding to current objects
312
    mod_list[[model_name]]<-fitbias
313
    rast_bias_list[[model_name]]<-raster_name_bias
314
    rast_clim_list[[model_name]]<-raster_name_clim
315
  }
316
  
317
  if (inherits(fitbias,"try-error")){
318
    print("Error with FitBias")
319
    #NEED TO DEAL WITH THIS!!!
320
    
321
    #Adding to current objects
322
    mod_list[[model_name]]<-NULL
323
    rast_bias_list[[model_name]]<-NULL
324
    rast_clim_list[[model_name]]<-NULL
325
  }
326

  
327
  
328
  #### STEP 5: Prepare object and return
329
  clim_obj<-list(rast_bias_list,rast_clim_list,data_month,mod_list,list_formulas)
330
  names(clim_obj)<-c("bias","clim","data_month","mod","formulas")
331
 
332
  save(clim_obj,file= file.path(out_path,paste("clim_obj_month_",j,"_",var,"_",out_prefix,".RData",sep="")))
333
  return(clim_obj)
334
}
335

  
336
## Run function for kriging...?
337

  
338
#runGAMFusion <- function(i,list_param) {            # loop over dates
339
run_prediction_daily_deviation <- function(i,list_param) {            # loop over dates
340
  #This function produce daily prediction using monthly predicted clim surface.
341
  #The output is both daily prediction and daily deviation from monthly steps.
342
  
343
  #### Change this to allow explicitly arguments...
344
  #Arguments: 
345
  #1)index: loop list index for individual run/fit
346
  #2)clim_year_list: list of climatology files for all models...(12*nb of models)
347
  #3)sampling_obj: contains, data per date/fit, sampling information
348
  #4)dst: data at the monthly time scale
349
  #5)var: variable predicted -TMAX or TMIN
350
  #6)y_var_name: name of the variable predicted - dailyTMax, dailyTMin
351
  #7)out_prefix
352
  #8)out_path
353
  #
354
  #The output is a list of four shapefile names produced by the function:
355
  #1) list_temp: y_var_name
356
  #2) rast_clim_list: list of files for temperature climatology predictions
357
  #3) delta: list of files for temperature delta predictions
358
  #4) data_s: training data
359
  #5) data_v: testing data
360
  #6) sampling_dat: sampling information for the current prediction (date,proportion of holdout and sample number)
361
  #7) mod_kr: kriging delta fit, field package model object
362
  
363
  ### PARSING INPUT ARGUMENTS
364
 
365
  #list_param_runGAMFusion<-list(i,clim_yearlist,sampling_obj,var,y_var_name, out_prefix)
366
  rast_clim_yearlist<-list_param$clim_yearlist
367
  sampling_obj<-list_param$sampling_obj
368
  ghcn.subsets<-sampling_obj$ghcn_data_day
369
  sampling_dat <- sampling_obj$sampling_dat
370
  sampling <- sampling_obj$sampling_index
371
  var<-list_param$var
372
  y_var_name<-list_param$y_var_name
373
  out_prefix<-list_param$out_prefix
374
  dst<-list_param$dst #monthly station dataset
375
  out_path <-list_param$out_path
376
  
377
  ##########
378
  # STEP 1 - Read in information and get traing and testing stations
379
  ############# 
380
  
381
  date<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
382
  month<-strftime(date, "%m")          # current month of the date being processed
383
  LST_month<-paste("mm_",month,sep="") # name of LST month to be matched
384
  proj_str<-proj4string(dst) #get the local projection information from monthly data
385

  
386
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
387
  data_day<-ghcn.subsets[[i]]
388
  mod_LST <- ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))]  #Match interpolation date and monthly LST average
389
  data_day$LST <- as.data.frame(mod_LST)[,1] #Add the variable LST to the dataset
390
  dst$LST<-dst[[LST_month]] #Add the variable LST to the monthly dataset
391

  
392
  ind.training<-sampling[[i]]
393
  ind.testing <- setdiff(1:nrow(data_day), ind.training)
394
  data_s <- data_day[ind.training, ]   #Training dataset currently used in the modeling
395
  data_v <- data_day[ind.testing, ]    #Testing/validation dataset using input sampling
396
  
397
  ns<-nrow(data_s)
398
  nv<-nrow(data_v)
399
  #i=1
400
  date_proc<-sampling_dat$date[i]
401
  date_proc<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
402
  mo<-as.integer(strftime(date_proc, "%m"))          # current month of the date being processed
403
  day<-as.integer(strftime(date_proc, "%d"))
404
  year<-as.integer(strftime(date_proc, "%Y"))
405
  
406
  ##########
407
  # STEP 2 - JOIN DAILY AND MONTHLY STATION INFORMATION
408
  ##########
409

  
410
  modst<-dst[dst$month==mo,] #Subsetting dataset for the relevant month of the date being processed
411

  
412
  if (var=="TMIN"){
413
    modst$LSTD_bias <- modst$LST-modst$TMin; #That is the difference between the monthly LST mean and monthly station mean
414
  }
415
  if (var=="TMAX"){
416
    modst$LSTD_bias <- modst$LST-modst$TMax; #That is the difference between the monthly LST mean and monthly station mean    
417
  }
418
  #This may be unnecessary since LSTD_bias is already in dst?? check the info
419
  #Some loss of observations: LSTD_bias for January has only 56 out of 66 possible TMIN!!! We may need to look into this issue
420
  #to avoid some losses of station data...
421
  
422
  #Clearn out this part: make this a function call
423
  x<-as.data.frame(data_v)
424
  d<-as.data.frame(data_s)
425
  for (j in 1:nrow(x)){
426
    if (x$value[j]== -999.9){
427
      x$value[j]<-NA
428
    }
429
  }
430
  for (j in 1:nrow(d)){
431
    if (d$value[j]== -999.9){
432
      d$value[j]<-NA
433
    }
434
  }
435
 
436
  d[1] <- NULL
437
  x[1] <- NULL
438
 
439
  pos<-match("value",names(d)) #Find column with name "value"
440
  #names(d)[pos]<-c("dailyTmax")
441
  names(d)[pos]<-y_var_name
442
  pos<-match("value",names(x)) #Find column with name "value"
443
  names(x)[pos]<-y_var_name
444
  pos<-match("station",names(d)) #Find column with station ID
445
  names(d)[pos]<-c("id")
446
  pos<-match("station",names(x)) #Find column with name station ID
447
  names(x)[pos]<-c("id")
448
  pos<-match("station",names(modst)) #Find column with name station ID
449
  names(modst)[pos]<-c("id")       #modst contains the average tmax per month for every stations...
450
  
451

  
452
  mergeTry<-try(dmoday <-merge(modst,d,by="id",suffixes=c("",".y2")))
453
  
454

  
455
  
456
  xmoday <-merge(modst,x,by="id",suffixes=c("",".y2"))
457
 
458
  mod_pat<-glob2rx("*.y2")   #remove duplicate columns that have ".y2" in their names
459
  var_pat<-grep(mod_pat,names(dmoday),value=FALSE) # using grep with "value" extracts the matching names
460
  dmoday<-dmoday[,-var_pat] #dropping relevant columns
461

  
462
  mod_pat<-glob2rx("*.y2")   
463
  var_pat<-grep(mod_pat,names(xmoday),value=FALSE) # using grep with "value" extracts the matching names
464
  xmoday<-xmoday[,-var_pat] #Removing duplicate columns
465
  data_v<-xmoday
466
  
467
  #dmoday contains the daily tmax values for training with TMax/TMin being the monthly station tmax/tmin mean
468
  #xmoday contains the daily tmax values for validation with TMax/TMin being the monthly station tmax/tmin mean
469

  
470
  ##########
471
  # STEP 3 - interpolate daily delta across space
472
  ##########
473
  #Change to take into account TMin and TMax
474
  if (var=="TMIN"){
475
    daily_delta<-dmoday$dailyTmin-dmoday$TMin #daily detl is the difference between monthly and daily temperatures
476
  }
477
  if (var=="TMAX"){
478
    daily_delta<-dmoday$dailyTmax-dmoday$TMax
479
  }
480

  
481
  daily_delta_xy<-as.matrix(cbind(dmoday$x,dmoday$y))
482
  fitdelta<-Krig(daily_delta_xy,daily_delta,theta=1e5) #use TPS or krige
483
  mod_krtmp2<-fitdelta
484
  model_name<-paste("mod_kr","day",sep="_")
485
  data_s<-dmoday #put the 
486
  data_s$daily_delta<-daily_delta
487
  
488
  #########
489
  # STEP 4 - Calculate daily predictions - T(day) = clim(month) + delta(day)
490
  #########
491

  
492
  rast_clim_list<-rast_clim_yearlist[[mo]]  #select relevant month
493
  rast_clim_month<-raster(rast_clim_list[[1]])
494
  
495
  daily_delta_rast<-interpolate(rast_clim_month,fitdelta) #Interpolation of the bias surface...
496
  
497
  #Saving kriged surface in raster images
498
  data_name<-paste("daily_delta_",y_var_name,"_",sampling_dat$date[i],"_",sampling_dat$prop[i],
499
                   "_",sampling_dat$run_samp[i],sep="")
500
  raster_name_delta<-file.path(out_path,paste(interpolation_method,"_",var,"_",data_name,out_prefix,".tif", sep=""))
501
  writeRaster(daily_delta_rast, filename=raster_name_delta,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
502
  
503
  #Now predict daily after having selected the relevant month
504
  temp_list<-vector("list",length(rast_clim_list))  
505
  for (j in 1:length(rast_clim_list)){
506
    rast_clim_month<-raster(rast_clim_list[[j]])
507
    temp_predicted<-rast_clim_month+daily_delta_rast
508
    
509
    data_name<-paste(y_var_name,"_predicted_",names(rast_clim_list)[j],"_",
510
                     sampling_dat$date[i],"_",sampling_dat$prop[i],
511
                     "_",sampling_dat$run_samp[i],sep="")
512
    raster_name<-file.path(out_path,paste(interpolation_method,"_",data_name,out_prefix,".tif", sep=""))
513
    writeRaster(temp_predicted, filename=raster_name,overwrite=TRUE) 
514
    temp_list[[j]]<-raster_name
515
  }
516
  
517
  ##########
518
  # STEP 5 - Prepare output object to return
519
  ##########
520
  
521
 
522
  mod_krtmp2<-fitdelta
523
  model_name<-paste("mod_kr","day",sep="_")
524
  names(temp_list)<-names(rast_clim_list)
525

  
526
  
527
  delta_obj<-list(temp_list,rast_clim_list,raster_name_delta,data_s,
528
                  data_v,sampling_dat[i,],mod_krtmp2)
529
  
530
  obj_names<-c(y_var_name,"clim","delta","data_s","data_v",
531
               "sampling_dat",model_name)
532
  names(delta_obj)<-obj_names 
533

  
534
  save(delta_obj,file= file.path(out_path,paste("delta_obj_",var,"_",sampling_dat$date[i],"_",sampling_dat$prop[i],
535
                                "_",sampling_dat$run_samp[i],out_prefix,".RData",sep="")))
536
  return(delta_obj)
537
  
538
}
539
 
climate/research/world/interpolation/master_script_12182013.R
1
##################    Master script for temperature predictions  #######################################
2
############################ TMIN AND TMAX predictions ##########################################
3
#                           
4
##This script produces intperpolated surface of TMIN and TMAX for specified processing region(s) given sets 
5
#of inputs and parameters.
6
#STAGE 1: LST climatology downloading and/or calculation
7
#STAGE 2: Covariates preparation for study/processing area: calculation of covariates (spect,land cover,etc.) and reprojection
8
#STAGE 3: Data preparation: meteorological station database query and extraction of covariates values from raster brick
9
#STAGE 4: Raster prediction: run interpolation method (-- gam fusion, gam CAI, ...) and perform validation 
10
#STAGE 5: Output analyses: assessment of results for specific dates...
11
#
12
#AUTHOR: Benoit Parmentier                                                                       
13
#DATE: 07/18/2013                                                                                 
14

  
15
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363, TASK$568--   
16

  
17
## TODO:
18
# Modify code for stage 1 and call python script from R in parallel
19
# Add options to run only specific stage + additional out_suffix?
20
# Make master script a function?
21
# Add log file for master script,add function to collect inputs and outputs
22
##################################################################################################
23

  
24
###Loading R library and packages   
25
library(RPostgreSQL)
26
library(maps)
27
library(maptools)
28
library(parallel)
29
library(gtools)                              # loading some useful tools 
30
library(mgcv)                                # GAM package by Simon Wood
31
library(sp)                                  # Spatial pacakge with class definition by Bivand et al.
32
library(spdep)                               # Spatial pacakge with methods and spatial stat. by Bivand et al.
33
library(rgdal)                               # GDAL wrapper for R, spatial utilities
34
library(gstat)                               # Kriging and co-kriging by Pebesma et al.
35
library(fields)                              # NCAR Spatial Interpolation methods such as kriging, splines
36
library(raster)                              # Hijmans et al. package for raster processing
37
library(rasterVis)
38
library(spgwr)
39
library(reshape)
40
library(plotrix)
41

  
42
######## PARAMETERS FOR WORK FLOW #########################
43
### Need to add documentation ###
44

  
45
#Adding command line arguments to use mpiexec
46
args<-commandArgs(TRUE)
47
script_path<-"/nobackup/aguzman4/climateLayers/climateCode/environmental-layers/climate/research/oregon/interpolation"
48
dataHome<-"/nobackup/aguzman4/climateLayers/interp/testdata/"
49
script_path2<-"/nobackup/aguzman4/climateLayers/climateCode/environmental-layers/climate/research/world/interpolation"
50

  
51

  
52
#CALLED FROM MASTER SCRIPT:
53

  
54
modis_download_script <- file.path(script_path,"modis_download_05142013.py") # LST modis download python script
55
clim_script <- file.path(script_path,"climatology_05312013.py") # LST climatology python script
56
grass_setting_script <- file.path(script_path,"grass-setup.R") #Set up system shell environment for python+GRASS
57
#source(file.path(script_path,"download_and_produce_MODIS_LST_climatology_06112013.R"))
58
source(file.path(script_path,"covariates_production_temperatures.R"))
59
source(file.path(script_path,"Database_stations_covariates_processing_function.R"))
60
source(file.path(script_path2,"GAM_fusion_analysis_raster_prediction_multisampling.R"))
61
source(file.path(script_path,"results_interpolation_date_output_analyses.R"))
62
#source(file.path(script_path,"results_covariates_database_stations_output_analyses_04012013.R")) #to be completed
63

  
64
#FUNCTIONS CALLED FROM GAM ANALYSIS RASTER PREDICTION ARE FOUND IN...
65

  
66
source(file.path(script_path,"sampling_script_functions.R"))
67
source(file.path(script_path2,"GAM_fusion_function_multisampling.R")) #Include GAM_CAI
68
source(file.path(script_path,"interpolation_method_day_function_multisampling.R")) #Include GAM_day
69
source(file.path(script_path,"GAM_fusion_function_multisampling_validation_metrics.R"))
70

  
71
#stages_to_run<-c(1,2,3,4,5) #May decide on antoher strategy later on...
72
stages_to_run<-c(0,0,0,4,0) #MRun only raster fitting, prediction and assessemnt (providing lst averages, covar brick and met stations)
73
#If stage 2 is skipped then use previous covar object
74
covar_obj_file<-args[6]
75
#If stage 3 is skipped then use previous met_stations object
76
met_stations_outfiles_obj_file<-args[7]
77

  
78
var<-"TMAX"    
79
out_prefix<-args[1] #User defined output prefix
80
out_suffix<-args[2] #Regional suffix
81
out_suffix_modis<-args[3] #pattern to find tiles produced previously
82

  
83
interpolation_method<-c("gam_fusion")#,"gam_CAI") #Testing mem usage
84

  
85
out_path<-"/nobackup/aguzman4/climateLayers/output/"
86
out_path<-paste(out_path,out_prefix,sep="")
87

  
88
if(!file.exists(out_path)){
89
   dir.create(out_path)
90
}
91
  
92
lc_path<-"/nobackup/aguzman4/climateLayers/interp/testdata/data_workflow/inputs/lc-consensus-global"
93
infile_modis_grid<-"/nobackup/aguzman4/climateLayers/interp/testdata/data_workflow/inputs/modis_grid/modis_sinusoidal_grid_world.shp" #modis grid tiling system, global
94
infile_elev<-"/nobackup/aguzman4/climateLayers/interp/testdata/data_workflow/inputs/dem-cgiar-srtm-1km-tif/srtm_1km.tif"  #elevation at 1km, global extent to be replaced by the new fused product 
95
infile_canheight<-"/nobackup/aguzman4/climateLayers/interp/testdata/data_workflow/inputs/treeheight-simard2011/Simard_Pinto_3DGlobalVeg_JGR.tif"         #Canopy height, global extent
96
infile_distoc <- "/nobackup/aguzman4/climateLayers/interp/testdata/data_workflow/inputs/distance_to_coast/GMT_intermediate_coast_distance_01d_rev.tif" #distance to coast, global extent at 0.01 deg
97

  
98

  
99
#infile_reg_outline <- "/nobackup/aguzman4/climateLayers/subsetWGS84/shapefiles/shp_40.00_-115.00_1929.shp"
100
infile_reg_outline <- args[4]
101

  
102
ref_rast_name<-args[5]
103

  
104
buffer_dist<-0 #not in use yet, must change climatology step to make sure additional tiles are downloaded and LST averages
105
               #must also be calculated for neighbouring tiles.
106

  
107
list_tiles_modis <- c("h08v04,h09v04") #tiles for Oregon
108
  
109
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84
110
CRS_interp <-CRS_locs_WGS84 
111

  
112

  
113
out_region_name<-""
114
 
115
#The names of covariates can be changed...these names should be output/input from covar script!!!
116
rnames<-c("x","y","lon","lat","N","E","N_w","E_w","elev_s","slope","aspect","CANHGHT","DISTOC")
117
lc_names<-c("LC1","LC2","LC3","LC4","LC5","LC6","LC7","LC8","LC9","LC10","LC11","LC12")
118
lst_names<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12",
119
               "nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08",
120
               "nobs_09","nobs_10","nobs_11","nobs_12")
121
covar_names<-c(rnames,lc_names,lst_names)
122
  
123
list_val_range <-c("lon,-180,180","lat,-90,90","N,-1,1","E,-1,1","N_w,-1,1","E_w,-1,1","elev_s,0,6000","slope,0,90",
124
                   "aspect,0,360","DISTOC,-0,10000000","CANHGHT,0,255","LC1,0,100","LC5,0,100","mm_01,-15,50",
125
                   "mm_02,-15,50","mm_03,-15,50","mm_04,-15,50","mm_05,-15,50","mm_06,-15,50","mm_07,-15,50",
126
                   "mm_08,-15,50","mm_09,-15,50","mm_10,-15,50","mm_11,-15,50","mm_12,-15,50")
127

  
128
############ STAGE 1: LST Climatology ###############
129
#print("Starting stage 1")
130

  
131
#Parameters,Inputs from R to Python??
132
start_year = "2001"
133
end_year = "2010"
134
hdfdir <- "/nobackup/aguzman4/climate/interp/testdata/data_workflow/inputs/MOD11A1_tiles" #path directory to MODIS data
135
download=0 #download MODIS product if 1
136
clim_calc=1 #calculate lst averages/climatology if 1
137

  
138
list_param_download_clim_LST_script <- list(list_tiles_modis,start_year,end_year,hdfdir,
139
                                            var,grass_setting_script,modis_download_script, clim_script,
140
                                            download,clim_calc,out_suffix_modis)
141
names(list_param_download_clim_LST_script)<-c("list_tiles_modis","start_year","end_year","hdfdir",
142
                                              "var","grass_setting_script","modis_download_script","clim_script",
143
                                              "download","clim_calc","out_suffix_modis")
144
no_tiles <- length(unlist(strsplit(list_tiles_modis,",")))  # transform string into separate element in char vector
145

  
146
if (stages_to_run[1]==1){
147
  clim_production_obj <-lapply(1:no_tiles, list_param=list_param_download_clim_LST_script, download_calculate_MODIS_LST_climatology)
148
}
149
#Collect LST climatology list as output???
150

  
151
############ STAGE 2: Covariate production ################
152

  
153
#list of 18 parameters
154
list_param_covar_production<-list(var,out_path,lc_path,infile_modis_grid,infile_elev,infile_canheight,
155
                                  infile_distoc,list_tiles_modis,infile_reg_outline,CRS_interp,CRS_locs_WGS84,out_region_name,
156
                                  buffer_dist,list_val_range,out_suffix,out_suffix_modis,ref_rast_name,hdfdir,covar_names) 
157

  
158
names(list_param_covar_production)<-c("var","out_path","lc_path","infile_modis_grid","infile_elev","infile_canheight",
159
                                      "infile_distoc","list_tiles_modis","infile_reg_outline","CRS_interp","CRS_locs_WGS84","out_region_name",
160
                                      "buffer_dist","list_val_range","out_suffix","out_suffix_modis","ref_rast_name","hdfdir","covar_names") 
161

  
162
## Modify to store infile_covar_brick in output folder!!!
163
if (stages_to_run[2]==2){
164
  covar_obj <- covariates_production_temperature(list_param_covar_production)
165
  infile_covariates <- covar_obj$infile_covariates
166
  infile_reg_outline <- covar_obj$infile_reg_outline
167
  covar_names<- covar_obj$covar_names
168
}else{
169
  covar_obj <-load_obj(covar_obj_file)
170
  infile_covariates <- covar_obj$infile_covariates
171
  #Region passed as input from command line
172
  #infile_reg_outline <- covar_obj$infile_reg_outline
173
  covar_names<- covar_obj$covar_names
174
}
175

  
176
#Note that if stages_to_run[2]!=2, then use values defined at the beginning of the script for infile_covariates and infile_reg_outline
177

  
178
############# STAGE 3: Data preparation ###############
179
#specific to this stage
180
db.name <- "ghcn" #       # name of the Postgres database
181
range_years<-c("2010","2011") #right bound not included in the range!!
182
range_years_clim<-c("2001","2011") #right bound not included in the range!!
183
infile_ghncd_data <-"/nobackup/aguzman4/climateLayers/stations/ghcnd-stations.txt"                              #This is the textfile of station locations from GHCND
184
#qc_flags_stations<-c("0","S")    #flags allowed for screening after the query from the GHCND??
185
qc_flags_stations<-c("0")    #flags allowed for screening after the query from the GHCND??
186

  
187
#infile_covariates and infile_reg_outline defined in stage 2 or at the start of script...
188

  
189
#list of 12 parameters for input in the function...
190

  
191
list_param_prep<-list(db.name,var,range_years,range_years_clim,infile_reg_outline,infile_ghncd_data,infile_covariates,CRS_locs_WGS84,out_path,covar_names,qc_flags_stations,out_prefix)
192
cnames<-c("db.name","var","range_years","range_years_clim","infile_reg_outline","infile_ghncd_data","infile_covariates","CRS_locs_WGS84","out_path","covar_names","qc_flags_stations","out_prefix")
193
names(list_param_prep)<-cnames
194

  
195
##### RUN SCRIPT TO GET STATION DATA WITH COVARIATES #####
196

  
197
if (stages_to_run[3]==3){
198
  list_outfiles<-database_covariates_preparation(list_param_prep)
199
}else{
200
  list_outfiles <-load_obj(met_stations_outfiles_obj_file) 
201
}
202

  
203
############### STAGE 4: RASTER PREDICTION #################
204
#Collect parameters from the previous stage: data preparation stage
205

  
206
#3 parameters from output
207
infile_monthly<-list_outfiles$monthly_covar_ghcn_data #outile4 from database_covar script
208
infile_daily<-list_outfiles$daily_covar_ghcn_data  #outfile3 from database_covar script
209
infile_locs<- list_outfiles$loc_stations_ghcn #outfile2? from database covar script
210

  
211
list_param_data_prep <- list(infile_monthly,infile_daily,infile_locs,infile_covariates,covar_names,var,out_prefix,CRS_locs_WGS84)
212
names(list_param_data_prep) <- c("infile_monthly","infile_daily","infile_locs","infile_covariates","covar_names","var","out_prefix","CRS_locs_WGS84")
213

  
214
#Set additional parameters
215
#Input for sampling function...
216
seed_number<- 100  #if seed zero then no seed?     
217
nb_sample<-1           #number of time random sampling must be repeated for every hold out proportion
218
step<-0         
219
constant<-0             #if value 1 then use the same samples as date one for the all set of dates
220
prop_minmax<-c(0.3,0.3)  #if prop_min=prop_max and step=0 then predicitons are done for the number of dates...
221
#dates_selected<-c("20100101","20100102","20100103","20100901") # Note that the dates set must have a specific format: yyymmdd
222
dates_selected<-"" # if empty string then predict for the full year specified earlier
223
screen_data_training<-FALSE #screen training data for NA and use same input training for all models fitted
224

  
225
#Models to run...this can be changed for each run
226
#LC1: Evergreen/deciduous needleleaf trees
227

  
228
#Combination for test run:
229
#list_models<-c("y_var ~ s(elev_s)",
230
#                "y_var ~ s(LST)",
231
#                "y_var ~ s(lat,lon)+ s(elev_s)",
232
#                "y_var ~ te(lat,lon,elev_s)",
233
#                "y_var ~ s(lat,lon) + s(elev_s) + s(N_w,E_w) + s(LST)", 
234
#                "y_var ~ s(lat,lon) + s(elev_s) + s(N_w,E_w) + s(LST) + s(LC1)") 
235

  
236
list_models<-c("y_var ~ s(lat,lon) + s(elev_s)",
237
               "y_var ~ s(lat,lon) + s(elev_s) + s(LST)")#,
238
               #"y_var ~ s(lat,lon) + s(elev_s) + s(N_w,E_w) + s(LST) + s(LC1)")
239

  
240
#Default name of LST avg to be matched               
241
lst_avg<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12")  
242

  
243
#Collect all parameters in a list
244
list_param_raster_prediction<-list(list_param_data_prep,screen_data_training,
245
                                seed_number,nb_sample,step,constant,prop_minmax,dates_selected,
246
                                list_models,lst_avg,out_path,script_path,
247
                                interpolation_method)
248
names(list_param_raster_prediction)<-c("list_param_data_prep","screen_data_training",
249
                                "seed_number","nb_sample","step","constant","prop_minmax","dates_selected",
250
                                "list_models","lst_avg","out_path","script_path",
251
                                "interpolation_method")
252

  
253
raster_prediction_obj <-raster_prediction_fun(list_param_raster_prediction)
254

  
255
############## STAGE 5: OUTPUT ANALYSES ##################
256
date_selected_results<-c("20100101") 
257

  
258
list_param_results_analyses<-list(out_path,script_path,raster_prediction_obj,interpolation_method,
259
                                  infile_covariates,covar_names,date_selected_results,var,out_prefix)
260
names(list_param_results_analyses)<-c("out_path","script_path","raster_prediction_obj","interpolation_method",
261
                     "infile_covariates","covar_names","date_selected_results","var","out_prefix")
262
#plots_assessment_by_date<-function(j,list_param){
263
if (stages_to_run[5]==5){
264
  #source(file.path(script_path,"results_interpolation_date_output_analyses_05062013.R"))
265
  #Use lapply or mclapply
266
  summary_v_day <-plots_assessment_by_date(1,list_param_results_analyses)
267
  #Call as function...
268
}
269
  
270
###############   END OF SCRIPT   ###################
271
#####################################################
272

  
273
# #LAND COVER INFORMATION
274
# LC1: Evergreen/deciduous needleleaf trees
275
# LC2: Evergreen broadleaf trees
276
# LC3: Deciduous broadleaf trees
277
# LC4: Mixed/other trees
278
# LC5: Shrubs
279
# LC6: Herbaceous vegetation
280
# LC7: Cultivated and managed vegetation
281
# LC8: Regularly flooded shrub/herbaceous vegetation
282
# LC9: Urban/built-up
283
# LC10: Snow/ice
284
# LC11: Barren lands/sparse vegetation
285
# LC12: Open water
286

  

Also available in: Unified diff