Project

General

Profile

« Previous | Next » 

Revision 0cc28573

Added by Benoit Parmentier almost 9 years ago

initial commit from stage 2_3 master script from Alberto

View differences:

climate/research/oregon/interpolation/master_script_stage_2_3.R
1
##################    Master script for temperature predictions  #######################################
2
############################ TMIN AND TMAX predictions ##########################################
3
#                           
4
##This script produces intperpolated surface of TMIN and TMAX for specified processing region(s) given sets 
5
#of inputs and parameters.
6
#STAGE 1: LST climatology downloading and/or calculation
7
#STAGE 2: Covariates preparation for study/processing area: calculation of covariates (spect,land cover,etc.) and reprojection
8
#STAGE 3: Data preparation: meteorological station database query and extraction of covariates values from raster brick
9
#STAGE 4: Raster prediction: run interpolation method (-- gam fusion, gam CAI, ...) and perform validation 
10
#STAGE 5: Output analyses: assessment of results for specific dates...
11
#
12
#AUTHOR: Benoit Parmentier                                                                       
13
#DATE: 11/29/2013                                                                                 
14

  
15
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363, TASK$568--   
16

  
17
## TODO:
18
# Modify code for stage 1 and call python script from R in parallel
19
# Add options to run only specific stage + additional out_suffix?
20
# Make master script a function?
21
# Add log file for master script,add function to collect inputs and outputs
22
##################################################################################################
23

  
24
###Loading R library and packages  ou 
25
library(RPostgreSQL)
26
library(maps)
27
library(maptools)
28
library(parallel)
29
library(gtools)                              # loading some useful tools 
30
library(mgcv)                                # GAM package by Simon Wood
31
library(sp)                                  # Spatial pacakge with class definition by Bivand et al.
32
library(spdep)                               # Spatial pacakge with methods and spatial stat. by Bivand et al.
33
library(rgdal)                               # GDAL wrapper for R, spatial utilities
34
library(gstat)                               # Kriging and co-kriging by Pebesma et al.
35
library(fields)                              # NCAR Spatial Interpolation methods such as kriging, splines
36
library(raster)                              # Hijmans et al. package for raster processing
37
library(rasterVis)
38
library(spgwr)
39
library(reshape)
40
library(plotrix)
41

  
42
######## PARAMETERS FOR WORK FLOW #########################
43
### Need to add documentation ###
44

  
45
#Adding command line arguments to use mpiexec
46
args<-commandArgs(TRUE)
47
script_path<-"/nobackupp6/aguzman4/climateLayers/finalCode/environmental-layers/climate/research/oregon/interpolation"
48
dataHome<-"/nobackupp6/aguzman4/climateLayers/inputLayers/"
49
script_path2<-"/nobackupp6/aguzman4/climateLayers/finalCode/environmental-layers/climate/research/world/interpolation"
50

  
51
#CALLED FROM MASTER SCRIPT:
52

  
53
modis_download_script <- file.path(script_path,"modis_download.py") # LST modis download python script
54
clim_script <- file.path(script_path,"climatology.py") # LST climatology python script
55
grass_setting_script <- file.path(script_path,"grass-setup.R") #Set up system shell environment for python+GRASS
56
#source(file.path(script_path,"download_and_produce_MODIS_LST_climatology_06112013.R"))
57
source(file.path(script_path2,"covariates_production_temperature.R"))
58
source(file.path(script_path2,"Database_stations_covariates_processing_function.R"))
59
source(file.path(script_path2,"GAM_fusion_analysis_raster_prediction_multisampling.R"))
60
source(file.path(script_path,"results_interpolation_date_output_analyses.R"))
61
#source(file.path(script_path,"results_covariates_database_stations_output_analyses_04012013.R")) #to be completed
62

  
63
#FUNCTIONS CALLED FROM GAM ANALYSIS RASTER PREDICTION ARE FOUND IN...
64

  
65
source(file.path(script_path,"sampling_script_functions.R"))
66
source(file.path(script_path2,"GAM_fusion_function_multisampling.R")) #Includes Fusion and CAI methods
67
source(file.path(script_path2,"interpolation_method_day_function_multisampling.R")) #Include GAM_day
68
source(file.path(script_path,"GAM_fusion_function_multisampling_validation_metrics.R"))
69

  
70
#sub sampling of stations
71
source(file.path(script_path2,"subsampling_data_func.R")) #Include GAM_day
72

  
73
#stages_to_run<-c(0,2,3,4,5) #MRun only raster fitting, prediction and assessemnt (providing lst averages, covar brick and met stations)
74
#stages_to_run<-c(0,2,3,0,0)
75
stages_to_run<-c(0,2,3,0,0)
76

  
77
#If stage 2 is skipped then use previous covar object
78
covar_obj_file<-args[8]
79

  
80
#If stage 3 is skipped then use previous met_stations object
81
met_stations_outfiles_obj_file<-args[9]
82

  
83
var<-"TMAX" # variable being interpolated
84
out_prefix<-args[1]
85
out_suffix<-args[2]                             #Regional suffix
86
out_suffix_modis<-args[3]                       #pattern to find tiles produced previously     
87

  
88
#interpolation_method<-c("gam_fusion") #other otpions to be added later
89
interpolation_method<-c("gam_CAI")
90

  
91
out_path <- args[4] #"/nobackup/aguzman4/climateLayers/output/"
92

  
93
out_path <-paste(out_path,out_prefix,sep="")
94

  
95
if (!file.exists(out_path)){
96
  dir.create(out_path)
97
  #} else{
98
  #  out_path <-paste(out_path..)
99
}
100
  
101
lc_path<-paste(dataHome,"lc-consensus-global_combined",sep="")
102
infile_modis_grid<-paste(dataHome,"/modis_grid/modis_sinusoidal_grid_world.shp",sep="") #modis grid tiling system, global
103
infile_elev<-paste(dataHome,"/GMTED2010/elevation_md_GMTED2010_md.tif",sep="")  #elevation at 1km, global extent to be replaced by the new fused product 
104
infile_canheight<-paste(dataHome,"treeheight-simard2011/Simard_Pinto_3DGlobalVeg_JGR.tif",sep="")#Canopy height, global extent
105
infile_distoc <- paste(dataHome,"distance_to_coast/GMT_intermediate_coast_distance_01d_rev.tif",sep="") #distance to coast, global extent at 0.01 deg
106

  
107
infile_reg_outline <- args[5]  #input region outline defined by polygon
108

  
109
ref_rast_name<- args[6]
110
buffer_dist<-0 #not in use yet, must change climatology step to make sure additional tiles are downloaded and LST averages
111
               #must also be calculated for neighbouring tiles.
112

  
113
list_tiles_modis <- "" 
114
  
115
CRS_interp<-"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs";
116
#CRS_interp <-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
117

  
118
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84
119

  
120
out_region_name<-""
121
  
122
#The names of covariates can be changed...these names should be output/input from covar script!!!
123
rnames<-c("x","y","lon","lat","N","E","N_w","E_w","elev_s","slope","aspect","CANHGHT","DISTOC")
124
lc_names<-c("LC1","LC2","LC3","LC4","LC5","LC6","LC7","LC8","LC9","LC10","LC11","LC12")
125
lst_names<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12",
126
               "nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08",
127
               "nobs_09","nobs_10","nobs_11","nobs_12")
128
covar_names<-c(rnames,lc_names,lst_names)
129
  
130
#list_val_range <-c("lon,-180,180","lat,-90,90","N,-1,1","E,-1,1","N_w,-1,1","E_w,-1,1","elev_s,0,6000","slope,0,90",
131
#                   "aspect,0,360","DISTOC,-0,10000000","CANHGHT,0,255","LC1,0,100","LC5,0,100","mm_01,-15,50",
132
#                   "mm_02,-15,50","mm_03,-15,50","mm_04,-15,50","mm_05,-15,50","mm_06,-15,50","mm_07,-15,50",
133
#                   "mm_08,-15,50","mm_09,-15,50","mm_10,-15,50","mm_11,-15,50","mm_12,-15,50")
134

  
135
list_val_range <-c("lon,-180,180","lat,-90,90","N,-1,1","E,-1,1","N_w,-1,1","E_w,-1,1","elev_s,-400,9000","slope,0,90",
136
                   "aspect,0,360","DISTOC,-0,10000000","CANHGHT,0,255","LC1,0,100","LC5,0,100","mm_01,-60,70",
137
                   "mm_02,-60,70","mm_03,-60,70","mm_04,-60,70","mm_05,-60,70","mm_06,-60,70","mm_07,-60,70",
138
                   "mm_08,-60,70","mm_09,-60,70","mm_10,-60,70","mm_11,-60,70","mm_12,-60,70")
139

  
140
############ STAGE 1: LST Climatology ###############
141

  
142
#Parameters,Inputs from R to Python??
143
start_year = "2001"
144
end_year = "2010"
145
hdfdir <- "" #path directory to MODIS data
146
download=0 #download MODIS product if 1
147
clim_calc=1 #calculate lst averages/climatology if 1
148

  
149
list_param_download_clim_LST_script <- list(list_tiles_modis,start_year,end_year,hdfdir,
150
                                            var,grass_setting_script,modis_download_script, clim_script,
151
                                            download,clim_calc,out_suffix_modis)
152
names(list_param_download_clim_LST_script)<-c("list_tiles_modis","start_year","end_year","hdfdir",
153
                                              "var","grass_setting_script","modis_download_script","clim_script",
154
                                              "download","clim_calc","out_suffix_modis")
155
no_tiles <- length(unlist(strsplit(list_tiles_modis,",")))  # transform string into separate element in char vector
156

  
157
if (stages_to_run[1]==1){
158
  #clim_production_obj <-mclapply(1:2, list_param=list_param_download_clim_LST_script, download_calculate_MODIS_LST_climatology,mc.preschedule=FALSE,mc.cores = 2) #This is the end bracket from mclapply(...) statement
159
  clim_production_obj <-lapply(1:no_tiles, list_param=list_param_download_clim_LST_script, download_calculate_MODIS_LST_climatology) #,mc.preschedule=FALSE,mc.cores = 2) #This is the end bracket from mclapply(...) statement
160
  
161
}
162
#Collect LST climatology list as output???
163

  
164
############ STAGE 2: Covariate production ################
165
#If tiles are already in wgs84 grid
166
process_LST<-args[7] #FALSE
167

  
168
#list of 18 parameters
169
list_param_covar_production<-list(var,out_path,lc_path,infile_modis_grid,infile_elev,infile_canheight,
170
                                  infile_distoc,list_tiles_modis,infile_reg_outline,CRS_interp,CRS_locs_WGS84,out_region_name,
171
                                  buffer_dist,list_val_range,out_suffix,out_suffix_modis,ref_rast_name,hdfdir,covar_names,process_LST) 
172

  
173
names(list_param_covar_production)<-c("var","out_path","lc_path","infile_modis_grid","infile_elev","infile_canheight",
174
                                      "infile_distoc","list_tiles_modis","infile_reg_outline","CRS_interp","CRS_locs_WGS84","out_region_name",
175
                                   "buffer_dist","list_val_range","out_suffix","out_suffix_modis","ref_rast_name","hdfdir","covar_names","process_LST") 
176

  
177
## Modify to store infile_covar_brick in output folder!!!
178
if (stages_to_run[2]==2){
179
  covar_obj <- covariates_production_temperature(list_param_covar_production)
180
  infile_covariates <- covar_obj$infile_covariates
181
  infile_reg_outline <- covar_obj$infile_reg_outline
182
  covar_names<- covar_obj$covar_names
183
}else{
184
  covar_obj <-load_obj(covar_obj_file)
185
  infile_covariates <- covar_obj$infile_covariates
186
  infile_reg_outline <- covar_obj$infile_reg_outline
187
  covar_names<- covar_obj$covar_names
188
}  
189

  
190
#Note that if stages_to_run[2]!=2, then use values defined at the beginning of the script for infile_covariates and infile_reg_outline
191

  
192
############# STAGE 3: Data preparation ###############
193

  
194
#specific to this stage
195
#db.name <- "ghcn_subset"       # name of the Postgres database
196
db.name <- "ghcn_gssd_90to01.db"       # name of the Postgres database
197
range_years<-c("1992","1993") #right bound not included in the range!!
198
range_years_clim<-c("1990","2001") #right bound not included in the range!!
199
infile_ghncd_data <-paste(dataHome,"ghcn/v2.92-upd-2012052822/ghcnd_gssd_station_list.txt",sep="") #This is the textfile of station locations from GHCND
200
qc_flags_stations<-c("0","S")    #flags allowed for screening after the query from the GHCND??
201
#qc_flags_stations<-c("0")    #flags allowed for screening after the query from the GHCND??
202

  
203
#infile_covariates and infile_reg_outline defined in stage 2 or at the start of script...
204

  
205
#Add subsampling parameters
206
sub_sampling <- TRUE  #if TRUE then monthly stations data are resampled
207
sub_sample_rnd <- TRUE #if  TRUE use random sampling  in addition to spatial  sub-sampling
208
target_range_nb <- c(600,700) # number of stations desired as min and max, convergence to  min  for  now
209
dist_range <- c(0,0.04165) #distance range  for pruning,  usually (0,5) in km or 0,0.009*5 for  degreee
210
step_dist <- 0.00833
211

  
212
#daily
213
sub_sampling_day <- TRUE
214
target_range_daily_nb <- c(600,700) # number of stations desired as min and max, convergence to  min  for  now
215

  
216
#list of 12 parameters for input in the function...
217

  
218
#list_param_prep<-list(db.name,var,range_years,range_years_clim,infile_reg_outline,infile_ghncd_data,infile_covariates,CRS_locs_WGS84,out_path,covar_names,qc_flags_stations,out_prefix)
219
#cnames<-c("db.name","var","range_years","range_years_clim","infile_reg_outline","infile_ghncd_data","infile_covariates","CRS_locs_WGS84","out_path","covar_names","qc_flags_stations","out_prefix")
220

  
221
list_param_prep<-list(db.name,var,range_years,range_years_clim,infile_reg_outline,infile_ghncd_data,infile_covariates,CRS_locs_WGS84,out_path,covar_names,qc_flags_stations,out_prefix,
222
 sub_sampling,sub_sample_rnd,target_range_nb,dist_range,step_dist,target_range_daily_nb,sub_sampling_day)
223
cnames<-c("db.name","var","range_years","range_years_clim","infile_reg_outline","infile_ghncd_data","infile_covariates","CRS_locs_WGS84","out_path","covar_names",
224
 "qc_flags_stations","out_prefix","sub_sampling","sub_sample_rnd","target_range_nb","dist_range","step_dist","target_range_daily_nb","sub_sampling_day")
225

  
226
names(list_param_prep)<-cnames
227

  
228
##### RUN SCRIPT TO GET STATION DATA WITH COVARIATES #####
229

  
230
if (stages_to_run[3]==3){
231
  list_outfiles<-database_covariates_preparation(list_param_prep)
232
  print("Done 3")
233
}
234
if ((stages_to_run[4]==4) | (stages_to_run[5]==5)){
235
    list_outfiles <-load_obj(met_stations_outfiles_obj_file)
236
}else{
237
    quit()
238
  }
239

  
240
############### STAGE 4: RASTER PREDICTION #################
241

  
242
#Prepare parameters for for raster prediction... 
243

  
244
#Collect parameters from the previous stage: data preparation stage
245

  
246
#3 parameters from output
247
infile_monthly<-list_outfiles$monthly_covar_ghcn_data #outile4 from database_covar script
248
infile_daily<-list_outfiles$daily_covar_ghcn_data  #outfile3 from database_covar script
249
infile_locs<- list_outfiles$loc_stations_ghcn #outfile2? from database covar script
250

  
251
#names(outfiles_obj)<- c("loc_stations","loc_stations_ghcn","daily_covar_ghcn_data","monthly_covar_ghcn_data")
252

  
253
list_param_data_prep <- list(infile_monthly,infile_daily,infile_locs,infile_covariates,covar_names,var,out_prefix,CRS_locs_WGS84)
254
names(list_param_data_prep) <- c("infile_monthly","infile_daily","infile_locs","infile_covariates","covar_names","var","out_prefix","CRS_locs_WGS84")
255

  
256
#Set additional parameters
257
#Input for sampling function...need to reorganize inputs!!!
258
seed_number<- 100  #if seed zero then no seed?     
259

  
260
nb_sample<-1           #number of time random sampling must be repeated for every hold out proportion
261
#step<- 0.1         
262
step<- 0         
263
constant<-0             #if value 1 then use the same samples as date one for the all set of dates
264
prop_minmax<-c(0.3,0.3)  #if prop_min=prop_max and step=0 then predictions are done for the number of dates...
265
#prop_minmax<-c(0.1,0.7)  #if prop_min=prop_max and step=0 then predictions are done for the number of dates...
266

  
267
seed_number_month <- 100
268
nb_sample_month <-1           #number of time random sampling must be repeated for every hold out proportion
269
step_month <-0         
270
#step_month <-0.1
271
constant_month <- 0             #if value 1 then use the same samples as date one for the all set of dates
272
prop_minmax_month <-c(0,0)  #if prop_min=prop_max and step=0 then predictions are done for the number of dates...
273

  
274
#dates_selected<-c("20100101","20100102","20100103","20100901") # Note that the dates set must have a specific format: yyymmdd
275
#dates_selected<-c("20100101","20100102","20100301","20100302","20100501","20100502","20100701","20100702","20100901","20100902","20101101","20101102")
276
dates_selected<-"" # if empty string then predict for the full year specified earlier
277
#dates_selected <- 2 # if integer then predict for the evert n dat in the year specified earlier
278

  
279
screen_data_training<- FALSE #screen training data for NA and use same input training for all models fitted
280
use_clim_image <- TRUE # use predicted image as a base...rather than average Tmin at the station for delta
281
join_daily <- FALSE # join monthly and daily station before calucating delta
282

  
283
#Models to run...this can be changed for each run
284
#LC1: Evergreen/deciduous needleleaf trees
285

  
286
#list_models<-c("y_var ~ lat*lon + elev_s + N_w*E_w",
287
#               "y_var ~ lat*lon + elev_s + DISTOC",
288
#               "y_var ~ lat*lon + elev_s + LST",
289
#               "y_var ~ lat*lon + elev_s + LST + I(LST*LC1)")
290

  
291

  
292
#list_models2<-c("y_var ~ s(lat,lon) + s(DISTOC)")
293
list_models2 <- NULL
294
interp_method2 <- NULL #other options are "gwr" and "kriging"
295

  
296
#list_models<-c("y_var ~ s(lat,lon,k=4) + s(elev_s,k=4)",
297
#               "y_var ~ s(lat,lon,k=4) + s(elev_s,k=4) + s(LST,k=4)") #,
298
               #"y_var ~ s(lat,lon) + s(elev_s) + s(N_w,E_w) + s(LST) + ti(LST,LC1) + s(LC1)")
299

  
300
#list_models<-c("y_var ~ s(lat,lon,k=5) + s(elev_s,k=3)",
301
#               "y_var ~ s(lat,lon,k=5) + s(elev_s,k=3) + s(LST,k=3)")
302

  
303
list_models<-c("y_var ~ s(lat,lon,k=5) + s(elev_s,k=3) + s(LST,k=3)")
304

  
305
#Default name of LST avg to be matched               
306
lst_avg<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12")  
307

  
308
#Add num_cores for doing global runs
309
num_cores<-args[10]
310

  
311
#max number of cells to read in memory
312
max_mem<-args[11]
313
#rasterOptions(maxmemory=1e+07,timer=TRUE)
314

  
315
#Collect all parameters in a list
316
list_param_raster_prediction<-list(list_param_data_prep,screen_data_training,
317
                                seed_number,nb_sample,step,constant,prop_minmax,dates_selected,
318
                                seed_number_month,nb_sample_month,step_month,constant_month,prop_minmax_month,
319
                                list_models,list_models2,interp_method2,lst_avg,out_path,script_path,use_clim_image,join_daily,
320
                                interpolation_method,num_cores,max_mem)
321
names(list_param_raster_prediction)<-c("list_param_data_prep","screen_data_training",
322
                                "seed_number","nb_sample","step","constant","prop_minmax","dates_selected",
323
                                "seed_number_month","nb_sample_month","step_month","constant_month","prop_minmax_month",
324
                                "list_models","list_models2","interp_method2","lst_avg","out_path","script_path","use_clim_image","join_daily",
325
                                "interpolation_method","num_cores","max_mem")
326

  
327
#debug(raster_prediction_fun)
328
#debug(debug_fun_test)
329
#debug_fun_test(list_param_raster_prediction)
330

  
331
if (stages_to_run[4]==4){
332
  raster_prediction_obj <- raster_prediction_fun(list_param_raster_prediction)
333
}
334

  
335
############## STAGE 5: OUTPUT ANALYSES ##################
336
if ((stages_to_run[4]==0)&(stages_to_run[5]==5)){
337
   #Load from previous
338
   if (var=="TMAX"){
339
      y_var_name<-"dailyTmax"
340
      y_var_month<-"TMax"
341
   }
342
   if (var=="TMIN"){
343
     y_var_name<-"dailyTmin"
344
     y_var_month <-"TMin"
345
   }
346

  
347
   raster_prediction_obj<-load_obj(file.path(out_path,paste("raster_prediction_obj_",interpolation_method,"_", y_var
348
_name,out_prefix,".RData",sep="")))
349
}
350

  
351
date_selected_results<-c("20100101") 
352
list_param_results_analyses<-list(out_path,script_path,raster_prediction_obj,interpolation_method,
353
                                  covar_obj,date_selected_results,var,out_prefix)
354
names(list_param_results_analyses)<-c("out_path","script_path","raster_prediction_obj","interpolation_method",
355
                     "covar_obj","date_selected_results","var","out_prefix")
356
#plots_assessment_by_date<-function(j,list_param){
357
if (stages_to_run[5]==5){
358
  #source(file.path(script_path,"results_interpolation_date_output_analyses_08052013.R"))
359
  #Use lapply or mclapply
360
  summary_v_day <-plots_assessment_by_date(1,list_param_results_analyses)
361
  #Call as function...
362
}
363
  
364
###############   END OF SCRIPT   ###################
365
#####################################################
366

  
367
# #LAND COVER INFORMATION
368
# LC1: Evergreen/deciduous needleleaf trees
369
# LC2: Evergreen broadleaf trees
370
# LC3: Deciduous broadleaf trees
371
# LC4: Mixed/other trees
372
# LC5: Shrubs
373
# LC6: Herbaceous vegetation
374
# LC7: Cultivated and managed vegetation
375
# LC8: Regularly flooded shrub/herbaceous vegetation
376
# LC9: Urban/built-up
377
# LC10: Snow/ice
378
# LC11: Barren lands/sparse vegetation
379
# LC12: Open water

Also available in: Unified diff