Project

General

Profile

« Previous | Next » 

Revision faee7682

Added by Benoit Parmentier almost 9 years ago

initial commit stage 6 assessement of run automated

View differences:

climate/research/oregon/interpolation/master_script_stage_6.R
1
##################    Master script for temperature predictions  #######################################
2
############################ TMIN AND TMAX predictions ##########################################
3
#                           
4
##This script produces intperpolated surface of TMIN and TMAX for specified processing region(s) given sets 
5
#of inputs and parameters.
6
#STAGE 1: LST climatology downloading and/or calculation
7
#STAGE 2: Covariates preparation for study/processing area: calculation of covariates (spect,land cover,etc.) and reprojection
8
#STAGE 3: Data preparation: meteorological station database query and extraction of covariates values from raster brick
9
#STAGE 4: Raster prediction: run interpolation method (-- gam fusion, gam CAI, ...) and perform validation 
10
#STAGE 5: Output analyses: assessment of results for specific dates...
11
#
12
#AUTHOR: Benoit Parmentier                                                                       
13
#DATE: 11/29/2013                                                                                 
14

  
15
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363, TASK$568--   
16

  
17
## TODO:
18
# Modify code for stage 1 and call python script from R in parallel
19
# Add options to run only specific stage + additional out_suffix?
20
# Make master script a function?
21
# Add log file for master script,add function to collect inputs and outputs
22
##################################################################################################
23

  
24
###Loading R library and packages  ou 
25
library(RPostgreSQL)
26
library(maps)
27
library(maptools)
28
library(parallel)
29
library(gtools)                              # loading some useful tools 
30
library(mgcv)                                # GAM package by Simon Wood
31
library(sp)                                  # Spatial pacakge with class definition by Bivand et al.
32
library(spdep)                               # Spatial pacakge with methods and spatial stat. by Bivand et al.
33
library(rgdal)                               # GDAL wrapper for R, spatial utilities
34
library(gstat)                               # Kriging and co-kriging by Pebesma et al.
35
library(fields)                              # NCAR Spatial Interpolation methods such as kriging, splines
36
library(raster)                              # Hijmans et al. package for raster processing
37
library(rasterVis)
38
library(spgwr)
39
library(reshape)
40
library(plotrix)
41

  
42
######## PARAMETERS FOR WORK FLOW #########################
43
### Need to add documentation ###
44

  
45
#Adding command line arguments to use mpiexec
46
args<-commandArgs(TRUE)
47
script_path<-"/nobackupp6/aguzman4/climateLayers/finalCode/environmental-layers/climate/research/oregon/interpolation"
48
dataHome<-"/nobackupp6/aguzman4/climateLayers/interp/testdata/"
49
script_path2<-"/nobackupp6/aguzman4/climateLayers/finalCode/environmental-layers/climate/research/world/interpolation"
50

  
51
#CALLED FROM MASTER SCRIPT:
52

  
53
modis_download_script <- file.path(script_path,"modis_download.py") # LST modis download python script
54
clim_script <- file.path(script_path,"climatology.py") # LST climatology python script
55
grass_setting_script <- file.path(script_path,"grass-setup.R") #Set up system shell environment for python+GRASS
56
#source(file.path(script_path,"download_and_produce_MODIS_LST_climatology_06112013.R"))
57
source(file.path(script_path2,"covariates_production_temperature.R"))
58
source(file.path(script_path2,"Database_stations_covariates_processing_function.R"))
59
source(file.path(script_path2,"GAM_fusion_analysis_raster_prediction_multisampling.R"))
60
source(file.path(script_path,"results_interpolation_date_output_analyses.R"))
61
#source(file.path(script_path,"results_covariates_database_stations_output_analyses_04012013.R")) #to be completed
62

  
63
#FUNCTIONS CALLED FROM GAM ANALYSIS RASTER PREDICTION ARE FOUND IN...
64

  
65
source(file.path(script_path,"sampling_script_functions.R"))
66
source(file.path(script_path2,"GAM_fusion_function_multisampling.R")) #Includes Fusion and CAI methods
67
source(file.path(script_path2,"interpolation_method_day_function_multisampling.R")) #Include GAM_day
68
source(file.path(script_path,"GAM_fusion_function_multisampling_validation_metrics.R"))
69

  
70
#stages_to_run<-c(0,2,3,4,5) #MRun only raster fitting, prediction and assessemnt (providing lst averages, covar brick and met stations)
71
#stages_to_run<-c(0,2,3,0,0)
72
stages_to_run<-c(0,0,0,4,5)
73

  
74
#If stage 2 is skipped then use previous covar object
75
covar_obj_file<-args[8]
76

  
77
#If stage 3 is skipped then use previous met_stations object
78
met_stations_outfiles_obj_file<-args[9]
79

  
80
var<-"TMAX" # variable being interpolated
81
out_prefix<-args[1]
82
out_suffix<-args[2]                             #Regional suffix
83
out_suffix_modis<-args[3]                       #pattern to find tiles produced previously     
84

  
85
#interpolation_method<-c("gam_fusion") #other otpions to be added later
86
interpolation_method<-c("gam_CAI")
87

  
88
out_path <- args[4] #"/nobackup/aguzman4/climateLayers/output/"
89

  
90
out_path <-paste(out_path,out_prefix,sep="")
91

  
92
if (!file.exists(out_path)){
93
  dir.create(out_path)
94
  #} else{
95
  #  out_path <-paste(out_path..)
96
}
97
  
98
lc_path<-"/nobackupp6/aguzman4/climateLayers/interp/testdata/data_workflow/inputs/lc-consensus-global"
99
infile_modis_grid<-"/nobackupp6/aguzman4/climateLayers/interp/testdata/data_workflow/inputs/modis_grid/modis_sinusoidal_grid_world.shp" #modis grid tiling system, global
100
infile_elev<-"/nobackupp6/aguzman4/climateLayers/interp/testdata/data_workflow/inputs/dem-cgiar-srtm-1km-tif/srtm_1km.tif"  #elevation at 1km, global extent to be replaced by the new fused product 
101
infile_canheight<-"/nobackupp6/aguzman4/climateLayers/interp/testdata/data_workflow/inputs/treeheight-simard2011/Simard_Pinto_3DGlobalVeg_JGR.tif"         #Canopy height, global extent
102
infile_distoc <- "/nobackupp6/aguzman4/climateLayers/interp/testdata/data_workflow/inputs/distance_to_coast/GMT_intermediate_coast_distance_01d_rev.tif" #distance to coast, global extent at 0.01 deg
103

  
104
infile_reg_outline <- args[5]  #input region outline defined by polygon
105

  
106
ref_rast_name<- args[6]
107
buffer_dist<-0 #not in use yet, must change climatology step to make sure additional tiles are downloaded and LST averages
108
               #must also be calculated for neighbouring tiles.
109

  
110
list_tiles_modis <- "" 
111
  
112
CRS_interp<-"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs";
113
#CRS_interp <-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
114

  
115
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84
116

  
117
out_region_name<-""
118
  
119
#The names of covariates can be changed...these names should be output/input from covar script!!!
120
rnames<-c("x","y","lon","lat","N","E","N_w","E_w","elev_s","slope","aspect","CANHGHT","DISTOC")
121
lc_names<-c("LC1","LC2","LC3","LC4","LC5","LC6","LC7","LC8","LC9","LC10","LC11","LC12")
122
lst_names<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12",
123
               "nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08",
124
               "nobs_09","nobs_10","nobs_11","nobs_12")
125
covar_names<-c(rnames,lc_names,lst_names)
126
  
127
#list_val_range <-c("lon,-180,180","lat,-90,90","N,-1,1","E,-1,1","N_w,-1,1","E_w,-1,1","elev_s,0,6000","slope,0,90",
128
#                   "aspect,0,360","DISTOC,-0,10000000","CANHGHT,0,255","LC1,0,100","LC5,0,100","mm_01,-15,50",
129
#                   "mm_02,-15,50","mm_03,-15,50","mm_04,-15,50","mm_05,-15,50","mm_06,-15,50","mm_07,-15,50",
130
#                   "mm_08,-15,50","mm_09,-15,50","mm_10,-15,50","mm_11,-15,50","mm_12,-15,50")
131

  
132
list_val_range <-c("lon,-180,180","lat,-90,90","N,-1,1","E,-1,1","N_w,-1,1","E_w,-1,1","elev_s,0,9000","slope,0,90",
133
                   "aspect,0,360","DISTOC,-0,10000000","CANHGHT,0,255","LC1,0,100","LC5,0,100","mm_01,-60,70",
134
                   "mm_02,-60,70","mm_03,-60,70","mm_04,-60,70","mm_05,-60,70","mm_06,-60,70","mm_07,-60,70",
135
                   "mm_08,-60,70","mm_09,-60,70","mm_10,-60,70","mm_11,-60,70","mm_12,-60,70")
136

  
137
############ STAGE 1: LST Climatology ###############
138

  
139
#Parameters,Inputs from R to Python??
140
start_year = "2001"
141
end_year = "2010"
142
hdfdir <- "" #path directory to MODIS data
143
download=0 #download MODIS product if 1
144
clim_calc=1 #calculate lst averages/climatology if 1
145

  
146
list_param_download_clim_LST_script <- list(list_tiles_modis,start_year,end_year,hdfdir,
147
                                            var,grass_setting_script,modis_download_script, clim_script,
148
                                            download,clim_calc,out_suffix_modis)
149
names(list_param_download_clim_LST_script)<-c("list_tiles_modis","start_year","end_year","hdfdir",
150
                                              "var","grass_setting_script","modis_download_script","clim_script",
151
                                              "download","clim_calc","out_suffix_modis")
152
no_tiles <- length(unlist(strsplit(list_tiles_modis,",")))  # transform string into separate element in char vector
153

  
154
if (stages_to_run[1]==1){
155
  #clim_production_obj <-mclapply(1:2, list_param=list_param_download_clim_LST_script, download_calculate_MODIS_LST_climatology,mc.preschedule=FALSE,mc.cores = 2) #This is the end bracket from mclapply(...) statement
156
  clim_production_obj <-lapply(1:no_tiles, list_param=list_param_download_clim_LST_script, download_calculate_MODIS_LST_climatology) #,mc.preschedule=FALSE,mc.cores = 2) #This is the end bracket from mclapply(...) statement
157
  
158
}
159
#Collect LST climatology list as output???
160

  
161
############ STAGE 2: Covariate production ################
162
#If tiles are already in wgs84 grid
163
process_LST<-args[7] #FALSE
164

  
165
#list of 18 parameters
166
list_param_covar_production<-list(var,out_path,lc_path,infile_modis_grid,infile_elev,infile_canheight,
167
                                  infile_distoc,list_tiles_modis,infile_reg_outline,CRS_interp,CRS_locs_WGS84,out_region_name,
168
                                  buffer_dist,list_val_range,out_suffix,out_suffix_modis,ref_rast_name,hdfdir,covar_names,process_LST) 
169

  
170
names(list_param_covar_production)<-c("var","out_path","lc_path","infile_modis_grid","infile_elev","infile_canheight",
171
                                      "infile_distoc","list_tiles_modis","infile_reg_outline","CRS_interp","CRS_locs_WGS84","out_region_name",
172
                                   "buffer_dist","list_val_range","out_suffix","out_suffix_modis","ref_rast_name","hdfdir","covar_names","process_LST") 
173

  
174
## Modify to store infile_covar_brick in output folder!!!
175
if (stages_to_run[2]==2){
176
  covar_obj <- covariates_production_temperature(list_param_covar_production)
177
  infile_covariates <- covar_obj$infile_covariates
178
  infile_reg_outline <- covar_obj$infile_reg_outline
179
  covar_names<- covar_obj$covar_names
180
}else{
181
  covar_obj <-load_obj(covar_obj_file)
182
  infile_covariates <- covar_obj$infile_covariates
183
  infile_reg_outline <- covar_obj$infile_reg_outline
184
  covar_names<- covar_obj$covar_names
185
}  
186

  
187
#Note that if stages_to_run[2]!=2, then use values defined at the beginning of the script for infile_covariates and infile_reg_outline
188

  
189
############# STAGE 3: Data preparation ###############
190

  
191
#specific to this stage
192
#db.name <- "ghcn_subset"       # name of the Postgres database
193
db.name <- "ghcnd_gssd_subset"       # name of the Postgres database
194
range_years<-c("2010","2011") #right bound not included in the range!!
195
range_years_clim<-c("2000","2011") #right bound not included in the range!!
196
infile_ghncd_data <-"/nobackupp6/aguzman4/climateLayers/interp/testdata/data_workflow/inputs/ghcn/v2.92-upd-2012052822/ghcnd_gssd_station_list.txt"                              #This is the textfile of station locations from GHCND
197
qc_flags_stations<-c("0","S")    #flags allowed for screening after the query from the GHCND??
198
#qc_flags_stations<-c("0")    #flags allowed for screening after the query from the GHCND??
199

  
200
#infile_covariates and infile_reg_outline defined in stage 2 or at the start of script...
201

  
202
#Add subsampling parameters
203
sub_sampling <- TRUE  #if TRUE then monthly stations data are resampled
204
sub_sample_rnd <- TRUE #if  TRUE use random sampling  in addition to spatial  sub-sampling
205
target_range_nb <- c(1000,2000) # number of stations desired as min and max, convergence to  min  for  now
206
dist_range <- c(0,0.04165) #distance range  for pruning,  usually (0,5) in km or 0,0.009*5 for  degreee
207
step_dist <- 0.00833
208

  
209
#daily
210
target_range_daily_nb <- c(600,700) # number of stations desired as min and max, convergence to  min  for  now
211

  
212
#list of 12 parameters for input in the function...
213

  
214
#list_param_prep<-list(db.name,var,range_years,range_years_clim,infile_reg_outline,infile_ghncd_data,infile_covariates,CRS_locs_WGS84,out_path,covar_names,qc_flags_stations,out_prefix)
215
#cnames<-c("db.name","var","range_years","range_years_clim","infile_reg_outline","infile_ghncd_data","infile_covariates","CRS_locs_WGS84","out_path","covar_names","qc_flags_stations","out_prefix")
216

  
217
list_param_prep<-list(db.name,var,range_years,range_years_clim,infile_reg_outline,infile_ghncd_data,infile_covariates,CRS_locs_WGS84,out_path,covar_names,qc_flags_stations,out_prefix,
218
 sub_sampling,sub_sample_rnd,target_range_nb,dist_range,step_dist,target_range_daily_nb)
219
cnames<-c("db.name","var","range_years","range_years_clim","infile_reg_outline","infile_ghncd_data","infile_covariates","CRS_locs_WGS84","out_path","covar_names",
220
 "qc_flags_stations","out_prefix","sub_sampling","sub_sample_rnd","target_range_nb","dist_range","step_dist","target_range_daily_nb")
221

  
222
names(list_param_prep)<-cnames
223

  
224
##### RUN SCRIPT TO GET STATION DATA WITH COVARIATES #####
225

  
226
if (stages_to_run[3]==3){
227
  list_outfiles<-database_covariates_preparation(list_param_prep)
228
}
229
if ((stages_to_run[4]==4) | (stages_to_run[5]==5)){
230
    list_outfiles <-load_obj(met_stations_outfiles_obj_file)
231
}else{
232
    quit()
233
  }
234

  
235
############### STAGE 4: RASTER PREDICTION #################
236

  
237
#Prepare parameters for for raster prediction... 
238

  
239
#Collect parameters from the previous stage: data preparation stage
240

  
241
#3 parameters from output
242
infile_monthly<-list_outfiles$monthly_covar_ghcn_data #outile4 from database_covar script
243
infile_daily<-list_outfiles$daily_covar_ghcn_data  #outfile3 from database_covar script
244
infile_locs<- list_outfiles$loc_stations_ghcn #outfile2? from database covar script
245

  
246
#names(outfiles_obj)<- c("loc_stations","loc_stations_ghcn","daily_covar_ghcn_data","monthly_covar_ghcn_data")
247

  
248
list_param_data_prep <- list(infile_monthly,infile_daily,infile_locs,infile_covariates,covar_names,var,out_prefix,CRS_locs_WGS84)
249
names(list_param_data_prep) <- c("infile_monthly","infile_daily","infile_locs","infile_covariates","covar_names","var","out_prefix","CRS_locs_WGS84")
250

  
251
#Set additional parameters
252
#Input for sampling function...need to reorganize inputs!!!
253
seed_number<- 100  #if seed zero then no seed?     
254

  
255
nb_sample<-1           #number of time random sampling must be repeated for every hold out proportion
256
#step<- 0.1         
257
step<- 0         
258
constant<-0             #if value 1 then use the same samples as date one for the all set of dates
259
prop_minmax<-c(0.3,0.3)  #if prop_min=prop_max and step=0 then predictions are done for the number of dates...
260
#prop_minmax<-c(0.1,0.7)  #if prop_min=prop_max and step=0 then predictions are done for the number of dates...
261

  
262
seed_number_month <- 100
263
nb_sample_month <-1           #number of time random sampling must be repeated for every hold out proportion
264
step_month <-0         
265
#step_month <-0.1
266
constant_month <- 0             #if value 1 then use the same samples as date one for the all set of dates
267
prop_minmax_month <-c(0,0)  #if prop_min=prop_max and step=0 then predictions are done for the number of dates...
268

  
269
#dates_selected<-c("20100101","20100102","20100103","20100901") # Note that the dates set must have a specific format: yyymmdd
270
#dates_selected<-c("20100101","20100102","20100301","20100302","20100501","20100502","20100701","20100702","20100901","20100902","20101101","20101102")
271
dates_selected<-"" # if empty string then predict for the full year specified earlier
272
#dates_selected <- 2 # if integer then predict for the evert n dat in the year specified earlier
273

  
274
screen_data_training<- FALSE #screen training data for NA and use same input training for all models fitted
275
use_clim_image <- TRUE # use predicted image as a base...rather than average Tmin at the station for delta
276
join_daily <- FALSE # join monthly and daily station before calucating delta
277

  
278
#Models to run...this can be changed for each run
279
#LC1: Evergreen/deciduous needleleaf trees
280

  
281
#list_models<-c("y_var ~ lat*lon + elev_s + N_w*E_w",
282
#               "y_var ~ lat*lon + elev_s + DISTOC",
283
#               "y_var ~ lat*lon + elev_s + LST",
284
#               "y_var ~ lat*lon + elev_s + LST + I(LST*LC1)")
285

  
286

  
287
#list_models2<-c("y_var ~ s(lat,lon) + s(DISTOC)")
288
list_models2 <- NULL
289
interp_method2 <- NULL #other options are "gwr" and "kriging"
290

  
291
#list_models<-c("y_var ~ s(lat,lon,k=4) + s(elev_s,k=4)",
292
#               "y_var ~ s(lat,lon,k=4) + s(elev_s,k=4) + s(LST,k=4)") #,
293
               #"y_var ~ s(lat,lon) + s(elev_s) + s(N_w,E_w) + s(LST) + ti(LST,LC1) + s(LC1)")
294

  
295
#list_models<-c("y_var ~ s(lat,lon,k=5) + s(elev_s,k=3)",
296
#               "y_var ~ s(lat,lon,k=5) + s(elev_s,k=3) + s(LST,k=3)")
297

  
298
list_models<-c("y_var ~ s(lat,lon,k=5) + s(elev_s,k=3) + s(LST,k=3)")
299

  
300
#Default name of LST avg to be matched               
301
lst_avg<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12")  
302

  
303
#Add num_cores for doing global runs
304
num_cores<-args[10]
305

  
306
#max number of cells to read in memory
307
max_mem<-args[11]
308
#rasterOptions(maxmemory=1e+07,timer=TRUE)
309
out_path_daily <- file.path(out_path,range_years[1]) #<-c("1992","1993") #right bound not included in the range!!
310
out_path_clim <- file.path(out_path,paste("clim_",range_years_clim[1],"_",range_years_clim[2],sep="")) #<-c("1990","2001") #right bound not included in the range!!
311

  
312
#Collect all parameters in a list
313
list_param_raster_prediction<-list(list_param_data_prep,screen_data_training,
314
                                seed_number,nb_sample,step,constant,prop_minmax,dates_selected,
315
                                seed_number_month,nb_sample_month,step_month,constant_month,prop_minmax_month,
316
                                list_models,list_models2,interp_method2,lst_avg,out_path_daily,out_path_clim,script_path,use_clim_image,join_daily,
317
                                interpolation_method,num_cores,max_mem)
318
names(list_param_raster_prediction)<-c("list_param_data_prep","screen_data_training",
319
                                "seed_number","nb_sample","step","constant","prop_minmax","dates_selected",
320
                                "seed_number_month","nb_sample_month","step_month","constant_month","prop_minmax_month",
321
                                "list_models","list_models2","interp_method2","lst_avg","out_path","out_path_clim","script_path","use_clim_image","join_daily",
322
                                "interpolation_method","num_cores","max_mem")
323

  
324
#debug(raster_prediction_fun)
325
#debug(debug_fun_test)
326
#debug_fun_test(list_param_raster_prediction)
327

  
328
if (stages_to_run[4]==4){
329
  raster_prediction_obj <- raster_prediction_fun(list_param_raster_prediction)
330
}
331

  
332
############## STAGE 5: OUTPUT ANALYSES ##################
333
if ((stages_to_run[4]==0)&(stages_to_run[5]==5)){
334
   #Load from previous
335
   if (var=="TMAX"){
336
      y_var_name<-"dailyTmax"
337
      y_var_month<-"TMax"
338
   }
339
   if (var=="TMIN"){
340
     y_var_name<-"dailyTmin"
341
     y_var_month <-"TMin"
342
   }
343

  
344
   raster_prediction_obj<-load_obj(file.path(out_path,paste("raster_prediction_obj_",interpolation_method,"_", y_var
345
_name,out_prefix,".RData",sep="")))
346
}
347

  
348
date_selected_results<-c("20100101") 
349
list_param_results_analyses<-list(out_path,script_path,raster_prediction_obj,interpolation_method,
350
                                  covar_obj,date_selected_results,var,out_prefix)
351
names(list_param_results_analyses)<-c("out_path","script_path","raster_prediction_obj","interpolation_method",
352
                     "covar_obj","date_selected_results","var","out_prefix")
353
#plots_assessment_by_date<-function(j,list_param){
354
if (stages_to_run[5]==5){
355
  #source(file.path(script_path,"results_interpolation_date_output_analyses_08052013.R"))
356
  #Use lapply or mclapply
357
  summary_v_day <-plots_assessment_by_date(1,list_param_results_analyses)
358
  #Call as function...
359
}
360
  
361
###############   END OF SCRIPT   ###################
362
#####################################################
363

  
364
# #LAND COVER INFORMATION
365
# LC1: Evergreen/deciduous needleleaf trees
366
# LC2: Evergreen broadleaf trees
367
# LC3: Deciduous broadleaf trees
368
# LC4: Mixed/other trees
369
# LC5: Shrubs
370
# LC6: Herbaceous vegetation
371
# LC7: Cultivated and managed vegetation
372
# LC8: Regularly flooded shrub/herbaceous vegetation
373
# LC9: Urban/built-up
374
# LC10: Snow/ice
375
# LC11: Barren lands/sparse vegetation
376
# LC12: Open water
377

  

Also available in: Unified diff