Revision 304b6722
Added by Benoit Parmentier over 11 years ago
climate/research/oregon/interpolation/master_script_temp.R | ||
---|---|---|
10 | 10 |
#STAGE 5: Output analyses: assessment of results for specific dates... |
11 | 11 |
# |
12 | 12 |
#AUTHOR: Benoit Parmentier |
13 |
#DATE: 07/15/2013
|
|
13 |
#DATE: 07/17/2013
|
|
14 | 14 |
|
15 | 15 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363, TASK$568-- |
16 | 16 |
|
... | ... | |
42 | 42 |
######## PARAMETERS FOR WORK FLOW ######################### |
43 | 43 |
### Need to add documentation ### |
44 | 44 |
|
45 |
script_path<-"/home/parmentier/Data/IPLANT_project/env_layers_scripts/" |
|
45 |
#/data/project/layers/commons/data_workflow : this directory contains the input data and scripts |
|
46 |
|
|
47 |
script_path<-"/data/project/layers/commons/data_workflow/env_layers_scripts/" |
|
46 | 48 |
|
47 | 49 |
##SCRIPT USED FOR THE PREDICTIONS: Source or list all scripts here to avoid confusion on versions being run!!!! |
48 | 50 |
|
49 |
#source(file.path(script_path,"master_script_temp_07092013.R")) #Master script can be run directly...
|
|
51 |
#source(file.path(script_path,"master_script_temp_07162013.R")) #Master script can be run directly...
|
|
50 | 52 |
|
51 | 53 |
#CALLED FROM MASTER SCRIPT: |
52 | 54 |
|
... | ... | |
54 | 56 |
clim_script <- file.path(script_path,"climatology_05312013.py") # LST climatology python script |
55 | 57 |
grass_setting_script <- file.path(script_path,"grass-setup.R") #Set up system shell environment for python+GRASS |
56 | 58 |
#source(file.path(script_path,"download_and_produce_MODIS_LST_climatology_06112013.R")) |
57 |
source(file.path(script_path,"covariates_production_temperatures_07052013.R"))
|
|
59 |
source(file.path(script_path,"covariates_production_temperatures_07172013.R"))
|
|
58 | 60 |
source(file.path(script_path,"Database_stations_covariates_processing_function_06112013.R")) |
59 |
source(file.path(script_path,"GAM_fusion_analysis_raster_prediction_multisampling_07142013.R"))
|
|
61 |
source(file.path(script_path,"GAM_fusion_analysis_raster_prediction_multisampling_07172013.R"))
|
|
60 | 62 |
source(file.path(script_path,"results_interpolation_date_output_analyses_06112013.R")) |
61 | 63 |
#source(file.path(script_path,"results_covariates_database_stations_output_analyses_04012013.R")) #to be completed |
62 | 64 |
|
... | ... | |
68 | 70 |
source(file.path(script_path,"GAM_fusion_function_multisampling_validation_metrics_05062013.R")) |
69 | 71 |
|
70 | 72 |
#stages_to_run<-c(1,2,3,4,5) #May decide on antoher strategy later on... |
71 |
stages_to_run<-c(0,2,3,4,5) #May decide on antoher strategy later on... |
|
73 |
#stages_to_run<-c(0,2,3,4,5) #May decide on antoher strategy later on... |
|
74 |
stages_to_run<-c(0,2,3,4,5) #MRun only raster fitting, prediction and assessemnt (providing lst averages, covar brick and met stations) |
|
75 |
|
|
72 | 76 |
|
73 | 77 |
var<-"TMAX" # variable being interpolated |
74 |
out_prefix<-"_365d_gam_day_lst_comb4_07152013" #User defined output prefix
|
|
75 |
out_suffix<-"_OR_07152013"
|
|
76 |
out_suffix_modis <-"_05302013" #use tiles produce previously
|
|
78 |
out_prefix<-"_365d_gam_fus_lst_test_run_07172013" #User defined output prefix
|
|
79 |
out_suffix<-"_OR_07172013" #Regional suffix
|
|
80 |
out_suffix_modis <-"_05302013" #pattern to find tiles produced previously
|
|
77 | 81 |
|
78 | 82 |
#interpolation_method<-c("gam_fusion","gam_CAI","gam_daily") #other otpions to be added later |
79 | 83 |
#interpolation_method<-c("gam_CAI") #other otpions to be added later |
80 |
#interpolation_method<-c("gam_fusion") #other otpions to be added later
|
|
81 |
interpolation_method<-c("gam_daily") #other otpions to be added later |
|
84 |
interpolation_method<-c("gam_fusion") #other otpions to be added later |
|
85 |
#interpolation_method<-c("gam_daily") #other otpions to be added later
|
|
82 | 86 |
#interpolation_method<-c("kriging_daily") #other otpions to be added later |
83 | 87 |
#interpolation_method<-c("gwr_daily") #other otpions to be added later |
84 | 88 |
|
85 | 89 |
#out_path <- paste("/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/output_data", |
86 | 90 |
# out_prefix,"/",sep="") |
87 |
out_path<-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/output_data"
|
|
91 |
out_path<-"/data/project/layers/commons/data_workflow/output_data"
|
|
88 | 92 |
out_path <-paste(out_path,out_prefix,sep="") |
89 | 93 |
|
90 | 94 |
if (!file.exists(out_path)){ |
... | ... | |
93 | 97 |
# out_path <-paste(out_path..) |
94 | 98 |
} |
95 | 99 |
|
96 |
lc_path<-"/home/layers/data/land-cover/lc-consensus-global" |
|
97 |
infile_modis_grid<-"/data/project/layers/commons/modis/modis_sinusoidal/modis_sinusoidal_grid_world.shp" #modis grid tiling system, global |
|
98 |
infile_elev<-"/home/layers/data/terrain/dem-cgiar-srtm-1km-tif/srtm_1km.tif" #elevation at 1km, global extent to be replaced by the new fused product |
|
99 |
infile_canheight<-"/home/layers/data/land-cover/treeheight-simard2011/Simard_Pinto_3DGlobalVeg_JGR.tif" #Canopy height, global extent |
|
100 |
infile_distoc <- "/data/project/layers/commons/distance_to_coast/GMT_intermediate_coast_distance_01d_rev.tif" #distance to coast, global extent at 0.01 deg |
|
101 |
#infile_covariates<-"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/covariates__venezuela_region__VE_01292013.tif" #this is an output from covariate script and used in stage 3 and stage 4 |
|
102 |
#infile_reg_outline<- "/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/outline_venezuela_region__VE_01292013.shp" |
|
103 |
#infile_covariates<-"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/covariates__venezuela_region_TMIN__VE_03192013.tif" #covariates stack for TMIN |
|
100 |
lc_path<-"/data/project/layers/commons/data_workflow/inputs/lc-consensus-global" |
|
101 |
infile_modis_grid<-"/data/project/layers/commons/data_workflow/inputs/modis_grid/modis_sinusoidal_grid_world.shp" #modis grid tiling system, global |
|
102 |
infile_elev<-"/data/project/layers/commons/data_workflow/inputs/dem-cgiar-srtm-1km-tif/srtm_1km.tif" #elevation at 1km, global extent to be replaced by the new fused product |
|
103 |
infile_canheight<-"/data/project/layers/commons/data_workflow/inputs/treeheight-simard2011/Simard_Pinto_3DGlobalVeg_JGR.tif" #Canopy height, global extent |
|
104 |
infile_distoc <- "/data/project/layers/commons/data_workflow/inputs/distance_to_coast/GMT_intermediate_coast_distance_01d_rev.tif" #distance to coast, global extent at 0.01 deg |
|
104 | 105 |
#infile_covariates<- "/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/covariates_Oregon_region_TMAX__OR_04052013.tif" #Oregon covar TMAX from earlier codes...for continuity |
105 | 106 |
#infile_reg_outline="" #input region outline defined by polygon: none for Venezuela |
106 | 107 |
#This is the shape file of outline of the study area #It is an input/output of the covariate script |
107 |
infile_reg_outline <- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/OR83M_state_outline.shp" #input region outline defined by polygon: Oregon
|
|
108 |
infile_reg_outline <- "/data/project/layers/commons/data_workflow/inputs/region_outlines_ref_files/OR83M_state_outline.shp" #input region outline defined by polygon: Oregon
|
|
108 | 109 |
#ref_rast_name<-"" #local raster name defining resolution, exent, local projection--. set on the fly?? |
109 | 110 |
#this may be redundant with infile_reg_outline |
110 |
ref_rast_name<-"/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_03142013/mean_day244_rescaled.rst" #local raster name defining resolution, exent: oregon
|
|
111 |
ref_rast_name<-"/data/project/layers/commons/data_workflow/inputs/region_outlines_ref_files/mean_day244_rescaled.rst" #local raster name defining resolution, exent: oregon
|
|
111 | 112 |
buffer_dist<-0 #not in use yet, must change climatology step to make sure additional tiles are downloaded and LST averages |
112 | 113 |
#must also be calculated for neighbouring tiles. |
113 | 114 |
|
115 |
#If stage 2 is skipped then use previous covar object |
|
116 |
covar_obj_file<-"/data/project/layers/commons/data_workflow/output_data_365d_gam_fus_lst_test_run_07162013/covar_obj__365d_gam_fus_lst_test_run_07162013.RData" |
|
114 | 117 |
#covar_names see stage 2 |
115 |
|
|
118 |
#If stage 3 is skipped then use previous met_stations object |
|
119 |
met_stations_outfiles_obj_file<-"/data/project/layers/commons/data_workflow/output_data_365d_gam_fus_lst_test_run_07162013/met_stations_outfiles_obj_gam_fusion__365d_gam_fus_lst_test_run_07162013.RData" |
|
116 | 120 |
#list_tiles_modis <- c("h11v08,h11v07,h12v07,h12v08,h10v07,h10v08") #tile for Venezuela and surrounding area |
117 | 121 |
list_tiles_modis <- c("h08v04,h09v04") #tiles for Oregon |
118 | 122 |
|
119 | 123 |
#CRS_interp<-"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs"; |
120 | 124 |
CRS_interp <-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs"; |
121 |
#CRS_interp <-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs"; |
|
122 | 125 |
|
123 | 126 |
#"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +datum=NAD83 +units=m +no_defs +ellps=GRS80" |
124 | 127 |
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84 |
... | ... | |
128 | 131 |
#The names of covariates can be changed...these names should be output/input from covar script!!! |
129 | 132 |
rnames<-c("x","y","lon","lat","N","E","N_w","E_w","elev_s","slope","aspect","CANHGHT","DISTOC") |
130 | 133 |
lc_names<-c("LC1","LC2","LC3","LC4","LC5","LC6","LC7","LC8","LC9","LC10","LC11","LC12") |
131 |
#lc_names<-c("LC1","LC2","LC3","LC4","LC5","LC6","LC7","LC8","LC9","LC10") #use older version for continuity check to be changed |
|
132 | 134 |
lst_names<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12", |
133 | 135 |
"nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08", |
134 | 136 |
"nobs_09","nobs_10","nobs_11","nobs_12") |
... | ... | |
142 | 144 |
############ STAGE 1: LST Climatology ############### |
143 | 145 |
|
144 | 146 |
#Parameters,Inputs from R to Python?? |
145 |
#list_tiles_modis <- c("h08v04,h09v04") #tiles for Oregon #defined above... |
|
146 | 147 |
start_year = "2001" |
147 | 148 |
end_year = "2010" |
148 |
#hdfdir = '/home/layers/commons/modis/MOD11A1_tiles' #destination file where hdf files are stored locally after download. |
|
149 |
hdfdir <- "/data/project/layers/commons/modis/MOD11A1_tiles" |
|
150 |
download=0 |
|
151 |
clim_calc=1 |
|
149 |
hdfdir <- "/data/project/layers/commons/data_workflow/inputs/MOD11A1_tiles" #path directory to MODIS data |
|
150 |
download=0 #download MODIS product if 1 |
|
151 |
clim_calc=1 #calculate lst averages/climatology if 1 |
|
152 | 152 |
|
153 | 153 |
list_param_download_clim_LST_script <- list(list_tiles_modis,start_year,end_year,hdfdir, |
154 | 154 |
var,grass_setting_script,modis_download_script, clim_script, |
... | ... | |
182 | 182 |
infile_covariates <- covar_obj$infile_covariates |
183 | 183 |
infile_reg_outline <- covar_obj$infile_reg_outline |
184 | 184 |
covar_names<- covar_obj$covar_names |
185 |
}else{ |
|
186 |
covar_obj <-load_obj(covar_obj_file) |
|
187 |
infile_covariates <- covar_obj$infile_covariates |
|
188 |
infile_reg_outline <- covar_obj$infile_reg_outline |
|
189 |
covar_names<- covar_obj$covar_names |
|
190 |
|
|
185 | 191 |
} |
186 | 192 |
|
187 | 193 |
#Note that if stages_to_run[2]!=2, then use values defined at the beginning of the script for infile_covariates and infile_reg_outline |
188 | 194 |
|
189 | 195 |
############# STAGE 3: Data preparation ############### |
190 | 196 |
|
191 |
|
|
192 | 197 |
#specific to this stage |
193 | 198 |
db.name <- "ghcn" # name of the Postgres database |
194 | 199 |
range_years<-c("2010","2011") #right bound not included in the range!! |
195 | 200 |
range_years_clim<-c("2000","2011") #right bound not included in the range!! |
196 |
infile_ghncd_data <-"/home/layers/data/climate/ghcn/v2.92-upd-2012052822/ghcnd-stations.txt" #This is the textfile of station locations from GHCND
|
|
201 |
infile_ghncd_data <-"/data/project/layers/commons/data_workflow/inputs/ghcn/v2.92-upd-2012052822/ghcnd-stations.txt" #This is the textfile of station locations from GHCND
|
|
197 | 202 |
#qc_flags_stations<-c("0","S") #flags allowed for screening after the query from the GHCND?? |
198 | 203 |
qc_flags_stations<-c("0") #flags allowed for screening after the query from the GHCND?? |
199 | 204 |
|
... | ... | |
207 | 212 |
|
208 | 213 |
##### RUN SCRIPT TO GET STATION DATA WITH COVARIATES ##### |
209 | 214 |
|
210 |
list_outfiles<-database_covariates_preparation(list_param_prep) |
|
215 |
if (stages_to_run[3]==3){ |
|
216 |
list_outfiles<-database_covariates_preparation(list_param_prep) |
|
217 |
}else{ |
|
218 |
list_outfiles <-load_obj(met_stations_outfiles_obj_file) |
|
219 |
} |
|
211 | 220 |
|
212 | 221 |
############### STAGE 4: RASTER PREDICTION ################# |
213 | 222 |
|
... | ... | |
234 | 243 |
prop_minmax<-c(0.3,0.3) #if prop_min=prop_max and step=0 then predicitons are done for the number of dates... |
235 | 244 |
#dates_selected<-c("20100101","20100102","20100103","20100901") # Note that the dates set must have a specific format: yyymmdd |
236 | 245 |
dates_selected<-"" # if empty string then predict for the full year specified earlier |
237 |
screen_data_training<-FALSE |
|
246 |
screen_data_training<-FALSE #screen training data for NA and use same input training for all models fitted
|
|
238 | 247 |
|
239 |
#Models to run...this can be change for each run |
|
248 |
#Models to run...this can be changed for each run
|
|
240 | 249 |
#LC1: Evergreen/deciduous needleleaf trees |
241 | 250 |
|
242 |
#Combination 4: for paper baseline=s(lat,lon) |
|
243 |
list_models<-c("y_var ~ s(lat,lon)", |
|
244 |
"y_var ~ s(lat,lon) + s(elev_s)", |
|
245 |
"y_var ~ s(lat,lon) + s(N_w)", |
|
246 |
"y_var ~ s(lat,lon) + s(E_w)", |
|
247 |
"y_var ~ s(lat,lon) + s(LST)", |
|
248 |
"y_var ~ s(lat,lon) + s(DISTOC)", |
|
249 |
"y_var ~ s(lat,lon) + s(LC1)", |
|
250 |
"y_var ~ s(lat,lon) + s(CANHGHT)") |
|
251 |
# "y_var ~ s(lat,lon) + s(LST) + ti(LST,LC1)", |
|
252 |
# "y_var ~ s(lat,lon) + s(LST) + ti(LST,CANHGHT)") |
|
253 |
|
|
254 |
#Combination 3: for paper baseline=s(lat,lon)+s(elev) |
|
255 |
#list_models<-c("y_var ~ s(lat,lon) + s(elev_s)", |
|
256 |
# "y_var ~ s(lat,lon) + s(elev_s) + s(N_w)", |
|
257 |
# "y_var ~ s(lat,lon) + s(elev_s) + s(E_w)", |
|
258 |
# "y_var ~ s(lat,lon) + s(elev_s) + s(LST)", |
|
259 |
# "y_var ~ s(lat,lon) + s(elev_s) + s(DISTOC)", |
|
260 |
# "y_var ~ s(lat,lon) + s(elev_s) + s(LC1)", |
|
261 |
# "y_var ~ s(lat,lon) + s(elev_s) + s(CANHGHT)", |
|
262 |
# "y_var ~ s(lat,lon) + s(elev_s) + s(LST) + ti(LST,LC1)", |
|
263 |
# "y_var ~ s(lat,lon) + s(elev_s) + s(LST) + ti(LST,CANHGHT)") |
|
264 |
|
|
265 |
# list_models<-c("y_var ~ s(elev_s)", |
|
266 |
# "y_var ~ s(LST)", |
|
267 |
# "y_var ~ s(elev_s,LST)", |
|
268 |
# "y_var ~ s(lat) + s(lon)+ s(elev_s)", |
|
269 |
# "y_var ~ s(lat,lon,elev_s)", |
|
270 |
# "y_var ~ s(lat,lon) + s(elev_s) + s(N_w,E_w) + s(LST)", |
|
271 |
# "y_var ~ s(lat,lon) + s(elev_s) + s(N_w,E_w) + s(LST) + s(LC6)", |
|
272 |
# "y_var ~ s(lat,lon) + s(elev_s) + s(N_w,E_w) + s(LST) + ti(LC6,LST)", |
|
273 |
# "y_var ~ s(lat,lon) + s(elev_s) + s(N_w,E_w) + s(LST) + s(DISTOC)") |
|
274 |
|
|
275 |
#testing combination 3, part 1 for GWR day |
|
276 |
#list_models<-c("y_var ~ lat*lon + elev_s", |
|
277 |
# "y_var ~ lat*lon + elev_s + N_w", |
|
278 |
# "y_var ~ lat*lon + elev_s + E_w") |
|
279 |
|
|
280 |
#testing combination 3, part 2 for GWR day |
|
281 |
#list_models<-c("y_var ~ lat*lon + elev_s + LST", |
|
282 |
# "y_var ~ lat*lon + elev_s + DISTOC", |
|
283 |
# "y_var ~ lat*lon + elev_s + LC1") |
|
284 |
|
|
285 |
#list_models<-c("y_var ~ lat*lon + elev_s", |
|
286 |
# "y_var ~ lat*lon + elev_s + N_w", |
|
287 |
# "y_var ~ lat*lon + elev_s + E_w", |
|
288 |
# "y_var ~ lat*lon + elev_s + LST", |
|
289 |
# "y_var ~ lat*lon + elev_s + DISTOC", |
|
290 |
# "y_var ~ lat*lon + elev_s + LC1", |
|
291 |
# "y_var ~ lat*lon + elev_s + CANHGHT", |
|
292 |
# "y_var ~ lat*lon + elev_s + LST + I(LST*LC1)", |
|
293 |
# "y_var ~ lat*lon + elev_s + LST + I(LST*CANHGHT)") |
|
251 |
#Combination for test run: |
|
294 | 252 |
|
253 |
list_models<-c("y_var ~ s(elev_s)", |
|
254 |
"y_var ~ s(LST)", |
|
255 |
"y_var ~ s(lat,lon)+ s(elev_s)", |
|
256 |
"y_var ~ te(lat,lon,elev_s)", |
|
257 |
"y_var ~ s(lat,lon) + s(elev_s) + s(N_w,E_w) + s(LST)", |
|
258 |
"y_var ~ s(lat,lon) + s(elev_s) + s(N_w,E_w) + s(LST) + s(LC1)") |
|
295 | 259 |
|
296 | 260 |
#Default name of LST avg to be matched |
297 | 261 |
lst_avg<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12") |
Also available in: Unified diff
master script, preparing data for Oregon for the test run