Revision c447e6b8
Added by Benoit Parmentier almost 9 years ago
climate/research/oregon/interpolation/master_script_stage_7.R | ||
---|---|---|
14 | 14 |
|
15 | 15 |
#AUTHOR: Benoit Parmentier |
16 | 16 |
#CREATED ON: 01/01/2016 |
17 |
#MODIFIED ON: 04/08/2016
|
|
17 |
#MODIFIED ON: 04/11/2016
|
|
18 | 18 |
#PROJECT: NCEAS INPLANT: Environment and Organisms |
19 | 19 |
|
20 | 20 |
#First source these files: |
... | ... | |
29 | 29 |
# |
30 | 30 |
################################################################################################## |
31 | 31 |
|
32 |
### PARAMETERS DEFINED IN THE SCRIPT |
|
33 |
#There are 21 parameters, 1 constant and 8 arguments (drawn from the parameters) for the Rscript call. |
|
34 |
#The arguments are passed directly from Rscript: |
|
35 |
#var <- args[1] # variable being interpolated #param 1, arg 1 |
|
36 |
#in_dir1 <- args[2] # This is the output directory containing global prediction e.g./nobackupp6/aguzman4/climateLayers/out/ param 5, arg 2 |
|
37 |
#region_name <- args[3] # region e.g. "reg4" param 6, arg 3 |
|
38 |
#out_prefix <- args[4] # this is used in creating an output directory,include region name? param 7, arg 4 |
|
39 |
#out_dir <- args[5] # output parent dir, can be home dir or other, param 8, arg 5) |
|
40 |
#create_out_dir_param <- args[6] # if TRUE create a output from "output"+out_prefix param 9, arg 6 |
|
41 |
#list_year_predicted <- args[7] # enter as list but currently runs on the first element of the list, param 10, arg 7 |
|
42 |
#num_cores <- args[8] #number of cores used # param 13, arg 8 |
|
43 |
#max_mem <- args[9] # maximum memory, param 21 |
|
44 |
|
|
32 | 45 |
###Loading R library and packages ou |
33 | 46 |
library(RPostgreSQL) |
34 | 47 |
library(maps) |
... | ... | |
60 | 73 |
|
61 | 74 |
#script_path <- "/home/parmentier/Data/IPLANT_project/env_layers_scripts" |
62 | 75 |
script_path <- "/nobackupp8/bparmen1/env_layers_scripts" #path to script |
63 |
function_mosaicing_functions <- "global_run_scalingup_mosaicing_function_04102016.R" #PARAM12
|
|
64 |
function_mosaicing <-"global_run_scalingup_mosaicing_04082016.R"
|
|
76 |
function_mosaicing_functions <- "global_run_scalingup_mosaicing_function_04112016.R" #PARAM12
|
|
77 |
function_mosaicing <-"global_run_scalingup_mosaicing_04102016.R"
|
|
65 | 78 |
source(file.path(script_path,function_mosaicing)) #source all functions used in this script |
66 | 79 |
source(file.path(script_path,function_mosaicing_functions)) #source all functions used in this script |
67 | 80 |
|
... | ... | |
85 | 98 |
|
86 | 99 |
#Data is on ATLAS or NASA NEX |
87 | 100 |
|
101 |
### PARAMETERS DEFINED IN THE SCRIPT |
|
102 |
#There are 21 parameters, 1 constant and 8 arguments (drawn from the parameters) for the Rscript call. |
|
103 |
#The arguments are passed directly from Rscript: |
|
104 |
#var <- args[1] # variable being interpolated #param 1, arg 1 |
|
105 |
#in_dir <- args[2] # This is the output directory containing global prediction e.g./nobackupp6/aguzman4/climateLayers/out/ param 5, arg 2 |
|
106 |
#region_name <- args[3] # region e.g. "reg4" param 6, arg 3 |
|
107 |
#out_suffix <- args[4] # formely out_prefix, this is used in creating an output directory, it is suggested to use "reg4" or same as region_name |
|
108 |
#out_dir <- args[5] # output parent dir, can be home dir or other, param 8, arg 5 |
|
109 |
#create_out_dir_param <- args[6] # if TRUE create a output from "output"+out_prefix param 9, arg 6 |
|
110 |
#year_predicted <- args[7] # enter as list but currently runs on the first element of the list, param 10, arg 7 |
|
111 |
#num_cores <- args[8] #number of cores used # param 13, arg 8 |
|
112 |
#max_mem <- args[9] # maximum memory, param 21 |
|
113 |
#mosaicing_method <- arg[10] #PARAM5 |
|
114 |
#metric_name <- arg[11] #"rmse" #RMSE, MAE etc. #PARAM 8 |
|
115 |
#day_to_mosaic_range <- arg[12] #c("19910101","19910103") #if null run all year |
|
116 |
#infile_mask <- arg[12] # "/nobackupp8/bparmen1/NEX_data/regions_input_files/r_mask_reg4.tif" |
|
117 |
#df_assessment_files_name <- arg[13] #"/nobackupp6/aguzman4/climateLayers/out/reg4/assessment/output_reg4_1991/df_assessment_files_reg4_1991_reg4_1991.txt" # data.frame with all files used in assessmnet, PARAM 21 |
|
118 |
#algorithm <- arg[14] #"python" #PARAM 28 #if R use mosaic function for R, if python use modified gdalmerge script from Alberto Guzmann |
|
119 |
#layers_option <- arg[15] #c("var_pred") #options are: |
|
120 |
#res_training, res_testing,ac_training, ac_testing, var_pred |
|
121 |
#tmp_files <- arg[16] #FALSE |
|
122 |
|
|
123 |
#mosaicing_method <- c("unweighted","use_edge_weights") #PARAM5 |
|
124 |
#metric_name <- "rmse" #RMSE, MAE etc. #PARAM 8 |
|
125 |
#day_to_mosaic_range <- c("19910101","19910103") #if null run all year |
|
126 |
#infile_mask <- "/nobackupp8/bparmen1/NEX_data/regions_input_files/r_mask_reg4.tif" |
|
127 |
#df_assessment_files_name <- "/nobackupp6/aguzman4/climateLayers/out/reg4/assessment/output_reg4_1991/df_assessment_files_reg4_1991_reg4_1991.txt" # data.frame with all files used in assessmnet, PARAM 21 |
|
128 |
#algorithm <- "python" #PARAM 28 #if R use mosaic function for R, if python use modified gdalmerge script from Alberto Guzmann |
|
129 |
#layers_option <- c("var_pred") #options are: |
|
130 |
#res_training, res_testing,ac_training, ac_testing, var_pred |
|
131 |
#tmp_files <- FALSE |
|
132 |
|
|
88 | 133 |
var <- args[1] # variable being interpolated #param 1, arg 1 |
89 | 134 |
var<-"TMAX" # variable being interpolated #param 1, arg 1 |
90 | 135 |
|
... | ... | |
101 | 146 |
#PARAM 2 |
102 | 147 |
#in_dir <- "/data/project/layers/commons/NEX_data/output_run10_1500x4500_global_analyses_pred_1992_12072015" #NCEAS |
103 | 148 |
#in_dir <- "/nobackupp8/bparmen1/output_run10_1500x4500_global_analyses_pred_1992_12072015" #NEX |
104 |
in_dir <- "/nobackupp6/aguzman4/climateLayers/out/" |
|
105 | 149 |
|
150 |
in_dir <- "/nobackupp6/aguzman4/climateLayers/out/" |
|
151 |
in_dir <- args[2] |
|
106 | 152 |
interpolation_method <- c("gam_CAI") #PARAM3 |
107 | 153 |
|
154 |
|
|
155 |
#var <- args[1] # variable being interpolated #param 1, arg 1 |
|
156 |
#in_dir <- args[2] # This is the output directory containing global prediction e.g./nobackupp6/aguzman4/climateLayers/out/ param 5, arg 2 |
|
157 |
#region_name <- args[3] # region e.g. "reg4" param 6, arg 3 |
|
158 |
#out_suffix <- args[4] # formely out_prefix, this is used in creating an output directory, it is suggested to use "reg4" or same as region_name |
|
159 |
#out_dir <- args[5] # output parent dir, can be home dir or other, param 8, arg 5 |
|
160 |
#create_out_dir_param <- args[6] # if TRUE create a output from "output"+out_prefix param 9, arg 6 |
|
161 |
#year_predicted <- args[7] # enter as list but currently runs on the first element of the list, param 10, arg 7 |
|
162 |
#num_cores <- args[8] #number of cores used # param 13, arg 8 |
|
163 |
#max_mem <- args[9] # maximum memory, param 21 |
|
164 |
#mosaicing_method <- args[10] #PARAM5 |
|
165 |
#metric_name <- args[11] #"rmse" #RMSE, MAE etc. #PARAM 8 |
|
166 |
#day_to_mosaic_range <- arg[12] #c("19910101","19910103") #if null run all year |
|
167 |
#infile_mask <- args[13] # "/nobackupp8/bparmen1/NEX_data/regions_input_files/r_mask_reg4.tif" |
|
168 |
#df_assessment_files_name <- args[14] #"/nobackupp6/aguzman4/climateLayers/out/reg4/assessment/output_reg4_1991/df_assessment_files_reg4_1991_reg4_1991.txt" # data.frame with all files used in assessmnet, PARAM 21 |
|
169 |
#algorithm <- args[15] #"python" #PARAM 28 #if R use mosaic function for R, if python use modified gdalmerge script from Alberto Guzmann |
|
170 |
#layers_option <- args[16] #c("var_pred") #options are: |
|
171 |
#res_training, res_testing,ac_training, ac_testing, var_pred |
|
172 |
#tmp_files <- args[17] #FALSE |
|
173 |
|
|
174 |
region_name <- args[3] |
|
108 | 175 |
region_name <- "reg4" #PARAM 4 #reg4 South America, Africa reg5,Europe reg2, North America reg1, Asia reg3 |
109 |
mosaicing_method <- c("unweighted","use_edge_weights") #PARAM5 |
|
176 |
|
|
110 | 177 |
#out_suffix <- paste(region_name,"_","run10_1500x4500_global_analyses_pred_1991_04052016",sep="") #PARAM 6 |
111 | 178 |
#out_suffix_str <- "run10_1500x4500_global_analyses_pred_1991_04052016" #PARAM 7 |
112 | 179 |
|
180 |
out_suffix <- args[4] |
|
113 | 181 |
out_suffix <- region_name #PARAM 6 |
114 | 182 |
out_suffix_str <- region_name #PARAM 7 |
115 |
|
|
116 |
metric_name <- "rmse" #RMSE, MAE etc. #PARAM 8 |
|
117 |
pred_mod_name <- "mod1" #PARAM 9 |
|
118 |
var_pred <- "res_mod1" #used in residuals mapping #PARAM 10 |
|
119 |
|
|
120 | 183 |
#out_dir <- in_dir #PARAM 11 |
184 |
out_dir <- args[5] |
|
121 | 185 |
out_dir <- "/nobackupp8/bparmen1/climateLayers/out/reg4" #PARAM 11, use this location for now |
186 |
create_out_dir_param <- args[6] |
|
122 | 187 |
create_out_dir_param <- TRUE #PARAM 12 |
123 | 188 |
|
189 |
year_predicted <- args[7] |
|
190 |
year_predicted <- 1991 #PARAM 31 |
|
191 |
|
|
192 |
num_cores <- args[8] |
|
193 |
num_cores <- 6 #PARAM 17 |
|
194 |
|
|
195 |
#max number of cells to read in memory |
|
196 |
max_mem<-args[9] |
|
197 |
|
|
198 |
mosaicing_method <- c("unweighted","use_edge_weights") #PARAM5 |
|
199 |
mosaicing_method <- args[10] |
|
200 |
|
|
201 |
metric_name <- args[11] |
|
202 |
metric_name <- "rmse" #RMSE, MAE etc. #PARAM 8 |
|
124 | 203 |
#if daily mosaics NULL then mosaicas all days of the year #PARAM 13 |
125 | 204 |
#day_to_mosaic <- c("19910101","19910102","19910103") #,"19920104","19920105") #PARAM9, two dates note in /tiles for now on NEX |
126 | 205 |
day_to_mosaic_range <- c("19910101","19910103") #if null run all year |
206 |
day_to_mosaic_range <- c("19910101","19910101") #if null run all year |
|
207 |
day_to_mosaic_range <- args[12] |
|
208 |
|
|
209 |
###Separate folder for masks by regions, should be listed as just the dir!!... #PARAM 20 |
|
210 |
infile_mask <- "/nobackupp8/bparmen1/NEX_data/regions_input_files/r_mask_reg4.tif" |
|
211 |
inflie_mask <- args[13] |
|
212 |
#infile_mask <- "/data/project/layers/commons/NEX_data/regions_input_files/r_mask_reg4.tif" |
|
213 |
## All of this is interesting so use df_assessment!! |
|
214 |
|
|
215 |
#path_assessment <- "/nobackupp6/aguzman4/climateLayers/out/reg4/assessment/output_reg4_1991" |
|
216 |
#path_assessment <- file.path(in_dir,region_name,"assessment",paste("output_",region_name,year_processed,sep="")) |
|
217 |
df_assessment_files_name <- args[14] |
|
218 |
df_assessment_files_name <- "/nobackupp6/aguzman4/climateLayers/out/reg4/assessment/output_reg4_1991/df_assessment_files_reg4_1991_reg4_1991.txt" # data.frame with all files used in assessmnet, PARAM 21 |
|
219 |
algorithm <- args[15] |
|
220 |
algorithm <- "python" #PARAM 28 #if R use mosaic function for R, if python use modified gdalmerge script from Alberto Guzmann |
|
221 |
#algorithm <- "R" #if R use mosaic function for R, if python use modified gdalmerge script from Alberto Guzmann |
|
222 |
|
|
223 |
layers_option <- c("var_pred") #options are: |
|
224 |
#res_training, res_testing,ac_training, ac_testing, var_pred |
|
225 |
tmp_files <- FALSE |
|
226 |
|
|
227 |
pred_mod_name <- "mod1" #PARAM 9 |
|
228 |
var_pred <- "res_mod1" #used in residuals mapping #PARAM 10 |
|
229 |
|
|
230 |
|
|
127 | 231 |
#CRS_WGS84 <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84 #CONSTANT1 |
128 | 232 |
#CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84 |
129 | 233 |
#proj_str<- CRS_WGS84 #PARAM 8 #check this parameter |
... | ... | |
132 | 236 |
NA_value <- -9999 #PARAM 15 |
133 | 237 |
NA_flag_val <- NA_value #PARAM 16 |
134 | 238 |
|
135 |
num_cores <- 6 #PARAM 17 |
|
136 | 239 |
#region_names <- c("reg23","reg4") #selected region names, ##PARAM 18 |
137 | 240 |
use_autokrige <- F #PARAM 19 |
138 | 241 |
proj_str <- CRS_locs_WGS84 |
139 | 242 |
|
140 |
###Separate folder for masks by regions, should be listed as just the dir!!... #PARAM 20 |
|
141 |
infile_mask <- "/nobackupp8/bparmen1/NEX_data/regions_input_files/r_mask_reg4.tif" |
|
142 |
#infile_mask <- "/data/project/layers/commons/NEX_data/regions_input_files/r_mask_reg4.tif" |
|
143 |
## All of this is interesting so use df_assessment!! |
|
144 |
|
|
145 |
year_processed <- 1991 #PARAM 31 |
|
146 |
#path_assessment <- "/nobackupp6/aguzman4/climateLayers/out/reg4/assessment/output_reg4_1991" |
|
147 |
#path_assessment <- file.path(in_dir,region_name,"assessment",paste("output_",region_name,year_processed,sep="")) |
|
148 |
df_assessment_files_name <- "/nobackupp6/aguzman4/climateLayers/out/reg4/assessment/output_reg4_1991/df_assessment_files_reg4_1991_reg4_1991.txt" # data.frame with all files used in assessmnet, PARAM 21 |
|
149 |
|
|
150 | 243 |
#in_dir can be on NEX or Atlas |
151 | 244 |
|
152 | 245 |
#python script and gdal on NEX NASA: |
... | ... | |
156 | 249 |
#mosaic_python <- "/data/project/layers/commons/NEX_data/sharedCode" #PARAM 26 |
157 | 250 |
#python_bin <- "/usr/bin" #PARAM 27 |
158 | 251 |
|
159 |
algorithm <- "python" #PARAM 28 #if R use mosaic function for R, if python use modified gdalmerge script from Alberto Guzmann |
|
160 |
#algorithm <- "R" #if R use mosaic function for R, if python use modified gdalmerge script from Alberto Guzmann |
|
161 | 252 |
match_extent <- "FALSE" #PARAM 29 #try without matching!!! |
162 | 253 |
|
163 | 254 |
#for residuals... |
164 | 255 |
list_models <- NULL #PARAM 30 |
165 | 256 |
#list_models <- paste(var_pred,"~","1",sep=" ") #if null then this is the default... |
166 | 257 |
|
167 |
#max number of cells to read in memory |
|
168 |
max_mem<-args[11] |
|
169 |
#in_dir_tiles <- file.path(in_dir,"tiles") #this is valid both for Atlas and NEX |
|
170 |
layers_option <- c("var_pred") #options are: |
|
171 |
#res_training, res_testing,ac_training, ac_testing, var_pred |
|
172 |
tmp_files <- FALSE |
|
173 | 258 |
|
174 | 259 |
#rasterOptions(maxmemory=1e+07,timer=TRUE) |
175 | 260 |
list_param_run_mosaicing_prediction <- list(in_dir,y_var_name,interpolation_method,region_name, |
176 | 261 |
mosaicing_method,out_suffix,out_suffix_str,metric_name,pred_mod_name,var_pred, |
177 |
create_out_dir_param,day_to_mosaic_range,proj_str,file_format,NA_value,num_cores, |
|
262 |
create_out_dir_param,day_to_mosaic_range,year_predicted,proj_str,file_format,NA_value,num_cores,
|
|
178 | 263 |
use_autokrige,infile_mask,df_assessment_files_name,mosaic_python, |
179 | 264 |
python_bin,algorithm,match_extent,list_models,layers_option,tmp_files) |
180 | 265 |
param_names <- c("in_dir","y_var_name","interpolation_method","region_name", |
181 | 266 |
"mosaicing_method","out_suffix","out_suffix_str","metric_name","pred_mod_name","var_pred", |
182 |
"create_out_dir_param","day_to_mosaic_range","proj_str","file_format","NA_value","num_cores", |
|
267 |
"create_out_dir_param","day_to_mosaic_range","year_predicted","proj_str","file_format","NA_value","num_cores",
|
|
183 | 268 |
"use_autokrige","infile_mask","df_assessment_files_name","mosaic_python", |
184 | 269 |
"python_bin","algorithm","match_extent","list_models","layers_option","tmp_files") |
185 | 270 |
names(list_param_run_mosaicing_prediction) <- param_names |
... | ... | |
198 | 283 |
#list_param_run_mosaicing_prediction |
199 | 284 |
} |
200 | 285 |
|
286 |
#runs in 42 minutes for 3 dates but note that beyond date 1, the process is about 11 minutes or so. |
|
287 |
|
|
201 | 288 |
############### END OF SCRIPT ################### |
202 | 289 |
##################################################### |
203 | 290 |
|
Also available in: Unified diff
turning script into shell callable code for stage 7 mosaicing