Project

General

Profile

« Previous | Next » 

Revision 33517b7d

Added by Benoit Parmentier about 11 years ago

first modifications for hold out at monthly timescale, master script and monthly sampling function

View differences:

climate/research/oregon/interpolation/master_script_temp.R
10 10
#STAGE 5: Output analyses: assessment of results for specific dates...
11 11
#
12 12
#AUTHOR: Benoit Parmentier                                                                       
13
#DATE: 08/13/2013                                                                                 
13
#DATE: 08/25/2013                                                                                 
14 14

  
15 15
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363, TASK$568--   
16 16

  
......
58 58
#source(file.path(script_path,"download_and_produce_MODIS_LST_climatology_06112013.R"))
59 59
source(file.path(script_path,"covariates_production_temperatures_08052013.R"))
60 60
source(file.path(script_path,"Database_stations_covariates_processing_function_06112013.R"))
61
source(file.path(script_path,"GAM_fusion_analysis_raster_prediction_multisampling_08062013.R"))
61
source(file.path(script_path,"GAM_fusion_analysis_raster_prediction_multisampling_08252013.R"))
62 62
source(file.path(script_path,"results_interpolation_date_output_analyses_08052013.R"))
63 63
#source(file.path(script_path,"results_covariates_database_stations_output_analyses_04012013.R")) #to be completed
64 64

  
65 65
#FUNCTIONS CALLED FROM GAM ANALYSIS RASTER PREDICTION ARE FOUND IN...
66 66

  
67
source(file.path(script_path,"sampling_script_functions_03122013.R"))
68
source(file.path(script_path,"GAM_fusion_function_multisampling_07302013.R")) #Include GAM_CAI
67
source(file.path(script_path,"sampling_script_functions_08252013.R"))
68
source(file.path(script_path,"GAM_fusion_function_multisampling_08252013.R")) #Include GAM_CAI
69 69
source(file.path(script_path,"interpolation_method_day_function_multisampling_07052013.R")) #Include GAM_day
70 70
source(file.path(script_path,"GAM_fusion_function_multisampling_validation_metrics_08062013.R"))
71 71

  
......
74 74
stages_to_run<-c(0,2,3,4,5) #MRun only raster fitting, prediction and assessemnt (providing lst averages, covar brick and met stations)
75 75
#If stage 2 is skipped then use previous covar object
76 76
covar_obj_file<-"/data/project/layers/commons/data_workflow/output_data_365d_gam_fus_lst_test_run_07172013/covar_obj__365d_gam_fus_lst_test_run_07172013.RData"
77
#covar_obj_file<-"covar_obj__365d_kriging_daily_mults10_lst_comb3_08062013.RData"
77
#covar_obj_file<-"covar_obj__365d_gam_CAI_lst_comb3_08252013.RData"
78 78
#If stage 3 is skipped then use previous met_stations object
79 79
met_stations_outfiles_obj_file<-"/data/project/layers/commons/data_workflow/output_data_365d_gam_fus_lst_test_run_07172013/met_stations_outfiles_obj_gam_fusion__365d_gam_fus_lst_test_run_07172013.RData"
80
#met_stations_outfiles_obj_file<-"met_stations_outfiles_obj_kriging_daily__365d_kriging_daily_mults10_lst_comb3_08062013.RData"
80
#met_stations_outfiles_obj_file<-"met_stations_outfiles_obj_gam_CAI__365d_gam_CAI_lst_comb3_08252013.RData"
81 81

  
82 82
var<-"TMAX" # variable being interpolated
83
out_prefix<-"_365d_gam_day_lst_comb4_08152013"                #User defined output prefix
84
out_suffix<-"_OR_08152013"                                       #Regional suffix
83
out_prefix<-"_365d_gam_CAI_lst_comb3_08252013"                #User defined output prefix
84
out_suffix<-"_OR_08252013"                                       #Regional suffix
85 85
out_suffix_modis <-"_05302013"                       #pattern to find tiles produced previously     
86 86

  
87 87
#interpolation_method<-c("gam_fusion","gam_CAI","gam_daily") #other otpions to be added later
88
#interpolation_method<-c("gam_CAI") #other otpions to be added later
88
interpolation_method<-c("gam_CAI") #other otpions to be added later
89 89
#interpolation_method<-c("gam_fusion") #other otpions to be added later
90 90
#interpolation_method<-c("kriging_fusion") #other otpions to be added later
91 91
#interpolation_method<-c("gwr_fusion") #other otpions to be added later
92 92
#interpolation_method<-c("gwr_CAI") #other otpions to be added later
93 93
#interpolation_method<-c("kriging_CAI") 
94 94

  
95
interpolation_method<-c("gam_daily") #other otpions to be added later
95
#interpolation_method<-c("gam_daily") #other otpions to be added later
96 96
#interpolation_method<-c("kriging_daily") #other otpions to be added later
97 97
#interpolation_method<-c("gwr_daily") #other otpions to be added later
98 98

  
......
241 241
names(list_param_data_prep) <- c("infile_monthly","infile_daily","infile_locs","infile_covariates","covar_names","var","out_prefix","CRS_locs_WGS84")
242 242

  
243 243
#Set additional parameters
244
#Input for sampling function...
244
#Input for sampling function...need to reorganize inputs!!!
245 245
seed_number<- 100  #if seed zero then no seed?     
246

  
246 247
nb_sample<-1           #number of time random sampling must be repeated for every hold out proportion
247 248
step<-0         
248 249
constant<-0             #if value 1 then use the same samples as date one for the all set of dates
249 250
prop_minmax<-c(0.3,0.3)  #if prop_min=prop_max and step=0 then predictions are done for the number of dates...
251

  
252
seed_number_month <- 100
253
nb_sample_month <-1           #number of time random sampling must be repeated for every hold out proportion
254
step_month <-0.1         
255
constant_month <-0             #if value 1 then use the same samples as date one for the all set of dates
256
prop_minmax_month <-c(0.2,0.3)  #if prop_min=prop_max and step=0 then predictions are done for the number of dates...
257

  
250 258
#dates_selected<-c("20100101","20100102","20100103","20100901") # Note that the dates set must have a specific format: yyymmdd
251 259
#dates_selected<-c("20100101","20100102","20100301","20100302","20100501","20100502","20100701","20100702","20100901","20100902","20101101","20101102")
252 260
dates_selected<-"" # if empty string then predict for the full year specified earlier
......
256 264
#LC1: Evergreen/deciduous needleleaf trees
257 265

  
258 266
#Combination 3: for paper baseline=s(lat,lon)+s(elev)
259
# list_models<-c("y_var ~ s(lat,lon) + s(elev_s)",
260
#                "y_var ~ s(lat,lon) + s(elev_s) + s(N_w)",
261
#                "y_var ~ s(lat,lon) + s(elev_s) + s(E_w)",
262
#                "y_var ~ s(lat,lon) + s(elev_s) + s(LST)",
263
#                "y_var ~ s(lat,lon) + s(elev_s) + s(DISTOC)",
264
#                "y_var ~ s(lat,lon) + s(elev_s) + s(LC1)",
265
#                "y_var ~ s(lat,lon) + s(elev_s) + s(CANHGHT)",
266
#                "y_var ~ s(lat,lon) + s(elev_s) + s(LST) + ti(LST,LC1)",
267
#                "y_var ~ s(lat,lon) + s(elev_s) + s(LST) + ti(LST,CANHGHT)")
267
list_models<-c("y_var ~ s(lat,lon) + s(elev_s)",
268
                "y_var ~ s(lat,lon) + s(elev_s) + s(N_w)",
269
                "y_var ~ s(lat,lon) + s(elev_s) + s(E_w)",
270
                "y_var ~ s(lat,lon) + s(elev_s) + s(LST)",
271
                "y_var ~ s(lat,lon) + s(elev_s) + s(DISTOC)",
272
                "y_var ~ s(lat,lon) + s(elev_s) + s(LC1)",
273
                "y_var ~ s(lat,lon) + s(elev_s) + s(CANHGHT)",
274
                "y_var ~ s(lat,lon) + s(elev_s) + s(LST) + ti(LST,LC1)",
275
                "y_var ~ s(lat,lon) + s(elev_s) + s(LST) + ti(LST,CANHGHT)")
268 276

  
269 277
#Combination 4: for paper baseline=s(lat,lon)
270
list_models<-c("y_var ~ s(lat,lon)",
271
               "y_var ~ s(lat,lon) + s(elev_s)",
272
               "y_var ~ s(lat,lon) + s(N_w)",
273
               "y_var ~ s(lat,lon) + s(E_w)",
274
               "y_var ~ s(lat,lon) + s(LST)",
275
               "y_var ~ s(lat,lon) + s(DISTOC)",
276
               "y_var ~ s(lat,lon) + s(LC1)",
277
               "y_var ~ s(lat,lon) + s(CANHGHT)",
278
               "y_var ~ s(lat,lon) + s(LST) + ti(LST,LC1)",
279
               "y_var ~ s(lat,lon) + s(LST) + ti(LST,CANHGHT)")
278
# list_models<-c("y_var ~ s(lat,lon)",
279
#                "y_var ~ s(lat,lon) + s(elev_s)",
280
#                "y_var ~ s(lat,lon) + s(N_w)",
281
#                "y_var ~ s(lat,lon) + s(E_w)",
282
#                "y_var ~ s(lat,lon) + s(LST)",
283
#                "y_var ~ s(lat,lon) + s(DISTOC)",
284
#                "y_var ~ s(lat,lon) + s(LC1)",
285
#                "y_var ~ s(lat,lon) + s(CANHGHT)",
286
#                "y_var ~ s(lat,lon) + s(LST) + ti(LST,LC1)",
287
#                "y_var ~ s(lat,lon) + s(LST) + ti(LST,CANHGHT)")
280 288

  
281 289
#list_models<-c("y_var ~ lat*lon + elev_s")
282 290

  
......
298 306
#Collect all parameters in a list
299 307
list_param_raster_prediction<-list(list_param_data_prep,screen_data_training,
300 308
                                seed_number,nb_sample,step,constant,prop_minmax,dates_selected,
309
                                seed_number_month,nb_sample_month,step_month,constant_month,prop_minmax_month,
301 310
                                list_models,lst_avg,out_path,script_path,
302 311
                                interpolation_method)
303 312
names(list_param_raster_prediction)<-c("list_param_data_prep","screen_data_training",
304 313
                                "seed_number","nb_sample","step","constant","prop_minmax","dates_selected",
314
                                "seed_number_month","nb_sample_month","step_month","constant_month","prop_minmax_month",
305 315
                                "list_models","lst_avg","out_path","script_path",
306 316
                                "interpolation_method")
307

  
317
#debug(raster_prediction_fun)
308 318
raster_prediction_obj <-raster_prediction_fun(list_param_raster_prediction)
309 319

  
310 320
############## STAGE 5: OUTPUT ANALYSES ##################
climate/research/oregon/interpolation/sampling_script_functions.R
1 1
sampling_training_testing<-function(list_param_sampling){
2 2
  
3
  #This function creates testing and training list for input sation data based on alist of dates.            
3
  #This function creates testing and training list for input sation data based on a list of dates. 
4
  #This function works for montly time scale if dates are provided as mid-months or other forms of for monthly records.
4 5
  #It requires 6 inputs:                                           
5 6
  # 1) seed_number: allow comparison across runs, if seed zero then no seed number is used
6 7
  # 2) nb_sample: number of time random sampling must be repeated for every hold out proportion 
......
15 16
  # 1) sampling_dat: sampling information for every run by date and sampling combintation
16 17
  # 2) sampling_index: list of indexes for training and testing for every dates
17 18
  # 3) sampling_stat_id: list of station ID for training and testing for every dates
18
  # 4) ghcn_data_day: ghcn subsets by date
19
  # 4) ghcn_data: ghcn subsets by date, can be monthly or daily with mulitple sampling
19 20
  
20 21
  #AUTHOR: Benoit Parmentier                                                                       
21
  #DATE: 03/13/2013                                                                                 
22
  #DATE: 08/25/2013                                                                                 
22 23
  #PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363, TASK#558--     
23 24
  #Comments and TODO
24 25
  #
......
33 34
  prop_minmax<-list_param_sampling$prop_minmax
34 35
  dates<-list_param_sampling$dates
35 36
  #ghcn_name<-list_param_sampling$ghcn_name
36
  ghcn<-list_param_sampling$ghcn
37
  ghcn<-list_param_sampling$ghcn #can be daily or monthly!!
37 38
  #ghcn<-get(ghcn_name) 
38 39
  
39 40
  ### BEGIN FUNCTION ####
......
46 47
  dates_list<-vector("list",nel) #list of one row data.frame
47 48
  prop_min<-prop_minmax[1]
48 49
  prop_max<-prop_minmax[2]
49
  
50
      
50 51
  prop_range<-(seq(from=prop_min,to=prop_max,by=step))*100     #range of proportion to run
51 52
  sn<-length(dates)*nb_sample*length(prop_range)               #Number of samples to run
52 53
  
......
110 111
  }
111 112
  
112 113
  sampling_obj<-list(sampling_dat,sampling,sampling_station_id,ghcn.subsets)
113
  names(sampling_obj)<- c("sampling_dat","sampling_index","sampling_stat_id","ghcn_data_day")
114
  names(sampling_obj)<- c("sampling_dat","sampling_index","sampling_stat_id","ghcn_data")
114 115
  
115 116
  return(sampling_obj)
116 117
  

Also available in: Unified diff