Project

General

Profile

« Previous | Next » 

Revision 9944e99d

Added by Benoit Parmentier about 9 years ago

testing accuracy function in the main script with data_s and data_v, training and testing

View differences:

climate/research/oregon/interpolation/global_run_scalingup_mosaicing.R
5 5
#Analyses, figures, tables and data are also produced in the script.
6 6
#AUTHOR: Benoit Parmentier 
7 7
#CREATED ON: 04/14/2015  
8
#MODIFIED ON: 12/12/2015            
8
#MODIFIED ON: 12/16/2015            
9 9
#Version: 5
10 10
#PROJECT: Environmental Layers project     
11 11
#COMMENTS: analyses run for reg4 1992 for test of mosaicing using 1500x4500km and other tiles
......
34 34
library(sp)                                  # Spatial pacakge with class definition by Bivand et al.
35 35
library(spdep)                               # Spatial pacakge with methods and spatial stat. by Bivand et al.
36 36
library(rgdal)                               # GDAL wrapper for R, spatial utilities
37
#library(gstat)                               # Kriging and co-kriging by Pebesma et al.
37
library(gstat)                               # Kriging and co-kriging by Pebesma et al.
38 38
library(fields)                              # NCAR Spatial Interpolation methods such as kriging, splines
39 39
library(raster)                              # Hijmans et al. package for raster processing
40 40
library(gdata)                               # various tools with xls reading, cbindX
......
46 46
library(plotrix)                             # Additional plotting functions
47 47
library(plyr)                                # Various tools including rbind.fill
48 48
#library(spgwr)                               # GWR method
49
#library(automap)                             # Kriging automatic fitting of variogram using gstat
50
#library(rgeos)                               # Geometric, topologic library of functions
49
library(automap)                             # Kriging automatic fitting of variogram using gstat
50
library(rgeos)                               # Geometric, topologic library of functions
51 51
#RPostgreSQL                                 # Interface R and Postgres, not used in this script
52 52
library(gridExtra)
53 53
#Additional libraries not used in workflow
......
58 58

  
59 59
#### FUNCTION USED IN SCRIPT
60 60

  
61
function_mosaicing <-"global_run_scalingup_mosaicing_function_12122015.R"
61
function_mosaicing <-"global_run_scalingup_mosaicing_function_12162015b.R"
62 62

  
63
#in_dir_script <-"/home/parmentier/Data/IPLANT_project/env_layers_scripts" #NCEAS UCSB
64
in_dir_script <- "/nobackupp8/bparmen1/env_layers_scripts" #NASA NEX
63
in_dir_script <-"/home/parmentier/Data/IPLANT_project/env_layers_scripts" #NCEAS UCSB
64
#in_dir_script <- "/nobackupp8/bparmen1/env_layers_scripts" #NASA NEX
65 65
source(file.path(in_dir_script,function_mosaicing))
66 66

  
67
load_obj <- function(f)
68
{
69
  env <- new.env()
70
  nm <- load(f, env)[1]
71
  env[[nm]]
72
}
73

  
74
create_dir_fun <- function(out_dir,out_suffix){
75
  if(!is.null(out_suffix)){
76
    out_name <- paste("output_",out_suffix,sep="")
77
    out_dir <- file.path(out_dir,out_name)
78
  }
79
  #create if does not exists
80
  if(!file.exists(out_dir)){
81
    dir.create(out_dir)
82
  }
83
  return(out_dir)
84
}
85

  
67 86
############################################
68 87
#### Parameters and constants  
69 88

  
......
85 104
region_name <- "reg4" #PARAM 4 #reg4 South America, Africa reg5,Europe reg2, North America reg1, Asia reg3
86 105
mosaicing_method <- c("unweighted","use_edge_weights") #PARAM5
87 106
out_suffix <- paste(region_name,"_","run10_1500x4500_global_analyses_pred_1992_12072015",sep="") #PARAM 6
88
out_suffix_str <- "run10_1500x4500_global_analyses_pred_1992_12072015"
89
metric_name <- "rmse" #RMSE, MAE etc.
90
pred_mod_name <- "mod1"
107
out_suffix_str <- "run10_1500x4500_global_analyses_pred_1992_12072015" #PARAM 7
108
metric_name <- "rmse" #RMSE, MAE etc. #PARAM 8
109
pred_mod_name <- "mod1" #PARAM 9
110
var_pred <- "res_mod1" #used in residuals mapping #PARAM 10
91 111

  
92
#PARAM3
93
out_dir <- in_dir #PARAM 7
94
create_out_dir_param <- FALSE #PARAM 8
112
out_dir <- in_dir #PARAM 11
113
create_out_dir_param <- FALSE #PARAM 12
95 114

  
96
#if daily mosaics NULL then mosaicas all days of the year
115
#if daily mosaics NULL then mosaicas all days of the year #PARAM 13
97 116
day_to_mosaic <- c("19920101","19920102","19920103") #,"19920104","19920105") #PARAM9, two dates note in /tiles for now on NEX
98 117

  
99 118
#CRS_WGS84 <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84 #CONSTANT1
100 119
#CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84
101 120
#proj_str<- CRS_WGS84 #PARAM 8 #check this parameter
102 121
 
103
file_format <- ".tif" #PARAM 10
104
NA_value <- -9999 #PARAM 11
105
NA_flag_val <- NA_value
122
file_format <- ".tif" #PARAM 14
123
NA_value <- -9999 #PARAM 15
124
NA_flag_val <- NA_value #PARAM 16
106 125
     
107
num_cores <- 6 #PARAM 12                  
108
region_names <- c("reg23","reg4") #selected region names, #PARAM2 
126
num_cores <- 6 #PARAM 17                 
127
region_names <- c("reg23","reg4") #selected region names, ##PARAM 18 
128
use_autokrige <- F #PARAM 19
109 129

  
110
###Make a separate folder for masks by regions...
130
###Make a separate folder for masks by regions... #PARAM 20
111 131
#infile_mask <- "/data/project/layers/commons/NEX_data/output_run10_1500x4500_global_analyses_pred_1992_10052015/r_mask_reg4.tif"
112 132
infile_mask <- "/nobackupp8/bparmen1/output_run10_1500x4500_global_analyses_pred_1992_10052015/r_mask_reg4.tif"
113 133

  
114 134
#tb_accuracy_name <- file.path(in_dir,paste("tb_diagnostic_v_NA","_",out_suffix_str,".txt",sep=""))
115
#tb_accuracy_name <- "/data/project/layers/commons/NEX_data/output_run10_1500x4500_global_analyses_pred_1992_10052015/tb_diagnostic_v_NA_run10_1500x4500_global_analyses_pred_1992_10052015.txt"
116 135
#tb_accuracy_name <- "/nobackupp8/bparmen1/output_run10_1500x4500_global_analyses_pred_1992_10052015/tb_diagnostic_v_NA_run10_1500x4500_global_analyses_pred_1992_10052015.txt"
117
tb_accuracy_name <- "/data/project/layers/commons/NEX_data/output_run10_1500x4500_global_analyses_pred_1992_12072015"
118
data_month_s_name <- "/data/project/layers/commons/NEX_data/output_run10_1500x4500_global_analyses_pred_1992_12072015/data_month_s_NAM_run10_1500x4500_global_analyses_pred_1992_12072015.txt"
119
df_tile_processed_name <- "/data/project/layers/commons/NEX_data/output_run10_1500x4500_global_analyses_pred_1992_12072015/df_tile_processed_run10_1500x4500_global_analyses_pred_1992_12072015.txt"
136
tb_accuracy_name <- "/data/project/layers/commons/NEX_data/output_run10_1500x4500_global_analyses_pred_1992_12072015/tb_diagnostic_v_NA_run10_1500x4500_global_analyses_pred_1992_12072015.txt" #PARAM 21
137
data_month_s_name <- "/data/project/layers/commons/NEX_data/output_run10_1500x4500_global_analyses_pred_1992_12072015/data_month_s_NAM_run10_1500x4500_global_analyses_pred_1992_12072015.txt" #PARAM 22
138
data_day_v_name <- "/data/project/layers/commons/NEX_data/output_run10_1500x4500_global_analyses_pred_1992_12072015/data_day_v_NAM_run10_1500x4500_global_analyses_pred_1992_12072015.txt" #PARAM 23
139
data_day_s_name <- "/data/project/layers/commons/NEX_data/output_run10_1500x4500_global_analyses_pred_1992_12072015/data_day_s_NAM_run10_1500x4500_global_analyses_pred_1992_12072015.txt" ##PARAM 24
140
df_tile_processed_name <- "/data/project/layers/commons/NEX_data/output_run10_1500x4500_global_analyses_pred_1992_12072015/df_tile_processed_run10_1500x4500_global_analyses_pred_1992_12072015.txt" ##PARAM 25
141

  
120 142
#python script and gdal on NEX NASA:
121 143
#mosaic_python <- "/nobackupp6/aguzman4/climateLayers/sharedCode/"
122 144
#python_bin <- "/nobackupp6/aguzman4/climateLayers/sharedModules2/bin"
123 145
#python script and gdal on Atlas NCEAS
124
mosaic_python <- "/data/project/layers/commons/NEX_data/sharedCode"
125
python_bin <- "/usr/bin"
146
mosaic_python <- "/data/project/layers/commons/NEX_data/sharedCode" #PARAM 26
147
python_bin <- "/usr/bin" #PARAM 27
126 148

  
127
algorithm <- "python" #if R use mosaic function for R, if python use modified gdalmerge script from Alberto Guzmann
149
algorithm <- "python" #PARAM 28 #if R use mosaic function for R, if python use modified gdalmerge script from Alberto Guzmann
128 150
#algorithm <- "R" #if R use mosaic function for R, if python use modified gdalmerge script from Alberto Guzmann
129 151
 
130
match_extent <- "FALSE" #try without matching!!!
152
match_extent <- "FALSE" #PARAM 29 #try without matching!!!
131 153

  
132 154
########################## START SCRIPT ##############################
133 155

  
......
146 168
setwd(out_dir)
147 169

  
148 170
# accuracy table by tiles
149
tb <- read.table(file=tb_accuracy_name,sep=",")
171
tb <- read.table(tb_accuracy_name,sep=",")
150 172
# textfiles of stations by month
151 173
data_month_s <- read.table(file.path(data_month_s_name),sep=",")
174
data_day_s <- read.table(file.path(data_day_s_name),sep=",") #daily testing/validation stations by dates and tiles
175
data_day_v <- read.table(file.path(data_day_v_name),sep=",") #daily training stations by dates and tiles
176

  
152 177
df_tile_processed <- read.table( df_tile_processed_name,sep=",")
153 178

  
154 179
#list all files to mosaic for a list of day
......
175 200
out_dir_str <- out_dir
176 201
out_suffix_str <- out_suffix
177 202

  
178
#Improve by adding multicores option
203
#Improved by adding multicores option
179 204
num_cores_tmp <- 6
180 205
list_param_accuracy_metric_raster <- list(lf,tb,metric_name,pred_mod_name,y_var_name,interpolation_method,
181 206
                    days_to_process,num_cores_tmp,NA_flag_val,file_format,out_dir_str,out_suffix_str) 
......
184 209
list_raster_created_obj <- lapply(1:length(day_to_mosaic),FUN=create_accuracy_metric_raster,
185 210
                                  list_param=list_param_accuracy_metric_raster)
186 211

  
187
#interpolation_method_day_function_multisampling.R
188
## Create accuracy surface by kriging
189
data_tmp <- subset(data_month_s,month==1)
190
data_tmp$tile_id <- as.character(data_tmp$tile_id)
191
var_pred <- "res_mod1"
192
list_models<- c("res_mod1 ~ 1")
193
df_tile_processed
194
#list_models<-c("y_var ~ lat*lon + elev_s + N_w*E_w",
195
#               "y_var ~ lat*lon + elev_s + DISTOC",
196
#               "y_var ~ lat*lon + elev_s + LST",
197
#               "y_var ~ lat*lon + elev_s + LST + I(LST*LC1)")
198

  
199
##By regions
200
create_accuracy_metric_raster_kriging <- function(data_df,df_tile_processed,var_pred,lf_ref_rast,out_dir,out_suffix){
201
  #This function generates kriged values for a giving set of formula and data input.
202
  #Inputs:
203
  #lf: list of raster files
204
  #tb: data.frame table #fitting or validation table with all days
205
  #metric_name: accuracy metric selected to be mapped, RMSE, MAE etc.
206
  #pred_mod_name: model selected, such as mod1, mod_kr etc.
207
  #y_var_name: variable being modeled e.g."dailyTmax", dailyTmin, precip  
208
  #interpolation_method: names of the interpolation/modeling method
209
  #date_processed: day being processed , e.g. 19920101
210
  #num_cores : number of cores used in the parallelization
211
  #NA_flag_val: value used as flag in the raster 
212
  #file_format: e.g. tif., .rst
213
  #out_dir_str: output directory
214
  #out_suffix_str: output suffix
215
  #Outputs:
216
  #raster list of weights and product of wegihts and inuts
217
  #TODO: 
218

  
219
  # - improve efficiency
220
  #
221
  #promote to spdf
222
  list_formulas <- lapply(list_models,as.formula,env=.GlobalEnv) #mulitple arguments passed to lapply!!
223

  
224
  #data_training <- data_df 
225
  coordinates(data_df)<-cbind(data_df$x,data_df$y)
226
  #proj4string(data_s)<-proj_str
227
  #coordinates(data_v)<-cbind(data_v$x,data_v$y)
228
  #proj4string(data_v)<-proj_str
229
  lf_day_tiles <- lf_mosaic[[i]] #i index for time...
230
  
231
  #Now match the correct tiles with data used in kriging...
232
  #match the correct tile!!! df_tile_processed
233
  #pattern_str <- as.character(unique(df_tile_processed$tile_coord))
234
  list_tile_coord <- as.character(df_tile_processed$tile_coord)
235
  pattern_str <- glob2rx(paste("*",list_tile_coord,"*","*.tif",sep=""))
236
  keywords_str <- pattern_str
237
  tmp_str2 <-unlist(lapply(keywords_str,grep,lf_day_tiles,value=TRUE))
238
  df_raster_pred_tiles_tmp <- data.frame(files =tmp_str2, tile_coord=list_tile_coord)
239
  df_raster_pred_tiles <- merge(df_raster_pred_tiles_tmp,df_tile_processed,by="tile_coord")
240
  df_raster_pred_tiles$path_NEX <- as.character(df_raster_pred_tiles$path_NEX)
241
  df_raster_pred_tiles$reg <- basename(dirname(df_raster_pred_tiles$path_NEX))
242
  df_raster_pred_tiles$files <- as.character(df_raster_pred_tiles$files)
243
  
244
  j <- 1 #loops across tiles from set of files
245
  
246
  r_stack <- df_raster_pred_tiles$files[j]
247
  tile_selected <- as.character(df_raster_pred_tiles$tile_id[j])
248
  data_training <- subset(data_df,tile_id==tile_selected) #df_raster_pred_tiles$files
249
  #r_stack <- stack(lf_day[j])
250
  out_filename <- "test.tif"
251
  #debug(predict_auto_krige_raster_model)
252
  test <- predict_auto_krige_raster_model(list_formulas,r_stack,data_training,out_filename)
253
  #identify residuals
254
  
255
  #call kriging function
256
  
257
  #write output
258
}
259

  
260 212
#debug(create_accuracy_metric_raster)
261 213
#list_raster_created_obj <- lapply(1:1,FUN=create_accuracy_metric_raster,
262 214
#                                  list_param=list_param_accuracy_metric_raster)
......
275 227
#r1_ac <- raster(lf_accuracy_raster[[1]][1]) 
276 228
#plot(r1_ac)
277 229

  
230
####################################
231
### Now create accuracy surfaces from residuals...
232

  
233
## Create accuracy surface by kriging
234

  
235
num_cores_tmp <- 6
236
lf_day_tiles  <- lf_mosaic #list of raster files by dates
237
data_df <- data_day_v # data.frame table/spdf containing stations with residuals and variable
238
#df_tile_processed  #tiles processed during assessment usually by region
239
#var_pred  #variable being modeled
240
list_models <- paste(var_pred,"~","1",sep=" ")
241
#use_autokrige #if TRUE use automap/gstat package
242
#y_var_name  #"dailyTmax" #PARAM2
243
#interpolation_method #c("gam_CAI") #PARAM3
244
#date_processed #can be a monthly layer
245
#num_cores #number of cores used
246
#NA_flag_val 
247
#file_format 
248
out_dir_str <- out_dir
249
out_suffix_str <- out_suffix 
250
days_to_process <- day_to_mosaic
251

  
252
#data_tmp <- subset(data_month_s,month==1)
253
#var_pred <- "res_mod1"
254
#list_models<- c("res_mod1 ~ 1")
255

  
256

  
257
##By regions
258
i<-1 #loop by days/date to process!!
259
 
260
list_param_create_accuracy_residuals_raster <- list(lf_day_tiles,data_df,df_tile_processed,
261
                    var_pred,list_models,use_autokrige,y_var_name,interpolation_method,
262
                    days_to_process,num_cores_tmp,NA_flag_val,file_format,out_dir_str,
263
                    out_suffix_str) 
264
names(list_param_create_accuracy_residuals_raster) <- c("lf_day_tiles","data_df","df_tile_processed",
265
                    "var_pred","list_models","use_autokrige","y_var_name","interpolation_method",
266
                    "days_to_process","num_cores_tmp","NA_flag_val","file_format","out_dir_str",
267
                    "out_suffix_str") 
268

  
269
#list_create_accuracy_residuals_raster_obj <- lapply(1:length(day_to_mosaic),FUN=create_accuracy_residuals_raster_obj,
270
#                                  list_param=list_param_create_accuracy_residuals_raster_obj)
271

  
272
debug(create_accuracy_residuals_raster)
273
list_create_accuracy_residuals_raster_obj <- lapply(1:1,FUN=create_accuracy_residuals_raster,
274
                                  list_param=list_param_create_accuracy_residuals_raster)
275

  
276
#create_accuracy_residuals_raster_obj <- create_accuracy_metric_raster(1, list_param_create_accuracy_residuals_raster_obj)
277

  
278 278
#################################
279 279
#### Second mosaic tiles for the variable and accuracy metrid
280 280

  

Also available in: Unified diff