Project

General

Profile

« Previous | Next » 

Revision e87ce694

Added by Alberto Guzman almost 11 years ago

Added new version of master_script, merged files from sandbox.

View differences:

climate/research/world/interpolation/Database_stations_covariates_processing_function_01062014.R
1
##################    Data preparation for interpolation   #######################################
2
############################ Extraction of station data ##########################################
3

  
4

  
5
database_covariates_preparation<-function(list_param_prep){
6
  #This function performs queries on the Postgres ghcnd database for stations matching the             
7
  #interpolation area. It requires 11 inputs:                                           
8
  # 1)  db.name :  Postgres database name containing the meteorological stations
9
  # 2)  var: the variable of interest - "TMAX","TMIN" or "PRCP" 
10
  # 3)  range_years: range of records used in the daily interpolation, note that upper bound year is not included               
11
  # 4)  range_years_clim: range of records used in the monthly climatology interpolation, note that upper bound is not included
12
  # 5)  infile_reg_outline: region outline as a shape file - used in the interpolation  stage too                              
13
  # 6)  infile_ghncd_data: ghcnd stations locations as a textfile name with lat-long fields                                                                                   
14
  # 7)  infile_covarariates: tif file of raser covariates for the interpolation area: it should have a local projection                                                                                           
15
  # 8)  CRS_locs_WGS84: longlat EPSG 4326 used as coordinates reference system (proj4)for stations locations
16
  # 9)  in_path: input path for covariates data and other files, this is also the output?
17
  # 10) out_path: output path created in master script
18
  # 11) covar_names: names of covariates used for the interpolation --may be removed later? (should be stored in the brick)
19
  # 12) qc_flags_stations: flag used to screen out at this stage only two values...
20
  # 13) out_prefix: output suffix added to output names--it is the same in the interpolation script
21
  #
22
  #The output is a list of four shapefile names produced by the function:
23
  #1) loc_stations: locations of stations as shapefile in EPSG 4326
24
  #2) loc_stations_ghcn: ghcn daily data for the year range of interpolation (locally projected)
25
  #3) daily_query_ghcn_data: ghcn daily data from daily query before application of quality flag
26
  #4) daily_covar_ghcn_data: ghcn daily data with covariates for the year range of interpolation (locally projected)
27
  #5) monthly_query_ghcn_data: ghcn daily data from monthly query before application of quality flag
28
  #6) monthly_covar_ghcn_data: ghcn monthly averaged data with covariates for the year range of interpolation (locally projected)
29
  
30
  #AUTHOR: Benoit Parmentier                                                                       
31
  #DATE: 05/21/2013                                                                                 
32
  #PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--     
33
  #Comments and TODO
34
  #-Add buffer option...
35
  #-Add screening for value predicted: var
36
  ##################################################################################################
37
  
38
  ###Loading R library and packages: should it be read in before???   
39
  
40
  library(RPostgreSQL)
41
  library(sp)                                           # Spatial pacakge with class definition by Bivand et al.
42
  library(spdep)                                          # Spatial pacakge with methods and spatial stat. by Bivand et al.
43
  library(rgdal)                                          # GDAL wrapper for R, spatial utilities
44
  library(rgeos)
45
  library(rgdal)
46
  library(raster)
47
  library(rasterVis)
48
  library(maps)
49
  library(maptools)
50
  
51
  ### Functions used in the script
52
  
53
  format_s <-function(s_ID){
54
    #Format station ID in a vector format/tuple that is used in a psql query.
55
    # Argument 1: vector of station ID
56
    # Return: character of station ID
57
    tx2<-s_ID
58
    tx2<-as.character(tx2)
59
    stat_list<-tx2
60
    temp<-shQuote(stat_list)
61
    t<-paste(temp, collapse= " ")
62
    t1<-gsub(" ", ",",t)
63
    sf_ID<-paste("(",t1,")",sep="") #vector containing the station ID to query
64
    return(sf_ID)
65
  }
66
  
67
  #parsing input arguments
68
  
69
  db.name <- list_param_prep$db.name             #name of the Postgres database
70
  var <- list_param_prep$var                     #name of the variables to keep: TMIN, TMAX or PRCP
71
  year_start <-list_param_prep$range_years[1] #"2010"               #starting year for the query (included)
72
  year_end <-list_param_prep$range_years[2] #"2011"                 #end year for the query (excluded)
73
  year_start_clim <-list_param_prep$range_years_clim[1] #right bound not included in the range!! starting year for monthly query to calculate clime
74
  year_end_clim <-list_param_prep$range_years_clim[2] #right bound not included in the range!! starting year for monthly query to calculate clime
75
  infile_reg_outline<- list_param_prep$infile_reg_outline  #This is the shape file of outline of the study area                                                      #It is an input/output of the covariate script
76
  infile_ghncd_data<-list_param_prep$infile_ghncd_data      #"/home/layers/data/climate/ghcn/v2.92-upd-2012052822/ghcnd-stations.txt"                              #This is the textfile of station locations from GHCND
77
  infile_covariates<-list_param_prep$infile_covariates #"covariates__venezuela_region__VE_01292013.tif" #this is an output from covariate script
78
  CRS_locs_WGS84<-list_param_prep$CRS_locs_WGS84 #Station coords WGS84: same as earlier
79
  in_path <- list_param_prep$in_path #CRS_locs_WGS84"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/input_data/"
80
  out_path <- list_param_prep$out_path #CRS_locs_WGS84"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/input_data/"
81
  covar_names<-list_param_prep$covar_names # names should be written in the tif file!!!
82
  qc_flags_stations <- list_param_prep$qc_flags_stations #flags allowed for the query from the GHCND??
83
  out_prefix<-list_param_prep$out_prefix #"_365d_GAM_fus5_all_lstd_03012013"                #User defined output prefix
84
  
85
  ## working directory is the same for input and output for this function  
86
  #setwd(in_path) 
87
  setwd(out_path)
88
  ##### STEP 1: Select station in the study area
89
    
90
  filename<-sub(".shp","",fixed=TRUE,infile_reg_outline)             #Removing the extension from file.
91
  interp_area <- readOGR(dsn=dirname(filename),basename(filename))
92
  CRS_interp<-proj4string(interp_area)         #Storing the coordinate information: geographic coordinates longlat WGS84
93
  
94
  #Read in GHCND database station locations
95
  dat_stat <- read.fwf(infile_ghncd_data, 
96
                       widths = c(11,9,10,7,3,31,4,4,6),fill=TRUE)
97
  colnames(dat_stat)<-c("STAT_ID","latitude","longitude","elev","state","name","GSNF","HCNF","WMOID")
98
  coords<- dat_stat[,c('longitude','latitude')]
99
  coordinates(dat_stat)<-coords
100
  proj4string(dat_stat)<-CRS_locs_WGS84 #this is the WGS84 projection
101
  #proj4string(dat_stat)<-CRS_interp
102
  interp_area_WGS84 <-spTransform(interp_area,CRS_locs_WGS84)         # Project from WGS84 to new coord. system
103
  
104
  # Spatial query to find relevant stations
105

  
106
  inside <- !is.na(over(dat_stat, as(interp_area_WGS84, "SpatialPolygons")))  #Finding stations contained in the current interpolation area
107
  stat_reg<-dat_stat[inside,]              #Selecting stations contained in the current interpolation area
108
  
109
  ####
110
  ##TODO: Add buffer option? 
111
  ####
112
  
113
  #### STEP 2: Connecting to the database and query for relevant data 
114

  
115
  drv <- dbDriver("PostgreSQL")
116
  db <- dbConnect(drv, dbname=db.name)
117
 
118
  time1<-proc.time()    #Start stop watch
119
  list_s<-format_s(stat_reg$STAT_ID)
120
  data2<-dbGetQuery(db, paste("SELECT *
121
                              FROM ghcn
122
                              WHERE element=",shQuote(var),
123
                              "AND year>=",year_start,
124
                              "AND year<",year_end,
125
                              "AND station IN ",list_s,";",sep=""))  #Selecting station using a SQL query
126
  time_duration<-proc.time()-time1             #Time for the query may be long given the size of the database
127
  time_minutes<-time_duration[3]/60
128
  print(time_minutes)
129
  dbDisconnect(db)
130
 
131
  data_table<-merge(data2,as.data.frame(stat_reg), by.x = "station", by.y = "STAT_ID")
132

  
133
  #Transform the subset data frame in a spatial data frame and reproject
134
  data_reg<-data_table                               #Make a copy of the data frame
135
  coords<- data_reg[c('longitude','latitude')]              #Define coordinates in a data frame: clean up here!!
136
  coordinates(data_reg)<-coords                      #Assign coordinates to the data frame
137
  proj4string(data_reg)<-CRS_locs_WGS84                #Assign coordinates reference system in PROJ4 format
138
  #data_reg<-spTransform(data_reg,CRS(CRS_interp))     #Project from WGS84 to new coord. system
139
  
140
  data_d <-data_reg  #data_d: daily data containing the query without screening
141
  #data_reg <-subset(data_d,mflag=="0" | mflag=="S") #should be input arguments!!
142
  #Transform the query to be depending on the number of flags
143

  
144
  data_reg <-subset(data_d, mflag %in% qc_flags_stations) #screening using flags
145
  #data_reg2 <-subset(data_d,mflag==qc_flags_stations[1] | mflag==qc_flags_stations[2]) #screening using flags
146

  
147
  ##################################################################
148
  ### STEP 3: Save results and outuput in textfile and a shape file
149
  #browser()
150
  #Save shape files of the locations of meteorological stations in the study area
151
  #outfile1<-file.path(in_path,paste("stations","_",out_prefix,".shp",sep=""))
152
  outfile1<-file.path(out_path,paste("stations","_",out_prefix,".shp",sep=""))
153
  writeOGR(stat_reg,dsn= dirname(outfile1),layer= sub(".shp","",basename(outfile1)), driver="ESRI Shapefile",overwrite_layer=TRUE)
154
  #writeOGR(dst,dsn= ".",layer= sub(".shp","",outfile4), driver="ESRI Shapefile",overwrite_layer=TRUE)
155
  #Also save as rds
156
  outfile1_rds<-sub(".shp",".rds",outfile1)
157
  saveRDS(stat_reg,outfile1)
158

  
159
  outfile2<-file.path(out_path,paste("ghcn_data_query_",var,"_",year_start,"_",year_end,out_prefix,".shp",sep=""))         #Name of the file
160
  #writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension
161
  writeOGR(data_d,dsn= dirname(outfile2),layer= sub(".shp","",basename(outfile2)), driver="ESRI Shapefile",overwrite_layer=TRUE)
162
  outfile2_rds<-sub(".shp",".rds",outfile2)
163
  saveRDS(data_d,outfile2_rds) 
164

  
165
  outfile3<-file.path(out_path,paste("ghcn_data_",var,"_",year_start,"_",year_end,out_prefix,".shp",sep=""))         #Name of the file
166
  #writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension
167
  writeOGR(data_reg,dsn= dirname(outfile3),layer= sub(".shp","",basename(outfile3)), driver="ESRI Shapefile",overwrite_layer=TRUE)
168
  outfile3_rds<-sub(".shp",".rds",outfile3)
169
  saveRDS(data_reg,outfile3_rds)
170

  
171
  ###################################################################
172
  ### STEP 4: Extract values at stations from covariates stack of raster images
173
  #Eventually this step may be skipped if the covariates information is stored in the database...
174

  
175
  #s_raster<-stack(file.path(in_path,infile_covariates))                   #read in the data stack
176
  s_raster<-brick(infile_covariates)                   #read in the data stack
177
  names(s_raster)<-covar_names               #Assigning names to the raster layers: making sure it is included in the extraction
178
  stat_val<- extract(s_raster, data_reg)        #Extracting values from the raster stack for every point location in coords data frame.
179
  #stat_val_test<- extract(s_raster, data_reg,def=TRUE)
180
  
181
  #create a shape file and data_frame with names
182
  data_RST<-as.data.frame(stat_val)                                            #This creates a data frame with the values extracted
183
  data_RST_SDF<-cbind(data_reg,data_RST)
184
  coordinates(data_RST_SDF)<-coordinates(data_reg) #Transforming data_RST_SDF into a spatial point dataframe
185
  CRS_reg<-proj4string(data_reg)
186
  proj4string(data_RST_SDF)<-CRS_reg  #Need to assign coordinates...
187

  
188
  #Creating a date column
189
  date1<-ISOdate(data_RST_SDF$year,data_RST_SDF$month,data_RST_SDF$day) #Creating a date object from 3 separate column
190
  date2<-gsub("-","",as.character(as.Date(date1)))
191
  data_RST_SDF$date<-date2                                              #Date format (year,month,day) is the following: "20100627"
192
 
193
  #This allows to change only one name of the data.frame
194
  pos<-match("value",names(data_RST_SDF)) #Find column with name "value"
195
  if (var=="TMAX"){
196
    #names(data_RST_SDF)[pos]<-c("TMax")
197
    data_RST_SDF$value<-data_RST_SDF$value/10                #TMax is the average max temp for monthy data
198
  }
199
  
200
  if (var=="TMIN"){
201
    #names(data_RST_SDF)[pos]<-c("TMin")
202
    data_RST_SDF$value<-data_RST_SDF$value/10                #TMax is the average max temp for monthy data
203
  }
204
  
205
  #write out a new shapefile (including .prj component)
206
  outfile4<-file.path(out_path,paste("daily_covariates_ghcn_data_",var,"_",range_years[1],"_",range_years[2],out_prefix,".shp",sep=""))         #Name of the file
207
  writeOGR(data_RST_SDF,dsn= dirname(outfile4),layer= sub(".shp","",basename(outfile4)), driver="ESRI Shapefile",overwrite_layer=TRUE)
208
  outfile4_rds<-sub(".shp",".rds",outfile4)
209
  saveRDS(data_RST_SDF,outfile4_rds)  
210

  
211
  ###############################################################
212
  ######## STEP 5: Preparing monthly averages from the ProstGres database
213
  drv <- dbDriver("PostgreSQL")
214
  db <- dbConnect(drv, dbname=db.name)
215
  
216
  #year_start_clim: set at the start of the script
217
  time1<-proc.time()    #Start stop watch
218
  list_s<-format_s(stat_reg$STAT_ID)
219
  data_m<-dbGetQuery(db, paste("SELECT *
220
                               FROM ghcn
221
                               WHERE element=",shQuote(var),
222
                               "AND year>=",year_start_clim,
223
                               "AND station IN ",list_s,";",sep=""))  #Selecting station using a SQL query
224
  time_duration<-proc.time()-time1             #Time for the query may be long given the size of the database
225
  time_minutes<-time_duration[3]/60
226
  print(time_minutes)
227
  dbDisconnect(db)
228
  
229
  #Clean out this section!!
230
  date1<-ISOdate(data_m$year,data_m$month,data_m$day) #Creating a date object from 3 separate column
231
  date2<-as.POSIXlt(as.Date(date1))
232
  data_m$date<-date2
233
  #Save the query data here...
234
  data_m<-merge(data_m, stat_reg, by.x="station", by.y="STAT_ID")   #Inner join all columns are retained
235
  #Extracting covariates from stack for the monthly dataset...
236
  coords<- data_m[c('longitude','latitude')]              #Define coordinates in a data frame
237
  coordinates(data_m)<-coords                      #Assign coordinates to the data frame
238
  proj4string(data_m)<-CRS_locs_WGS84                  #Assign coordinates reference system in PROJ4 format
239
  data_m<-spTransform(data_m,CRS(CRS_interp))     #Project from WGS84 to new coord. system
240
  outfile5<-file.path(out_path,paste("monthly_query_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep=""))  #Name of the file
241
  writeOGR(data_m,dsn= dirname(outfile5),layer= sub(".shp","",basename(outfile5)), driver="ESRI Shapefile",overwrite_layer=TRUE)
242
  
243
  outfile5_rds<-sub(".shp",".rds",outfile5)
244
  saveRDS(data_m,outfile5_rds) 
245

  
246
  #In Venezuela and other regions where there are not many stations...mflag==S should be added..see Durenne etal.2010.
247

  
248
  #d<-subset(data_m,mflag==qc_flags_stations[1] | mflag==qc_flags_stations[2])
249
  d<-subset(data_m,mflag %in% qc_flags_stations)
250
  
251
  #Add screening here ...May need some screeing??? i.e. range of temp and elevation...
252
  
253
  d1<-aggregate(value~station+month, data=d, mean)  #Calculate monthly mean for every station in OR
254
  #d2<-aggregate(value~station+month, data=d, length)  #Calculate monthly mean for every station in OR
255
  is_not_na_fun<-function(x) sum(!is.na(x)) #count the number of available observation
256
  d2<-aggregate(value~station+month, data=d, is_not_na_fun)  #Calculate monthly mean for every station in OR
257
  #names(d2)[names(d2)=="value"] <-"nobs_station"
258
  d1$nobs_station<-d2$value
259
  dst<-merge(d1, stat_reg, by.x="station", by.y="STAT_ID")   #Inner join all columns are retained
260
  
261
  #This allows to change only one name of the data.frame
262
  pos<-match("value",names(dst)) #Find column with name "value"
263
  if (var=="TMAX"){
264
    names(dst)[pos]<-c("TMax")           #renaming the column "value" extracted from the Postgres database
265
    dst$TMax<-dst$TMax/10                #TMax is the average max temp for monthy data
266
  }
267
  
268
  if (var=="TMIN"){
269
    names(dst)[pos]<-c("TMin")
270
    dst$TMin<-dst$TMin/10                #TMin is the average min temp for monthy data
271
  }
272
  
273
  #Extracting covariates from stack for the monthly dataset...
274
  #names(dst)[5:6] <-c('latitude','longitude')
275
  coords<- dst[c('longitude','latitude')]              #Define coordinates in a data frame
276

  
277
  coordinates(dst)<-coords                      #Assign coordinates to the data frame
278
  proj4string(dst)<-CRS_locs_WGS84                  #Assign coordinates reference system in PROJ4 format
279
  dst_month<-spTransform(dst,CRS(CRS_interp))     #Project from WGS84 to new coord. system
280
  
281
  stations_val<-extract(s_raster,dst_month,df=TRUE)  #extraction of the information at station location in a data frame
282
  #dst_extract<-spCbind(dst_month,stations_val) #this is in sinusoidal from the raster stack
283
  dst_extract<-cbind(dst_month,stations_val) #this is in sinusoidal from the raster stack
284
  dst<-dst_extract #problem!!! two column named elev!!! use elev_s??
285
  
286
  #browser()
287
  coords<- dst[c('x','y')]              #Define coordinates in a data frame, this is the local x,y
288
  index<-!is.na(coords$x)               #remove if NA, may need to revisit at a later stage
289
  dst<-dst[index,]
290
  coords<- dst[c('x','y')]              #Define coordinates in a data frame, this is the local x,y
291
  coordinates(dst)<-coords                    #Assign coordinates to the data frame
292
  proj4string(dst)<-CRS_interp        #Assign coordinates reference system in PROJ4 format
293
  
294
  ### ADD SCREENING HERE BEFORE WRITING OUT DATA
295
  #Covariates ok since screening done in covariate script
296
  #screening on var i.e. value, TMIN, TMAX...
297
  
298
  ####
299
  
300
  ####
301
  #write out a new shapefile (including .prj component)
302
  dst$OID<-1:nrow(dst) #need a unique ID?
303
  outfile6<-file.path(out_path,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep=""))  #Name of the file
304
  writeOGR(dst,dsn= dirname(outfile6),layer= sub(".shp","",basename(outfile6)), driver="ESRI Shapefile",overwrite_layer=TRUE)
305
  
306
  outfile6_rds<-sub(".shp",".rds",outfile6)
307
  saveRDS(dst,outfile6_rds)
308

  
309
  outfiles_obj<-list(outfile1,outfile2,outfile3,outfile4,outfile5,outfile6)
310
  names(outfiles_obj)<- c("loc_stations","loc_stations_ghcn","daily_query_ghcn_data","daily_covar_ghcn_data","monthly_query_ghcn_data","monthly_covar_ghcn_data")
311
  save(outfiles_obj,file= file.path(out_path,paste("met_stations_outfiles_obj_",interpolation_method,"_", out_prefix,".RData",sep="")))
312
  
313
  return(outfiles_obj)
314
  
315
  #END OF FUNCTION # 
316
}
317

  
318
########## END OF SCRIPT ##########
climate/research/world/interpolation/GAM_fusion_analysis_raster_prediction_multisampling.R
1
#########################    Raster prediction    ####################################
2
############################ Interpolation of temperature for given processing region ##########################################
3
#This script interpolates temperature values using MODIS LST, covariates and GHCND station data.                      
4
#It requires the text file of stations and a shape file of the study area.           
5
#Note that the projection for both GHCND and study area is lonlat WGS84.       
6
#Options to run this program are:
7
#1) Multisampling: vary the porportions of hold out and use random samples for each run
8
#2)Constant sampling: use the same sample over the runs
9
#3)over dates: run over for example 365 dates without mulitsampling
10
#4)use seed number: use seed if random samples must be repeatable
11
#5)possibilty of running GAM+FUSION or GAM+CAI and other options added
12
#The interpolation is done first at the monthly time scale then delta surfaces are added.
13
#AUTHOR: Benoit Parmentier                                                                        
14
#DATE: 07/16/2013                                                                                 
15
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#568--     
16
#
17
# TO DO:
18
# 1) modify to make it general for any method i.e. make call to method e.g. gam_fus, gam_cai etc.
19
# 2) simplify and bundle validation steps, make it general--method_mod_validation
20
# 3) solve issues with log file recordings
21
# 4) output location folder on the fly???
22

  
23
###################################################################################################
24

  
25
raster_prediction_fun <-function(list_param_raster_prediction){
26

  
27
  ##Function to predict temperature interpolation with 21 input parameters
28
  #9 parameters used in the data preparation stage and input in the current script
29
  #1)list_param_data_prep: used in earlier code for the query from the database and extraction for raster brick
30
  #2)infile_monthly:
31
  #3)infile_daily
32
  #4)infile_locs:
33
  #5)infile_covariates: raster covariate brick, tif file
34
  #6)covar_names: covar_names #remove at a later stage...
35
  #7)var: variable being interpolated-TMIN or TMAX
36
  #8)out_prefix
37
  #9)CRS_locs_WGS84
38
  #10)screen_data_training
39
  #
40
  #6 parameters for sampling function
41
  #10)seed_number
42
  #11)nb_sample
43
  #12)step
44
  #13)constant
45
  #14)prop_minmax
46
  #15)dates_selected
47
  #
48
  #6 additional parameters for monthly climatology and more
49
  #16)list_models: model formulas in character vector
50
  #17)lst_avg: LST climatology name in the brick of covariate--change later
51
  #18)n_path
52
  #19)out_path
53
  #20)script_path: path to script
54
  #21)interpolation_method: c("gam_fusion","gam_CAI") #other otpions to be added later
55
  
56
  ###Loading R library and packages     
57
  
58
  library(gtools)                                         # loading some useful tools 
59
  library(mgcv)                                           # GAM package by Simon Wood
60
  library(sp)                                             # Spatial pacakge with class definition by Bivand et al.
61
  library(spdep)                               # Spatial pacakge with methods and spatial stat. by Bivand et al.
62
  library(rgdal)                               # GDAL wrapper for R, spatial utilities
63
  library(gstat)                               # Kriging and co-kriging by Pebesma et al.
64
  library(fields)                             # NCAR Spatial Interpolation methods such as kriging, splines
65
  library(raster)                              # Hijmans et al. package for raster processing
66
  library(rasterVis)
67
  library(parallel)                            # Urbanek S. and Ripley B., package for multi cores & parralel processing
68
  library(reshape)
69
  library(plotrix)
70
  library(maptools)
71
  library(gdata) #Nesssary to use cbindX
72
  library(automap)  #autokrige function
73
  library(spgwr)   #GWR method
74
  
75
  ### Parameters and arguments
76
  #PARSING INPUTS/ARGUMENTS
77
#   
78
#   names(list_param_raster_prediction)<-c("list_param_data_prep",
79
#                                          "seed_number","nb_sample","step","constant","prop_minmax","dates_selected",
80
#                                          "list_models","lst_avg","in_path","out_path","script_path",
81
#                                          "interpolation_method")
82
  
83
  #9 parameters used in the data preparation stage and input in the current script
84
  list_param_data_prep<-list_param_raster_prediction$list_param_data_prep
85
  infile_monthly<-list_param_data_prep$infile_monthly
86
  infile_daily<-list_param_data_prep$infile_daily
87
  infile_locs<-list_param_data_prep$infile_locs
88
  infile_covariates<-list_param_data_prep$infile_covariates #raster covariate brick, tif file
89
  covar_names<- list_param_data_prep$covar_names #remove at a later stage...
90
  var<-list_param_data_prep$var
91
  out_prefix<-list_param_data_prep$out_prefix
92
  CRS_locs_WGS84<-list_param_data_prep$CRS_locs_WGS84
93

  
94
  #6 parameters for sampling function
95
  seed_number<-list_param_raster_prediction$seed_number
96
  nb_sample<-list_param_raster_prediction$nb_sample
97
  step<-list_param_raster_prediction$step
98
  constant<-list_param_raster_prediction$constant
99
  prop_minmax<-list_param_raster_prediction$prop_minmax
100
  dates_selected<-list_param_raster_prediction$dates_selected
101
  
102
  #6 additional parameters for monthly climatology and more
103
  list_models<-list_param_raster_prediction$list_models
104
  lst_avg<-list_param_raster_prediction$lst_avg
105
  out_path<-list_param_raster_prediction$out_path
106
  script_path<-list_param_raster_prediction$script_path
107
  interpolation_method<-list_param_raster_prediction$interpolation_method
108
  screen_data_training <-list_param_raster_prediction$screen_data_training
109
  
110
  setwd(out_path)
111
  
112
  ###################### START OF THE SCRIPT ########################
113
   
114
  if (var=="TMAX"){
115
    y_var_name<-"dailyTmax"                                       
116
  }
117
  
118
  if (var=="TMIN"){
119
    y_var_name<-"dailyTmin"                                       
120
  }
121
  
122
  ################# CREATE LOG FILE #####################
123
  
124
  #create log file to keep track of details such as processing times and parameters.
125
  
126
  #log_fname<-paste("R_log_raster_prediction",out_prefix, ".log",sep="")
127
  log_fname<-paste("R_log_raster_prediction",out_prefix, ".log",sep="")
128
  #sink(log_fname) #create new log file
129
  file.create(file.path(out_path,log_fname)) #create new log file
130
  
131
  time1<-proc.time()    #Start stop watch
132
  
133
  cat(paste("Starting script at this local Date and Time: ",as.character(Sys.time()),sep=""),
134
             file=log_fname,sep="\n")
135
  cat("Starting script process time:",file=log_fname,sep="\n",append=TRUE)
136
  cat(as.character(time1),file=log_fname,sep="\n",append=TRUE)    
137
  
138

  
139
  ############### READING INPUTS: DAILY STATION DATA AND OTEHR DATASETS  #################
140
  #infile_daily = gsub("/data/project/layers/commons/","/nobackup/aguzman4/climate/interp/testdata/",infile_daily)
141
  ghcn<-readOGR(dsn=dirname(infile_daily),layer=sub(".shp","",basename(infile_daily)))
142
  CRS_interp<-proj4string(ghcn)                       #Storing projection information (ellipsoid, datum,etc.)
143
  stat_loc<-readOGR(dsn=dirname(infile_locs),layer=sub(".shp","",basename(infile_locs)))
144
  #dates2 <-readLines(file.path(in_path,dates_selected)) #dates to be predicted, now read directly from the file
145
  if (dates_selected==""){
146
    dates<-as.character(sort(unique(ghcn$date))) #dates to be predicted 
147
  }
148
  if (dates_selected!=""){
149
    dates<-dates_selected #dates to be predicted 
150
  }
151
  
152

  
153
  #Reading in covariate brickcan be changed...
154
  
155
  s_raster<-brick(infile_covariates)                   #read in the data brck
156
  names(s_raster)<-covar_names               #Assigning names to the raster layers: making sure it is included in the extraction
157
    
158
  #Reading monthly data
159
  #Getting this name from command line
160
  dst<-readOGR(dsn=(infile_monthly),layer=sub(".shp","",basename(infile_monthly)))
161
  ########### CREATE SAMPLING -TRAINING AND TESTING STATIONS ###########
162
  #Input for sampling function...
163
  
164
  #dates #list of dates for prediction
165
  #ghcn_name<-"ghcn" #infile daily data 
166
  
167
  list_param_sampling<-list(seed_number,nb_sample,step,constant,prop_minmax,dates,ghcn)
168

  
169
  names(list_param_sampling)<-c("seed_number","nb_sample","step","constant","prop_minmax","dates","ghcn")
170
  
171
  #run function, note that dates must be a character vector!!
172
  sampling_obj<-sampling_training_testing(list_param_sampling)
173
  
174
  ########### PREDICT FOR MONTHLY SCALE  ##################
175
  #First predict at the monthly time scale: climatology
176
  cat("Predictions at monthly scale:",file=log_fname,sep="\n", append=TRUE)
177
  cat(paste("Local Date and Time: ",as.character(Sys.time()),sep=""),
178
      file=log_fname,sep="\n")
179
  t1<-proc.time()
180
  j=12
181

  
182
  if (interpolation_method=="gam_fusion"){
183
    list_param_runClim_KGFusion<-list(j,s_raster,covar_names,lst_avg,list_models,dst,var,y_var_name, out_prefix,out_path)
184
    names(list_param_runClim_KGFusion)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","var","y_var_name","out_prefix","out_path")
185
    
186
    print("Monthly")
187
    clim_method_mod_obj<-mclapply(1:12, list_param=list_param_runClim_KGFusion, runClim_KGFusion,mc.preschedule=FALSE,mc.cores = 6) #This is the end bracket from mclapply(...) statement
188
    print("done")
189
   
190
    file2<-file.path(out_path,paste(interpolation_method,"_mod_",y_var_name,out_prefix,".RData",sep=""))
191
    save(clim_method_mod_obj,file= file.path(out_path,paste(interpolation_method,"_mod_",y_var_name,out_prefix,".RData",sep="")))
192
    list_tmp<-vector("list",length(clim_method_mod_obj))
193
    for (i in 1:length(clim_method_mod_obj)){
194
      tmp<-clim_method_mod_obj[[i]]$clim
195
      list_tmp[[i]]<-tmp
196
    }
197
    clim_yearlist<-list_tmp
198
  }
199

  
200
  if (interpolation_method=="gam_CAI"){
201
    list_param_runClim_KGCAI<-list(j,s_raster,covar_names,lst_avg,list_models,dst,var,y_var_name, out_prefix,out_path)
202
    names(list_param_runClim_KGCAI)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","var","y_var_name","out_prefix","out_path")
203
    clim_method_mod_obj<-mclapply(1:12, list_param=list_param_runClim_KGCAI, runClim_KGCAI,mc.preschedule=FALSE,mc.cores = 6) #This is the end bracket from mclapply(...) statement
204
    save(clim_method_mod_obj,file= file.path(out_path,paste(interpolation_method,"_mod_",y_var_name,out_prefix,".RData",sep="")))
205
    list_tmp<-vector("list",length(clim_method_mod_obj))
206
    for (i in 1:length(clim_method_mod_obj)){
207
      tmp<-clim_method_mod_obj[[i]]$clim
208
      list_tmp[[i]]<-tmp
209
    }
210
    clim_yearlist<-list_tmp
211
  }
212
  t2<-proc.time()-t1
213
  cat(as.character(t2),file=log_fname,sep="\n", append=TRUE)
214

  
215
  ################## PREDICT AT DAILY TIME SCALE #################
216
  #Predict at daily time scale from single time scale or multiple time scale methods: 2 methods availabe now
217
  
218
  #put together list of clim models per month...
219
  #rast_clim_yearlist<-list_tmp
220
  
221
  #Second predict at the daily time scale: delta
222
  
223
  #method_mod_obj<-mclapply(1:1, runGAMFusion,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement
224
  print("daily")
225
  cat("Predictions at the daily scale:",file=log_fname,sep="\n", append=TRUE)
226
  t1<-proc.time()
227
  cat(paste("Local Date and Time: ",as.character(Sys.time()),sep=""),
228
      file=log_fname,sep="\n")
229
  
230
  #TODO : Same call for all functions!!! Replace by one "if" for all multi time scale methods...
231
  if (interpolation_method=="gam_CAI" | interpolation_method=="gam_fusion"){
232
    #input a list:note that ghcn.subsets is not sampling_obj$data_day_ghcn
233
    i<-1
234
    list_param_run_prediction_daily_deviation <-list(i,clim_yearlist,sampling_obj,dst,var,y_var_name, interpolation_method,out_prefix,out_path)
235
    names(list_param_run_prediction_daily_deviation)<-c("list_index","clim_yearlist","sampling_obj","dst","var","y_var_name","interpolation_method","out_prefix","out_path")
236
    method_mod_obj<-mclapply(1:length(sampling_obj$ghcn_data_day),list_param=list_param_run_prediction_daily_deviation,run_prediction_daily_deviation,mc.preschedule=FALSE,mc.cores = 6) #This is the end bracket from mclapply(...) statement
237
    save(method_mod_obj,file= file.path(out_path,paste("method_mod_obj_",interpolation_method,"_",y_var_name,out_prefix,".RData",sep="")))
238
   }
239
  #TODO : Same call for all functions!!! Replace by one "if" for all daily single time scale methods...
240
  if (interpolation_method=="gam_daily"){
241
    #input a list:note that ghcn.subsets is not sampling_obj$data_day_ghcn
242
    i<-1
243
    list_param_run_prediction_gam_daily <-list(i,s_raster,covar_names,lst_avg,list_models,dst,screen_data_training,var,y_var_name, sampling_obj,interpolation_method,out_prefix,out_path)
244
    names(list_param_run_prediction_gam_daily)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","screen_data_training","var","y_var_name","sampling_obj","interpolation_method","out_prefix","out_path")
245
    method_mod_obj<-mclapply(1:length(sampling_obj$ghcn_data_day),list_param=list_param_run_prediction_gam_daily,runGAM_day_fun,mc.preschedule=FALSE,mc.cores = 12) #This is the end bracket from mclapply(...) statement
246
    save(method_mod_obj,file= file.path(out_path,paste("method_mod_obj_",interpolation_method,"_",y_var_name,out_prefix,".RData",sep="")))
247
    
248
  }
249
  
250
  if (interpolation_method=="kriging_daily"){
251
    #input a list:note that ghcn.subsets is not sampling_obj$data_day_ghcn
252
    i<-1
253
    list_param_run_prediction_kriging_daily <-list(i,s_raster,covar_names,lst_avg,list_models,dst,var,y_var_name, sampling_obj,interpolation_method,out_prefix,out_path)
254
    names(list_param_run_prediction_kriging_daily)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","var","y_var_name","sampling_obj","interpolation_method","out_prefix","out_path")
255
    #test <- runKriging_day_fun(1,list_param_run_prediction_kriging_daily)
256
    method_mod_obj<-mclapply(1:length(sampling_obj$ghcn_data_day),list_param=list_param_run_prediction_kriging_daily,runKriging_day_fun,mc.preschedule=FALSE,mc.cores = 6) #This is the end bracket from mclapply(...) statement
257
    #method_mod_obj<-mclapply(1:18,list_param=list_param_run_prediction_kriging_daily,runKriging_day_fun,mc.preschedule=FALSE,mc.cores = 9) #This is the end bracket from mclapply(...) statement
258
    
259
    save(method_mod_obj,file= file.path(out_path,paste("method_mod_obj_",interpolation_method,"_",y_var_name,out_prefix,".RData",sep="")))
260
    
261
  }
262
  
263
  if (interpolation_method=="gwr_daily"){
264
    #input a list:note that ghcn.subsets is not sampling_obj$data_day_ghcn
265
    i<-1
266
    list_param_run_prediction_gwr_daily <-list(i,s_raster,covar_names,lst_avg,list_models,dst,var,y_var_name, sampling_obj,interpolation_method,out_prefix,out_path)
267
    names(list_param_run_prediction_gwr_daily)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","var","y_var_name","sampling_obj","interpolation_method","out_prefix","out_path")
268
    #test <- run_interp_day_fun(1,list_param_run_prediction_gwr_daily)
269
    method_mod_obj<-mclapply(1:length(sampling_obj$ghcn_data_day),list_param=list_param_run_prediction_gwr_daily,run_interp_day_fun,mc.preschedule=FALSE,mc.cores = 12) #This is the end bracket from mclapply(...) statement
270
    
271
    save(method_mod_obj,file= file.path(out_path,paste("method_mod_obj_",interpolation_method,"_",y_var_name,out_prefix,".RData",sep="")))
272
    
273
  }
274
  t2<-proc.time()-t1
275
  cat(as.character(t2),file=log_fname,sep="\n", append=TRUE)
276
  #browser()
277
  
278
  ############### NOW RUN VALIDATION #########################
279
  #SIMPLIFY THIS PART: one call
280
  print("validation")
281
 
282
  list_tmp<-vector("list",length(method_mod_obj))
283
  for (i in 1:length(method_mod_obj)){
284
    tmp<-method_mod_obj[[i]][[y_var_name]]  #y_var_name is the variable predicted (dailyTmax or dailyTmin)
285
    list_tmp[[i]]<-tmp
286
  }
287
  rast_day_yearlist<-list_tmp #list of predicted images over full year... 
288

  
289
  cat("Validation step:",file=log_fname,sep="\n", append=TRUE)
290
  t1<-proc.time()
291
  cat(paste("Local Date and Time: ",as.character(Sys.time()),sep=""),
292
      file=log_fname,sep="\n")
293
  
294
  list_param_validation<-list(i,rast_day_yearlist,method_mod_obj,y_var_name, out_prefix, out_path)
295
  names(list_param_validation)<-c("list_index","rast_day_year_list","method_mod_obj","y_var_name","out_prefix", "out_path") #same names for any method
296
  
297
  validation_mod_obj <-mclapply(1:length(method_mod_obj), list_param=list_param_validation, calculate_accuracy_metrics,mc.preschedule=FALSE,mc.cores = 12) 
298
  #test_val<-calculate_accuracy_metrics(1,list_param_validation)
299
  save(validation_mod_obj,file= file.path(out_path,paste(interpolation_method,"_validation_mod_obj_",y_var_name,out_prefix,".RData",sep="")))
300
  t2<-proc.time()-t1
301
  cat(as.character(t2),file=log_fname,sep="\n", append=TRUE)
302
  
303
  #################### ASSESSMENT OF PREDICTIONS: PLOTS OF ACCURACY METRICS ###########
304
  ##Create data.frame with valiation metrics for a full year
305
  tb_diagnostic_v<-extract_from_list_obj(validation_mod_obj,"metrics_v")
306
  rownames(tb_diagnostic_v)<-NULL #remove row names
307
  
308
  #Call functions to create plots of metrics for validation dataset
309
  metric_names<-c("rmse","mae","me","r","m50")
310
  summary_metrics_v<- boxplot_from_tb(tb_diagnostic_v,metric_names,out_prefix,out_path)
311
  names(summary_metrics_v)<-c("avg","median")
312
  summary_month_metrics_v<- boxplot_month_from_tb(tb_diagnostic_v,metric_names,out_prefix,out_path)
313
  
314
  #################### CLOSE LOG FILE  ####################
315
  
316
  #close log_file connection and add meta data
317
  cat("Finished script process time:",file=log_fname,sep="\n", append=TRUE)
318
  time2<-proc.time()-time1
319
  cat(as.character(time2),file=log_fname,sep="\n", append=TRUE)
320
  #later on add all the parameters used in the script...
321
  cat(paste("Finished script at this local Date and Time: ",as.character(Sys.time()),sep=""),
322
             file=log_fname,sep="\n", append=TRUE)
323
  cat("End of script",file=log_fname,sep="\n", append=TRUE)
324
  
325
  ################### PREPARE RETURN OBJECT ###############
326
  #Will add more information to be returned
327
  if (interpolation_method=="gam_CAI" | interpolation_method=="gam_fusion"){
328
    raster_prediction_obj<-list(clim_method_mod_obj,method_mod_obj,validation_mod_obj,tb_diagnostic_v,
329
                                summary_metrics_v,summary_month_metrics_v)
330
    names(raster_prediction_obj)<-c("clim_method_mod_obj","method_mod_obj","validation_mod_obj","tb_diagnostic_v",
331
                                    "summary_metrics_v","summary_month_metrics_v")  
332
    save(raster_prediction_obj,file= file.path(out_path,paste("raster_prediction_obj_",interpolation_method,"_", y_var_name,out_prefix,".RData",sep="")))
333
    
334
  }
335
  
336
  #use %in% instead of "|" operator
337
  if (interpolation_method=="gam_daily" | interpolation_method=="kriging_daily" | interpolation_method=="gwr_daily"){
338
    raster_prediction_obj<-list(method_mod_obj,validation_mod_obj,tb_diagnostic_v,
339
                                summary_metrics_v,summary_month_metrics_v)
340
    names(raster_prediction_obj)<-c("method_mod_obj","validation_mod_obj","tb_diagnostic_v",
341
                                    "summary_metrics_v","summary_month_metrics_v")  
342
    save(raster_prediction_obj,file= file.path(out_path,paste("raster_prediction_obj_",interpolation_method,"_", y_var_name,out_prefix,".RData",sep="")))
343
    
344
  }
345
  return(raster_prediction_obj)
346
}
347

  
348
####################################################################
349
######################## END OF SCRIPT/FUNCTION #####################
climate/research/world/interpolation/GAM_fusion_analysis_raster_prediction_multisampling_01062014.R
60 60
  #30)interp_method2: intepolation method for daily devation step
61 61
  #31)num_cores: How many cores to use
62 62
  #32)max_mem: Max memory to use for predict step
63
  #33)reg_outline: shapefile with region outline used to create nc output file
63 64
  
64 65
  ###Loading R library and packages     
65 66
  
......
132 133
  num_cores <- list_param_raster_prediction$num_cores
133 134
  max_mem<- as.numeric(list_param_raster_prediction$max_mem)
134 135
 
135
  rasterOptions(maxmemory=max_mem,timer=TRUE)
136
  
136
  #Get the region outline
137
  reg_outline<-list_param_raster_prediction$reg_outline
138

  
139
  #rasterOptions(maxmemory=max_mem,timer=TRUE,chunksize=1e+08)
140
  rasterOptions(timer=TRUE,chunksize=5e+05)  
141
  #rasterOptions(timer=TRUE)
142

  
137 143
  setwd(out_path)
138 144
  
139 145
  ###################### START OF THE SCRIPT ########################
......
164 170
  cat("Starting script process time:",file=log_fname,sep="\n",append=TRUE)
165 171
  cat(as.character(time1),file=log_fname,sep="\n",append=TRUE)    
166 172
  
173
  ############### Make nc file from outline ############
174
  
175

  
167 176
  ############### READING INPUTS: DAILY STATION DATA AND OTHER DATASETS  #################
177
  #Takes too long to read shapefiles. Let's try using saveRDS(object,*.rds) and readRDS()
178
  infile_daily_rds <-sub(".shp",".rds",infile_daily)
179
  if (file.exists(infile_daily_rds) == TRUE){
180
     ghcn<-readRDS(infile_daily_rds)
181
  }else{
182
    ghcn<-readOGR(dsn=dirname(infile_daily),layer=sub(".shp","",basename(infile_daily)))
183
    CRS_interp<-proj4string(ghcn)                       #Storing projection information (ellipsoid, datum,etc.)
184
    saveRDS(ghcn,infile_daily_rds)
185
  }
186

  
187
  infile_locs_rds<-sub(".shp",".rds",infile_locs)
188
  if (file.exists(infile_locs_rds) == TRUE){
189
     readRDS(infile_locs_rds) 
190
  }else{
191
    stat_loc<-readOGR(dsn=dirname(infile_locs),layer=sub(".shp","",basename(infile_locs)))
192
    saveRDS(stat_loc,infile_locs_rds)
193
  }
168 194
  
169
  ghcn<-readOGR(dsn=dirname(infile_daily),layer=sub(".shp","",basename(infile_daily)))
170
  CRS_interp<-proj4string(ghcn)                       #Storing projection information (ellipsoid, datum,etc.)
171
  stat_loc<-readOGR(dsn=dirname(infile_locs),layer=sub(".shp","",basename(infile_locs)))
172
  #dates2 <-readLines(file.path(in_path,dates_selected)) #dates to be predicted, now read directly from the file
195
  #dates2 <-readLines(file.path(in_path,dates_selected)) #dates to be predicted, now read directly from the file  
173 196
  
174 197
  #Should clean this up, reduce the number of if
175 198
  if (dates_selected==""){
......
188 211
  names(s_raster)<-covar_names               #Assigning names to the raster layers: making sure it is included in the extraction
189 212
    
190 213
  #Reading monthly data
191
  dst<-readOGR(dsn=dirname(infile_monthly),layer=sub(".shp","",basename(infile_monthly)))
192
    
214
  infile_monthly_rds<-sub(".shp",".rds",infile_monthly)
215
  if (file.exists(infile_monthly_rds) == TRUE) {
216
     dst<-readRDS(infile_monthly_rds)
217
  }else{
218
   dst<-readOGR(dsn=dirname(infile_monthly),layer=sub(".shp","",basename(infile_monthly)))
219
   saveRDS(dst,infile_monthly_rds) 
220
  }
221

  
193 222
  #construct date based on input end_year !!!
194 223
  day_tmp <- rep("15",length=nrow(dst))
195 224
  year_tmp <- rep(as.character(end_year),length=nrow(dst))
......
219 248
  sampling_month_obj <- sampling_training_testing(list_param_sampling)
220 249
  
221 250
  ########### PREDICT FOR MONTHLY SCALE  ##################
222
  
223 251
  #First predict at the monthly time scale: climatology
224 252
  cat("Predictions at monthly scale:",file=log_fname,sep="\n", append=TRUE)
225 253
  cat(paste("Local Date and Time: ",as.character(Sys.time()),sep=""),
226 254
      file=log_fname,sep="\n")
227 255
  t1<-proc.time()
228
  j=6 #12
256
  j=12
229 257
  
230 258
  #browser() #Missing out_path for gam_fusion list param!!!
231 259
  #if (interpolation_method=="gam_fusion"){
......
234 262
    names(list_param_runClim_KGFusion)<-c("list_index","covar_rast","covar_names","lst_avg","list_models","dst","sampling_month_obj","var","y_var_name","out_prefix","out_path")
235 263
    #debug(runClim_KGFusion)
236 264
    #test<-runClim_KGFusion(1,list_param=list_param_runClim_KGFusion)
265

  
237 266
    clim_method_mod_obj<-mclapply(1:length(sampling_month_obj$ghcn_data), list_param=list_param_runClim_KGFusion, runClim_KGFusion,mc.preschedule=FALSE,mc.cores = num_cores) #This is the end bracket from mclapply(...) statement
238
   
239 267
    
240 268
    save(clim_method_mod_obj,file= file.path(out_path,paste(interpolation_method,"_mod_",y_var_name,out_prefix,".RData",sep="")))
241 269
    #Use function to extract list
......
267 295
  
268 296
  #Getting rid of raster temp files
269 297
  removeTmpFiles(h=0)
270
  #quit()
271 298

  
272 299
  ################## PREDICT AT DAILY TIME SCALE #################
273 300
  #Predict at daily time scale from single time scale or multiple time scale methods: 2 methods availabe now
......
277 304
  #Second predict at the daily time scale: delta
278 305
  
279 306
  #method_mod_obj<-mclapply(1:1, runGAMFusion,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement
307

  
308
  #Set raster options for daily steps
309
  rasterOptions(timer=TRUE,chunksize=1e+05)
310

  
280 311
  cat("Predictions at the daily scale:",file=log_fname,sep="\n", append=TRUE)
281 312
  t1<-proc.time()
282 313
  cat(paste("Local Date and Time: ",as.character(Sys.time()),sep=""),
......
310 341
    
311 342
  }
312 343
  
344
  removeTmpFiles(h=0)
345

  
313 346
  #TODO : Same call for all functions!!! Replace by one "if" for all daily single time scale methods...
314 347
  if (interpolation_method=="gam_daily"){
315 348
    #input a list:note that ghcn.subsets is not sampling_obj$data_day_ghcn
......
484 517
    
485 518
  }
486 519
  
487
  print("stage 4 DONE")
488 520
  return(raster_prediction_obj)
489 521
}
490 522

  
climate/research/world/interpolation/GAM_fusion_function_multisampling.R
1
##################  Functions for use in the raster prediction stage   #######################################
2
############################ Interpolation in a given tile/region ##########################################
3
#This script contains 5 functions used in the interpolation of temperature in the specfied study/processing area:                             
4
# 1)predict_raster_model<-function(in_models,r_stack,out_filename)                                                             
5
# 2)fit_models<-function(list_formulas,data_training)           
6
# 3)runClim_KGCAI<-function(j,list_param) : function that peforms GAM CAI method
7
# 4)runClim_KGFusion<-function(j,list_param) function for monthly step (climatology) in the fusion method
8
# 5)runGAMFusion <- function(i,list_param) : daily step for fusion method, perform daily prediction
9
#
10
#AUTHOR: Benoit Parmentier                                                                       
11
#DATE: 05/07/2013                                                                                 
12
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--   
13

  
14
##Comments and TODO:
15
#This script is meant to be for general processing tile by tile or region by region.
16
# Note that the functions are called from GAM_fusion_analysis_raster_prediction_mutlisampling.R.
17
# This will be expanded to other methods.
18
##################################################################################################
19

  
20

  
21
predict_raster_model<-function(in_models,r_stack,out_filename){
22
  #This functions performs predictions on a raster grid given input models.
23
  #Arguments: list of fitted models, raster stack of covariates
24
  #Output: spatial grid data frame of the subset of tiles
25
  list_rast_pred<-vector("list",length(in_models))
26
  for (i in 1:length(in_models)){
27
    mod <-in_models[[i]] #accessing GAM model ojbect "j"
28
    raster_name<-out_filename[[i]]
29
    if (inherits(mod,"gam")) {           #change to c("gam","autoKrige")
30
      raster_pred<- predict(object=r_stack,model=mod,na.rm=FALSE) #Using the coeff to predict new values.
31
      names(raster_pred)<-"y_pred"  
32
      writeRaster(raster_pred, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
33
      list_rast_pred[[i]]<-raster_name
34
    }
35
  }
36
  if (inherits(mod,"try-error")) {
37
    print(paste("no gam model fitted:",mod[1],sep=" ")) #change message for any model type...
38
  }
39
  return(list_rast_pred)
40
}
41

  
42
fit_models<-function(list_formulas,data_training){
43
  #This functions several models and returns model objects.
44
  #Arguments: - list of formulas for GAM models
45
  #           - fitting data in a data.frame or SpatialPointDataFrame
46
  #Output: list of model objects 
47
  list_fitted_models<-vector("list",length(list_formulas))
48
  for (k in 1:length(list_formulas)){
49
    formula<-list_formulas[[k]]
50
    mod<- try(gam(formula, data=data_training,k=5)) #change to any model!!
51
    #mod<- try(autoKrige(formula, input_data=data_s,new_data=s_sgdf,data_variogram=data_s))
52
    model_name<-paste("mod",k,sep="")
53
    assign(model_name,mod) 
54
    list_fitted_models[[k]]<-mod
55
  }
56
  return(list_fitted_models) 
57
}
58

  
59
####
60
#TODO:
61
#Add log file and calculate time and sizes for processes-outputs
62
runClim_KGCAI <-function(j,list_param){
63

  
64
  #Make this a function with multiple argument that can be used by mcmapply??
65
  #Arguments: 
66
  #1)list_index: j 
67
  #2)covar_rast: covariates raster images used in the modeling
68
  #3)covar_names: names of input variables 
69
  #4)lst_avg: list of LST climatogy names, may be removed later on
70
  #5)list_models: list input models for bias calculation
71
  #6)dst: data at the monthly time scale
72
  #7)var: TMAX or TMIN, variable being interpolated
73
  #8)y_var_name: output name, not used at this stage
74
  #9)out_prefix
75
  #10) out_path
76
  
77
  #The output is a list of four shapefile names produced by the function:
78
  #1) clim: list of output names for raster climatogies 
79
  #2) data_month: monthly training data for bias surface modeling
80
  #3) mod: list of model objects fitted 
81
  #4) formulas: list of formulas used in bias modeling
82
    
83
  ### PARSING INPUT ARGUMENTS
84
  #list_param_runGAMFusion<-list(i,clim_yearlist,sampling_obj,var,y_var_name, out_prefix)
85
    
86
  index<-list_param$j
87
  s_raster<-list_param$covar_rast
88
  covar_names<-list_param$covar_names
89
  lst_avg<-list_param$lst_avg
90
  list_models<-list_param$list_models
91
  dst<-list_param$dst #monthly station dataset
92
  var<-list_param$var
93
  y_var_name<-list_param$y_var_name
94
  out_prefix<-list_param$out_prefix
95
  out_path<-list_param$out_path
96
  
97
  #Model and response variable can be changed without affecting the script
98
  prop_month<-0 #proportion retained for validation
99
  run_samp<-1
100
  
101
  #### STEP 2: PREPARE DATA
102
    
103
  data_month<-dst[dst$month==j,] #Subsetting dataset for the relevant month of the date being processed
104
  LST_name<-lst_avg[j] # name of LST month to be matched
105
  data_month$LST<-data_month[[LST_name]]
106
  
107
  #Create formulas object from list of characters...
108
  list_formulas<-lapply(list_models,as.formula,env=.GlobalEnv) #mulitple arguments passed to lapply!!  
109

  
110
  #TMax to model...
111
  if (var=="TMAX"){   
112
    data_month$y_var<-data_month$TMax #Adding TMax as the variable modeled
113
  }
114
  if (var=="TMIN"){   
115
    data_month$y_var<-data_month$TMin #Adding TMin as the variable modeled
116
  }
117
  #Fit gam models using data and list of formulas
118
  mod_list<-fit_models(list_formulas,data_month) #only gam at this stage
119
  cname<-paste("mod",1:length(mod_list),sep="") #change to more meaningful name?
120
  names(mod_list)<-cname
121
  #Adding layer LST to the raster stack  
122
  
123
  pos<-match("LST",names(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA
124
  s_raster<-dropLayer(s_raster,pos)      # If it exists drop layer
125
  LST<-subset(s_raster,LST_name)
126
  names(LST)<-"LST"
127
  s_raster<-addLayer(s_raster,LST)            #Adding current month
128
  
129
  #Now generate file names for the predictions...
130
  list_out_filename<-vector("list",length(mod_list))
131
  names(list_out_filename)<-cname  
132
  
133
  for (k in 1:length(list_out_filename)){
134
    #j indicate which month is predicted
135
    data_name<-paste(var,"_clim_month_",j,"_",cname[k],"_",prop_month,
136
                     "_",run_samp,sep="")
137
    raster_name<-file.path(out_path,paste("CAI_",data_name,out_prefix,".tif", sep=""))
138
    list_out_filename[[k]]<-raster_name
139
  }
140
  
141
  #now predict values for raster image...
142
  rast_clim_list<-predict_raster_model(mod_list,s_raster,list_out_filename)
143
  names(rast_clim_list)<-cname
144
  #Some models will not be predicted because of the lack of training data...remove empty string from list of models
145
  rast_clim_list<-rast_clim_list[!sapply(rast_clim_list,is.null)] #remove NULL elements in list
146
  
147
  #Adding Kriging for Climatology options
148
  
149
  clim_xy<-coordinates(data_month)
150
  fitclim<-Krig(clim_xy,data_month$y_var,theta=1e5) #use TPS or krige 
151
  #fitclim<-Krig(clim_xy,data_month$TMax,theta=1e5) #use TPS or krige 
152
  mod_krtmp1<-fitclim
153
  model_name<-"mod_kr"
154
  
155
  clim_rast<-interpolate(LST,fitclim) #interpolation using function from raster package
156
  #Saving kriged surface in raster images
157
  #data_name<-paste("clim_month_",j,"_",model_name,"_",prop_month,
158
  #                 "_",run_samp,sep="")
159
  #raster_name_clim<-paste("fusion_",data_name,out_prefix,".tif", sep="")
160
  #writeRaster(clim_rast, filename=raster_name_clim,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
161
  
162
  #now climatology layer
163
  #clim_rast<-LST-bias_rast
164
  data_name<-paste(var,"_clim_month_",j,"_",model_name,"_",prop_month,
165
                   "_",run_samp,sep="")
166
  raster_name_clim<-file.path(out_path,paste("CAI_",data_name,out_prefix,".tif", sep=""))
167
  writeRaster(clim_rast, filename=raster_name_clim,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
168
  
169
  #Adding to current objects
170
  mod_list[[model_name]]<-mod_krtmp1
171
  #rast_bias_list[[model_name]]<-raster_name_bias
172
  rast_clim_list[[model_name]]<-raster_name_clim
173
  
174
  #Prepare object to return
175
  clim_obj<-list(rast_clim_list,data_month,mod_list,list_formulas)
176
  names(clim_obj)<-c("clim","data_month","mod","formulas")
177
  
178
  save(clim_obj,file= file.path(out_path,paste("clim_obj_CAI_month_",j,"_",var,"_",out_prefix,".RData",sep="")))
179
  
180
  return(clim_obj) 
181
}
182
#
183

  
184
runClim_KGFusion<-function(j,list_param){
185
  
186
  #Make this a function with multiple argument that can be used by mcmapply??
187
  #Arguments: 
188
  #1)list_index: j 
189
  #2)covar_rast: covariates raster images used in the modeling
190
  #3)covar_names: names of input variables 
191
  #4)lst_avg: list of LST climatogy names, may be removed later on
192
  #5)list_models: list input models for bias calculation
193
  #6)dst: data at the monthly time scale
194
  #7)var: TMAX or TMIN, variable being interpolated
195
  #8)y_var_name: output name, not used at this stage
196
  #9)out_prefix
197
  #
198
  #The output is a list of four shapefile names produced by the function:
199
  #1) clim: list of output names for raster climatogies 
200
  #2) data_month: monthly training data for bias surface modeling
201
  #3) mod: list of model objects fitted 
202
  #4) formulas: list of formulas used in bias modeling
203
  
204
  ### PARSING INPUT ARGUMENTS
205
  #list_param_runGAMFusion<-list(i,clim_yearlist,sampling_obj,var,y_var_name, out_prefix)
206
 
207
  options(error=function()traceback(2))
208
 
209
  index<-list_param$j
210
  s_raster<-list_param$covar_rast
211
  covar_names<-list_param$covar_names
212
  lst_avg<-list_param$lst_avg
213
  list_models<-list_param$list_models
214
  dst<-list_param$dst #monthly station dataset
215
  var<-list_param$var
216
  y_var_name<-list_param$y_var_name
217
  out_prefix<-list_param$out_prefix
218
  out_path<-list_param$out_path
219

  
220
  #Model and response variable can be changed without affecting the script
221
  prop_month<-0 #proportion retained for validation
222
  run_samp<-1 #This option can be added later on if/when neeeded
223
  
224
  #### STEP 2: PREPARE DATA
225
  
226
  data_month<-dst[dst$month==j,] #Subsetting dataset for the relevant month of the date being processed
227
  LST_name<-lst_avg[j] # name of LST month to be matched
228
  data_month$LST<-data_month[[LST_name]]
229
  
230
  #Adding layer LST to the raster stack 
231
  covar_rast<-s_raster
232
  pos<-match("LST",names(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA
233

  
234
  s_raster<-dropLayer(s_raster,pos)      # If it exists drop layer
235
  LST<-subset(s_raster,LST_name)
236
  names(LST)<-"LST"
237
  s_raster<-addLayer(s_raster,LST)            #Adding current month
238
 
239
  #LST bias to model...
240
  if (var=="TMAX"){
241
    data_month$LSTD_bias<-data_month$LST-data_month$TMax
242
    data_month$y_var<-data_month$LSTD_bias #Adding bias as the variable modeled
243
  }
244
  if (var=="TMIN"){
245
    data_month$LSTD_bias<-data_month$LST-data_month$TMin
246
    data_month$y_var<-data_month$LSTD_bias #Adding bias as the variable modeled
247
  }
248
  
249
  #### STEP3:  NOW FIT AND PREDICT  MODEL
250

  
251
  list_formulas<-lapply(list_models,as.formula,env=.GlobalEnv) #mulitple arguments passed to lapply!!
252

  
253
  mod_list<-fit_models(list_formulas,data_month) #only gam at this stage
254
  cname<-paste("mod",1:length(mod_list),sep="") #change to more meaningful name?
255
  names(mod_list)<-cname
256

  
257
  #Now generate file names for the predictions...
258
  list_out_filename<-vector("list",length(mod_list))
259
  names(list_out_filename)<-cname  
260

  
261
  for (k in 1:length(list_out_filename)){
262
    #j indicate which month is predicted, var indicates TMIN or TMAX
263
    data_name<-paste(var,"_bias_LST_month_",j,"_",cname[k],"_",prop_month,
264
                     "_",run_samp,sep="")
265
    raster_name<-file.path(out_path,paste("fusion_",data_name,out_prefix,".tif", sep=""))
266
    list_out_filename[[k]]<-raster_name
267
  }
268

  
269
  print("predict raster model")
270
  #now predict values for raster image...by providing fitted model list, raster brick and list of output file names
271
  rast_bias_list<-predict_raster_model(mod_list,s_raster,list_out_filename)
272
  print("done")
273
  names(rast_bias_list)<-cname
274
  #Some modles will not be predicted...remove them
275
  rast_bias_list<-rast_bias_list[!sapply(rast_bias_list,is.null)] #remove NULL elements in list
276

  
277
  mod_rast<-stack(rast_bias_list)  #stack of bias raster images from models
278
  rast_clim_list<-vector("list",nlayers(mod_rast))
279
  names(rast_clim_list)<-names(rast_bias_list)
280
  for (k in 1:nlayers(mod_rast)){
281
    clim_fus_rast<-LST-subset(mod_rast,k)
282
    data_name<-paste(var,"_clim_LST_month_",j,"_",names(rast_clim_list)[k],"_",prop_month,
283
                     "_",run_samp,sep="")
284
    raster_name<-file.path(out_path,paste("fusion_",data_name,out_prefix,".tif", sep=""))
285
    rast_clim_list[[k]]<-raster_name
286
    writeRaster(clim_fus_rast, filename=raster_name,overwrite=TRUE)  #Wri
287
  }
288
  
289
  #### STEP 4:Adding Kriging for Climatology options
290
  bias_xy<-coordinates(data_month)
291
  #fitbias<-Krig(bias_xy,data_month$LSTD_bias,theta=1e5) #use TPS or krige 
292
  fitbias<-try(Krig(bias_xy,data_month$LSTD_bias,theta=1e5)) #use TPS or krige 
293
 
294
  model_name<-"mod_kr"
295

  
296
  if (inherits(fitbias,"Krig")){
297
    
298
    #Saving kriged surface in raster images
299
    bias_rast<-bias_rast<-interpolate(LST,fitbias) #interpolation using function from raster package
300
    data_name<-paste(var,"_bias_LST_month_",j,"_",model_name,"_",prop_month,
301
                     "_",run_samp,sep="")
302
    raster_name_bias<-file.path(out_path,paste("fusion_",data_name,out_prefix,".tif", sep=""))
303
    writeRaster(bias_rast, filename=raster_name_bias,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
304
    
305
    #now climatology layer
306
    clim_rast<-LST-bias_rast
307
    data_name<-paste(var,"_clim_LST_month_",j,"_",model_name,"_",prop_month,
308
                     "_",run_samp,sep="")
309
    raster_name_clim<-file.path(out_path,paste("fusion_",data_name,out_prefix,".tif", sep=""))
310
    writeRaster(clim_rast, filename=raster_name_clim,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
311
    #Adding to current objects
312
    mod_list[[model_name]]<-fitbias
313
    rast_bias_list[[model_name]]<-raster_name_bias
314
    rast_clim_list[[model_name]]<-raster_name_clim
315
  }
316
  
317
  if (inherits(fitbias,"try-error")){
318
    print("Error with FitBias")
319
    #NEED TO DEAL WITH THIS!!!
320
    
321
    #Adding to current objects
322
    mod_list[[model_name]]<-NULL
323
    rast_bias_list[[model_name]]<-NULL
324
    rast_clim_list[[model_name]]<-NULL
325
  }
326

  
327
  
328
  #### STEP 5: Prepare object and return
329
  clim_obj<-list(rast_bias_list,rast_clim_list,data_month,mod_list,list_formulas)
330
  names(clim_obj)<-c("bias","clim","data_month","mod","formulas")
331
 
332
  save(clim_obj,file= file.path(out_path,paste("clim_obj_month_",j,"_",var,"_",out_prefix,".RData",sep="")))
333
  return(clim_obj)
334
}
335

  
336
## Run function for kriging...?
337

  
338
#runGAMFusion <- function(i,list_param) {            # loop over dates
339
run_prediction_daily_deviation <- function(i,list_param) {            # loop over dates
340
  #This function produce daily prediction using monthly predicted clim surface.
341
  #The output is both daily prediction and daily deviation from monthly steps.
342
  
343
  #### Change this to allow explicitly arguments...
344
  #Arguments: 
345
  #1)index: loop list index for individual run/fit
346
  #2)clim_year_list: list of climatology files for all models...(12*nb of models)
347
  #3)sampling_obj: contains, data per date/fit, sampling information
348
  #4)dst: data at the monthly time scale
349
  #5)var: variable predicted -TMAX or TMIN
350
  #6)y_var_name: name of the variable predicted - dailyTMax, dailyTMin
351
  #7)out_prefix
352
  #8)out_path
353
  #
354
  #The output is a list of four shapefile names produced by the function:
355
  #1) list_temp: y_var_name
356
  #2) rast_clim_list: list of files for temperature climatology predictions
357
  #3) delta: list of files for temperature delta predictions
358
  #4) data_s: training data
359
  #5) data_v: testing data
360
  #6) sampling_dat: sampling information for the current prediction (date,proportion of holdout and sample number)
361
  #7) mod_kr: kriging delta fit, field package model object
362
  
363
  ### PARSING INPUT ARGUMENTS
364
 
365
  #list_param_runGAMFusion<-list(i,clim_yearlist,sampling_obj,var,y_var_name, out_prefix)
366
  rast_clim_yearlist<-list_param$clim_yearlist
367
  sampling_obj<-list_param$sampling_obj
368
  ghcn.subsets<-sampling_obj$ghcn_data_day
369
  sampling_dat <- sampling_obj$sampling_dat
370
  sampling <- sampling_obj$sampling_index
371
  var<-list_param$var
372
  y_var_name<-list_param$y_var_name
373
  out_prefix<-list_param$out_prefix
374
  dst<-list_param$dst #monthly station dataset
375
  out_path <-list_param$out_path
376
  
377
  ##########
378
  # STEP 1 - Read in information and get traing and testing stations
379
  ############# 
380
  
381
  date<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
382
  month<-strftime(date, "%m")          # current month of the date being processed
383
  LST_month<-paste("mm_",month,sep="") # name of LST month to be matched
384
  proj_str<-proj4string(dst) #get the local projection information from monthly data
385

  
386
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
387
  data_day<-ghcn.subsets[[i]]
388
  mod_LST <- ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))]  #Match interpolation date and monthly LST average
389
  data_day$LST <- as.data.frame(mod_LST)[,1] #Add the variable LST to the dataset
390
  dst$LST<-dst[[LST_month]] #Add the variable LST to the monthly dataset
391

  
392
  ind.training<-sampling[[i]]
393
  ind.testing <- setdiff(1:nrow(data_day), ind.training)
394
  data_s <- data_day[ind.training, ]   #Training dataset currently used in the modeling
395
  data_v <- data_day[ind.testing, ]    #Testing/validation dataset using input sampling
396
  
397
  ns<-nrow(data_s)
398
  nv<-nrow(data_v)
399
  #i=1
400
  date_proc<-sampling_dat$date[i]
401
  date_proc<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
402
  mo<-as.integer(strftime(date_proc, "%m"))          # current month of the date being processed
403
  day<-as.integer(strftime(date_proc, "%d"))
404
  year<-as.integer(strftime(date_proc, "%Y"))
405
  
406
  ##########
407
  # STEP 2 - JOIN DAILY AND MONTHLY STATION INFORMATION
408
  ##########
409

  
410
  modst<-dst[dst$month==mo,] #Subsetting dataset for the relevant month of the date being processed
411

  
412
  if (var=="TMIN"){
413
    modst$LSTD_bias <- modst$LST-modst$TMin; #That is the difference between the monthly LST mean and monthly station mean
414
  }
415
  if (var=="TMAX"){
416
    modst$LSTD_bias <- modst$LST-modst$TMax; #That is the difference between the monthly LST mean and monthly station mean    
417
  }
418
  #This may be unnecessary since LSTD_bias is already in dst?? check the info
419
  #Some loss of observations: LSTD_bias for January has only 56 out of 66 possible TMIN!!! We may need to look into this issue
420
  #to avoid some losses of station data...
421
  
422
  #Clearn out this part: make this a function call
423
  x<-as.data.frame(data_v)
424
  d<-as.data.frame(data_s)
425
  for (j in 1:nrow(x)){
426
    if (x$value[j]== -999.9){
427
      x$value[j]<-NA
428
    }
429
  }
430
  for (j in 1:nrow(d)){
431
    if (d$value[j]== -999.9){
432
      d$value[j]<-NA
433
    }
434
  }
435
 
436
  d[1] <- NULL
437
  x[1] <- NULL
438
 
439
  pos<-match("value",names(d)) #Find column with name "value"
440
  #names(d)[pos]<-c("dailyTmax")
441
  names(d)[pos]<-y_var_name
442
  pos<-match("value",names(x)) #Find column with name "value"
443
  names(x)[pos]<-y_var_name
444
  pos<-match("station",names(d)) #Find column with station ID
445
  names(d)[pos]<-c("id")
446
  pos<-match("station",names(x)) #Find column with name station ID
447
  names(x)[pos]<-c("id")
448
  pos<-match("station",names(modst)) #Find column with name station ID
449
  names(modst)[pos]<-c("id")       #modst contains the average tmax per month for every stations...
450
  
451

  
452
  mergeTry<-try(dmoday <-merge(modst,d,by="id",suffixes=c("",".y2")))
453
  
454

  
455
  
456
  xmoday <-merge(modst,x,by="id",suffixes=c("",".y2"))
457
 
458
  mod_pat<-glob2rx("*.y2")   #remove duplicate columns that have ".y2" in their names
459
  var_pat<-grep(mod_pat,names(dmoday),value=FALSE) # using grep with "value" extracts the matching names
460
  dmoday<-dmoday[,-var_pat] #dropping relevant columns
461

  
462
  mod_pat<-glob2rx("*.y2")   
463
  var_pat<-grep(mod_pat,names(xmoday),value=FALSE) # using grep with "value" extracts the matching names
464
  xmoday<-xmoday[,-var_pat] #Removing duplicate columns
465
  data_v<-xmoday
466
  
467
  #dmoday contains the daily tmax values for training with TMax/TMin being the monthly station tmax/tmin mean
468
  #xmoday contains the daily tmax values for validation with TMax/TMin being the monthly station tmax/tmin mean
469

  
470
  ##########
471
  # STEP 3 - interpolate daily delta across space
472
  ##########
473
  #Change to take into account TMin and TMax
474
  if (var=="TMIN"){
475
    daily_delta<-dmoday$dailyTmin-dmoday$TMin #daily detl is the difference between monthly and daily temperatures
476
  }
477
  if (var=="TMAX"){
478
    daily_delta<-dmoday$dailyTmax-dmoday$TMax
479
  }
480

  
481
  daily_delta_xy<-as.matrix(cbind(dmoday$x,dmoday$y))
482
  fitdelta<-Krig(daily_delta_xy,daily_delta,theta=1e5) #use TPS or krige
483
  mod_krtmp2<-fitdelta
484
  model_name<-paste("mod_kr","day",sep="_")
485
  data_s<-dmoday #put the 
486
  data_s$daily_delta<-daily_delta
487
  
488
  #########
489
  # STEP 4 - Calculate daily predictions - T(day) = clim(month) + delta(day)
490
  #########
491

  
492
  rast_clim_list<-rast_clim_yearlist[[mo]]  #select relevant month
493
  rast_clim_month<-raster(rast_clim_list[[1]])
494
  
495
  daily_delta_rast<-interpolate(rast_clim_month,fitdelta) #Interpolation of the bias surface...
496
  
497
  #Saving kriged surface in raster images
498
  data_name<-paste("daily_delta_",y_var_name,"_",sampling_dat$date[i],"_",sampling_dat$prop[i],
499
                   "_",sampling_dat$run_samp[i],sep="")
500
  raster_name_delta<-file.path(out_path,paste(interpolation_method,"_",var,"_",data_name,out_prefix,".tif", sep=""))
501
  writeRaster(daily_delta_rast, filename=raster_name_delta,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
502
  
503
  #Now predict daily after having selected the relevant month
504
  temp_list<-vector("list",length(rast_clim_list))  
505
  for (j in 1:length(rast_clim_list)){
506
    rast_clim_month<-raster(rast_clim_list[[j]])
507
    temp_predicted<-rast_clim_month+daily_delta_rast
508
    
509
    data_name<-paste(y_var_name,"_predicted_",names(rast_clim_list)[j],"_",
510
                     sampling_dat$date[i],"_",sampling_dat$prop[i],
511
                     "_",sampling_dat$run_samp[i],sep="")
512
    raster_name<-file.path(out_path,paste(interpolation_method,"_",data_name,out_prefix,".tif", sep=""))
513
    writeRaster(temp_predicted, filename=raster_name,overwrite=TRUE) 
514
    temp_list[[j]]<-raster_name
515
  }
516
  
517
  ##########
518
  # STEP 5 - Prepare output object to return
519
  ##########
520
  
521
 
522
  mod_krtmp2<-fitdelta
523
  model_name<-paste("mod_kr","day",sep="_")
524
  names(temp_list)<-names(rast_clim_list)
525

  
526
  
527
  delta_obj<-list(temp_list,rast_clim_list,raster_name_delta,data_s,
528
                  data_v,sampling_dat[i,],mod_krtmp2)
529
  
530
  obj_names<-c(y_var_name,"clim","delta","data_s","data_v",
531
               "sampling_dat",model_name)
532
  names(delta_obj)<-obj_names 
533

  
534
  save(delta_obj,file= file.path(out_path,paste("delta_obj_",var,"_",sampling_dat$date[i],"_",sampling_dat$prop[i],
535
                                "_",sampling_dat$run_samp[i],out_prefix,".RData",sep="")))
536
  return(delta_obj)
537
  
538
}
539
 
climate/research/world/interpolation/GAM_fusion_function_multisampling_01062014.R
319 319
  
320 320
  ### PARSING INPUT ARGUMENTS
321 321
  #list_param_runGAMFusion<-list(i,clim_yearlist,sampling_obj,var,y_var_name, out_prefix)
322
  
322

  
323 323
  index<-list_param$j
324 324
  s_raster<-list_param$covar_rast
325

  
325 326
  covar_names<-list_param$covar_names
326 327
  lst_avg<-list_param$lst_avg
327 328
  list_models<-list_param$list_models
......
442 443
  
443 444
  ## Select the relevant method...
444 445
  
445
  if (interpolation_method=="gam_fusion"){
446
  if (interpolation_method== "gam_fusion"){
446 447
    #First fitting
447 448
    mod_list<-fit_models(list_formulas,data_month) #only gam at this stage
449
    
448 450
    names(mod_list)<-cname
449
  
450 451
    #Second predict values for raster image...by providing fitted model list, raster brick and list of output file names
451 452
    rast_bias_list<-predict_raster_model(mod_list,s_raster,list_out_filename)
452 453
    names(rast_bias_list)<-cname
453
  }
454
  
454
  } 
455 455
  if (interpolation_method %in% c("kriging_fusion","gwr_fusion")){
456 456
    if(interpolation_method=="kriging_fusion"){
457 457
      method_interp <- "kriging"
......
466 466
    rast_bias_list <-month_prediction_obj$list_rast_pred
467 467
    names(rast_bias_list)<-cname
468 468
  }
469
  
469

  
470 470
  #Some modles will not be predicted...remove them
471 471
  rast_bias_list<-rast_bias_list[!sapply(rast_bias_list,is.null)] #remove NULL elements in list
472 472

  
......
474 474
 
475 475
  rast_clim_list<-vector("list",nlayers(mod_rast))
476 476
  
477

  
478 477
  names(rast_clim_list)<-names(rast_bias_list)
479 478

  
480 479
  for (k in 1:nlayers(mod_rast)){
......
485 484
    rast_clim_list[[k]]<-raster_name
486 485
    writeRaster(clim_fus_rast, filename=raster_name,overwrite=TRUE)  #Wri
487 486
  }
488
  
487

  
489 488
  #### STEP 4:Adding Kriging for Climatology options
490 489
  bias_xy<-coordinates(data_month)
491 490
  #fitbias<-Krig(bias_xy,data_month$LSTD_bias,theta=1e5) #use TPS or krige 
492 491
  fitbias<-try(Krig(bias_xy,data_month$LSTD_bias,theta=1e5)) #use TPS or krige 
493 492

  
494 493
  model_name<-"mod_kr"
495

  
494
 
496 495
  if (inherits(fitbias,"Krig")){
497 496
    #Saving kriged surface in raster images
498 497
    bias_rast<-bias_rast<-interpolate(LST,fitbias) #interpolation using function from raster package
499 498
    data_name<-paste(var,"_bias_LST_month_",as.integer(month_no),"_",model_name,"_",prop_month,
500 499
                     "_",run_samp,sep="")
501 500
    raster_name_bias<-file.path(out_path,paste("fusion_",interpolation_method,"_",data_name,out_prefix,".tif", sep=""))
502
    writeRaster(bias_rast, filename=raster_name_bias,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
503
    
501
    writeRaster(bias_rast, filename=raster_name_bias,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)   
502

  
504 503
    #now climatology layer
505 504
    clim_rast<-LST-bias_rast
506 505
    data_name<-paste(var,"_clim_LST_month_",as.integer(month_no),"_",model_name,"_",prop_month,
......
594 593
  ############# 
595 594
  
596 595
  #use index_d and index_m
597
  
596

  
597
  print(paste("Processing day",i))  
598

  
598 599
  date<-strptime(daily_dev_sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
599 600
  month<-strftime(date, "%m")          # current month of the date being processed
600 601
  LST_month<-paste("mm_",month,sep="") # name of LST month to be matched
climate/research/world/interpolation/climatologyScripts/mosaicByMonth.py
15 15
  parser = OptionParser(usage=usage)
16 16
  parser.add_option("-m","--months",dest="months",default='1,2,3,4,5,6,7,8,9,10,11,12',
17 17
      help="Comma separated list of months to process (1,2,3...12),default all")
18
  parser.add_option("-d","--daynight",dest="dn",default=0,
19
      help="Use 0 for both(default),1 for day only,2 for night only")
18
  parser.add_option("-d","--daynight",dest="dn",default=1,
19
      help="Use 0 for both,1 for day only(default),2 for night only")
20
  parser.add_option("-l","--lowRes",dest="lowRes",default=True,
21
      help="Create low res version 10 arc-min")
20 22
  opts,args=parser.parse_args()
21 23
  if len(args)<2:
22 24
     sys.exit(parser.print_help())
......
24 26
  inDir = args[0]
25 27
  outDir = args[1]
26 28

  
29
  lowRes = opts.lowRes
27 30
  months = opts.months.split(',')
28
 
31

  
29 32
  if int(opts.dn) == 0:
30 33
     dayNight = ["Day","Night"]
31 34
  elif int(opts.dn) == 1:
......
44 47
     sys.exit(1)
45 48
  if os.path.exists(outDir) is False:
46 49
     print "Warning: %s directory doesn't exists, creating directory" % outDir
47
  
50
     os.mkdir(outDir)  
51

  
48 52
  layers = ["mean","nobs","qa"]
49 53

  
50 54
  print "Processing months %s in directory %s" % (opts.months,inDir)
......
73 77
              fPtr.write(f+"\n")
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff