Project

General

Profile

« Previous | Next » 

Revision 972ac6af

Added by Benoit Parmentier over 11 years ago

GAM Fusion raster prediction, cleaning out of script, reduction of arguments in preparation for function additions

View differences:

climate/research/oregon/interpolation/GAM_fusion_analysis_raster_prediction_multisampling.R
1
##################    MULTI SAMPLING GAM FUSION METHOD ASSESSMENT ####################################
1
##################    MULTI SAMPLING GAM FUSION METHOD ASSESSMENT   ####################################
2 2
############################ Merging LST and station data ##########################################
3 3
#This script interpolates tmax values using MODIS LST and GHCND station data                      
4 4
#interpolation area. It requires the text file of stations and a shape file of the study area.           
......
33 33
### Parameters and argument
34 34

  
35 35
infile2<-"list_365_dates_04212012.txt"
36

  
37
## output param from previous script: Database_stations_covariates_processing_function
36 38
infile_monthly<-"monthly_covariates_ghcn_data_TMAXy2010_2010_VE_02082013.shp"
37 39
infile_daily<-"daily_covariates_ghcn_data_TMAXy2010_2010_VE_02082013.shp"
38 40
infile_locs<-"stations_venezuela_region_y2010_2010_VE_02082013.shp"
39 41
infile3<-"covariates__venezuela_region__VE_01292013.tif" #this is an output from covariate script
42
var<-"TMAX"
43
out_prefix<-"_365d_GAM_fus5_all_lstd_02202013"                #User defined output prefix
44
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84: same as earlier
45
#CRS_interp<-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
46
#infile_monthly<-"monthly_covariates_ghcn_data_TMAXy2010_2010_VE_02082013.shp" #outile4 from database_covar script
47
#infile_daily<-"daily_covariates_ghcn_data_TMAXy2010_2010_VE_02082013.shp"  #outfile3 from database_covar script
48
#infile_locs<-"stations_venezuela_region_y2010_2010_VE_02082013.shp" #outfile2? from database covar script
40 49

  
41
in_path<-"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/input_data"
42
out_path<-"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/output_data"
43
script_path<-"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/"
44
setwd(in_path)
50
###
45 51

  
46
y_var_name<-"dailyTmax"                                       
47
out_prefix<-"_365d_GAM_fus5_all_lstd_02202013"                #User defined output prefix
52
if (var=="TMAX"){
53
  y_var_name<-"dailyTmax"                                       
54
}
55
if (var=="TMIN"){
56
  y_var_name<-"dailyTmin"                                       
57
}
58

  
59
#Input for sampling function...
48 60
seed_number<- 100  #if seed zero then no seed?     
49 61
nb_sample<-1           #number of time random sampling must be repeated for every hold out proportion
50 62
prop_min<-0.3          #if prop_min=prop_max and step=0 then predicitons are done for the number of dates...
51 63
prop_max<-0.3
52 64
step<-0         
53 65
constant<-0             #if value 1 then use the same samples as date one for the all set of dates
54
#projection used in the interpolation of the study area: should be read directly from the outline of the study area
55
#CRS_interp<-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
56
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84
57 66

  
58 67
#Models to run...this can be change for each run
59 68
list_models<-c("y_var ~ s(elev_1)",
......
66 75
               "y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(LC6)", 
67 76
               "y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(DISTOC)")
68 77

  
78
#Choose interpolation method...
79
interpolation_method<-c("gam_fusion","gam_CAI") #other otpions to be added later
80

  
69 81
#Default name of LST avg to be matched               
70 82
lst_avg<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12")  
71 83

  
84
in_path<-"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/input_data"
85
#Create on the fly output folder...
86
out_path<-"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/output_data"
87
script_path<-"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/"
88
setwd(in_path)
89

  
72 90
source(file.path(script_path,"GAM_fusion_function_multisampling_02262013.R"))
73 91
source(file.path(script_path,"GAM_fusion_function_multisampling_validation_metrics_02262013.R"))
74 92

  
......
98 116

  
99 117
ghcn<-readOGR(dsn=in_path,layer=sub(".shp","",infile_daily))
100 118
CRS_interp<-proj4string(ghcn)                       #Storing projection information (ellipsoid, datum,etc.)
101

  
102 119
stat_loc<-readOGR(dsn=in_path,layer=sub(".shp","",infile_locs))
103

  
104
data3<-readOGR(dsn=in_path,layer=sub(".shp","",infile_monthly))
105

  
106
#Remove NA for LC and CANHEIGHT: Need to check this part after
107
ghcn$LC1[is.na(ghcn$LC1)]<-0
108
ghcn$LC3[is.na(ghcn$LC3)]<-0
109
ghcn$CANHEIGHT[is.na(ghcn$CANHEIGHT)]<-0
110
ghcn$LC4[is.na(ghcn$LC4)]<-0
111
ghcn$LC6[is.na(ghcn$LC6)]<-0
112

  
113 120
dates <-readLines(file.path(in_path,infile2)) #dates to be predicted
114 121

  
115
##Extracting the variables values from the raster files                                             
116

  
117
#The names of covariates can be changed...
122
#Reading of covariate brick covariates can be changed...
118 123
rnames <-c("x","y","lon","lat","N","E","N_w","E_w","elev","slope","aspect","CANHEIGHT","DISTOC")
119 124
lc_names<-c("LC1","LC2","LC3","LC4","LC5","LC6","LC7","LC8","LC9","LC10","LC11","LC12")
120 125
lst_names<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12",
121 126
                    "nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08",
122
                    "nobs_09","nobs_10","nobs_11","nobs_12")
123
                  
124
covar_names<-c(rnames,lc_names,lst_names)
125
                  
126
s_raster<-stack(infile3)                   #read in the data stack
127
                    "nobs_09","nobs_10","nobs_11","nobs_12")                  
128
covar_names<-c(rnames,lc_names,lst_names)                
129
s_raster<-brick(infile3)                   #read in the data brck
127 130
names(s_raster)<-covar_names               #Assigning names to the raster layers: making sure it is included in the extraction
128 131

  
129
#Deal with no data value and zero      
130
#pos<-match("LC1",layerNames(s_raster)) #Find column with name "value"
131
#LC1<-raster(s_raster,layer=pos)             #Select layer from stack
132
#s_raster<-dropLayer(s_raster,pos)
133
#LC1[is.na(LC1)]<-0
134

  
135
#pos<-match("LC3",layerNames(s_raster)) #Find column with name "value"
136
#LC3<-raster(s_raster,layer=pos)             #Select layer from stack
137
#s_raster<-dropLayer(s_raster,pos)
138
#LC3[is.na(LC3)]<-0
139

  
140
#pos<-match("CANHEIGHT",layerNames(s_raster)) #Find column with name "value"
141
#CANHEIGHT<-raster(s_raster,layer=pos)             #Select layer from stack
142
#s_raster<-dropLayer(s_raster,pos)
143
#CANHEIGHT[is.na(CANHEIGHT)]<-0
144
#pos<-match("ELEV_SRTM",layerNames(s_raster)) #Find column with name "ELEV_SRTM"
145
#ELEV_SRTM<-raster(s_raster,layer=pos)             #Select layer from stack on 10/30
146
#s_raster<-dropLayer(s_raster,pos)
147
#ELEV_SRTM[ELEV_SRTM <0]<-NA
148

  
149
#s_sgdf<-as(s_raster,"SpatialGridDataFrame") #Conversion to spatial grid data frame
150

  
151
######### Preparing daily and monthly values for training and testing
152
                  
153
#Screening for daily bad values: value is tmax in this case
154
#ghcn$value<-as.numeric(ghcn$value)
155
#ghcn_all<-ghcn
156
#ghcn_test<-subset(ghcn,ghcn$value>-150 & ghcn$value<400)
157
#ghcn_test<-ghcn
158
#ghcn_test2<-subset(ghcn_test,ghcn_test$elev_1>0)
159
#ghcn<-ghcn_test2
160
#coords<- ghcn[,c('x_OR83M','y_OR83M')]
161

  
162
#Now clean and screen monthly values
163
#dst_all<-dst
132
#Reading monthly data
133
data3<-readOGR(dsn=in_path,layer=sub(".shp","",infile_monthly))
164 134
dst_all<-data3
165 135
dst<-data3
166
#dst<-subset(dst,dst$TMax>-15 & dst$TMax<45) #may choose different threshold??
167
#dst<-subset(dst,dst$ELEV_SRTM>0) #This will drop two stations...or 24 rows
168 136

  
169
##### Create sampling: select training and testing sites ###
137
### TO DO -important ###
138
#Cleaning/sceerniging functions for daily stations, monthly stations and covariates?? do this at the preparation stage!!!
139
##
170 140

  
141
##### Create sampling: select training and testing sites ###
171 142
#Make this a a function!!!!
172 143
                  
173 144
if (seed_number>0) {
......
236 207
  sampling<-list_const_sampling 
237 208
  sampling_station_id<-list_const_sampling_station_id
238 209
}
210
#return()
239 211

  
240 212
########### PREDICT FOR MONTHLY SCALE  #############
241 213

  

Also available in: Unified diff