Revision 972ac6af
Added by Benoit Parmentier almost 12 years ago
climate/research/oregon/interpolation/GAM_fusion_analysis_raster_prediction_multisampling.R | ||
---|---|---|
1 |
################## MULTI SAMPLING GAM FUSION METHOD ASSESSMENT #################################### |
|
1 |
################## MULTI SAMPLING GAM FUSION METHOD ASSESSMENT ####################################
|
|
2 | 2 |
############################ Merging LST and station data ########################################## |
3 | 3 |
#This script interpolates tmax values using MODIS LST and GHCND station data |
4 | 4 |
#interpolation area. It requires the text file of stations and a shape file of the study area. |
... | ... | |
33 | 33 |
### Parameters and argument |
34 | 34 |
|
35 | 35 |
infile2<-"list_365_dates_04212012.txt" |
36 |
|
|
37 |
## output param from previous script: Database_stations_covariates_processing_function |
|
36 | 38 |
infile_monthly<-"monthly_covariates_ghcn_data_TMAXy2010_2010_VE_02082013.shp" |
37 | 39 |
infile_daily<-"daily_covariates_ghcn_data_TMAXy2010_2010_VE_02082013.shp" |
38 | 40 |
infile_locs<-"stations_venezuela_region_y2010_2010_VE_02082013.shp" |
39 | 41 |
infile3<-"covariates__venezuela_region__VE_01292013.tif" #this is an output from covariate script |
42 |
var<-"TMAX" |
|
43 |
out_prefix<-"_365d_GAM_fus5_all_lstd_02202013" #User defined output prefix |
|
44 |
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84: same as earlier |
|
45 |
#CRS_interp<-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs"; |
|
46 |
#infile_monthly<-"monthly_covariates_ghcn_data_TMAXy2010_2010_VE_02082013.shp" #outile4 from database_covar script |
|
47 |
#infile_daily<-"daily_covariates_ghcn_data_TMAXy2010_2010_VE_02082013.shp" #outfile3 from database_covar script |
|
48 |
#infile_locs<-"stations_venezuela_region_y2010_2010_VE_02082013.shp" #outfile2? from database covar script |
|
40 | 49 |
|
41 |
in_path<-"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/input_data" |
|
42 |
out_path<-"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/output_data" |
|
43 |
script_path<-"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/" |
|
44 |
setwd(in_path) |
|
50 |
### |
|
45 | 51 |
|
46 |
y_var_name<-"dailyTmax" |
|
47 |
out_prefix<-"_365d_GAM_fus5_all_lstd_02202013" #User defined output prefix |
|
52 |
if (var=="TMAX"){ |
|
53 |
y_var_name<-"dailyTmax" |
|
54 |
} |
|
55 |
if (var=="TMIN"){ |
|
56 |
y_var_name<-"dailyTmin" |
|
57 |
} |
|
58 |
|
|
59 |
#Input for sampling function... |
|
48 | 60 |
seed_number<- 100 #if seed zero then no seed? |
49 | 61 |
nb_sample<-1 #number of time random sampling must be repeated for every hold out proportion |
50 | 62 |
prop_min<-0.3 #if prop_min=prop_max and step=0 then predicitons are done for the number of dates... |
51 | 63 |
prop_max<-0.3 |
52 | 64 |
step<-0 |
53 | 65 |
constant<-0 #if value 1 then use the same samples as date one for the all set of dates |
54 |
#projection used in the interpolation of the study area: should be read directly from the outline of the study area |
|
55 |
#CRS_interp<-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs"; |
|
56 |
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84 |
|
57 | 66 |
|
58 | 67 |
#Models to run...this can be change for each run |
59 | 68 |
list_models<-c("y_var ~ s(elev_1)", |
... | ... | |
66 | 75 |
"y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(LC6)", |
67 | 76 |
"y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(DISTOC)") |
68 | 77 |
|
78 |
#Choose interpolation method... |
|
79 |
interpolation_method<-c("gam_fusion","gam_CAI") #other otpions to be added later |
|
80 |
|
|
69 | 81 |
#Default name of LST avg to be matched |
70 | 82 |
lst_avg<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12") |
71 | 83 |
|
84 |
in_path<-"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/input_data" |
|
85 |
#Create on the fly output folder... |
|
86 |
out_path<-"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/output_data" |
|
87 |
script_path<-"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/" |
|
88 |
setwd(in_path) |
|
89 |
|
|
72 | 90 |
source(file.path(script_path,"GAM_fusion_function_multisampling_02262013.R")) |
73 | 91 |
source(file.path(script_path,"GAM_fusion_function_multisampling_validation_metrics_02262013.R")) |
74 | 92 |
|
... | ... | |
98 | 116 |
|
99 | 117 |
ghcn<-readOGR(dsn=in_path,layer=sub(".shp","",infile_daily)) |
100 | 118 |
CRS_interp<-proj4string(ghcn) #Storing projection information (ellipsoid, datum,etc.) |
101 |
|
|
102 | 119 |
stat_loc<-readOGR(dsn=in_path,layer=sub(".shp","",infile_locs)) |
103 |
|
|
104 |
data3<-readOGR(dsn=in_path,layer=sub(".shp","",infile_monthly)) |
|
105 |
|
|
106 |
#Remove NA for LC and CANHEIGHT: Need to check this part after |
|
107 |
ghcn$LC1[is.na(ghcn$LC1)]<-0 |
|
108 |
ghcn$LC3[is.na(ghcn$LC3)]<-0 |
|
109 |
ghcn$CANHEIGHT[is.na(ghcn$CANHEIGHT)]<-0 |
|
110 |
ghcn$LC4[is.na(ghcn$LC4)]<-0 |
|
111 |
ghcn$LC6[is.na(ghcn$LC6)]<-0 |
|
112 |
|
|
113 | 120 |
dates <-readLines(file.path(in_path,infile2)) #dates to be predicted |
114 | 121 |
|
115 |
##Extracting the variables values from the raster files |
|
116 |
|
|
117 |
#The names of covariates can be changed... |
|
122 |
#Reading of covariate brick covariates can be changed... |
|
118 | 123 |
rnames <-c("x","y","lon","lat","N","E","N_w","E_w","elev","slope","aspect","CANHEIGHT","DISTOC") |
119 | 124 |
lc_names<-c("LC1","LC2","LC3","LC4","LC5","LC6","LC7","LC8","LC9","LC10","LC11","LC12") |
120 | 125 |
lst_names<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12", |
121 | 126 |
"nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08", |
122 |
"nobs_09","nobs_10","nobs_11","nobs_12") |
|
123 |
|
|
124 |
covar_names<-c(rnames,lc_names,lst_names) |
|
125 |
|
|
126 |
s_raster<-stack(infile3) #read in the data stack |
|
127 |
"nobs_09","nobs_10","nobs_11","nobs_12") |
|
128 |
covar_names<-c(rnames,lc_names,lst_names) |
|
129 |
s_raster<-brick(infile3) #read in the data brck |
|
127 | 130 |
names(s_raster)<-covar_names #Assigning names to the raster layers: making sure it is included in the extraction |
128 | 131 |
|
129 |
#Deal with no data value and zero |
|
130 |
#pos<-match("LC1",layerNames(s_raster)) #Find column with name "value" |
|
131 |
#LC1<-raster(s_raster,layer=pos) #Select layer from stack |
|
132 |
#s_raster<-dropLayer(s_raster,pos) |
|
133 |
#LC1[is.na(LC1)]<-0 |
|
134 |
|
|
135 |
#pos<-match("LC3",layerNames(s_raster)) #Find column with name "value" |
|
136 |
#LC3<-raster(s_raster,layer=pos) #Select layer from stack |
|
137 |
#s_raster<-dropLayer(s_raster,pos) |
|
138 |
#LC3[is.na(LC3)]<-0 |
|
139 |
|
|
140 |
#pos<-match("CANHEIGHT",layerNames(s_raster)) #Find column with name "value" |
|
141 |
#CANHEIGHT<-raster(s_raster,layer=pos) #Select layer from stack |
|
142 |
#s_raster<-dropLayer(s_raster,pos) |
|
143 |
#CANHEIGHT[is.na(CANHEIGHT)]<-0 |
|
144 |
#pos<-match("ELEV_SRTM",layerNames(s_raster)) #Find column with name "ELEV_SRTM" |
|
145 |
#ELEV_SRTM<-raster(s_raster,layer=pos) #Select layer from stack on 10/30 |
|
146 |
#s_raster<-dropLayer(s_raster,pos) |
|
147 |
#ELEV_SRTM[ELEV_SRTM <0]<-NA |
|
148 |
|
|
149 |
#s_sgdf<-as(s_raster,"SpatialGridDataFrame") #Conversion to spatial grid data frame |
|
150 |
|
|
151 |
######### Preparing daily and monthly values for training and testing |
|
152 |
|
|
153 |
#Screening for daily bad values: value is tmax in this case |
|
154 |
#ghcn$value<-as.numeric(ghcn$value) |
|
155 |
#ghcn_all<-ghcn |
|
156 |
#ghcn_test<-subset(ghcn,ghcn$value>-150 & ghcn$value<400) |
|
157 |
#ghcn_test<-ghcn |
|
158 |
#ghcn_test2<-subset(ghcn_test,ghcn_test$elev_1>0) |
|
159 |
#ghcn<-ghcn_test2 |
|
160 |
#coords<- ghcn[,c('x_OR83M','y_OR83M')] |
|
161 |
|
|
162 |
#Now clean and screen monthly values |
|
163 |
#dst_all<-dst |
|
132 |
#Reading monthly data |
|
133 |
data3<-readOGR(dsn=in_path,layer=sub(".shp","",infile_monthly)) |
|
164 | 134 |
dst_all<-data3 |
165 | 135 |
dst<-data3 |
166 |
#dst<-subset(dst,dst$TMax>-15 & dst$TMax<45) #may choose different threshold?? |
|
167 |
#dst<-subset(dst,dst$ELEV_SRTM>0) #This will drop two stations...or 24 rows |
|
168 | 136 |
|
169 |
##### Create sampling: select training and testing sites ### |
|
137 |
### TO DO -important ### |
|
138 |
#Cleaning/sceerniging functions for daily stations, monthly stations and covariates?? do this at the preparation stage!!! |
|
139 |
## |
|
170 | 140 |
|
141 |
##### Create sampling: select training and testing sites ### |
|
171 | 142 |
#Make this a a function!!!! |
172 | 143 |
|
173 | 144 |
if (seed_number>0) { |
... | ... | |
236 | 207 |
sampling<-list_const_sampling |
237 | 208 |
sampling_station_id<-list_const_sampling_station_id |
238 | 209 |
} |
210 |
#return() |
|
239 | 211 |
|
240 | 212 |
########### PREDICT FOR MONTHLY SCALE ############# |
241 | 213 |
|
Also available in: Unified diff
GAM Fusion raster prediction, cleaning out of script, reduction of arguments in preparation for function additions