Revision ca9c0dff
Added by Benoit Parmentier almost 12 years ago
climate/research/oregon/interpolation/GAM_CAI_analysis_raster_prediction_multisampling.R | ||
---|---|---|
6 | 6 |
#Method is assedsed using constant sampling with variation of validation sample with different # |
7 | 7 |
#hold out proportions. # |
8 | 8 |
#AUTHOR: Benoit Parmentier # |
9 |
#DATE: 10/25/2012 #
|
|
9 |
#DATE: 10/30/2012 #
|
|
10 | 10 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#491-- # |
11 | 11 |
################################################################################################### |
12 | 12 |
|
... | ... | |
43 | 43 |
#GHCN Database for 1980-2010 for study area (OR) |
44 | 44 |
data3<-read.table(paste(path,"/","ghcn_data_TMAXy1980_2010_OR_0602012.txt",sep=""),sep=",", header=TRUE) |
45 | 45 |
|
46 |
nmodels<-8 #number of models running
|
|
46 |
nmodels<-5 #number of models running
|
|
47 | 47 |
y_var_name<-"dailyTmax" |
48 | 48 |
climgam=1 #if 1, then GAM is run on the climatology rather than the daily deviation surface... |
49 | 49 |
predval<-1 |
... | ... | |
51 | 51 |
|
52 | 52 |
seed_number<- 100 #Seed number for random sampling, if seed_number<0, no seed number is used.. |
53 | 53 |
#out_prefix<-"_365d_GAM_CAI2_const_10222012_" #User defined output prefix |
54 |
out_prefix<-"_365d_GAM_CAI2_all_lstd_10262012" #User defined output prefix |
|
54 |
#out_prefix<-"_365d_GAM_CAI2_const_all_lstd_10272012" #User defined output prefix |
|
55 |
out_prefix<-"_365d_GAM_CAI3_all_10302012" #User defined output prefix |
|
55 | 56 |
|
56 | 57 |
bias_val<-0 #if value 1 then daily training data is used in the bias surface rather than the all monthly stations (added on 07/11/2012) |
57 | 58 |
bias_prediction<-1 #if value 1 then use GAM for the BIAS prediction otherwise GAM direct reprediction for y_var (daily tmax) |
... | ... | |
64 | 65 |
CRS_interp<-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs"; |
65 | 66 |
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84 |
66 | 67 |
|
67 |
source("GAM_CAI_function_multisampling_10252012.R") |
|
68 |
#source("GAM_CAI_function_multisampling_10252012.R") |
|
69 |
source("GAM_CAI_function_multisampling_10302012.R") |
|
68 | 70 |
|
69 | 71 |
############ START OF THE SCRIPT ################## |
70 | 72 |
|
... | ... | |
139 | 141 |
|
140 | 142 |
LC_s<-stack(LC1,LC3,LC4,LC6) |
141 | 143 |
layerNames(LC_s)<-c("LC1_forest","LC3_grass","LC4_crop","LC6_urban") |
142 |
plot(LC_s) |
|
144 |
#plot(LC_s)
|
|
143 | 145 |
|
144 | 146 |
pos<-match("CANHEIGHT",layerNames(s_raster)) #Find column with name "value" |
145 | 147 |
CANHEIGHT<-raster(s_raster,layer=pos) #Select layer from stack |
146 | 148 |
s_raster<-dropLayer(s_raster,pos) |
147 | 149 |
CANHEIGHT[is.na(CANHEIGHT)]<-0 |
150 |
pos<-match("ELEV_SRTM",layerNames(s_raster)) #Find column with name "ELEV_SRTM" |
|
151 |
ELEV_SRTM<-raster(s_raster,layer=pos) #Select layer from stack on 10/30 |
|
152 |
s_raster<-dropLayer(s_raster,pos) |
|
153 |
ELEV_SRTM[ELEV_SRTM <0]<-NA |
|
148 | 154 |
|
149 | 155 |
xy<-coordinates(r1) #get x and y projected coordinates... |
150 | 156 |
xy_latlon<-project(xy, CRS, inv=TRUE) # find lat long for projected coordinats (or pixels...) |
... | ... | |
157 | 163 |
values(lon)<-xy_latlon[,1] |
158 | 164 |
values(lat)<-xy_latlon[,2] |
159 | 165 |
|
160 |
r<-stack(N,E,Nw,Ew,lon,lat,LC1,LC3,LC4,LC6, CANHEIGHT) |
|
161 |
rnames<-c("Northness","Eastness","Northness_w","Eastness_w", "lon","lat","LC1","LC3","LC4","LC6","CANHEIGHT") |
|
166 |
r<-stack(N,E,Nw,Ew,lon,lat,LC1,LC3,LC4,LC6, CANHEIGHT,ELEV_SRTM)
|
|
167 |
rnames<-c("Northness","Eastness","Northness_w","Eastness_w", "lon","lat","LC1","LC3","LC4","LC6","CANHEIGHT","ELEV_SRTM")
|
|
162 | 168 |
layerNames(r)<-rnames |
163 | 169 |
s_raster<-addLayer(s_raster, r) |
164 | 170 |
|
... | ... | |
222 | 228 |
stations_val<-as.data.frame(stations_val) |
223 | 229 |
dst_extract<-cbind(dst_month,stations_val) |
224 | 230 |
dst<-dst_extract |
231 |
#Now clean and screen monthly values |
|
232 |
dst_all<-dst |
|
233 |
dst<-subset(dst,dst$TMax>-15 & dst$TMax<40) |
|
234 |
dst<-subset(dst,dst$ELEV_SRTM>0) #This will drop two stations...or 24 rows |
|
225 | 235 |
|
226 | 236 |
######### Preparing daily values for training and testing |
227 | 237 |
|
... | ... | |
263 | 273 |
#ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date==d)) #this creates a list of 10 or 365 subsets dataset based on dates |
264 | 274 |
ghcn.subsets <-lapply(as.character(sampling_dat$date), function(d) subset(ghcn, date==d)) #this creates a list of 10 or 365 subsets dataset based on dates |
265 | 275 |
|
266 |
sampling<-vector("list",length(ghcn.subsets)) |
|
276 |
if (seed_number>0) { |
|
277 |
set.seed(seed_number) #Using a seed number allow results based on random number to be compared... |
|
278 |
} |
|
267 | 279 |
|
280 |
sampling<-vector("list",length(ghcn.subsets)) |
|
281 |
sampling_station_id<-vector("list",length(ghcn.subsets)) |
|
268 | 282 |
for(i in 1:length(ghcn.subsets)){ |
269 | 283 |
n<-nrow(ghcn.subsets[[i]]) |
270 | 284 |
prop<-(sampling_dat$prop[i])/100 |
... | ... | |
272 | 286 |
nv<-n-ns #create a sample for validation with prop of the rows |
273 | 287 |
ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly |
274 | 288 |
ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training) |
289 |
#Find the corresponding |
|
290 |
data_sampled<-ghcn.subsets[[i]][ind.training,] #selected the randomly sampled stations |
|
291 |
station_id.training<-data_sampled$station #selected id for the randomly sampled stations (115) |
|
292 |
#Save the information |
|
275 | 293 |
sampling[[i]]<-ind.training |
294 |
sampling_station_id[[i]]<- station_id.training |
|
276 | 295 |
} |
277 |
|
|
296 |
## Use same samples across the year... |
|
278 | 297 |
if (constant==1){ |
279 | 298 |
sampled<-sampling[[1]] |
299 |
data_sampled<-ghcn.subsets[[1]][sampled,] #selected the randomly sampled stations |
|
300 |
station_sampled<-data_sampled$station #selected id for the randomly sampled stations (115) |
|
280 | 301 |
list_const_sampling<-vector("list",sn) |
302 |
list_const_sampling_station_id<-vector("list",sn) |
|
281 | 303 |
for(i in 1:sn){ |
282 |
list_const_sampling[[i]]<-sampled |
|
304 |
station_id.training<-intersect(station_sampled,ghcn.subsets[[i]]$station) |
|
305 |
ind.training<-match(station_id.training,ghcn.subsets[[i]]$station) |
|
306 |
list_const_sampling[[i]]<-ind.training |
|
307 |
list_const_sampling_station_id[[i]]<-station_id.training |
|
283 | 308 |
} |
284 |
sampling<-list_const_sampling |
|
309 |
sampling<-list_const_sampling |
|
310 |
sampling_station_id<-list_const_sampling_station_id |
|
285 | 311 |
} |
286 | 312 |
|
287 | 313 |
######## Prediction for the range of dates |
... | ... | |
330 | 356 |
|
331 | 357 |
# Save before plotting |
332 | 358 |
#sampling_obj<-list(sampling_dat=sampling_dat,training=sampling) |
333 |
sampling_obj<-list(sampling_dat=sampling_dat,training=sampling, tb=tb_diagnostic) |
|
359 |
#sampling_obj<-list(sampling_dat=sampling_dat,training=sampling, tb=tb_diagnostic) |
|
360 |
sampling_obj<-list(sampling_dat=sampling_dat,training=sampling, training_id=sampling_station_id, tb=tb_diagnostic) |
|
334 | 361 |
|
335 | 362 |
write.table(avg_tb, file= paste(path,"/","results2_fusion_Assessment_measure_avg_",out_prefix,".txt",sep=""), sep=",") |
336 | 363 |
write.table(median_tb, file= paste(path,"/","results2_fusion_Assessment_measure_median_",out_prefix,".txt",sep=""), sep=",") |
337 | 364 |
write.table(tb_diagnostic, file= paste(path,"/","results2_fusion_Assessment_measure",out_prefix,".txt",sep=""), sep=",") |
338 | 365 |
write.table(tb, file= paste(path,"/","results2_fusion_Assessment_measure_all",out_prefix,".txt",sep=""), sep=",") |
339 | 366 |
|
340 |
|
|
341 |
# tb_RMSE<-subset(tb, metric=="RMSE") |
|
342 |
# tb_MAE<-subset(tb,metric=="MAE") |
|
343 |
# tb_ME<-subset(tb,metric=="ME") |
|
344 |
# tb_R2<-subset(tb,metric=="R2") |
|
345 |
# tb_RMSE_f<-subset(tb, metric=="RMSE_f") |
|
346 |
# tb_MAE_f<-subset(tb,metric=="MAE_f") |
|
347 |
# tb_R2_f<-subset(tb,metric=="R2_f") |
|
348 |
# |
|
349 |
# tb_diagnostic1<-rbind(tb_RMSE,tb_MAE,tb_ME,tb_R2) |
|
350 |
# #tb_diagnostic2<-rbind(tb_,tb_MAE,tb_ME,tb_R2) |
|
351 |
# |
|
352 |
# mean_RMSE<-sapply(tb_RMSE[,4:(nmodels+4)],mean) |
|
353 |
# mean_MAE<-sapply(tb_MAE[,4:(nmodels+4)],mean) |
|
354 |
# mean_R2<-sapply(tb_R2[,4:(nmodels+4)],mean) |
|
355 |
# mean_ME<-sapply(tb_ME[,4:(nmodels+4)],mean) |
|
356 |
# mean_MAE_f<-sapply(tb_MAE[,4:(nmodels+4)],mean) |
|
357 |
# mean_RMSE_f<-sapply(tb_RMSE_f[,4:(nmodels+4)],mean) |
|
358 |
# |
|
359 |
# #Wrting metric results in textfile and model objects in .RData file |
|
360 |
# write.table(tb_diagnostic1, file= paste(path,"/","results2_CAI_Assessment_measure1",out_prefix,".txt",sep=""), sep=",") |
|
361 |
# write.table(tb, file= paste(path,"/","results2_CAI_Assessment_measure_all",out_prefix,".txt",sep=""), sep=",") |
|
362 | 367 |
save(sampling_obj, file= paste(path,"/","results2_CAI_sampling_obj",out_prefix,".RData",sep="")) |
363 | 368 |
save(gam_CAI_mod,file= paste(path,"/","results2_CAI_Assessment_measure_all",out_prefix,".RData",sep="")) |
364 | 369 |
|
Also available in: Unified diff
GAM CAI raster prediction OR tmax,corretion constant sampling and GAM climatology models