Revision 0f4426cb
Added by Benoit Parmentier about 12 years ago
climate/research/oregon/interpolation/GAM_fusion_function_multisampling.R | ||
---|---|---|
3 | 3 |
date<-strptime(sampling_dat$date[i], "%Y%m%d") # interpolation date being processed |
4 | 4 |
month<-strftime(date, "%m") # current month of the date being processed |
5 | 5 |
LST_month<-paste("mm_",month,sep="") # name of LST month to be matched |
6 |
|
|
6 |
proj_str<-proj4string(dst) |
|
7 | 7 |
#Adding layer LST to the raster stack |
8 | 8 |
|
9 | 9 |
pos<-match("LST",layerNames(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA |
... | ... | |
16 | 16 |
#r1[r1 < (min_val)]<-NA |
17 | 17 |
s_raster<-addLayer(s_raster,r1) #Adding current month |
18 | 18 |
|
19 |
pos<-match("elev",layerNames(s_raster)) |
|
20 |
layerNames(s_raster)[pos]<-"elev_1" |
|
19 | 21 |
###Regression part 1: Creating a validation dataset by creating training and testing datasets |
20 |
#Problem here... |
|
21 |
mod_LST <-ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))] #Match interpolation date and monthly LST average |
|
22 |
ghcn.subsets[[i]] = transform(ghcn.subsets[[i]],LST = as.data.frame(mod_LST)) #Add the variable LST to the subset dataset |
|
23 |
dst$LST<-dst[[LST_month]] |
|
24 |
#n<-nrow(ghcn.subsets[[i]]) |
|
25 |
#ns<-n-round(n*prop) #Create a sample from the data frame with 70% of the rows |
|
26 |
#nv<-n-ns #create a sample for validation with prop of the rows |
|
27 |
#ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly |
|
22 |
data_day<-ghcn.subsets[[i]] |
|
23 |
mod_LST <- ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))] #Match interpolation date and monthly LST average |
|
24 |
data_day$LST <- as.data.frame(mod_LST)[,1] #Add the variable LST to the dataset |
|
25 |
dst$LST<-dst[[LST_month]] #Add the variable LST to the monthly dataset |
|
26 |
|
|
28 | 27 |
ind.training<-sampling[[i]] |
29 |
ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training)
|
|
30 |
data_s <- ghcn.subsets[[i]][ind.training, ] #Training dataset currently used in the modeling
|
|
31 |
data_v <- ghcn.subsets[[i]][ind.testing, ] #Testing/validation dataset using input sampling
|
|
28 |
ind.testing <- setdiff(1:nrow(data_day), ind.training)
|
|
29 |
data_s <- data_day[ind.training, ] #Training dataset currently used in the modeling
|
|
30 |
data_v <- data_day[ind.testing, ] #Testing/validation dataset using input sampling
|
|
32 | 31 |
|
33 | 32 |
ns<-nrow(data_s) |
34 | 33 |
nv<-nrow(data_v) |
... | ... | |
50 | 49 |
#min_val<-(-15) #Screening for extreme values |
51 | 50 |
#themolst[themolst < (min_val)]<-NA |
52 | 51 |
|
53 |
plot(themolst) |
|
54 |
|
|
55 | 52 |
########### |
56 | 53 |
# STEP 2 - Weather station means across same days: Monthly mean calculation |
57 | 54 |
########### |
... | ... | |
65 | 62 |
#sta_lola=modst[,c("lon","lat")] #Extracting locations of stations for the current month.. |
66 | 63 |
|
67 | 64 |
#proj_str="+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs"; |
68 |
#lookup<-function(r,lat,lon) {
|
|
69 |
# xy<-project(cbind(lon,lat),proj_str);
|
|
70 |
# cidx<-cellFromXY(r,xy);
|
|
71 |
# return(r[cidx])
|
|
72 |
#}
|
|
65 |
lookup<-function(r,lat,lon) { |
|
66 |
xy<-project(cbind(lon,lat),proj_str); |
|
67 |
cidx<-cellFromXY(r,xy); |
|
68 |
return(r[cidx]) |
|
69 |
} |
|
73 | 70 |
#sta_tmax_from_lst=lookup(themolst,sta_lola$lat,sta_lola$lon) #Extracted values of LST for the stations |
74 | 71 |
sta_tmax_from_lst<-modst$LST |
75 | 72 |
######### |
... | ... | |
80 | 77 |
#Added by Benoit |
81 | 78 |
modst$LSTD_bias<-sta_bias #Adding bias to data frame modst containning the monthly average for 10 years |
82 | 79 |
|
83 |
bias_xy=project(as.matrix(sta_lola),proj_str) |
|
80 |
#bias_xy=project(as.matrix(sta_lola),proj_str)
|
|
84 | 81 |
png(paste("LST_TMax_scatterplot_",sampling_dat$date[i],"_",sampling_dat$prop[i],"_",sampling_dat$run_samp[i], out_prefix,".png", sep="")) |
85 | 82 |
plot(modst$TMax,sta_tmax_from_lst,xlab="Station mo Tmax",ylab="LST mo Tmax",main=paste("LST vs TMax for",datelabel,sep=" ")) |
86 | 83 |
abline(0,1) |
84 |
nb_point<-paste("n=",length(modst$TMax),sep="") |
|
85 |
mean_bias<-paste("LST bias= ",format(mean(modst$LSTD_bias,na.rm=TRUE),digits=3),sep="") |
|
86 |
#Add the number of data points on the plot |
|
87 |
legend("topleft",legend=c(mean_bias,nb_point),bty="n") |
|
87 | 88 |
dev.off() |
88 | 89 |
|
89 | 90 |
#added by Benoit |
90 | 91 |
#x<-ghcn.subsets[[i]] #Holds both training and testing for instance 161 rows for Jan 1 |
91 |
x<-data_v |
|
92 |
d<-data_s |
|
93 |
|
|
92 |
x<-as.data.frame(data_v) |
|
93 |
d<-as.data.frame(data_s) |
|
94 |
#x[x$value==-999.9]<-NA |
|
95 |
for (j in 1:nrow(x)){ |
|
96 |
if (x$value[j]== -999.9){ |
|
97 |
x$value[j]<-NA |
|
98 |
} |
|
99 |
} |
|
100 |
for (j in 1:nrow(d)){ |
|
101 |
if (d$value[j]== -999.9){ |
|
102 |
d$value[j]<-NA |
|
103 |
} |
|
104 |
} |
|
105 |
#x[x$value==-999.9]<-NA |
|
106 |
#d[d$value==-999.9]<-NA |
|
94 | 107 |
pos<-match("value",names(d)) #Find column with name "value" |
95 | 108 |
#names(d)[pos]<-c("dailyTmax") |
96 | 109 |
names(d)[pos]<-y_var_name |
97 | 110 |
names(x)[pos]<-y_var_name |
98 | 111 |
#names(x)[pos]<-c("dailyTmax") |
99 |
d$dailyTmax=(as.numeric(d$dailyTmax))/10 #stored as 1/10 degree C to allow integer storage |
|
100 |
x$dailyTmax=(as.numeric(x$dailyTmax))/10 #stored as 1/10 degree C to allow integer storage |
|
101 | 112 |
pos<-match("station",names(d)) #Find column with name "value" |
102 | 113 |
names(d)[pos]<-c("id") |
103 | 114 |
names(x)[pos]<-c("id") |
... | ... | |
131 | 142 |
#fitbias<-Tps(bias_xy,sta_bias) #use TPS or krige |
132 | 143 |
|
133 | 144 |
#Adding options to use only training stations : 07/11/2012 |
134 |
bias_xy=project(as.matrix(sta_lola),proj_str) |
|
145 |
#bias_xy=project(as.matrix(sta_lola),proj_str)
|
|
135 | 146 |
#bias_xy2=project(as.matrix(c(dmoday$lon,dmoday$lat),proj_str) |
147 |
bias_xy<-coordinates(modst) |
|
136 | 148 |
if(bias_val==1){ |
137 | 149 |
sta_bias<-dmoday$LSTD_bias |
138 | 150 |
bias_xy<-cbind(dmoday$x_OR83M,dmoday$y_OR83M) |
... | ... | |
152 | 164 |
|
153 | 165 |
daily_sta_lola=dmoday[,c("lon","lat")] #could be same as before but why assume merge does this - assume not |
154 | 166 |
daily_sta_xy=project(as.matrix(daily_sta_lola),proj_str) |
167 |
|
|
155 | 168 |
daily_delta=dmoday$dailyTmax-dmoday$TMax |
156 | 169 |
|
157 | 170 |
#fitdelta<-Tps(daily_sta_xy,daily_delta) #use TPS or krige |
... | ... | |
213 | 226 |
######## |
214 | 227 |
# check: assessment of results: validation |
215 | 228 |
######## |
216 |
RMSE<-function(x,y) {return(mean((x-y)^2)^0.5)}
|
|
217 |
MAE_fun<-function(x,y) {return(mean(abs(x-y)))}
|
|
229 |
RMSE<-function(res) {return(((mean(res,na.rm=TRUE))^2)^0.5)}
|
|
230 |
MAE_fun<-function(res) {return(mean(abs(res),na.rm=TRUE))}
|
|
218 | 231 |
#ME_fun<-function(x,y){return(mean(abs(y)))} |
219 | 232 |
#FIT ASSESSMENT |
220 | 233 |
sta_pred_data_s=lookup(tmax_predicted,data_s$lat,data_s$lon) |
221 |
rmse_fit=RMSE(sta_pred_data_s,data_s$dailyTmax) |
|
222 |
mae_fit=MAE_fun(sta_pred_data_s,data_s$dailyTmax) |
|
234 |
|
|
235 |
rmse_fit=RMSE(sta_pred_data_s-data_s$dailyTmax) |
|
236 |
mae_fit=MAE_fun(sta_pred_data_s-data_s$dailyTmax) |
|
223 | 237 |
|
224 | 238 |
sta_pred=lookup(tmax_predicted,data_v$lat,data_v$lon) |
225 | 239 |
#sta_pred=lookup(tmax_predicted,daily_sta_lola$lat,daily_sta_lola$lon) |
... | ... | |
228 | 242 |
#names(data_v)[pos]<-c("dailyTmax") |
229 | 243 |
tmax<-data_v$dailyTmax |
230 | 244 |
#data_v$dailyTmax<-tmax |
231 |
rmse=RMSE(sta_pred,tmax)
|
|
232 |
mae<-MAE_fun(sta_pred,tmax)
|
|
245 |
rmse=RMSE(sta_pred-tmax)
|
|
246 |
mae<-MAE_fun(sta_pred-tmax)
|
|
233 | 247 |
r2<-cor(sta_pred,tmax)^2 #R2, coef. of var |
234 |
me<-mean(sta_pred-tmax) |
|
248 |
me<-mean(sta_pred-tmax,na.rm=T)
|
|
235 | 249 |
|
236 | 250 |
png(paste("Predicted_tmax_versus_observed_scatterplot_",sampling_dat$date[i],"_",sampling_dat$prop[i], |
237 | 251 |
"_",sampling_dat$run_samp[i],out_prefix,".png", sep="")) |
... | ... | |
243 | 257 |
|
244 | 258 |
###BEFORE GAM prediction the data object must be transformed to SDF |
245 | 259 |
|
246 |
coords<- data_v[,c('x_OR83M','y_OR83M')]
|
|
260 |
coords<- data_v[,c('x','y')]
|
|
247 | 261 |
coordinates(data_v)<-coords |
248 |
proj4string(data_v)<-CRS #Need to assign coordinates...
|
|
249 |
coords<- data_s[,c('x_OR83M','y_OR83M')]
|
|
262 |
proj4string(data_v)<-proj_str #Need to assign coordinates...
|
|
263 |
coords<- data_s[,c('x','y')]
|
|
250 | 264 |
coordinates(data_s)<-coords |
251 |
proj4string(data_s)<-CRS #Need to assign coordinates..
|
|
252 |
coords<- modst[,c('x_OR83M','y_OR83M')]
|
|
253 |
coordinates(modst)<-coords |
|
254 |
proj4string(modst)<-CRS #Need to assign coordinates..
|
|
265 |
proj4string(data_s)<-proj_str #Need to assign coordinates..
|
|
266 |
coords<- modst[,c('x','y')]
|
|
267 |
#coordinates(modst)<-coords
|
|
268 |
#proj4string(modst)<-proj_str #Need to assign coordinates..
|
|
255 | 269 |
|
256 | 270 |
ns<-nrow(data_s) #This is added to because some loss of data might have happened because of the averaging... |
257 | 271 |
nv<-nrow(data_v) |
... | ... | |
274 | 288 |
|
275 | 289 |
list_formulas<-vector("list",nmodels) |
276 | 290 |
|
277 |
list_formulas[[1]] <- as.formula("y_var ~ s(ELEV_SRTM)", env=.GlobalEnv)
|
|
291 |
list_formulas[[1]] <- as.formula("y_var ~ s(elev_1)", env=.GlobalEnv)
|
|
278 | 292 |
list_formulas[[2]] <- as.formula("y_var ~ s(LST)", env=.GlobalEnv) |
279 |
list_formulas[[3]] <- as.formula("y_var ~ s(ELEV_SRTM,LST)", env=.GlobalEnv)
|
|
280 |
list_formulas[[4]] <- as.formula("y_var ~ s(lat) + s(lon)+ s(ELEV_SRTM)", env=.GlobalEnv)
|
|
281 |
list_formulas[[5]] <- as.formula("y_var ~ s(lat,lon,ELEV_SRTM)", env=.GlobalEnv)
|
|
282 |
list_formulas[[6]] <- as.formula("y_var ~ s(lat,lon) + s(ELEV_SRTM) + s(Northness_w,Eastness_w) + s(LST)", env=.GlobalEnv)
|
|
283 |
list_formulas[[7]] <- as.formula("y_var ~ s(lat,lon) + s(ELEV_SRTM) + s(Northness_w,Eastness_w) + s(LST) + s(LC1)", env=.GlobalEnv)
|
|
284 |
list_formulas[[8]] <- as.formula("y_var ~ s(lat,lon) + s(ELEV_SRTM) + s(Northness_w,Eastness_w) + s(LST) + s(LC3)", env=.GlobalEnv)
|
|
285 |
list_formulas[[9]] <- as.formula("y_var ~ s(lat,lon) + s(ELEV_SRTM) + s(Northness_w,Eastness_w) + s(LST) + s(DISTOC)", env=.GlobalEnv)
|
|
293 |
list_formulas[[3]] <- as.formula("y_var ~ s(elev_1,LST)", env=.GlobalEnv)
|
|
294 |
list_formulas[[4]] <- as.formula("y_var ~ s(lat) + s(lon)+ s(elev_1)", env=.GlobalEnv)
|
|
295 |
list_formulas[[5]] <- as.formula("y_var ~ s(lat,lon,elev_1)", env=.GlobalEnv)
|
|
296 |
list_formulas[[6]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST)", env=.GlobalEnv)
|
|
297 |
list_formulas[[7]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(LC2)", env=.GlobalEnv)
|
|
298 |
list_formulas[[8]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(LC6)", env=.GlobalEnv)
|
|
299 |
list_formulas[[9]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(DISTOC)", env=.GlobalEnv)
|
|
286 | 300 |
|
287 | 301 |
#list_formulas[[1]] <- as.formula("y_var ~ s(ELEV_SRTM)", env=.GlobalEnv) |
288 | 302 |
#list_formulas[[2]] <- as.formula("y_var ~ s(lat,lon)", env=.GlobalEnv) |
Also available in: Unified diff
GAM fusion function, modifications for first fusion prediction in Venezuela