Revision cf45083f
Added by Benoit Parmentier over 10 years ago
climate/research/oregon/interpolation/contribution_of_covariates_paper_interpolation.R | ||
---|---|---|
4 | 4 |
#different covariates using two baselines. Accuracy methods are added in the the function script to evaluate results. |
5 | 5 |
#Figures, tables and data for the contribution of covariate paper are also produced in the script. |
6 | 6 |
#AUTHOR: Benoit Parmentier |
7 |
#MODIFIED ON: 07/03/2014
|
|
7 |
#MODIFIED ON: 07/09/2014
|
|
8 | 8 |
#Version: 5 |
9 | 9 |
#PROJECT: Environmental Layers project |
10 | 10 |
################################################################################################# |
... | ... | |
31 | 31 |
library(automap) # Kriging automatic fitting of variogram using gstat |
32 | 32 |
library(rgeos) # Geometric, topologic library of functions |
33 | 33 |
#RPostgreSQL # Interface R and Postgres, not used in this script |
34 |
|
|
34 |
library(gridExtra) |
|
35 |
library(colorRamps) |
|
35 | 36 |
#Additional libraries not used in workflow |
36 | 37 |
library(pgirmess) # Krusall Wallis test with mulitple options, Kruskalmc {pgirmess} |
37 | 38 |
library(ncf) |
... | ... | |
264 | 265 |
sub(".shp","",basename(met_stations_obj$monthly_covar_ghcn_data))) |
265 | 266 |
ghcn_dat_WGS84 <-spTransform(ghcn_dat,CRS_locs_WGS84) # Project from WGS84 to new coord. system |
266 | 267 |
|
268 |
|
|
267 | 269 |
interp_area <- readOGR(dsn=dirname(infile_reg_outline),sub(".shp","",basename(infile_reg_outline))) |
268 | 270 |
interp_area_WGS84 <-spTransform(interp_area,CRS_locs_WGS84) # Project from WGS84 to new coord. system |
269 | 271 |
|
... | ... | |
322 | 324 |
lst_md<- lst_md - 273.16 |
323 | 325 |
lst_mm_01<-subset(s_raster,"mm_01") |
324 | 326 |
|
325 |
no_brks <- length(seq(min_val,max_val,by=0.1))-1 |
|
327 |
#no_brks <- length(seq(min_val,max_val,by=0.1))-1
|
|
326 | 328 |
#temp.colors <- colorRampPalette(c('blue', 'white', 'red')) |
327 | 329 |
#temp.colors <- colorRampPalette(c('blue', 'lightgoldenrodyellow', 'red')) |
328 | 330 |
#temp.colors <- matlab.like(no_brks) |
... | ... | |
330 | 332 |
|
331 | 333 |
png(filename=paste("Figure_3_paper_Comparison_daily_monthly_mean_lst",out_prefix,".png",sep=""),width=960,height=480) |
332 | 334 |
par(mfrow=c(1,2)) |
333 |
plot(lst_md,col=temp.colors(25))
|
|
335 |
plot(lst_md,col=temp.colors(25),axes=F) #use axes=F to remove lat and lon or coordinates
|
|
334 | 336 |
plot(interp_area,add=TRUE) |
335 | 337 |
title("Mean for January 1") |
336 |
plot(lst_mm_01,col=temp.colors(25)) |
|
338 |
plot(lst_mm_01,col=temp.colors(25),axes=F)
|
|
337 | 339 |
plot(interp_area,add=TRUE) |
338 | 340 |
title("Mean for month of January") |
339 | 341 |
dev.off() |
340 | 342 |
|
343 |
path_in_tmp <- "/home/parmentier/Data/IPLANT_project/region_outlines_ref_files" |
|
344 |
interp_area_tmp <- readOGR(dsn=path_in_tmp,"OR83M_state_outline") |
|
345 |
proj4string(interp_area_tmp ) <- proj4string(interp_area) |
|
346 |
projection(lst_md) <- proj4string(interp_area) |
|
347 |
r_region_ref <- rasterize(x=interp_area_tmp,y=lst_md,field="State_ID_s",fun=max) |
|
348 |
|
|
341 | 349 |
################################################ |
342 | 350 |
################### Figure 4. MAE/RMSE and distance to closest fitting station. |
343 | 351 |
|
... | ... | |
1093 | 1101 |
#plot.gstatVariogram |
1094 | 1102 |
#Plot a sample variogram, and possibly a fitted model |
1095 | 1103 |
#model 1 lat,lon and elev |
1096 |
layout_m<-c(1,1) # works if set to this?? ok set the resolution...
|
|
1104 |
layout_m <- c(1,2) # works if set to this?? ok set the resolution...
|
|
1097 | 1105 |
|
1098 | 1106 |
date_selected <- c("20100101","20100901") |
1099 | 1107 |
png(paste("Figure9_paper_","_variogram_",date_selected[1],"_",date_selected[2],"_",out_prefix,".png", sep=""), |
... | ... | |
1101 | 1109 |
#height=480*6,width=480*4) |
1102 | 1110 |
|
1103 | 1111 |
#p3 <- list_plots_spt[[3]] |
1104 |
p1<-plot(raster_prediction_obj_3$method_mod_obj[[1]]$mod[[1]]$exp_var,raster_prediction_obj_3$method_mod_obj[[1]]$mod[[1]]$var_model,justPosition=F) |
|
1112 |
p1<-plot(raster_prediction_obj_3$method_mod_obj[[1]]$mod[[1]]$exp_var,raster_prediction_obj_3$method_mod_obj[[1]]$mod[[1]]$var_model, |
|
1113 |
ylim=c(0,9), |
|
1114 |
ylab=list(label="Semivariance",cex=1.5), |
|
1115 |
xlab=list(label="Distance (meter)",cex=1.5), |
|
1116 |
main=list(label="Mod1 January 1, 2010",cex=1.8)) |
|
1105 | 1117 |
#plot(p1) |
1106 |
p241<-plot(raster_prediction_obj_3$method_mod_obj[[241]]$mod[[1]]$exp_var,raster_prediction_obj_3$method_mod_obj[[241]]$mod[[1]]$var_model) |
|
1118 |
p241<-plot(raster_prediction_obj_3$method_mod_obj[[241]]$mod[[1]]$exp_var,raster_prediction_obj_3$method_mod_obj[[241]]$mod[[1]]$var_model, |
|
1119 |
ylim=c(0,9), |
|
1120 |
ylab=list(label="Semivariance",cex=1.5), |
|
1121 |
xlab=list(label="Distance (meter)",cex=1.5), |
|
1122 |
main=list(label="Mod1 September 1, 2010",cex=1.8)) |
|
1107 | 1123 |
#plot(p241) |
1108 | 1124 |
|
1109 | 1125 |
#grid.arrange(p1,p2,p3,ncol=1) |
1110 |
grid.arrange(p1,p241,ncol=1)
|
|
1126 |
grid.arrange(p1,p241,ncol=2)
|
|
1111 | 1127 |
|
1112 | 1128 |
dev.off() |
1113 | 1129 |
#Combine both plot? + plot info on sill, nugget and range? and most frequent model selected |
... | ... | |
1131 | 1147 |
tb_variogram["date"] <- dates |
1132 | 1148 |
tb_variogram$month <- add_month_tag(tb_variogram) |
1133 | 1149 |
#add dates and month?? |
1134 |
histogram(tt1$model) |
|
1150 |
|
|
1151 |
|
|
1152 |
#layout_m <- c(2,2) # works if set to this?? ok set the resolution... |
|
1153 |
res_pix<-480 |
|
1154 |
col_mfrow<-2 |
|
1155 |
#row_mfrow<-2 |
|
1156 |
row_mfrow<-2 |
|
1157 |
|
|
1158 |
png_file_name<- paste("Figure_14_paper_variogram_statistics_",out_prefix,".png", sep="") |
|
1159 |
png(filename=file.path(out_dir,png_file_name), |
|
1160 |
width=col_mfrow*res_pix,height=row_mfrow*res_pix) |
|
1161 |
par(mfrow=c(row_mfrow,col_mfrow)) |
|
1162 |
|
|
1163 |
#date_selected <- c("20100101","20100901") |
|
1164 |
#png(paste("Figure14_paper_","_variogram_statistics","_",out_prefix,".png", sep=""), |
|
1165 |
# height=960*layout_m[1],width=960*layout_m[2]) |
|
1166 |
#height=480*6,width=480*4) |
|
1167 |
#mfrow(2,2) |
|
1168 |
p_hist<-histogram(tb_variogram$model,col="grey", |
|
1169 |
main=list(label="Variogram model type",cex=1.8), |
|
1170 |
ylab=list(label="Percent of total",cex=1.5), |
|
1171 |
xlab=list(label="Variogram model",cex=1.5)) |
|
1172 |
print(p_hist) |
|
1135 | 1173 |
#hist(tt1$range) |
1136 | 1174 |
#hist(tt1$psill) |
1137 | 1175 |
#hist(tt1$Nug) |
... | ... | |
1139 | 1177 |
boxplot(tb_variogram$Nug~tb_variogram$month,outline=F) |
1140 | 1178 |
boxplot(tb_variogram$psill~tb_variogram$month,outline=F) |
1141 | 1179 |
#boxplot(tb_variogram$model~tb_variogram$month,outline=F) |
1142 |
|
|
1180 |
|
|
1181 |
dev.off() |
|
1182 |
|
|
1143 | 1183 |
########################################### |
1144 | 1184 |
### Figure 10: map of residuals...for a specific date... |
1145 | 1185 |
index <- 244 |
... | ... | |
1148 | 1188 |
#date_selected <- c("20100101") |
1149 | 1189 |
date_selected <- c("20100901") |
1150 | 1190 |
|
1151 |
data_v <-raster_prediction_obj_2$validation_mod_obj[[index]]$data_v #testing with residuals |
|
1191 |
data_v <- raster_prediction_obj_2$validation_mod_obj[[index]]$data_v #testing with residuals
|
|
1152 | 1192 |
#data_s<-validation_mod_obj[[index]]$data_s |
1153 | 1193 |
date_proc<-strptime(date_selected, "%Y%m%d") # interpolation date being processed |
1154 | 1194 |
mo<-as.integer(strftime(date_proc, "%m")) # current month of the date being processed |
... | ... | |
1156 | 1196 |
year<-as.integer(strftime(date_proc, "%Y")) |
1157 | 1197 |
datelabel=format(ISOdate(year,mo,day),"%b %d, %Y") |
1158 | 1198 |
|
1159 |
#height=480*6,width=480*4)
|
|
1199 |
#height=480*6,width=480*4) |
|
1160 | 1200 |
list_p <- vector("list", length(names_mod)) |
1161 | 1201 |
for (k in 1:length(names_mod)){ |
1162 | 1202 |
model_name <- names_mod[k] |
... | ... | |
1169 | 1209 |
#p1 <- levelplot(elev,scales = list(draw = FALSE), colorkey = FALSE,col.regions=rev(terrain.colors(255)),contour=T) |
1170 | 1210 |
#add legend..par.settings = GrTheme |
1171 | 1211 |
cx <- ((data_v[[res_model_name]])*2) |
1172 |
#p1 <- levelplot(elev,scales = list(draw = FALSE), colorkey = FALSE,par.settings = GrTheme)
|
|
1212 |
p1 <- levelplot(elev,scales = list(draw = FALSE), colorkey = FALSE,par.settings = GrTheme) |
|
1173 | 1213 |
|
1174 | 1214 |
#p2 <- spplot(data_v,res_model_name, cex=1 * cx,main=paste("Residuals for ",res_model_name," ",datelabel,sep=""), |
1175 | 1215 |
# col.regions=matlab.like(25)) |
1176 | 1216 |
p2 <- bubble(data_v,res_model_name, main=paste("Residuals for ",res_model_name," ",datelabel,sep=""), |
1177 |
col.regions=matlab.like(25))
|
|
1217 |
col=matlab.like(25)) |
|
1178 | 1218 |
p3 <- p2 + p1 + p2 #to force legend... |
1179 | 1219 |
#p1 <- spplot(interp_area) |
1180 | 1220 |
#p3 <- p1+p2 #to force legend... |
... | ... | |
1187 | 1227 |
list_p[[k]] <- p3 |
1188 | 1228 |
} |
1189 | 1229 |
|
1190 |
layout_m<-c(2,5) # works if set to this?? ok set the resolution...
|
|
1191 |
png(paste("Figure10_paper_","_residuals_",date_selected,"_",out_prefix,".png", sep=""),
|
|
1230 |
layout_m<-c(1,3) # works if set to this?? ok set the resolution...
|
|
1231 |
png(paste("Figure13_paper_","_residuals_",date_selected,"_",out_prefix,".png", sep=""),
|
|
1192 | 1232 |
height=480*layout_m[1],width=480*layout_m[2]) |
1193 | 1233 |
|
1194 |
grid.arrange(list_p[[1]],list_p[[2]],list_p[[3]],list_p[[4]],list_p[[5]], |
|
1195 |
list_p[[6]],list_p[[7]],list_p[[8]],list_p[[9]],list_p[[10]],ncol=5) |
|
1234 |
grid.arrange(list_p_mae[[1]],list_p_mae[[2]],list_p_mae[[3]],ncol=3) |
|
1196 | 1235 |
|
1197 | 1236 |
dev.off() |
1198 | 1237 |
|
1199 |
|
|
1200 | 1238 |
######## NOW GET A ACCURACY BY STATIONS |
1201 |
|
|
1202 |
list_data_s <-extract_list_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"data_s") |
|
1203 |
list_data_v <-extract_list_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"data_v") |
|
1239 |
l_formulas<-(extract_from_list_obj(raster_prediction_obj_2$method_mod_obj,"formulas")) #get vector of dates |
|
1240 |
# y_var ~ s(lat,lon) |
|
1241 |
# y_var ~ s(lat,lon) + s(elev_s) |
|
1242 |
# y_var ~ s(lat,lon) + s(N_w) |
|
1243 |
# y_var ~ s(lat,lon) + s(E_w) |
|
1244 |
# y_var ~ s(lat,lon) + s(LST) |
|
1245 |
# y_var ~ s(lat,lon) + s(DISTOC) |
|
1246 |
# y_var ~ s(lat,lon) + s(LC1) |
|
1247 |
# y_var ~ s(lat,lon) + s(CANHGHT) |
|
1248 |
# y_var ~ s(lat,lon) + s(LST) + ti(LST,LC1) |
|
1249 |
# y_var ~ s(lat,lon) + s(LST) + ti(LST,CANHGHT) |
|
1250 |
list_data_s <-extract_list_from_list_obj(raster_prediction_obj_2$validation_mod_obj,"data_s") |
|
1251 |
list_data_v <-extract_list_from_list_obj(raster_prediction_obj_2$validation_mod_obj,"data_v") |
|
1252 |
names_mod <- names(raster_prediction_obj_2$method_mod_obj[[1]][[y_var_name]]) #names of models to plot |
|
1204 | 1253 |
|
1205 | 1254 |
#number of observations per day |
1206 | 1255 |
year_nbv <- sapply(list_data_v,FUN=length) |
... | ... | |
1213 | 1262 |
|
1214 | 1263 |
#Convert sp data.frame and combined them in one unique df, see function define earlier |
1215 | 1264 |
data_v_combined <-convert_spdf_to_df_from_list(list_data_v) #long rownames |
1216 |
names_var<-c("res_mod1","res_mod2","res_mod3","res_mod4","res_mod5","res_mod6","res_mod7","res_mod8") |
|
1217 |
|
|
1218 |
limit_val<- c(-30,-2.57,0,2.57,30) |
|
1219 |
data_v_combined$res_mod1_rc1 <- cut(data_v_combined$res_mod1,include.lowest=TRUE,breaks=limit_val) |
|
1220 |
data_v_combined$res_mod1_rc1 |
|
1221 |
|
|
1222 |
t<-melt(data_v_combined, |
|
1223 |
measure=names_var, |
|
1224 |
id=c("res_mod1_rc1","id"), |
|
1225 |
na.rm=T) |
|
1226 |
|
|
1227 |
n_tb<-cast(t,res_mod1_rc1+id~variable,length) |
|
1228 |
n_tb_tot <-cast(t,id~variable,length) #number of times the stations was used for validation |
|
1229 |
|
|
1230 |
merge(n_tb$id |
|
1231 |
dim(n_tb) |
|
1232 |
#mae_tb <-cast(t,dst_cat1~variable,mae_fun) |
|
1233 |
#rmse_tb <-cast(t,dst_cat1~variable,rmse_fun) |
|
1234 |
#sd_abs_tb<-cast(t,dst_cat1~variable,sd_abs_fun) |
|
1235 |
|
|
1236 |
#avg_tb<-cast(t,dst_cat1~variable,mean) |
|
1237 |
#sd_tb<-cast(t,dst_cat1~variable,sd) |
|
1265 |
names_var<-c("res_mod1","res_mod2","res_mod3","res_mod4","res_mod5","res_mod6","res_mod7","res_mod8","res_mod9","res_mod10") |
|
1238 | 1266 |
|
1239 | 1267 |
t<-melt(data_v_combined, |
1240 | 1268 |
measure=names_var, |
... | ... | |
1249 | 1277 |
|
1250 | 1278 |
mae_tb<-cast(t,id~variable,mae_fun) #join to station location... |
1251 | 1279 |
|
1252 |
sd_abs_tb<-cast(t,dst_cat1~variable,sd_abs_fun) |
|
1253 |
|
|
1254 |
#avg_tb<-cast(t,dst_cat1~variable,mean) |
|
1255 |
#sd_tb<-cast(t,dst_cat1~variable,sd) |
|
1256 |
#n_tb<-cast(t,dst_cat1~variable,length) |
|
1257 |
|
|
1258 | 1280 |
met_obj <-load_obj(file.path(in_dir1,met_obj_file_1)) |
1259 | 1281 |
stat_loc<-readOGR(dsn=in_dir1,layer=sub(".shp","",basename(met_obj$loc_stations))) |
1260 | 1282 |
|
... | ... | |
1267 | 1289 |
coordinates(data_v_mae)<-coords #Assign coordinates to the data frame |
1268 | 1290 |
proj4string(data_v_mae)<- proj4string(stat_loc) #Assign coordinates reference system in PROJ4 format |
1269 | 1291 |
data_v_mae<-spTransform(data_v_mae,CRS(CRS_interp)) #Project from WGS84 to new coord. system |
1292 |
elev <- subset(s_raster,"elev_s") |
|
1293 |
list_p_mae <- vector("list", length(names_mod)) |
|
1294 |
|
|
1295 |
for (k in 1:length(names_mod)){ |
|
1296 |
|
|
1297 |
model_name <- names_mod[k] |
|
1298 |
res_model_name <- paste("res",model_name,sep="_") |
|
1299 |
#res_model_name <- "res_mod1" |
|
1300 |
p1 <- levelplot(elev,scales = list(draw = FALSE), colorkey = FALSE,par.settings = GrTheme) |
|
1301 |
tt <- na.omit(data_v_mae) |
|
1302 |
tt <- data_v_mae[data_v_mae[res_model_name],] |
|
1303 |
p2 <- bubble(data_v_mae,res_model_name, main=paste("MAE per station for ",model_name,sep=""), |
|
1304 |
col=matlab.like(25),na.rm=TRUE) |
|
1305 |
p3 <- p2 + p1 + p2 #to force legend... |
|
1306 |
print(p3) |
|
1307 |
#p <- bubble(data_v_mae,"res_mod1",maxsize=4,col=c("red"),fill=FALSE) |
|
1308 |
#p<-bubble(data_v_mae,"res_mod1",maxsize=4,col=c("red"),fill=FALSE,key.entries=c(1,1.5,2,2.5,3,3.5,4,4.5)) |
|
1309 |
list_p_mae[[k]] <- p3 |
|
1310 |
} |
|
1270 | 1311 |
|
1271 |
p<-bubble(data_v_mae,"res_mod1",maxsize=4,col=c("red"),fill=FALSE) |
|
1272 |
#p<-bubble(data_v_mae,"res_mod1",maxsize=4,col=c("red"),fill=FALSE,key.entries=c(1,1.5,2,2.5,3,3.5,4,4.5)) |
|
1312 |
layout_m<-c(1,3) # works if set to this?? ok set the resolution... |
|
1313 |
png(paste("Figure13_paper_","_residuals_",date_selected,"_",out_prefix,".png", sep=""), |
|
1314 |
height=480*layout_m[1],width=480*layout_m[2]) |
|
1273 | 1315 |
|
1274 |
p |
|
1316 |
grid.arrange(list_p_mae[[1]],list_p_mae[[2]],list_p_mae[[5]],ncol=3) |
|
1317 |
|
|
1318 |
dev.off() |
|
1275 | 1319 |
|
1276 | 1320 |
infile_reg_outline <- "/data/project/layers/commons/data_workflow/inputs/region_outlines_ref_files/OR83M_state_outline.shp" #input region outline defined by polygon: Oregon |
1277 | 1321 |
reg_outline <- readOGR(dsn=dirname(infile_reg_outline),layer=sub(".shp","",basename(infile_reg_outline))) |
1278 | 1322 |
|
1279 | 1323 |
p + layer(sp.polygons(reg_outline,lwd=0.9,col="darkgray")) |
1280 | 1324 |
|
1281 |
p4<-bubble(data_v_mae,"res_mod4",maxsize=4,col=c("red"),fill=FALSE)
|
|
1325 |
p4 <- bubble(data_v_mae,"res_mod4",maxsize=4,col=c("red"),fill=FALSE)
|
|
1282 | 1326 |
p4 + layer(sp.polygons(reg_outline,lwd=0.9,col="darkgray")) |
1283 | 1327 |
|
1284 | 1328 |
col_t <- colorRampPalette(c('blue', 'white', 'red')) |
... | ... | |
1318 | 1362 |
plot(res_mod1~LST,data=test) |
1319 | 1363 |
plot(res_mod5~LST,data=test) |
1320 | 1364 |
|
1321 |
|
|
1322 | 1365 |
brks<-c(0,500,1000,1500,2000,2500,4000) |
1323 | 1366 |
lab_brks<-1:6 |
1324 |
brks<-c(0,500,1000,1500,2000,2500)
|
|
1325 |
lab_brks<-1:5
|
|
1367 |
brks<-c(0,500,1000,1500,2000) |
|
1368 |
lab_brks<-1:4
|
|
1326 | 1369 |
elev_rcstat<-cut(data_v_ag$elev,breaks=brks,labels=lab_brks,right=F) |
1327 | 1370 |
boxplot(data_v_ag$res_mod2~elev_rcstat,ylim=c(-15,15)) |
1328 | 1371 |
boxplot(data_v_ag$res_mod1~elev_rcstat,ylim=c(-15,15)) |
1329 | 1372 |
boxplot(data_v_ag$res_mod5~elev_rcstat,ylim=c(-15,15)) |
1330 |
|
|
1373 |
|
|
1374 |
#brks<-c(0,20,40,60,80,100) |
|
1375 |
#lab_brks<-1:5 |
|
1376 |
#rcstat<-cut(data_v_ag$LC1,breaks=brks,labels=lab_brks,right=F) |
|
1377 |
#plot(data_v_ag$res_mod5~rcstat,ylim=c(-5,5)) |
|
1378 |
#plot(data_v_ag$res_mod5~rcstat,ylim=c(-5,5)) |
|
1379 |
|
|
1331 | 1380 |
brks<-c(-20,-10,0,10,20,30,40) |
1332 | 1381 |
lab_brks<-1:6 |
1333 | 1382 |
rcstat<-cut(data_v_ag$LST,breaks=brks,labels=lab_brks,right=F) |
Also available in: Unified diff
contributions of covariates and methods paper: revisions - addtional improvements to figures (variograms, residuals etc.)