Revision 0b28f27e
Added by Benoit Parmentier over 10 years ago
climate/research/oregon/interpolation/contribution_of_covariates_paper_interpolation.R | ||
---|---|---|
973 | 973 |
# names.attr=names_layers,col.regions=temp.colors) |
974 | 974 |
dev.off |
975 | 975 |
|
976 |
######## NOW GET A ACCURACY BY STATIONS |
|
977 |
|
|
978 |
list_data_s <-extract_list_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"data_s") |
|
979 |
list_data_v <-extract_list_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"data_v") |
|
980 |
|
|
981 |
#number of observations per day |
|
982 |
year_nbv <- sapply(list_data_v,FUN=length) |
|
983 |
year_nbs <- sapply(list_data_s,FUN=length) |
|
984 |
nb_df <- data.frame(nv=year_nbv,ns=year_nbs) |
|
985 |
nb_df$n_tot <- year_nbv + year_nbs |
|
986 |
range(nb_df$n_tot) |
|
987 |
|
|
988 |
data_v_test <- list_data_v[[1]] |
|
989 |
|
|
990 |
#Convert sp data.frame and combined them in one unique df, see function define earlier |
|
991 |
data_v_combined <-convert_spdf_to_df_from_list(list_data_v) #long rownames |
|
992 |
names_var<-c("res_mod1","res_mod2","res_mod3","res_mod4","res_mod5","res_mod6","res_mod7","res_mod8") |
|
993 |
|
|
994 |
limit_val<- c(-30,-2.57,0,2.57,30) |
|
995 |
data_v_combined$res_mod1_rc1 <- cut(data_v_combined$res_mod1,include.lowest=TRUE,breaks=limit_val) |
|
996 |
data_v_combined$res_mod1_rc1 |
|
997 |
|
|
998 |
t<-melt(data_v_combined, |
|
999 |
measure=names_var, |
|
1000 |
id=c("res_mod1_rc1","id"), |
|
1001 |
na.rm=T) |
|
1002 |
|
|
1003 |
n_tb<-cast(t,res_mod1_rc1+id~variable,length) |
|
1004 |
n_tb_tot <-cast(t,id~variable,length) #number of times the stations was used for validation |
|
1005 |
|
|
1006 |
merge(n_tb$id |
|
1007 |
dim(n_tb) |
|
1008 |
#mae_tb <-cast(t,dst_cat1~variable,mae_fun) |
|
1009 |
#rmse_tb <-cast(t,dst_cat1~variable,rmse_fun) |
|
1010 |
#sd_abs_tb<-cast(t,dst_cat1~variable,sd_abs_fun) |
|
1011 |
|
|
1012 |
#avg_tb<-cast(t,dst_cat1~variable,mean) |
|
1013 |
#sd_tb<-cast(t,dst_cat1~variable,sd) |
|
1014 |
|
|
1015 |
t<-melt(data_v_combined, |
|
1016 |
measure=names_var, |
|
1017 |
id=c("id"), |
|
1018 |
na.rm=T) |
|
1019 |
|
|
1020 |
hist(data_v_combined) |
|
1021 |
names(data_v_combined) |
|
1022 |
|
|
1023 |
mae_fun<-function(x){mean(abs(x))} #Mean Absolute Error give a residuals vector |
|
1024 |
sd_abs_fun<-function(x){sd(abs(x))} #sd Absolute Error give a residuals vector |
|
1025 |
|
|
1026 |
mae_tb<-cast(t,id~variable,mae_fun) #join to station location... |
|
1027 |
|
|
1028 |
sd_abs_tb<-cast(t,dst_cat1~variable,sd_abs_fun) |
|
1029 |
|
|
1030 |
#avg_tb<-cast(t,dst_cat1~variable,mean) |
|
1031 |
#sd_tb<-cast(t,dst_cat1~variable,sd) |
|
1032 |
#n_tb<-cast(t,dst_cat1~variable,length) |
|
1033 |
|
|
1034 |
met_obj <-load_obj(file.path(in_dir1,met_obj_file_1)) |
|
1035 |
stat_loc<-readOGR(dsn=in_dir1,layer=sub(".shp","",basename(met_obj$loc_stations))) |
|
1036 |
|
|
1037 |
data_v_mae <-merge(mae_tb,stat_loc,by.x=c("id"),by.y=c("STAT_ID")) |
|
1038 |
hist(data_v_mae$res_mod1) |
|
1039 |
mean(data_v_mae$res_mod1) |
|
1040 |
|
|
1041 |
coords<- data_v_mae[c('longitude','latitude')] #Define coordinates in a data frame |
|
1042 |
CRS_interp<-proj4string(data_v_test) |
|
1043 |
coordinates(data_v_mae)<-coords #Assign coordinates to the data frame |
|
1044 |
proj4string(data_v_mae)<- proj4string(stat_loc) #Assign coordinates reference system in PROJ4 format |
|
1045 |
data_v_mae<-spTransform(data_v_mae,CRS(CRS_interp)) #Project from WGS84 to new coord. system |
|
1046 |
|
|
1047 |
p<-bubble(data_v_mae,"res_mod1",maxsize=4,col=c("red"),fill=FALSE) |
|
1048 |
#p<-bubble(data_v_mae,"res_mod1",maxsize=4,col=c("red"),fill=FALSE,key.entries=c(1,1.5,2,2.5,3,3.5,4,4.5)) |
|
1049 |
|
|
1050 |
p |
|
1051 |
|
|
1052 |
infile_reg_outline <- "/data/project/layers/commons/data_workflow/inputs/region_outlines_ref_files/OR83M_state_outline.shp" #input region outline defined by polygon: Oregon |
|
1053 |
reg_outline <- readOGR(dsn=dirname(infile_reg_outline),layer=sub(".shp","",basename(infile_reg_outline))) |
|
1054 |
|
|
1055 |
p + layer(sp.polygons(reg_outline,lwd=0.9,col="darkgray")) |
|
1056 |
|
|
1057 |
p4<-bubble(data_v_mae,"res_mod4",maxsize=4,col=c("red"),fill=FALSE) |
|
1058 |
p4 + layer(sp.polygons(reg_outline,lwd=0.9,col="darkgray")) |
|
1059 |
|
|
1060 |
col_t <- colorRampPalette(c('blue', 'white', 'red')) |
|
1061 |
|
|
1062 |
p_elev <-levelplot(subset(s_raster,"elev_s"),margin=FALSE) |
|
1063 |
p4 <-bubble(data_v_mae[data_v_mae$res_mod4>2.134,],"res_mod4",maxsize=4,col=c("blue"),fill=FALSE) |
|
1064 |
p_elev + p4 + layer(sp.polygons(reg_outline,lwd=0.9,col="green")) |
|
1065 |
title("mod4") |
|
1066 |
|
|
1067 |
p_elev <-levelplot(subset(s_raster,"elev_s")) |
|
1068 |
p1 <-bubble(data_v_mae[data_v_mae$res_mod1>2.109,],"res_mod1",maxsize=4,col=c("blue"),fill=FALSE) |
|
1069 |
p_elev + p1 + layer(sp.polygons(reg_outline,lwd=0.9,col="green")) |
|
1070 |
#bubble(data_v_mae,"res_mod1") |
|
1071 |
#p<-spplot(data_v_mae,"res_mod1",maxsize=4,col=c("red")) |
|
1072 |
#p |
|
1073 |
#stations that are outliers in one but not the other... |
|
1074 |
id_setdiff<-setdiff(data_v_mae[data_v_mae$res_mod1>2.109,]$id,data_v_mae[data_v_mae$res_mod4>2.134,]$id) |
|
1075 |
|
|
1076 |
data_id_setdiff <- data_v_mae[data_v_mae$id %in% id_setdiff,] |
|
1077 |
|
|
1078 |
p_elev +layer(sp.polygons(reg_outline,lwd=0.9,col="green")) + layer(sp.points(data_id_setdiff,pch=4,cex=2,col="pink")) |
|
1079 |
|
|
1080 | 976 |
############################### |
1081 | 977 |
########## Prepare table 6 |
1082 | 978 |
# Now get p values and other things... |
... | ... | |
1205 | 1101 |
#height=480*6,width=480*4) |
1206 | 1102 |
|
1207 | 1103 |
#p3 <- list_plots_spt[[3]] |
1208 |
p1<-plot(raster_prediction_obj_3$method_mod_obj[[1]]$mod[[1]]$exp_var,raster_prediction_obj_3$method_mod_obj[[1]]$mod[[1]]$var_model) |
|
1104 |
p1<-plot(raster_prediction_obj_3$method_mod_obj[[1]]$mod[[1]]$exp_var,raster_prediction_obj_3$method_mod_obj[[1]]$mod[[1]]$var_model,justPosition=F)
|
|
1209 | 1105 |
#plot(p1) |
1210 | 1106 |
p241<-plot(raster_prediction_obj_3$method_mod_obj[[241]]$mod[[1]]$exp_var,raster_prediction_obj_3$method_mod_obj[[241]]$mod[[1]]$var_model) |
1211 | 1107 |
#plot(p241) |
... | ... | |
1216 | 1112 |
dev.off() |
1217 | 1113 |
#Combine both plot? + plot info on sill, nugget and range? and most frequent model selected |
1218 | 1114 |
|
1115 |
list_kg_var_model <- lapply(1:365,function(i){raster_prediction_obj_3$method_mod_obj[[i]]$mod[[1]]$var_model}) |
|
1116 |
list_kg_var_model <- lapply(1:365,function(i){raster_prediction_obj_3$method_mod_obj[[i]]$mod[[1]]$var_model}) |
|
1117 |
|
|
1118 |
list_kg_var_model |
|
1119 |
|
|
1120 |
test <- do.call(rbind,list_kg_var_model) |
|
1121 |
tb_variogram <- subset(test,model!="Nug") |
|
1122 |
tt2 <- subset(test,model=="Nug") |
|
1123 |
tb_variogram["Nug"] <- (tt2$psill) |
|
1124 |
|
|
1125 |
add_month_tag<-function(tb){ |
|
1126 |
date<-strptime(tb$date, "%Y%m%d") # interpolation date being processed |
|
1127 |
month<-strftime(date, "%m") # current month of the date being processed |
|
1128 |
} |
|
1129 |
|
|
1130 |
dates<-(extract_from_list_obj(raster_prediction_obj_1$method_mod_obj,"sampling_dat"))$date #get vector of dates |
|
1131 |
tb_variogram["date"] <- dates |
|
1132 |
tb_variogram$month <- add_month_tag(tb_variogram) |
|
1133 |
#add dates and month?? |
|
1134 |
histogram(tt1$model) |
|
1135 |
#hist(tt1$range) |
|
1136 |
#hist(tt1$psill) |
|
1137 |
#hist(tt1$Nug) |
|
1138 |
boxplot(tb_variogram$range~tb_variogram$month,outline=F) |
|
1139 |
boxplot(tb_variogram$Nug~tb_variogram$month,outline=F) |
|
1140 |
boxplot(tb_variogram$psill~tb_variogram$month,outline=F) |
|
1141 |
#boxplot(tb_variogram$model~tb_variogram$month,outline=F) |
|
1219 | 1142 |
|
1220 | 1143 |
########################################### |
1221 | 1144 |
### Figure 10: map of residuals...for a specific date... |
... | ... | |
1273 | 1196 |
|
1274 | 1197 |
dev.off() |
1275 | 1198 |
|
1276 |
|
|
1199 |
|
|
1200 |
######## NOW GET A ACCURACY BY STATIONS |
|
1201 |
|
|
1202 |
list_data_s <-extract_list_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"data_s") |
|
1203 |
list_data_v <-extract_list_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"data_v") |
|
1204 |
|
|
1205 |
#number of observations per day |
|
1206 |
year_nbv <- sapply(list_data_v,FUN=length) |
|
1207 |
year_nbs <- sapply(list_data_s,FUN=length) |
|
1208 |
nb_df <- data.frame(nv=year_nbv,ns=year_nbs) |
|
1209 |
nb_df$n_tot <- year_nbv + year_nbs |
|
1210 |
range(nb_df$n_tot) |
|
1211 |
|
|
1212 |
data_v_test <- list_data_v[[1]] |
|
1213 |
|
|
1214 |
#Convert sp data.frame and combined them in one unique df, see function define earlier |
|
1215 |
data_v_combined <-convert_spdf_to_df_from_list(list_data_v) #long rownames |
|
1216 |
names_var<-c("res_mod1","res_mod2","res_mod3","res_mod4","res_mod5","res_mod6","res_mod7","res_mod8") |
|
1217 |
|
|
1218 |
limit_val<- c(-30,-2.57,0,2.57,30) |
|
1219 |
data_v_combined$res_mod1_rc1 <- cut(data_v_combined$res_mod1,include.lowest=TRUE,breaks=limit_val) |
|
1220 |
data_v_combined$res_mod1_rc1 |
|
1221 |
|
|
1222 |
t<-melt(data_v_combined, |
|
1223 |
measure=names_var, |
|
1224 |
id=c("res_mod1_rc1","id"), |
|
1225 |
na.rm=T) |
|
1226 |
|
|
1227 |
n_tb<-cast(t,res_mod1_rc1+id~variable,length) |
|
1228 |
n_tb_tot <-cast(t,id~variable,length) #number of times the stations was used for validation |
|
1229 |
|
|
1230 |
merge(n_tb$id |
|
1231 |
dim(n_tb) |
|
1232 |
#mae_tb <-cast(t,dst_cat1~variable,mae_fun) |
|
1233 |
#rmse_tb <-cast(t,dst_cat1~variable,rmse_fun) |
|
1234 |
#sd_abs_tb<-cast(t,dst_cat1~variable,sd_abs_fun) |
|
1235 |
|
|
1236 |
#avg_tb<-cast(t,dst_cat1~variable,mean) |
|
1237 |
#sd_tb<-cast(t,dst_cat1~variable,sd) |
|
1238 |
|
|
1239 |
t<-melt(data_v_combined, |
|
1240 |
measure=names_var, |
|
1241 |
id=c("id"), |
|
1242 |
na.rm=T) |
|
1243 |
|
|
1244 |
#hist(data_v_combined) |
|
1245 |
names(data_v_combined) |
|
1246 |
|
|
1247 |
mae_fun<-function(x){mean(abs(x))} #Mean Absolute Error give a residuals vector |
|
1248 |
sd_abs_fun<-function(x){sd(abs(x))} #sd Absolute Error give a residuals vector |
|
1249 |
|
|
1250 |
mae_tb<-cast(t,id~variable,mae_fun) #join to station location... |
|
1251 |
|
|
1252 |
sd_abs_tb<-cast(t,dst_cat1~variable,sd_abs_fun) |
|
1253 |
|
|
1254 |
#avg_tb<-cast(t,dst_cat1~variable,mean) |
|
1255 |
#sd_tb<-cast(t,dst_cat1~variable,sd) |
|
1256 |
#n_tb<-cast(t,dst_cat1~variable,length) |
|
1257 |
|
|
1258 |
met_obj <-load_obj(file.path(in_dir1,met_obj_file_1)) |
|
1259 |
stat_loc<-readOGR(dsn=in_dir1,layer=sub(".shp","",basename(met_obj$loc_stations))) |
|
1260 |
|
|
1261 |
data_v_mae <-merge(mae_tb,stat_loc,by.x=c("id"),by.y=c("STAT_ID")) |
|
1262 |
hist(data_v_mae$res_mod1) |
|
1263 |
mean(data_v_mae$res_mod1) |
|
1264 |
|
|
1265 |
coords<- data_v_mae[c('longitude','latitude')] #Define coordinates in a data frame |
|
1266 |
CRS_interp<-proj4string(data_v_test) |
|
1267 |
coordinates(data_v_mae)<-coords #Assign coordinates to the data frame |
|
1268 |
proj4string(data_v_mae)<- proj4string(stat_loc) #Assign coordinates reference system in PROJ4 format |
|
1269 |
data_v_mae<-spTransform(data_v_mae,CRS(CRS_interp)) #Project from WGS84 to new coord. system |
|
1270 |
|
|
1271 |
p<-bubble(data_v_mae,"res_mod1",maxsize=4,col=c("red"),fill=FALSE) |
|
1272 |
#p<-bubble(data_v_mae,"res_mod1",maxsize=4,col=c("red"),fill=FALSE,key.entries=c(1,1.5,2,2.5,3,3.5,4,4.5)) |
|
1273 |
|
|
1274 |
p |
|
1275 |
|
|
1276 |
infile_reg_outline <- "/data/project/layers/commons/data_workflow/inputs/region_outlines_ref_files/OR83M_state_outline.shp" #input region outline defined by polygon: Oregon |
|
1277 |
reg_outline <- readOGR(dsn=dirname(infile_reg_outline),layer=sub(".shp","",basename(infile_reg_outline))) |
|
1278 |
|
|
1279 |
p + layer(sp.polygons(reg_outline,lwd=0.9,col="darkgray")) |
|
1280 |
|
|
1281 |
p4<-bubble(data_v_mae,"res_mod4",maxsize=4,col=c("red"),fill=FALSE) |
|
1282 |
p4 + layer(sp.polygons(reg_outline,lwd=0.9,col="darkgray")) |
|
1283 |
|
|
1284 |
col_t <- colorRampPalette(c('blue', 'white', 'red')) |
|
1285 |
|
|
1286 |
p_elev <-levelplot(subset(s_raster,"elev_s"),margin=FALSE) |
|
1287 |
p4 <-bubble(data_v_mae[data_v_mae$res_mod4>2.134,],"res_mod4",maxsize=4,col=c("blue"),fill=FALSE) |
|
1288 |
p_elev + p4 + layer(sp.polygons(reg_outline,lwd=0.9,col="green")) |
|
1289 |
title("mod4") |
|
1290 |
|
|
1291 |
p_elev <-levelplot(subset(s_raster,"elev_s")) |
|
1292 |
p1 <-bubble(data_v_mae[data_v_mae$res_mod1>2.109,],"res_mod1",maxsize=4,col=c("blue"),fill=FALSE) |
|
1293 |
p_elev + p1 + layer(sp.polygons(reg_outline,lwd=0.9,col="green")) |
|
1294 |
#bubble(data_v_mae,"res_mod1") |
|
1295 |
#p<-spplot(data_v_mae,"res_mod1",maxsize=4,col=c("red")) |
|
1296 |
#p |
|
1297 |
#stations that are outliers in one but not the other... |
|
1298 |
id_setdiff<-setdiff(data_v_mae[data_v_mae$res_mod1>2.109,]$id,data_v_mae[data_v_mae$res_mod4>2.134,]$id) |
|
1299 |
|
|
1300 |
data_id_setdiff <- data_v_mae[data_v_mae$id %in% id_setdiff,] |
|
1301 |
|
|
1302 |
p_elev +layer(sp.polygons(reg_outline,lwd=0.9,col="green")) + layer(sp.points(data_id_setdiff,pch=4,cex=2,col="pink")) |
|
1303 |
|
|
1304 |
|
|
1305 |
#raster_prediciton object for baseline 1 () s(lat,lon) + s(elev)) and baseline 2 (slat,lon)) |
|
1277 | 1306 |
list_data_v <- lapply(1:365,function(i){raster_prediction_obj_2$validation_mod_obj[[i]]$data_v} #testing with residuals |
1307 |
l_formulas<-(extract_from_list_obj(raster_prediction_obj_2$method_mod_obj,"formulas")) #get vector of dates |
|
1278 | 1308 |
|
1279 | 1309 |
test <- do.call(rbind,list_data_v) |
1280 |
|
|
1310 |
data_v_ag <-test |
|
1281 | 1311 |
plot(res_mod1~elev,data=test) |
1282 | 1312 |
plot(res_mod2~elev,data=test) |
1283 | 1313 |
cor(test$res_mod1,test$elev) |
... | ... | |
1286 | 1316 |
cor(test$res_mod5,test$LST,use="complete.obs") #decrease in correlation when using LST |
1287 | 1317 |
|
1288 | 1318 |
plot(res_mod1~LST,data=test) |
1289 |
plot(res_mod9~LST,data=test) |
|
1319 |
plot(res_mod5~LST,data=test) |
|
1320 |
|
|
1321 |
|
|
1322 |
brks<-c(0,500,1000,1500,2000,2500,4000) |
|
1323 |
lab_brks<-1:6 |
|
1324 |
brks<-c(0,500,1000,1500,2000,2500) |
|
1325 |
lab_brks<-1:5 |
|
1326 |
elev_rcstat<-cut(data_v_ag$elev,breaks=brks,labels=lab_brks,right=F) |
|
1327 |
boxplot(data_v_ag$res_mod2~elev_rcstat,ylim=c(-15,15)) |
|
1328 |
boxplot(data_v_ag$res_mod1~elev_rcstat,ylim=c(-15,15)) |
|
1329 |
boxplot(data_v_ag$res_mod5~elev_rcstat,ylim=c(-15,15)) |
|
1330 |
|
|
1331 |
brks<-c(-20,-10,0,10,20,30,40) |
|
1332 |
lab_brks<-1:6 |
|
1333 |
rcstat<-cut(data_v_ag$LST,breaks=brks,labels=lab_brks,right=F) |
|
1334 |
boxplot(data_v_ag$res_mod4 ~ rcstat,ylim=c(-15,15)) |
|
1335 |
|
|
1336 |
y_range<-range(c(diff_fc)) |
|
1337 |
x_range<-range(c(elev_rcstat)) |
|
1338 |
plot(elev_rcstat,res_mod, ylab="diff_cf", xlab="ELEV_SRTM (m) ", |
|
1339 |
ylim=y_range, xlim=x_range) |
|
1340 |
text(elev_rcstat,diff_cf,labels=data_vf$idx,pos=3) |
|
1341 |
grid(lwd=0.5,col="black") |
|
1342 |
title(paste("Testing stations residuals fusion vs Elevation",date_selected,sep=" ")) |
|
1343 |
|
|
1344 |
lat<- aggregate(lat~id,data=data_v_ag,FUN=mean) |
|
1345 |
lon<- aggregate(lon~id,data=data_v_ag,FUN=mean) |
|
1346 |
|
|
1347 |
data_v_mae <- data.frame(lon=lon$lon,lat=lat$lat) |
|
1348 |
coordinates(data_v_mae)<- cbind(lon$lon,lat$lat) |
|
1349 |
|
|
1350 |
mae_fun<-function(x){mean(abs(x))} |
|
1351 |
test<- aggregate(res_mod1~id,data=data_v_ag,FUN=mae_fun) |
|
1352 |
data_v_mae$mod1 <- test$res_mod1 |
|
1353 |
test<- aggregate(res_mod2~id,data=data_v_ag,FUN=mae_fun) |
|
1354 |
data_v_mae$mod2 <- test$res_mod2 |
|
1355 |
p<- bubble(data_v_mae,"mod1") |
|
1356 |
print(p) |
|
1357 |
p1 <- levelplot(elev,scales = list(draw = FALSE), colorkey = FALSE,par.settings = GrTheme) |
|
1358 |
hist(data_v_mae$mod1) |
|
1359 |
p<- bubble(data_v_mae,"mod1") |
|
1360 |
spplot() |
|
1290 | 1361 |
|
1362 |
p_elev <- levelplot(elev) |
|
1363 |
pc <-p+p1 + p |
|
1364 |
print(pc) |
|
1365 |
|
|
1366 |
t<-melt(data_v_ag, |
|
1367 |
measure=names_var, |
|
1368 |
id=c("res_mod1","res_mod2,"id"), |
|
1369 |
na.rm=T) |
|
1370 |
|
|
1371 |
n_tb<-cast(t,res_mod1_rc1+id~variable,mae_fun) |
|
1372 |
n_tb_tot <-cast(t,id~variable,length) #number of times the stations was used for validation |
|
1373 |
|
|
1291 | 1374 |
###################### END OF SCRIPT ####################### |
1292 | 1375 |
|
1293 | 1376 |
|
Also available in: Unified diff
contribution of covariates and methods: revisions-adding summary of variogram param and accuracy summarized by stations