Project

General

Profile

« Previous | Next » 

Revision 0b28f27e

Added by Benoit Parmentier over 10 years ago

contribution of covariates and methods: revisions-adding summary of variogram param and accuracy summarized by stations

View differences:

climate/research/oregon/interpolation/contribution_of_covariates_paper_interpolation.R
973 973
#          names.attr=names_layers,col.regions=temp.colors)
974 974
dev.off
975 975

  
976
######## NOW GET A ACCURACY BY STATIONS
977

  
978
list_data_s <-extract_list_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"data_s")
979
list_data_v <-extract_list_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"data_v")
980

  
981
#number of observations per day
982
year_nbv <- sapply(list_data_v,FUN=length)
983
year_nbs <- sapply(list_data_s,FUN=length)
984
nb_df <- data.frame(nv=year_nbv,ns=year_nbs)
985
nb_df$n_tot <- year_nbv + year_nbs
986
range(nb_df$n_tot)
987

  
988
data_v_test <- list_data_v[[1]]
989

  
990
#Convert sp data.frame and combined them in one unique df, see function define earlier
991
data_v_combined <-convert_spdf_to_df_from_list(list_data_v) #long rownames
992
names_var<-c("res_mod1","res_mod2","res_mod3","res_mod4","res_mod5","res_mod6","res_mod7","res_mod8")
993

  
994
limit_val<- c(-30,-2.57,0,2.57,30)
995
data_v_combined$res_mod1_rc1 <- cut(data_v_combined$res_mod1,include.lowest=TRUE,breaks=limit_val)
996
data_v_combined$res_mod1_rc1
997

  
998
t<-melt(data_v_combined,
999
        measure=names_var, 
1000
        id=c("res_mod1_rc1","id"),
1001
        na.rm=T)
1002

  
1003
n_tb<-cast(t,res_mod1_rc1+id~variable,length)
1004
n_tb_tot <-cast(t,id~variable,length) #number of times the stations was used for validation
1005

  
1006
merge(n_tb$id
1007
dim(n_tb)
1008
#mae_tb <-cast(t,dst_cat1~variable,mae_fun)
1009
#rmse_tb <-cast(t,dst_cat1~variable,rmse_fun)
1010
#sd_abs_tb<-cast(t,dst_cat1~variable,sd_abs_fun)
1011

  
1012
#avg_tb<-cast(t,dst_cat1~variable,mean)
1013
#sd_tb<-cast(t,dst_cat1~variable,sd)
1014

  
1015
t<-melt(data_v_combined,
1016
        measure=names_var, 
1017
        id=c("id"),
1018
        na.rm=T)
1019

  
1020
hist(data_v_combined)
1021
names(data_v_combined)
1022

  
1023
mae_fun<-function(x){mean(abs(x))} #Mean Absolute Error give a residuals vector
1024
sd_abs_fun<-function(x){sd(abs(x))} #sd Absolute Error give a residuals vector
1025

  
1026
mae_tb<-cast(t,id~variable,mae_fun) #join to station location...
1027

  
1028
sd_abs_tb<-cast(t,dst_cat1~variable,sd_abs_fun)
1029

  
1030
#avg_tb<-cast(t,dst_cat1~variable,mean)
1031
#sd_tb<-cast(t,dst_cat1~variable,sd)
1032
#n_tb<-cast(t,dst_cat1~variable,length)
1033

  
1034
met_obj <-load_obj(file.path(in_dir1,met_obj_file_1))
1035
stat_loc<-readOGR(dsn=in_dir1,layer=sub(".shp","",basename(met_obj$loc_stations)))
1036

  
1037
data_v_mae <-merge(mae_tb,stat_loc,by.x=c("id"),by.y=c("STAT_ID"))
1038
hist(data_v_mae$res_mod1)
1039
mean(data_v_mae$res_mod1)
1040

  
1041
coords<- data_v_mae[c('longitude','latitude')]              #Define coordinates in a data frame
1042
CRS_interp<-proj4string(data_v_test)
1043
coordinates(data_v_mae)<-coords                      #Assign coordinates to the data frame
1044
proj4string(data_v_mae)<- proj4string(stat_loc)                #Assign coordinates reference system in PROJ4 format
1045
data_v_mae<-spTransform(data_v_mae,CRS(CRS_interp))     #Project from WGS84 to new coord. system
1046

  
1047
p<-bubble(data_v_mae,"res_mod1",maxsize=4,col=c("red"),fill=FALSE)
1048
#p<-bubble(data_v_mae,"res_mod1",maxsize=4,col=c("red"),fill=FALSE,key.entries=c(1,1.5,2,2.5,3,3.5,4,4.5))
1049

  
1050
p
1051

  
1052
infile_reg_outline <- "/data/project/layers/commons/data_workflow/inputs/region_outlines_ref_files/OR83M_state_outline.shp"  #input region outline defined by polygon: Oregon
1053
reg_outline <- readOGR(dsn=dirname(infile_reg_outline),layer=sub(".shp","",basename(infile_reg_outline)))
1054

  
1055
p + layer(sp.polygons(reg_outline,lwd=0.9,col="darkgray"))
1056

  
1057
p4<-bubble(data_v_mae,"res_mod4",maxsize=4,col=c("red"),fill=FALSE)
1058
p4 + layer(sp.polygons(reg_outline,lwd=0.9,col="darkgray"))
1059

  
1060
col_t <- colorRampPalette(c('blue', 'white', 'red'))
1061

  
1062
p_elev <-levelplot(subset(s_raster,"elev_s"),margin=FALSE)
1063
p4 <-bubble(data_v_mae[data_v_mae$res_mod4>2.134,],"res_mod4",maxsize=4,col=c("blue"),fill=FALSE)
1064
p_elev + p4 + layer(sp.polygons(reg_outline,lwd=0.9,col="green"))
1065
title("mod4")
1066

  
1067
p_elev <-levelplot(subset(s_raster,"elev_s"))
1068
p1 <-bubble(data_v_mae[data_v_mae$res_mod1>2.109,],"res_mod1",maxsize=4,col=c("blue"),fill=FALSE)
1069
p_elev + p1 + layer(sp.polygons(reg_outline,lwd=0.9,col="green"))
1070
#bubble(data_v_mae,"res_mod1")
1071
#p<-spplot(data_v_mae,"res_mod1",maxsize=4,col=c("red"))
1072
#p
1073
#stations that are outliers in one but not the other...
1074
id_setdiff<-setdiff(data_v_mae[data_v_mae$res_mod1>2.109,]$id,data_v_mae[data_v_mae$res_mod4>2.134,]$id)
1075

  
1076
data_id_setdiff <- data_v_mae[data_v_mae$id %in% id_setdiff,]
1077

  
1078
p_elev +layer(sp.polygons(reg_outline,lwd=0.9,col="green")) + layer(sp.points(data_id_setdiff,pch=4,cex=2,col="pink"))
1079

  
1080 976
###############################
1081 977
########## Prepare table 6
1082 978
# Now get p values and other things...
......
1205 1101
    #height=480*6,width=480*4)
1206 1102

  
1207 1103
#p3 <- list_plots_spt[[3]]
1208
p1<-plot(raster_prediction_obj_3$method_mod_obj[[1]]$mod[[1]]$exp_var,raster_prediction_obj_3$method_mod_obj[[1]]$mod[[1]]$var_model)
1104
p1<-plot(raster_prediction_obj_3$method_mod_obj[[1]]$mod[[1]]$exp_var,raster_prediction_obj_3$method_mod_obj[[1]]$mod[[1]]$var_model,justPosition=F)
1209 1105
#plot(p1)
1210 1106
p241<-plot(raster_prediction_obj_3$method_mod_obj[[241]]$mod[[1]]$exp_var,raster_prediction_obj_3$method_mod_obj[[241]]$mod[[1]]$var_model)
1211 1107
#plot(p241)
......
1216 1112
dev.off()
1217 1113
#Combine both plot?     + plot info on sill, nugget and range? and most frequent model selected
1218 1114

  
1115
list_kg_var_model <- lapply(1:365,function(i){raster_prediction_obj_3$method_mod_obj[[i]]$mod[[1]]$var_model})
1116
list_kg_var_model <- lapply(1:365,function(i){raster_prediction_obj_3$method_mod_obj[[i]]$mod[[1]]$var_model})
1117

  
1118
list_kg_var_model
1119
   
1120
test <- do.call(rbind,list_kg_var_model)
1121
tb_variogram <- subset(test,model!="Nug")      
1122
tt2 <- subset(test,model=="Nug")      
1123
tb_variogram["Nug"] <- (tt2$psill)
1124

  
1125
add_month_tag<-function(tb){
1126
  date<-strptime(tb$date, "%Y%m%d")   # interpolation date being processed
1127
  month<-strftime(date, "%m")          # current month of the date being processed
1128
}
1129

  
1130
dates<-(extract_from_list_obj(raster_prediction_obj_1$method_mod_obj,"sampling_dat"))$date #get vector of dates
1131
tb_variogram["date"] <- dates
1132
tb_variogram$month <- add_month_tag(tb_variogram)
1133
#add dates and month??
1134
histogram(tt1$model)      
1135
#hist(tt1$range)
1136
#hist(tt1$psill)
1137
#hist(tt1$Nug)
1138
boxplot(tb_variogram$range~tb_variogram$month,outline=F)
1139
boxplot(tb_variogram$Nug~tb_variogram$month,outline=F)
1140
boxplot(tb_variogram$psill~tb_variogram$month,outline=F)
1141
#boxplot(tb_variogram$model~tb_variogram$month,outline=F)
1219 1142
      
1220 1143
###########################################
1221 1144
### Figure 10: map of residuals...for a specific date...
......
1273 1196
      
1274 1197
dev.off()   
1275 1198

  
1276
      
1199

  
1200
######## NOW GET A ACCURACY BY STATIONS
1201

  
1202
list_data_s <-extract_list_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"data_s")
1203
list_data_v <-extract_list_from_list_obj(raster_prediction_obj_1$validation_mod_obj,"data_v")
1204

  
1205
#number of observations per day
1206
year_nbv <- sapply(list_data_v,FUN=length)
1207
year_nbs <- sapply(list_data_s,FUN=length)
1208
nb_df <- data.frame(nv=year_nbv,ns=year_nbs)
1209
nb_df$n_tot <- year_nbv + year_nbs
1210
range(nb_df$n_tot)
1211

  
1212
data_v_test <- list_data_v[[1]]
1213

  
1214
#Convert sp data.frame and combined them in one unique df, see function define earlier
1215
data_v_combined <-convert_spdf_to_df_from_list(list_data_v) #long rownames
1216
names_var<-c("res_mod1","res_mod2","res_mod3","res_mod4","res_mod5","res_mod6","res_mod7","res_mod8")
1217

  
1218
limit_val<- c(-30,-2.57,0,2.57,30)
1219
data_v_combined$res_mod1_rc1 <- cut(data_v_combined$res_mod1,include.lowest=TRUE,breaks=limit_val)
1220
data_v_combined$res_mod1_rc1
1221

  
1222
t<-melt(data_v_combined,
1223
        measure=names_var, 
1224
        id=c("res_mod1_rc1","id"),
1225
        na.rm=T)
1226

  
1227
n_tb<-cast(t,res_mod1_rc1+id~variable,length)
1228
n_tb_tot <-cast(t,id~variable,length) #number of times the stations was used for validation
1229

  
1230
merge(n_tb$id
1231
dim(n_tb)
1232
#mae_tb <-cast(t,dst_cat1~variable,mae_fun)
1233
#rmse_tb <-cast(t,dst_cat1~variable,rmse_fun)
1234
#sd_abs_tb<-cast(t,dst_cat1~variable,sd_abs_fun)
1235

  
1236
#avg_tb<-cast(t,dst_cat1~variable,mean)
1237
#sd_tb<-cast(t,dst_cat1~variable,sd)
1238

  
1239
t<-melt(data_v_combined,
1240
        measure=names_var, 
1241
        id=c("id"),
1242
        na.rm=T)
1243

  
1244
#hist(data_v_combined)
1245
names(data_v_combined)
1246

  
1247
mae_fun<-function(x){mean(abs(x))} #Mean Absolute Error give a residuals vector
1248
sd_abs_fun<-function(x){sd(abs(x))} #sd Absolute Error give a residuals vector
1249

  
1250
mae_tb<-cast(t,id~variable,mae_fun) #join to station location...
1251

  
1252
sd_abs_tb<-cast(t,dst_cat1~variable,sd_abs_fun)
1253

  
1254
#avg_tb<-cast(t,dst_cat1~variable,mean)
1255
#sd_tb<-cast(t,dst_cat1~variable,sd)
1256
#n_tb<-cast(t,dst_cat1~variable,length)
1257

  
1258
met_obj <-load_obj(file.path(in_dir1,met_obj_file_1))
1259
stat_loc<-readOGR(dsn=in_dir1,layer=sub(".shp","",basename(met_obj$loc_stations)))
1260

  
1261
data_v_mae <-merge(mae_tb,stat_loc,by.x=c("id"),by.y=c("STAT_ID"))
1262
hist(data_v_mae$res_mod1)
1263
mean(data_v_mae$res_mod1)
1264

  
1265
coords<- data_v_mae[c('longitude','latitude')]              #Define coordinates in a data frame
1266
CRS_interp<-proj4string(data_v_test)
1267
coordinates(data_v_mae)<-coords                      #Assign coordinates to the data frame
1268
proj4string(data_v_mae)<- proj4string(stat_loc)                #Assign coordinates reference system in PROJ4 format
1269
data_v_mae<-spTransform(data_v_mae,CRS(CRS_interp))     #Project from WGS84 to new coord. system
1270

  
1271
p<-bubble(data_v_mae,"res_mod1",maxsize=4,col=c("red"),fill=FALSE)
1272
#p<-bubble(data_v_mae,"res_mod1",maxsize=4,col=c("red"),fill=FALSE,key.entries=c(1,1.5,2,2.5,3,3.5,4,4.5))
1273

  
1274
p
1275

  
1276
infile_reg_outline <- "/data/project/layers/commons/data_workflow/inputs/region_outlines_ref_files/OR83M_state_outline.shp"  #input region outline defined by polygon: Oregon
1277
reg_outline <- readOGR(dsn=dirname(infile_reg_outline),layer=sub(".shp","",basename(infile_reg_outline)))
1278

  
1279
p + layer(sp.polygons(reg_outline,lwd=0.9,col="darkgray"))
1280

  
1281
p4<-bubble(data_v_mae,"res_mod4",maxsize=4,col=c("red"),fill=FALSE)
1282
p4 + layer(sp.polygons(reg_outline,lwd=0.9,col="darkgray"))
1283

  
1284
col_t <- colorRampPalette(c('blue', 'white', 'red'))
1285

  
1286
p_elev <-levelplot(subset(s_raster,"elev_s"),margin=FALSE)
1287
p4 <-bubble(data_v_mae[data_v_mae$res_mod4>2.134,],"res_mod4",maxsize=4,col=c("blue"),fill=FALSE)
1288
p_elev + p4 + layer(sp.polygons(reg_outline,lwd=0.9,col="green"))
1289
title("mod4")
1290

  
1291
p_elev <-levelplot(subset(s_raster,"elev_s"))
1292
p1 <-bubble(data_v_mae[data_v_mae$res_mod1>2.109,],"res_mod1",maxsize=4,col=c("blue"),fill=FALSE)
1293
p_elev + p1 + layer(sp.polygons(reg_outline,lwd=0.9,col="green"))
1294
#bubble(data_v_mae,"res_mod1")
1295
#p<-spplot(data_v_mae,"res_mod1",maxsize=4,col=c("red"))
1296
#p
1297
#stations that are outliers in one but not the other...
1298
id_setdiff<-setdiff(data_v_mae[data_v_mae$res_mod1>2.109,]$id,data_v_mae[data_v_mae$res_mod4>2.134,]$id)
1299

  
1300
data_id_setdiff <- data_v_mae[data_v_mae$id %in% id_setdiff,]
1301

  
1302
p_elev +layer(sp.polygons(reg_outline,lwd=0.9,col="green")) + layer(sp.points(data_id_setdiff,pch=4,cex=2,col="pink"))
1303

  
1304

  
1305
#raster_prediciton object for baseline 1 () s(lat,lon) + s(elev)) and baseline 2 (slat,lon))      
1277 1306
list_data_v <- lapply(1:365,function(i){raster_prediction_obj_2$validation_mod_obj[[i]]$data_v} #testing with residuals
1307
l_formulas<-(extract_from_list_obj(raster_prediction_obj_2$method_mod_obj,"formulas")) #get vector of dates
1278 1308

  
1279 1309
test <- do.call(rbind,list_data_v)        
1280
                
1310
data_v_ag <-test                
1281 1311
plot(res_mod1~elev,data=test)                    
1282 1312
plot(res_mod2~elev,data=test)                    
1283 1313
cor(test$res_mod1,test$elev)
......
1286 1316
cor(test$res_mod5,test$LST,use="complete.obs") #decrease in correlation when using LST
1287 1317

  
1288 1318
plot(res_mod1~LST,data=test)                    
1289
plot(res_mod9~LST,data=test)                    
1319
plot(res_mod5~LST,data=test)                    
1320
                     
1321

  
1322
brks<-c(0,500,1000,1500,2000,2500,4000)
1323
lab_brks<-1:6
1324
brks<-c(0,500,1000,1500,2000,2500)
1325
lab_brks<-1:5
1326
elev_rcstat<-cut(data_v_ag$elev,breaks=brks,labels=lab_brks,right=F)
1327
boxplot(data_v_ag$res_mod2~elev_rcstat,ylim=c(-15,15))           
1328
boxplot(data_v_ag$res_mod1~elev_rcstat,ylim=c(-15,15))                      
1329
boxplot(data_v_ag$res_mod5~elev_rcstat,ylim=c(-15,15))                      
1330
                
1331
brks<-c(-20,-10,0,10,20,30,40)
1332
lab_brks<-1:6
1333
rcstat<-cut(data_v_ag$LST,breaks=brks,labels=lab_brks,right=F)
1334
boxplot(data_v_ag$res_mod4 ~ rcstat,ylim=c(-15,15))           
1335
                    
1336
y_range<-range(c(diff_fc))
1337
x_range<-range(c(elev_rcstat))
1338
plot(elev_rcstat,res_mod, ylab="diff_cf", xlab="ELEV_SRTM (m) ",
1339
       ylim=y_range, xlim=x_range)
1340
text(elev_rcstat,diff_cf,labels=data_vf$idx,pos=3)
1341
grid(lwd=0.5,col="black")
1342
title(paste("Testing stations residuals fusion vs Elevation",date_selected,sep=" "))
1343

  
1344
lat<- aggregate(lat~id,data=data_v_ag,FUN=mean)
1345
lon<- aggregate(lon~id,data=data_v_ag,FUN=mean)                     
1346

  
1347
data_v_mae <- data.frame(lon=lon$lon,lat=lat$lat)
1348
coordinates(data_v_mae)<- cbind(lon$lon,lat$lat)
1349

  
1350
mae_fun<-function(x){mean(abs(x))}                      
1351
test<- aggregate(res_mod1~id,data=data_v_ag,FUN=mae_fun)
1352
data_v_mae$mod1 <- test$res_mod1  
1353
test<- aggregate(res_mod2~id,data=data_v_ag,FUN=mae_fun)
1354
data_v_mae$mod2 <- test$res_mod2   
1355
p<- bubble(data_v_mae,"mod1")
1356
print(p)                    
1357
p1 <- levelplot(elev,scales = list(draw = FALSE), colorkey = FALSE,par.settings = GrTheme)
1358
hist(data_v_mae$mod1)
1359
p<- bubble(data_v_mae,"mod1")
1360
spplot()
1290 1361
                     
1362
p_elev <- levelplot(elev)
1363
pc <-p+p1 + p
1364
print(pc)
1365

  
1366
t<-melt(data_v_ag,
1367
        measure=names_var, 
1368
        id=c("res_mod1","res_mod2,"id"),
1369
        na.rm=T)
1370

  
1371
n_tb<-cast(t,res_mod1_rc1+id~variable,mae_fun)
1372
n_tb_tot <-cast(t,id~variable,length) #number of times the stations was used for validation
1373

  
1291 1374
###################### END OF SCRIPT #######################
1292 1375

  
1293 1376

  

Also available in: Unified diff