1
|
##################################### METHODS COMPARISON part 4 ##########################################
|
2
|
#################################### Spatial Analysis ############################################
|
3
|
#This script utilizes the R ojbects created during the interpolation phase. #
|
4
|
#At this stage the script produces figures of various accuracy metrics and compare methods: #
|
5
|
#- rank station in terms of residuals and metrics (MAE and RMSE) #
|
6
|
#- calculate accuary metrics for climtology predictions... #
|
7
|
#- spatial density of station network and accuracy metric #
|
8
|
#- visualization of maps of prediction and difference for comparison #
|
9
|
#AUTHOR: Benoit Parmentier #
|
10
|
#DATE: 11/23/2012 #
|
11
|
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#491 -- #
|
12
|
###################################################################################################
|
13
|
|
14
|
###Loading R library and packages
|
15
|
library(gtools) # loading some useful tools such as mixedsort
|
16
|
library(mgcv) # GAM package by Wood 2006 (version 2012)
|
17
|
library(sp) # Spatial pacakge with class definition by Bivand et al. 2008
|
18
|
library(spdep) # Spatial package with methods and spatial stat. by Bivand et al. 2012
|
19
|
library(rgdal) # GDAL wrapper for R, spatial utilities (Keitt et al. 2012)
|
20
|
library(gstat) # Kriging and co-kriging by Pebesma et al. 2004
|
21
|
library(automap) # Automated Kriging based on gstat module by Hiemstra et al. 2008
|
22
|
library(spgwr)
|
23
|
library(gpclib)
|
24
|
library(maptools)
|
25
|
library(graphics)
|
26
|
library(parallel) # Urbanek S. and Ripley B., package for multi cores & parralel processing
|
27
|
library(raster)
|
28
|
library(rasterVis)
|
29
|
library(plotrix) #Draw circle on graph
|
30
|
library(reshape)
|
31
|
## Functions
|
32
|
#loading R objects that might have similar names
|
33
|
load_obj <- function(f)
|
34
|
{
|
35
|
env <- new.env()
|
36
|
nm <- load(f, env)[1]
|
37
|
env[[nm]]
|
38
|
}
|
39
|
|
40
|
###Parameters and arguments
|
41
|
|
42
|
infile1<- "ghcn_or_tmax_covariates_06262012_OR83M.shp" #GHCN shapefile containing variables for modeling 2010
|
43
|
#infile2<-"list_10_dates_04212012.txt" #List of 10 dates for the regression
|
44
|
infile2<-"list_365_dates_04212012.txt" #list of dates
|
45
|
infile3<-"LST_dates_var_names.txt" #LST dates name
|
46
|
infile4<-"models_interpolation_05142012.txt" #Interpolation model names
|
47
|
infile5<-"mean_day244_rescaled.rst" #mean LST for day 244
|
48
|
inlistf<-"list_files_05032012.txt" #list of raster images containing the Covariates
|
49
|
infile6<-"OR83M_state_outline.shp"
|
50
|
#stat_loc<-read.table(paste(path,"/","location_study_area_OR_0602012.txt",sep=""),sep=",", header=TRUE)
|
51
|
|
52
|
out_prefix<-"methods_11092012_"
|
53
|
|
54
|
##### LOAD USEFUL DATA
|
55
|
|
56
|
#obj_list<-"list_obj_08262012.txt" #Results of fusion from the run on ATLAS
|
57
|
path<-"/home/parmentier/Data/IPLANT_project/methods_interpolation_comparison_10242012" #Jupiter LOCATION on Atlas for kriging #Jupiter Location on XANDERS
|
58
|
#path<-"/Users/benoitparmentier/Dropbox/Data/NCEAS/Oregon_covariates/" #Local dropbox folder on Benoit's laptop
|
59
|
setwd(path)
|
60
|
proj_str="+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
|
61
|
#User defined output prefix
|
62
|
|
63
|
#CRS<-proj4string(ghcn) #Storing projection information (ellipsoid, datum,etc.)
|
64
|
lines<-read.table(paste(path,"/",inlistf,sep=""), sep="") #Column 1 contains the names of raster files
|
65
|
inlistvar<-lines[,1]
|
66
|
inlistvar<-paste(path,"/",as.character(inlistvar),sep="")
|
67
|
covar_names<-as.character(lines[,2]) #Column two contains short names for covaraites
|
68
|
|
69
|
s_raster<- stack(inlistvar) #Creating a stack of raster images from the list of variables.
|
70
|
layerNames(s_raster)<-covar_names #Assigning names to the raster layers
|
71
|
projection(s_raster)<-proj_str
|
72
|
|
73
|
#Create mask using land cover data
|
74
|
pos<-match("LC10",layerNames(s_raster)) #Find the layer which contains water bodies
|
75
|
LC10<-subset(s_raster,pos)
|
76
|
LC10[is.na(LC10)]<-0 #Since NA values are 0, we assign all zero to NA
|
77
|
mask_land<-LC10<100 #All values below 100% water are assigned the value 1, value 0 is "water"
|
78
|
mask_land_NA<-mask_land
|
79
|
mask_land_NA[mask_land_NA==0]<-NA #Water bodies are assigned value 1
|
80
|
|
81
|
data_name<-"mask_land_OR"
|
82
|
raster_name<-paste(data_name,".rst", sep="")
|
83
|
writeRaster(mask_land, filename=raster_name,overwrite=TRUE) #Writing the data in a raster file format...(IDRISI)
|
84
|
#writeRaster(r2, filename=raster_name,overwrite=TRUE) #Writing the data in a raster file format...(IDRISI)
|
85
|
|
86
|
pos<-match("ELEV_SRTM",layerNames(s_raster)) #Find column with name "ELEV_SRTM"
|
87
|
ELEV_SRTM<-raster(s_raster,layer=pos) #Select layer from stack on 10/30
|
88
|
s_raster<-dropLayer(s_raster,pos)
|
89
|
ELEV_SRTM[ELEV_SRTM <0]<-NA
|
90
|
mask_ELEV_SRTM<-ELEV_SRTM>0
|
91
|
|
92
|
#Change this a in loop...
|
93
|
pos<-match("LC1",layerNames(s_raster)) #Find column with name "value"
|
94
|
LC1<-raster(s_raster,layer=pos) #Select layer from stack
|
95
|
s_raster<-dropLayer(s_raster,pos)
|
96
|
LC1[is.na(LC1)]<-0
|
97
|
pos<-match("LC2",layerNames(s_raster)) #Find column with name "value"
|
98
|
LC2<-raster(s_raster,layer=pos) #Select layer from stack
|
99
|
s_raster<-dropLayer(s_raster,pos)
|
100
|
LC2[is.na(LC2)]<-0
|
101
|
pos<-match("LC3",layerNames(s_raster)) #Find column with name "value"
|
102
|
LC3<-raster(s_raster,layer=pos) #Select layer from stack
|
103
|
s_raster<-dropLayer(s_raster,pos)
|
104
|
LC3[is.na(LC3)]<-0
|
105
|
pos<-match("LC4",layerNames(s_raster)) #Find column with name "value"
|
106
|
LC4<-raster(s_raster,layer=pos) #Select layer from stack
|
107
|
s_raster<-dropLayer(s_raster,pos)
|
108
|
LC4[is.na(LC4)]<-0
|
109
|
pos<-match("LC6",layerNames(s_raster)) #Find column with name "value"
|
110
|
LC6<-raster(s_raster,layer=pos) #Select layer from stack
|
111
|
s_raster<-dropLayer(s_raster,pos)
|
112
|
LC6[is.na(LC6)]<-0
|
113
|
pos<-match("LC7",layerNames(s_raster)) #Find column with name "value"
|
114
|
LC7<-raster(s_raster,layer=pos) #Select layer from stack
|
115
|
s_raster<-dropLayer(s_raster,pos)
|
116
|
LC7[is.na(LC7)]<-0
|
117
|
pos<-match("LC9",layerNames(s_raster)) #Find column with name "LC9", this is wetland...
|
118
|
LC9<-raster(s_raster,layer=pos) #Select layer from stack
|
119
|
s_raster<-dropLayer(s_raster,pos)
|
120
|
LC9[is.na(LC9)]<-0
|
121
|
|
122
|
LC_s<-stack(LC1,LC2,LC3,LC4,LC6,LC7)
|
123
|
layerNames(LC_s)<-c("LC1_forest","LC2_shrub","LC3_grass","LC4_crop","LC6_urban","LC7_barren")
|
124
|
LC_s <-mask(LC_s,mask_ELEV_SRTM)
|
125
|
plot(LC_s)
|
126
|
|
127
|
s_raster<-addLayer(s_raster, LC_s)
|
128
|
|
129
|
#mention this is the last... files
|
130
|
|
131
|
#Read region outline...
|
132
|
filename<-sub(".shp","",infile6) #Removing the extension from file.
|
133
|
reg_outline<-readOGR(".", filename) #reading shapefile
|
134
|
|
135
|
############ PART 4: RESIDUALS ANALYSIS: ranking, plots, focus regions ##################
|
136
|
############## EXAMINING STATION RESIDUALS ###########
|
137
|
########### CONSTANT OVER 365 AND SAMPLING OVER 365
|
138
|
#Plot daily_deltaclim_rast, bias_rast,add data_s and data_v
|
139
|
|
140
|
# RANK STATION by average or median RMSE
|
141
|
# Count the number of times a station is in the extremum group of outliers...
|
142
|
# LOOK at specific date...
|
143
|
|
144
|
#Examine residuals for a spciefic date...Jan, 1 using run of const_all i.e. same training over 365 dates
|
145
|
path_data_cai<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_10242012_CAI" #Change to constant
|
146
|
path_data_fus<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_10242012_GAM"
|
147
|
|
148
|
date_selected<-"20100103"
|
149
|
|
150
|
oldpath<-getwd()
|
151
|
setwd(path_data_cai)
|
152
|
file_pat<-glob2rx(paste("*tmax_predicted*",date_selected,"*_365d_GAM_CAI2_const_all_10312012.rst",sep="")) #Search for files in relation to fusion
|
153
|
lf_cai2c<-list.files(pattern=file_pat) #Search for files in relation to fusion
|
154
|
rast_cai2c<-stack(lf_cai2c) #lf_cai2c CAI results with constant sampling over 365 dates
|
155
|
rast_cai2c<-mask(rast_cai2c,mask_ELEV_SRTM)
|
156
|
|
157
|
oldpath<-getwd()
|
158
|
setwd(path_data_fus)
|
159
|
file_pat<-glob2rx(paste("*tmax_predicted*",date_selected,"*_365d_GAM_fusion_const_all_lstd_11022012.rst",sep="")) #Search for files in relation to fusion
|
160
|
lf_fus1c<-list.files(pattern=file_pat) #Search for files in relation to fusion
|
161
|
rast_fus1c<-stack(lf_fus1c)
|
162
|
rast_fus1c<-mask(rast_fus1c,mask_ELEV_SRTM)
|
163
|
|
164
|
rast_fus_pred<-raster(rast_fus1c,1) # Select the first model from the stack i.e fusion with kriging for both steps
|
165
|
rast_cai_pred<-raster(rast_cai2c,1)
|
166
|
|
167
|
#list files that contain model objects and ratingin-testing information for CAI and Fusion
|
168
|
|
169
|
gam_fus<-load_obj(file.path(path_data_fus,
|
170
|
"results_mod_obj__365d_GAM_fusion_const_all_lstd_11022012.RData"))
|
171
|
gam_cai<-load_obj(file.path(path_data_cai,
|
172
|
"results_mod_obj__365d_GAM_CAI2_const_all_10312012.RData")) #This contains all the info
|
173
|
sampling_date_list<-gam_fus$sampling_obj$sampling_dat$date
|
174
|
|
175
|
#Create a residual table...
|
176
|
res_mod9f_list<-vector("list",365)
|
177
|
res_mod9c_list<-vector("list",365)
|
178
|
data_vf_list<-vector("list",365)
|
179
|
data_vc_list<-vector("list",365)
|
180
|
|
181
|
for(k in 1:365){
|
182
|
data_vf<-as.data.frame(c)
|
183
|
data_vc<-as.data.frame(gam_cai$gam_CAI_mod[[k]]$data_v)
|
184
|
data_vf<-data_vf[,c("id","date","res_mod7","x_OR83M", "y_OR83M")]
|
185
|
data_vc<-data_vc[,c("id","date","res_mod9","x_OR83M", "y_OR83M")]
|
186
|
#subset data frame? or rbind them...and reshape?? think about it
|
187
|
data_vf_list[[k]]<-data_vf
|
188
|
data_vc_list[[k]]<-data_vc
|
189
|
}
|
190
|
tab_resmod9f<-do.call(rbind,data_vf_list)
|
191
|
tab_resmod9c<-do.call(rbind,data_vc_list)
|
192
|
|
193
|
#Get the unique location information...
|
194
|
tab_locsc<-melt(tab_resmod9c,
|
195
|
measure=c("x_OR83M","y_OR83M"),
|
196
|
id=c("id"),
|
197
|
na.rm=F)
|
198
|
tab_locsc_cast<-cast(tab_locsc,id~variable,mean)
|
199
|
tab_locsc<-as.data.frame(tab_locsc_cast)
|
200
|
coords<- tab_locsc[,c('x_OR83M','y_OR83M')]
|
201
|
coordinates(tab_locsc)<-coords
|
202
|
proj4string(tab_locsc)<-proj_str #Need to assign coordinates...
|
203
|
|
204
|
tab_locsf<-melt(tab_resmod9f,
|
205
|
measure=c("x_OR83M","y_OR83M"),
|
206
|
id=c("id"),
|
207
|
na.rm=F)
|
208
|
tab_locsf_cast<-cast(tab_locsf,id~variable,mean)
|
209
|
tab_locsf<-as.data.frame(tab_locsf_cast)
|
210
|
coords<- tab_locsf[,c('x_OR83M','y_OR83M')]
|
211
|
coordinates(tab_locsf)<-coords
|
212
|
proj4string(tab_locsf)<-proj_str #Need to assign coordinates..
|
213
|
|
214
|
all.equal(tab_locsc,tab_locsf) #Checking that stations used over the year are equal!!!
|
215
|
#since all equal we can use one object, it will be sufficient...
|
216
|
tab_locs<-tab_locsf
|
217
|
|
218
|
####### Now join and summarize objects together...
|
219
|
|
220
|
tabf_melt<-melt(tab_resmod9f[c("id","date","res_mod7")],
|
221
|
measure=c("res_mod7"),
|
222
|
id=c("id","date"),
|
223
|
na.rm=F)
|
224
|
tabc_melt<-melt(tab_resmod9c[c("id","date","res_mod9")],
|
225
|
measure=c("res_mod9"),
|
226
|
id=c("id","date"),
|
227
|
na.rm=F)
|
228
|
tabf_cast<-cast(tabf_melt,id~date~variable,mean)
|
229
|
tabf_resmod9_locs<-as.data.frame(tabf_cast[,,1])
|
230
|
tabc_cast<-cast(tabc_melt,id~date~variable,mean)
|
231
|
tabc_resmod9_locs<-as.data.frame(tabc_cast[,,1])
|
232
|
|
233
|
|
234
|
#Now reclass the data into outliers: Class with -1 (negative exteme),class 0 (not extreme), class 1 (positive extre)
|
235
|
calculate_extremes_stat <-function (tab_resmod9_locs,tab_locs){
|
236
|
|
237
|
#tab_resmod9_locs<-tabf_resmod9_locs #comment out when running function
|
238
|
#tab_locs<-tab_locsf
|
239
|
#tab_res_rec<-tabf_resmod9_locs
|
240
|
|
241
|
#start here
|
242
|
sd_v<-sapply(tab_resmod9_locs,sd,na.rm=T) #This the sandard deviation of residuals for every day for the fusion prediction
|
243
|
mean_v<-sapply(tab_resmod9_locs,mean,na.rm=T) #This the mean of residuals for every day for the fusion prediction
|
244
|
|
245
|
for (k in 1:365){
|
246
|
mean_val<-mean_v[k]
|
247
|
sd_val<-sd_v[k]
|
248
|
y<-tab_resmod9_locs[,k]
|
249
|
breaks<-c("-inf",mean_val-2*sd_val,mean_val+2*sd_val,"+inf")
|
250
|
rec<-cut(y,breaks,labels=c(-1,0,1))
|
251
|
rec<-as.numeric(as.character(rec))
|
252
|
tab_res_rec[,k]<-rec
|
253
|
}
|
254
|
|
255
|
tab_res_rec<-tab_res_rec[,1:365] #mae_fun<-function (x){mean(abs(x),na.rm=T)}
|
256
|
|
257
|
tab_res_rec$id<-as.character(rownames(tab_res_rec))
|
258
|
#mae_fun<-function (x){mean(abs(x),na.rm=T)}
|
259
|
for (i in 1:nrow(tab_resmod9_locs)){
|
260
|
x<-tab_resmod9_locs[i,]
|
261
|
tab_locs$mae[i]<-mean(abs(x),na.rm=T)
|
262
|
#tab_locs$mae2[i]<-mae_fun(x)
|
263
|
rec<-as.numeric(tab_res_rec[i,])
|
264
|
tmp<-table(rec)
|
265
|
tab_res_rec$c1[i]<-as.numeric(tmp[1])
|
266
|
tab_res_rec$c2[i]<-as.numeric(tmp[2])
|
267
|
tab_res_rec$c3[i]<-as.numeric(tmp[3])
|
268
|
}
|
269
|
|
270
|
projstr_info<-proj4string(tab_locs)
|
271
|
tab_resmod9_locs$id<-rownames(tab_resmod9_locs)
|
272
|
tab_resmod9_locs[is.na(tab_resmod9_locs)]<-NA #replace NaN by NA
|
273
|
tab_locs$id<-as.character(tab_locs$id)
|
274
|
data_v_res<-merge(tab_locs,tab_resmod9_locs,by="id")
|
275
|
data_v_res<-merge(tab_res_rec[,c("id","c1","c2","c3")],data_v_res,by="id")
|
276
|
|
277
|
coords<- data_v_res[,c('x_OR83M','y_OR83M')]
|
278
|
coordinates(data_v_res)<-coords
|
279
|
proj4string(data_v_res)<-projstr_info #Need to assign coordinates...
|
280
|
|
281
|
#tmp<-subset(data_v_res,select=date_selected)
|
282
|
data_v_res$idx<-1:length(data_v_res)
|
283
|
#Plotting results...
|
284
|
return (data_v_res)
|
285
|
}
|
286
|
|
287
|
### usef the function...
|
288
|
data_v_resf<-calculate_extremes_stat(tabf_resmod9_locs,tab_locs) # call function
|
289
|
data_v_resc<-calculate_extremes_stat(tabc_resmod9_locs,tab_locs) # call functio
|
290
|
|
291
|
#Some plotting of results...
|
292
|
data_v_plot<-subset(data_v_resf,!is.na(data_v_resf$mae),"mae")
|
293
|
bubble(data_v_plot,"mae",add=TRUE)
|
294
|
spplot(data_v_resf,"mae")
|
295
|
|
296
|
data_v_plot<-subset(data_v_res,!is.na(c1),"c1")
|
297
|
bubble(data_v_res,"c1")
|
298
|
plot(data_v_res[5,])
|
299
|
plot(reg_outline,add=TRUE)
|
300
|
#Figure on 11/07
|
301
|
s.range <- c(min(minValue(rast_fus_pred)), max(maxValue(rast_fus_pred)))
|
302
|
col.breaks <- pretty(s.range, n=50)
|
303
|
lab.breaks <- pretty(s.range, n=5)
|
304
|
temp.colors <- colorRampPalette(c('blue', 'white', 'red'))
|
305
|
|
306
|
#Training plot
|
307
|
plot(rast_fus_pred, breaks=col.breaks, col=temp.colors(length(col.breaks)-1),
|
308
|
axis=list(at=lab.breaks, labels=lab.breaks))
|
309
|
plot(reg_outline,add=TRUE)
|
310
|
|
311
|
plot(data_v_res, pch=1,cex=sqrt(data_v_res$c1)/5,add=TRUE)
|
312
|
|
313
|
plot(data_v_plot, pch=1,cex=sqrt(data_v_plot$mae),add=TRUE)
|
314
|
# Calculate RMSE and MAE for each station over 365 dates and plot it on a map as well as on a scatterplot
|
315
|
#Look at the number of observation.
|
316
|
|
317
|
#bubble(data_v_res,"X20101230",na.rm=T,do.sqrt=TRUE)
|
318
|
|
319
|
#1) Digitize features from high resolution imagery and see if you can see differences in the way it is detected in CAI and fusion method...
|
320
|
#in particular how much variation there is in such polygons...
|
321
|
#Polygon to digitize: valley, crop area, mountain...Show that CAI does not capture crop as well as Fusion
|
322
|
#2) Transect with slope changes and examining variation in temperatures...: calculate average MAE on the transect for examle river
|
323
|
#3) X Land cover: examine MAE per land cover...for CAI and Fusion, LST bias and ecoregions...: boxplots at
|
324
|
#4) Look at differences...regions with box plot of MAE inside and outside regions of differences...
|
325
|
#5) X Summarize by season/month
|
326
|
#6) PCA on differences and CAI-Fusion
|
327
|
#7) X PCA on residuals...
|
328
|
#8) X Plot residuals at station by buble plot and kriging...
|
329
|
#9) PCA MAE and other variables ...
|
330
|
|
331
|
### PLOTS OF RESIDUALS AND COVARIATES FOR SPECIFIC DATE
|
332
|
|
333
|
#This should be in a loop...
|
334
|
setwd(path)
|
335
|
date_selected<-"20100103"
|
336
|
dates<-c("20100103","20100901")
|
337
|
|
338
|
for (i in 1:length(dates)){
|
339
|
date_selected<-dates[i]
|
340
|
k<-match(date_selected,sampling_date_list)
|
341
|
names(gam_fus$gam_fus_mod[[k]]) #Show the name structure of the object/list
|
342
|
|
343
|
#Extract the training and testing information for the given date...
|
344
|
data_sf<-gam_fus$gam_fus_mod[[k]]$data_s #object for the first date...20100103
|
345
|
data_vf<-gam_fus$gam_fus_mod[[k]]$data_v #object for the first date...20100103
|
346
|
data_sc<-gam_cai$gam_CAI_mod[[k]]$data_s #object for the first date...20100103
|
347
|
data_vc<-gam_cai$gam_CAI_mod[[k]]$data_v #object for the first date...20100103
|
348
|
|
349
|
#Join information extracted using the function previously
|
350
|
id_selected<-intersect(data_v_resf$id,data_vf$id) #Match station using their official IDSs
|
351
|
pos<-match(id_selected,data_v_resf$id)
|
352
|
tmp<-as.data.frame(data_v_resf[pos,c("id","idx","mae","c1","c2","c3")])
|
353
|
tmp<-tmp[,c("id","idx","mae","c1","c2","c3")]
|
354
|
data_vf<-merge(data_vf,tmp,by="id")
|
355
|
id_selected<-intersect(data_v_resc$id,data_vc$id) #Match station using their official IDSs
|
356
|
pos<-match(id_selected,data_v_resc$id)
|
357
|
tmp<-as.data.frame(data_v_resc[pos,c("id","idx","mae","c1","c2","c3")])
|
358
|
tmp<-tmp[,c("id","idx","mae","c1","c2","c3")]
|
359
|
data_vc<-merge(data_vc,tmp,by="id")
|
360
|
#Turn the data frame back into a spdf
|
361
|
coords<- data_vf[,c('x_OR83M','y_OR83M')]
|
362
|
coordinates(data_vf)<-coords
|
363
|
proj4string(data_vf)<-proj_str #Need to assign coordinates...
|
364
|
coords<- data_vc[,c('x_OR83M','y_OR83M')]
|
365
|
coordinates(data_vc)<-coords
|
366
|
proj4string(data_vc)<-proj_str #Need to assign coordinates...
|
367
|
|
368
|
#Prepare for plotting
|
369
|
X11(width=16,height=9)
|
370
|
par(mfrow=c(1,2))
|
371
|
|
372
|
#Select background image for plotting and Plot validation residuals for fusion on 20100103
|
373
|
s.range <- c(min(minValue(rast_fus_pred)), max(maxValue(rast_fus_pred)))
|
374
|
col.breaks <- pretty(s.range, n=50)
|
375
|
lab.breaks <- pretty(s.range, n=5)
|
376
|
temp.colors <- colorRampPalette(c('blue', 'white', 'red'))
|
377
|
|
378
|
#Training plot
|
379
|
plot(rast_fus_pred, breaks=col.breaks, col=temp.colors(length(col.breaks)-1),
|
380
|
axis=list(at=lab.breaks, labels=lab.breaks))
|
381
|
plot(data_sf,cex=1, add=TRUE)
|
382
|
plot(reg_outline,add=TRUE)
|
383
|
title(paste("Training stations",date_selected,sep=" "))
|
384
|
#Testing plot
|
385
|
plot(rast_fus_pred, breaks=col.breaks, col=temp.colors(length(col.breaks)-1),
|
386
|
axis=list(at=lab.breaks, labels=lab.breaks))
|
387
|
plot(data_vf,cex=1,pch=1,add=TRUE)
|
388
|
plot(reg_outline,add=TRUE)
|
389
|
text(data_vf,labels=data_vf$idx,pos=3,cex=1.3)
|
390
|
title(paste("Testing stations",date_selected,sep=" "))
|
391
|
|
392
|
#savePlot here...
|
393
|
savePlot(paste("fig1_training_testing_",date_selected,out_prefix,".png", sep=""), type="png")
|
394
|
|
395
|
#plot(rast_fus_pred, breaks=col.breaks, col=temp.colors(length(col.breaks)-1),
|
396
|
# axis=list(at=lab.breakts, labels=lab.breaks))
|
397
|
#plot(data_vf,cex=sqrt(data_vf$res_mod7),pch=1,add=TRUE)
|
398
|
#plot(reg_outline,add=TRUE)
|
399
|
#text(data_vf,labels=data_vf$idx,pos=3)
|
400
|
|
401
|
#X11(width=16,height=9)
|
402
|
#par(mfrow=c(1,2))
|
403
|
|
404
|
# CREATE A FUNCTION TO AUTOMATE THE PLOT: improve code here
|
405
|
#Plot residuals
|
406
|
y_range<-range(c(data_vf$res_mod7,data_vc$res_mod9))
|
407
|
plot(data_vf$idx,data_vf$res_mod7, ylab="Residuals", xlab="Index", ylim=y_range)
|
408
|
text(data_vf$idx,data_vf$res_mod7,labels=data_vf$idx,pos=3)
|
409
|
grid(lwd=0.5,col="black")
|
410
|
title(paste("Testing stations residuals fusion",date_selected,sep=" "))
|
411
|
plot(data_vc$idx,data_vc$res_mod9,ylab="Residuals", xlab="Index", ylim=y_range)
|
412
|
text(data_vc$idx,data_vc$res_mod9,data_vc$idx,labels=data_vc$idx,pos=3)
|
413
|
grid(lwd=0.5, col="black")
|
414
|
title(paste("Testing stations residuals CAI",date_selected,sep=" "))
|
415
|
#savePlot here...
|
416
|
savePlot(paste("fig2_testing_residuals_fusion_CAI_",date_selected,out_prefix,".png", sep=""), type="png")
|
417
|
|
418
|
#Plot predicted vs observed
|
419
|
y_range<-range(c(data_vf$pred_mod7,data_vc$pred_mod9))
|
420
|
x_range<-range(c(data_vf$dailyTmax,data_vc$dailyTamx))
|
421
|
plot(data_vf$pred_mod7,data_vf$dailyTmax, ylab="Observed daily tmax (C)", xlab="Predicted daily tmax (C)",
|
422
|
ylim=y_range,xlim=x_range)
|
423
|
text(data_vf$pred_mod7,data_vf$dailyTmax,labels=data_vf$idx,pos=3)
|
424
|
abline(0,1) #takes intercept at 0 and slope as 1 so display 1:1 ine
|
425
|
grid(lwd=0.5,col="black")
|
426
|
title(paste("Testing stations tmax fusion vs daily tmax",date_selected,sep=" "))
|
427
|
plot(data_vc$pred_mod9,data_vc$dailyTmax,ylab="Observed daily tmax (C)", xlab="Predicted daily tmax (C)",
|
428
|
ylim=y_range,xlim=x_range)
|
429
|
text(data_vc$pred_mod9,data_vc$dailyTmax,labels=data_vc$idx,pos=3)
|
430
|
abline(0,1) #takes intercept at 0 and slope as 1 so display 1:1 ine
|
431
|
grid(lwd=0.5, col="black")
|
432
|
title(paste("Testing stations tmax CAI vs daily tmax",date_selected,sep=" "))
|
433
|
#savePlot here...
|
434
|
savePlot(paste("fig3_testing_predicted_observed_fusion_CAI_",date_selected,out_prefix,".png", sep=""), type="png")
|
435
|
|
436
|
###########Plot residuals and covariates
|
437
|
##Elevation and residulas
|
438
|
y_range<-range(c(data_vf$res_mod7,data_vc$res_mod9))
|
439
|
x_range<-range(c(data_vf$ELEV_SRTM,data_vc$ELEV_SRTM))
|
440
|
plot(data_vf$ELEV_SRTM,data_vf$res_mod7, ylab="Residuals", xlab="ELEV_SRTM (m) ",
|
441
|
ylim=y_range, xlim=x_range)
|
442
|
text(data_vf$ELEV_SRTM,data_vf$res_mod7,labels=data_vf$idx,pos=3)
|
443
|
grid(lwd=0.5,col="black")
|
444
|
title(paste("Testing stations residuals fusion vs Elevation",date_selected,sep=" "))
|
445
|
plot(data_vc$ELEV_SRTM,data_vc$res_mod9, ylab="Residuals", xlab="ELEV_SRTM (m) ",
|
446
|
ylim=y_range, xlim=x_range)
|
447
|
text(data_vc$ELEV_SRTM,data_vc$res_mod9,labels=data_vc$idx,pos=3)
|
448
|
grid(lwd=0.5,col="black")
|
449
|
title(paste("Testing stations residuals CAI vs Elevation",date_selected,sep=" "))
|
450
|
savePlot(paste("fig4_testing_residuals_fusion_CAI_Elev_",date_selected,out_prefix,".png", sep=""), type="png")
|
451
|
|
452
|
##Forest (LC1) and residuals
|
453
|
y_range<-range(c(data_vf$res_mod7,data_vc$res_mod9))
|
454
|
x_range<-range(c(data_vf$LC1,data_vc$LC1))
|
455
|
plot(data_vf$LC1,data_vf$res_mod7,ylab="Residuals", xlab="LC1 (%)",
|
456
|
ylim=y_range, xlim=x_range)
|
457
|
text(data_vf$LC1,data_vf$res_mod7,labels=data_vf$idx,pos=3)
|
458
|
grid(lwd=0.5,col="black")
|
459
|
title(paste("Testing stations residuals CAI vs LC1 (forest)",date_selected,sep=" "))
|
460
|
plot(data_vc$LC1,data_vc$res_mod9,ylab="Residuals", xlab="LC1 (%)",
|
461
|
ylim=y_range, xlim=x_range)
|
462
|
text(data_vc$LC1,data_vc$res_mod9,labels=data_vc$idx,pos=3)
|
463
|
grid(lwd=0.5,col="black")
|
464
|
title(paste("Testing stations residuals CAI vs LC1(forest)",date_selected,sep=" "))
|
465
|
savePlot(paste("fig5_testing_residuals_fusion_CAI_LC1_",date_selected,out_prefix,".png", sep=""), type="png")
|
466
|
|
467
|
##Grass (LC3) and residuals
|
468
|
y_range<-range(c(data_vf$res_mod7,data_vc$res_mod9))
|
469
|
x_range<-range(c(data_vf$LC3,data_vc$LC3))
|
470
|
plot(data_vf$LC3,data_vf$res_mod7,ylab="Residuals", xlab="LC3 (%)",
|
471
|
ylim=y_range, xlim=x_range)
|
472
|
text(data_vf$LC3,data_vf$res_mod7,labels=data_vf$idx,pos=3)
|
473
|
grid(lwd=0.5,col="black")
|
474
|
title(paste("Testing stations residuals CAI vs LC3 (grass)",date_selected,sep=" "))
|
475
|
plot(data_vc$LC3,data_vc$res_mod9,ylab="Residuals", xlab="LC3 (%)",
|
476
|
ylim=y_range, xlim=x_range)
|
477
|
text(data_vc$LC3,data_vc$res_mod9,labels=data_vc$idx,pos=3)
|
478
|
grid(lwd=0.5,col="black")
|
479
|
title(paste("Testing stations residuals CAI vs LC3(grass)",date_selected,sep=" "))
|
480
|
savePlot(paste("fig6_testing_residuals_fusion_CAI_LC3_",date_selected,out_prefix,".png", sep=""), type="png")
|
481
|
|
482
|
#LSTD_bias and residuals
|
483
|
y_range<-range(c(data_vf$res_mod7,data_vc$res_mod9))
|
484
|
x_range<-range(c(data_vf$LSTD_bias,data_vc$LSTD_bias))
|
485
|
plot(data_vf$LSTD_bias,data_vf$res_mod7)
|
486
|
text(data_vf$LSTD_bias,data_vf$res_mod7,labels=data_vf$idx,pos=3)
|
487
|
grid(lwd=0.5,col="black")
|
488
|
title(paste("Testing stations LST bias vs residuals",date_selected,sep=" "))
|
489
|
plot(data_sf$LSTD_bias,data_sf$res_mod7)
|
490
|
#text(data_sf$LSTD_bias,data_sf$res_mod7,labels=data_vf$idx,pos=3) # Also labels for training?
|
491
|
grid(lwd=0.5,col="black")
|
492
|
title(paste("Training stations LST bias vs residuals",date_selected,sep=" "))
|
493
|
savePlot(paste("fig7_testing_training_residuals_fusion_LSTD_bias_",date_selected,out_prefix,".png", sep=""), type="png")
|
494
|
|
495
|
#LSTD vs TMax and residuals
|
496
|
y_range<-range(c(data_vf$TMax,data_vc$TMax))
|
497
|
x_range<-range(c(data_vf$LSTD_bias,data_vc$LSTD_bias))
|
498
|
plot(data_vf$LSTD_bias,data_vf$res_mod7,xlab="LST bias", ylab="Residuals")
|
499
|
text(data_vf$LSTD_bias,data_vf$res_mod7,labels=data_vf$idx,pos=3)
|
500
|
grid(lwd=0.5,col="black")
|
501
|
title(paste("Testing stations LST bias vs fusion residuals",date_selected,sep=" "))
|
502
|
plot((data_vf$LST-273.16),data_vf$TMax,xlab="LST", ylab="TMax (monthly tmax in C)")
|
503
|
text((data_vf$LST-273.16),data_vf$TMax,labels=data_vf$idx,pos=3)
|
504
|
abline(0,1) #takes intercept at 0 and slope as 1 so display 1:1 ine
|
505
|
grid(lwd=0.5,col="black")
|
506
|
title(paste("Testing stations LST vs TMax ",date_selected,sep=" "))
|
507
|
savePlot(paste("fig8_testing_TMax_fusion_",date_selected,out_prefix,".png", sep=""), type="png")
|
508
|
|
509
|
##Elevation and residuals of temperature prediction
|
510
|
diff_fc<-data_vf$pred_mod7-data_vc$pred_mod9
|
511
|
|
512
|
|
513
|
y_range<-range(c(diff_fc))
|
514
|
x_range<-range(c(data_vf$ELEV_SRTM,data_vc$ELEV_SRTM))
|
515
|
plot(data_vf$ELEV_SRTM,diff_fc, ylab="diff_fc", xlab="ELEV_SRTM (m) ",
|
516
|
ylim=y_range, xlim=x_range)
|
517
|
text(data_vf$ELEV_SRTM,diff_fc,labels=data_vf$idx,pos=3)
|
518
|
grid(lwd=0.5,col="black")
|
519
|
title(paste("Testing stations residuals fusion vs Elevation",date_selected,sep=" "))
|
520
|
|
521
|
brks<-c(0,500,1000,1500,2000,2500,4000)
|
522
|
lab_brks<-1:6
|
523
|
elev_rcstat<-cut(data_vf$ELEV_SRTM,breaks=brks,labels=lab_brks,right=F)
|
524
|
y_range<-range(c(diff_fc))
|
525
|
x_range<-range(c(elev_rcstat))
|
526
|
plot(elev_rcstat,diff_fc, ylab="diff_cf", xlab="ELEV_SRTM (m) ",
|
527
|
ylim=y_range, xlim=x_range)
|
528
|
text(elev_rcstat,diff_cf,labels=data_vf$idx,pos=3)
|
529
|
grid(lwd=0.5,col="black")
|
530
|
title(paste("Testing stations residuals fusion vs Elevation",date_selected,sep=" "))
|
531
|
|
532
|
# Combine both training and testing
|
533
|
pred_fus<-c(data_vf$pred_mod7,data_sf$pred_mod7)
|
534
|
pred_cai<-c(data_vc$pred_mod9,data_sc$pred_mod9)
|
535
|
elev_station<-c(data_vf$ELEV_SRTM,data_sf$ELEV_SRTM)
|
536
|
diff_fc<-pred_fus-pred_cai
|
537
|
|
538
|
elev_rcstat<-cut(elev_station,breaks=brks,labels=lab_brks,right=F)
|
539
|
y_range<-range(diff_fc)
|
540
|
x_range<-range(elev_station)
|
541
|
plot(elev_station,diff_fc, ylab="diff_fc", xlab="ELEV_SRTM (m) ",
|
542
|
ylim=y_range, xlim=x_range)
|
543
|
text(elev_rcstat,diff_fc,labels=data_vf$idx,pos=3)
|
544
|
grid(lwd=0.5,col="black")
|
545
|
title(paste("Testing stations residuals fusion vs Elevation",date_selected,sep=" "))
|
546
|
|
547
|
#USING BOTH validation and training
|
548
|
|
549
|
dev.off()
|
550
|
}
|
551
|
|
552
|
##Check...difference at diff stations...
|
553
|
|
554
|
|
555
|
#Extract the training and testing information for the given date...
|
556
|
data_sf<-gam_fus$gam_fus_mod[[k]]$data_s #object for the first date...20100103
|
557
|
data_vf<-gam_fus$gam_fus_mod[[k]]$data_v #object for the first date...20100103
|
558
|
data_sc<-gam_cai$gam_CAI_mod[[k]]$data_s #object for the first date...20100103
|
559
|
data_vc<-gam_cai$gam_CAI_mod[[k]]$data_v #object for the first date...20100103
|
560
|
|
561
|
#Join information extracted using the function previously
|
562
|
id_selected<-intersect(data_v_resf$id,data_vf$id) #Match station using their official IDSs
|
563
|
pos<-match(id_selected,data_v_resf$id)
|
564
|
tmp<-as.data.frame(data_v_resf[pos,c("id","idx","mae","c1","c2","c3")])
|
565
|
tmp<-tmp[,c("id","idx","mae","c1","c2","c3")]
|
566
|
data_vf<-merge(data_vf,tmp,by="id")
|
567
|
id_selected<-intersect(data_v_resc$id,data_vc$id) #Match station using their official IDSs
|
568
|
pos<-match(id_selected,data_v_resc$id)
|
569
|
tmp<-as.data.frame(data_v_resc[pos,c("id","idx","mae","c1","c2","c3")])
|
570
|
tmp<-tmp[,c("id","idx","mae","c1","c2","c3")]
|
571
|
data_vc<-merge(data_vc,tmp,by="id")
|
572
|
#Turn the data frame back into a spdf
|
573
|
coords<- data_vf[,c('x_OR83M','y_OR83M')]
|
574
|
coordinates(data_vf)<-coords
|
575
|
proj4string(data_vf)<-proj_str #Need to assign coordinates...
|
576
|
coords<- data_vc[,c('x_OR83M','y_OR83M')]
|
577
|
coordinates(data_vc)<-coords
|
578
|
proj4string(data_vc)<-proj_str #Need to assign coordinates...
|
579
|
|
580
|
###################################################
|
581
|
############## RESIDUALS BY TIME!!! ##############
|
582
|
|
583
|
#tab_resmod9
|
584
|
for (i in 1:nrow(tab_resmod9f)){
|
585
|
date<-strptime(tab_resmod9f$date[i], "%Y%m%d") # interpolation date being processed, converting the string using specific format
|
586
|
month<-as.integer(strftime(date, "%m"))
|
587
|
tab_resmod9f$month[i]<-month
|
588
|
}
|
589
|
|
590
|
mae_function<-function(res){mae<-mean(abs(res),na.rm=T)}
|
591
|
tab_test<-melt(tab_resmod9f,
|
592
|
measure=c("res_mod7","x_OR83M","y_OR83M"),
|
593
|
id=c("id","month"),
|
594
|
na.rm=F)
|
595
|
tab_test_cast<-cast(data=tab_test,formula=id~month~variable,mean)
|
596
|
table_id_month<-tab_test_cast[,,1]
|
597
|
tab_month_cast<-cast(data=tab_test,formula=month~variable,mean)
|
598
|
tab_month_cast<-cast(data=tab_test,formula=month~variable,mae_function)
|
599
|
X11()
|
600
|
barplot(tab_month_cast$res_mod7,
|
601
|
names.arg=1:12,cex.names=0.8, #names of the teleconnections indices and size of fonts of axis labes
|
602
|
xlab="Month", # font.lab is 2 to make the font bold
|
603
|
ylab="MAE",font.lab=2)
|
604
|
|
605
|
savePlot(paste("fig9_mae_fusion_per_season_",date_selected,out_prefix,".png", sep=""), type="png")
|
606
|
|
607
|
plot(table_id_month[55,])
|
608
|
plot(table_id_month[5,])
|
609
|
|
610
|
#tab_resmod9c
|
611
|
|
612
|
for (i in 1:nrow(tab_resmod9c)){
|
613
|
date<-strptime(tab_resmod9c$date[i], "%Y%m%d") # interpolation date being processed, converting the string using specific format
|
614
|
month<-as.integer(strftime(date, "%m"))
|
615
|
tab_resmod9c$month[i]<-month
|
616
|
}
|
617
|
|
618
|
mae_function<-function(res){mae<-mean(abs(res),na.rm=T)}
|
619
|
tab_test<-melt(tab_resmod9c,
|
620
|
measure=c("res_mod9","x_OR83M","y_OR83M"),
|
621
|
id=c("id","month"),
|
622
|
na.rm=F)
|
623
|
tab_test_cast<-cast(data=tab_test,formula=id~month~variable,mean)
|
624
|
table_id_month_cai<-tab_test_cast[,,1]
|
625
|
tab_month_cast_cai<-cast(data=tab_test,formula=month~variable,mean)
|
626
|
tab_month_cast_cai<-cast(data=tab_test,formula=month~variable,mae_function)
|
627
|
|
628
|
barplot(tab_month_cast_cai$res_mod9,
|
629
|
names.arg=1:12,cex.names=0.8, #names of the teleconnections indices and size of fonts of axis labes
|
630
|
xlab="Month", # font.lab is 2 to make the font bold
|
631
|
ylab="MAE",font.lab=2)
|
632
|
grid(nx=12,ny=10)
|
633
|
|
634
|
savePlot(paste("fig10_mae_cai_per_season_",date_selected,out_prefix,".png", sep=""), type="png")
|
635
|
|
636
|
plot(table_id_month_cai[55,])
|
637
|
plot(table_id_month_cai[5,])
|
638
|
|
639
|
######
|
640
|
overlay(data_v_resf,diff_r)
|
641
|
plot(avg_LC_rec)
|
642
|
#Wihtout setting range
|
643
|
s_range<-c(-12,18)
|
644
|
#col_breaks <- pretty(s_range, n=50)
|
645
|
#lab_breaks <- pretty(s_range, n=5)
|
646
|
col_breaks<-49
|
647
|
temp_colors <- colorRampPalette(c('blue', 'white', 'red'))
|
648
|
lab_breaks <- pretty(s_range, n=5)
|
649
|
X11(width=18,height=12)
|
650
|
par(mfrow=c(3,3))
|
651
|
mod_name<-paste("mod",1:8, sep="")
|
652
|
rname<-c("FUS_kr",mod_name)
|
653
|
for (k in 1:length(lf_fus1a)){
|
654
|
fus1a_r<-raster(rast_fus1a,k)
|
655
|
plot(fus1a_r, breaks=col_breaks, col=temp_colors(col_breaks-1),
|
656
|
axis=list(at=lab_breaks, labels=lab_breaks),main=rname[k])
|
657
|
}
|
658
|
|
659
|
### Wihtout setting range
|
660
|
s_range<-c(-12,23)
|
661
|
#col_breaks <- pretty(s_range, n=50)
|
662
|
#lab_breaks <- pretty(s_range, n=5)
|
663
|
col_breaks<-49
|
664
|
temp_colors <- colorRampPalette(c('blue', 'white', 'red'))
|
665
|
lab_breaks <- pretty(s_range, n=5)
|
666
|
X11(width=18,height=12)
|
667
|
par(mfrow=c(3,3))
|
668
|
mod_name<-paste("mod",1:8, sep="")
|
669
|
rname<-c("FUS_kr",mod_name)
|
670
|
for (k in 1:length(lf_fus1s)){
|
671
|
fus1s_r<-raster(rast_fus1s,k)
|
672
|
plot(fus1s_r, breaks=col_breaks, col=temp_colors(col_breaks-1),
|
673
|
axis=list(at=lab_breaks, labels=lab_breaks),main=rname[k])
|
674
|
}
|
675
|
|
676
|
############### NOW USE RESHAPE TO CREATE TABLE....##########
|
677
|
# DO A PCA HERE...also look at the difference between prediction at stations...
|
678
|
|
679
|
var_pat<-glob2rx("*2010*") #Search for files in relation to fusion
|
680
|
var<-match("*2010*",names(data_v_resf)) #Search for files in relation to fusion
|
681
|
xp<-data_v_resf[,names(data_v_resf)[10:374]] # subsetting the original training data, note that that this is a "SpatialPointsDataFrame"
|
682
|
|
683
|
#xp<-data_s[,var] # subsetting the original training data, note that that this is a "SpatialPointsDataFrame"
|
684
|
xp<-as.data.frame(xp)
|
685
|
dropsc<-c("x_OR83M","y_OR83M") #columns to be removed
|
686
|
xp<-xp[,!(names(xp) %in% dropsc)] # dropping columns
|
687
|
|
688
|
X<-as.matrix(na.omit(xp))
|
689
|
|
690
|
PCA9var<-prcomp(~.,data=xp, retx = TRUE, center= TRUE, scale = TRUE, na.action=na.omit)
|
691
|
E<-matrix()
|
692
|
E<-PCA9var$rotation #E are the eigenvectors of the standardized PCA base on xp data.frame
|
693
|
Xs<-scale(X) #Use scale to standardize the original data matrix (center on mean and divide by standard deviation)
|
694
|
Xpc<- Xs %*% E #Rotate the original standardized matrix Xs by the eigenvectors from the standardized PCA
|
695
|
plot(PCA9var)
|
696
|
#plot(PCA9var$scores)
|
697
|
loadings<-cor(X, Xpc) #Correlation between the original variables and the principal components...
|
698
|
loadings<-as.data.frame(loadings)
|
699
|
#quartz(width=6, height=6)
|
700
|
plot(PC2~PC1, xlab= "PC1", ylab= "PC2", xlim=c(-1, 1), ylim=c(-1,1), pch =16, width=1,
|
701
|
main= paste("This is PCA for ",date_selected, sep=""), height=1,asp=1,data=loadings) #Add aspect options to get
|
702
|
axis(1,pos=0)
|
703
|
axis(2,pos=0)
|
704
|
text(loadings$PC1,loadings$PC2,rownames(loadings), adj = c(0,0),offset=2, col="red",cex=1.2)
|
705
|
draw.circle(0,0,radius=1)
|
706
|
|
707
|
#DO PCA ON SELECTED DATE...
|
708
|
|