Project

General

Profile

« Previous | Next » 

Revision c5732f39

Added by Benoit Parmentier about 12 years ago

Methods comp part4- task#491-, residuals analses, cleaning, added functions, residuals by land cover and temporal profiles

View differences:

climate/research/oregon/interpolation/methods_comparison_assessment_part4.R
1
#####################################  METHODS COMPARISON part 3 ##########################################
1
#####################################  METHODS COMPARISON part 4 ##########################################
2 2
#################################### Spatial Analysis ############################################
3 3
#This script utilizes the R ojbects created during the interpolation phase.                       #
4 4
#At this stage the script produces figures of various accuracy metrics and compare methods:       #
......
7 7
#- spatial density of station network and accuracy metric                                         #
8 8
#- visualization of maps of prediction and difference for comparison                              #
9 9
#AUTHOR: Benoit Parmentier                                                                        #
10
#DATE: 10/23/2012                                                                                 #
10
#DATE: 11/23/2012                                                                                 #
11 11
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#491 --                                  #
12 12
###################################################################################################
13 13

  
......
60 60
proj_str="+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
61 61
#User defined output prefix
62 62

  
63
#CRS<-proj4string(ghcn)                       #Storing projection information (ellipsoid, datum,etc.)
64
lines<-read.table(paste(path,"/",inlistf,sep=""), sep="")                      #Column 1 contains the names of raster files
65
inlistvar<-lines[,1]
66
inlistvar<-paste(path,"/",as.character(inlistvar),sep="")
67
covar_names<-as.character(lines[,2])                                         #Column two contains short names for covaraites
68

  
69
s_raster<- stack(inlistvar)                                                  #Creating a stack of raster images from the list of variables.
70
layerNames(s_raster)<-covar_names                                            #Assigning names to the raster layers
71
projection(s_raster)<-proj_str
72

  
73
#Create mask using land cover data
74
pos<-match("LC10",layerNames(s_raster))            #Find the layer which contains water bodies
75
LC10<-subset(s_raster,pos)
76
LC10[is.na(LC10)]<-0                               #Since NA values are 0, we assign all zero to NA
77
mask_land<-LC10<100                                #All values below 100% water are assigned the value 1, value 0 is "water"
78
mask_land_NA<-mask_land                            
79
mask_land_NA[mask_land_NA==0]<-NA                  #Water bodies are assigned value 1
80

  
81
data_name<-"mask_land_OR"
82
raster_name<-paste(data_name,".rst", sep="")
83
writeRaster(mask_land, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
84
#writeRaster(r2, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
85

  
86
pos<-match("ELEV_SRTM",layerNames(s_raster)) #Find column with name "ELEV_SRTM"
87
ELEV_SRTM<-raster(s_raster,layer=pos)             #Select layer from stack on 10/30
88
s_raster<-dropLayer(s_raster,pos)
89
ELEV_SRTM[ELEV_SRTM <0]<-NA
90
mask_ELEV_SRTM<-ELEV_SRTM>0
91

  
92
#Change this a in loop...
93
pos<-match("LC1",layerNames(s_raster)) #Find column with name "value"
94
LC1<-raster(s_raster,layer=pos)             #Select layer from stack
95
s_raster<-dropLayer(s_raster,pos)
96
LC1[is.na(LC1)]<-0
97
pos<-match("LC2",layerNames(s_raster)) #Find column with name "value"
98
LC2<-raster(s_raster,layer=pos)             #Select layer from stack
99
s_raster<-dropLayer(s_raster,pos)
100
LC2[is.na(LC2)]<-0
101
pos<-match("LC3",layerNames(s_raster)) #Find column with name "value"
102
LC3<-raster(s_raster,layer=pos)             #Select layer from stack
103
s_raster<-dropLayer(s_raster,pos)
104
LC3[is.na(LC3)]<-0
105
pos<-match("LC4",layerNames(s_raster)) #Find column with name "value"
106
LC4<-raster(s_raster,layer=pos)             #Select layer from stack
107
s_raster<-dropLayer(s_raster,pos)
108
LC4[is.na(LC4)]<-0
109
pos<-match("LC6",layerNames(s_raster)) #Find column with name "value"
110
LC6<-raster(s_raster,layer=pos)             #Select layer from stack
111
s_raster<-dropLayer(s_raster,pos)
112
LC6[is.na(LC6)]<-0
113
pos<-match("LC7",layerNames(s_raster)) #Find column with name "value"
114
LC7<-raster(s_raster,layer=pos)             #Select layer from stack
115
s_raster<-dropLayer(s_raster,pos)
116
LC7[is.na(LC7)]<-0
117
pos<-match("LC9",layerNames(s_raster)) #Find column with name "LC9", this is wetland...
118
LC9<-raster(s_raster,layer=pos)             #Select layer from stack
119
s_raster<-dropLayer(s_raster,pos)
120
LC9[is.na(LC9)]<-0
121

  
122
LC_s<-stack(LC1,LC2,LC3,LC4,LC6,LC7)
123
layerNames(LC_s)<-c("LC1_forest","LC2_shrub","LC3_grass","LC4_crop","LC6_urban","LC7_barren")
124
LC_s <-mask(LC_s,mask_ELEV_SRTM)
125
plot(LC_s)
126

  
127
s_raster<-addLayer(s_raster, LC_s)
128

  
129
#mention this is the last... files
130

  
131
#Read region outline...
132
filename<-sub(".shp","",infile6)             #Removing the extension from file.
133
reg_outline<-readOGR(".", filename)                 #reading shapefile 
134

  
63 135
############ PART 4: RESIDUALS ANALYSIS: ranking, plots, focus regions  ##################
64 136
############## EXAMINING STATION RESIDUALS ###########
65 137
########### CONSTANT OVER 365 AND SAMPLING OVER 365
......
93 165
rast_cai_pred<-raster(rast_cai2c,1)                  
94 166

  
95 167
#list files that contain model objects and ratingin-testing information for CAI and Fusion
96
fus1_c<-load_obj("results2_fusion_Assessment_measure_all_365d_GAM_fusion_const_10172012.RData")
97 168

  
98 169
gam_fus<-load_obj(file.path(path_data_fus,
99 170
                            "results_mod_obj__365d_GAM_fusion_const_all_lstd_11022012.RData"))
......
102 173
sampling_date_list<-gam_fus$sampling_obj$sampling_dat$date
103 174

  
104 175
#Create a residual table...
105
res_mod9_list<-vector("list",365)
106
res_mod2_list<-vector("list",365)
107

  
108
tab_nv<-matrix(NA,365,1)
109
for(k in 1:365){
110
  tab_nv[k]<-length(fus1_c[[k]]$data_v$res_mod9)
111
}
112
#note that there might be some variation in the number!!!
176
res_mod9f_list<-vector("list",365)
177
res_mod9c_list<-vector("list",365)
178
data_vf_list<-vector("list",365)
179
data_vc_list<-vector("list",365)
113 180

  
114 181
for(k in 1:365){
115
  res_mod9<-gam_fus$gam_fus_mod[[k]]$data_v$res_mod7
116
  res_mod2<-gam_fus$gam_fus_mod[[k]]$data_v$res_mod2
117
  res_mod9_list[[k]]<-res_mod9
118
  res_mod2_list[[k]]<-res_mod2
182
  data_vf<-as.data.frame(c)
183
  data_vc<-as.data.frame(gam_cai$gam_CAI_mod[[k]]$data_v)
184
  data_vf<-data_vf[,c("id","date","res_mod7","x_OR83M", "y_OR83M")]
185
  data_vc<-data_vc[,c("id","date","res_mod9","x_OR83M", "y_OR83M")]
119 186
  #subset data frame? or rbind them...and reshape?? think about it
187
  data_vf_list[[k]]<-data_vf
188
  data_vc_list[[k]]<-data_vc
120 189
}
121
tab_resmod9<-do.call(rbind,res_mod9_list)
122

  
123
data_v_list<-vector("list",365)
190
tab_resmod9f<-do.call(rbind,data_vf_list)
191
tab_resmod9c<-do.call(rbind,data_vc_list)
124 192

  
125
for(k in 1:365){
126
  data_v<-as.data.frame(gam_fus$gam_fus_mod[[k]]$data_v)
127
  data_v<-data_v[,c("id","date","res_mod7","x_OR83M", "y_OR83M")]
128
  #subset data frame? or rbind them...and reshape?? think about it
129
  data_v_list[[k]]<-data_v
130
}
131
tab_resmod9<-do.call(rbind,data_v_list)
132
tab_locs<-melt(tab_resmod9,
193
#Get the unique location information...
194
tab_locsc<-melt(tab_resmod9c,
133 195
               measure=c("x_OR83M","y_OR83M"),
134 196
               id=c("id"),
135 197
               na.rm=F)
136
tab_locs_cast<-cast(tab_locs,id~variable,mean)
137
tab_locs<-as.data.frame(tab_locs_cast)
138
coords<- tab_locs[,c('x_OR83M','y_OR83M')]
139
coordinates(tab_locs)<-coords
140
proj4string(tab_locs)<-proj_str  #Need to assign coordinates...
141

  
142
tab_melt<-melt(tab_resmod9[c("id","date","res_mod7")],
198
tab_locsc_cast<-cast(tab_locsc,id~variable,mean)
199
tab_locsc<-as.data.frame(tab_locsc_cast)
200
coords<- tab_locsc[,c('x_OR83M','y_OR83M')]
201
coordinates(tab_locsc)<-coords
202
proj4string(tab_locsc)<-proj_str  #Need to assign coordinates...
203

  
204
tab_locsf<-melt(tab_resmod9f,
205
                measure=c("x_OR83M","y_OR83M"),
206
                id=c("id"),
207
                na.rm=F)
208
tab_locsf_cast<-cast(tab_locsf,id~variable,mean)
209
tab_locsf<-as.data.frame(tab_locsf_cast)
210
coords<- tab_locsf[,c('x_OR83M','y_OR83M')]
211
coordinates(tab_locsf)<-coords
212
proj4string(tab_locsf)<-proj_str  #Need to assign coordinates..
213

  
214
all.equal(tab_locsc,tab_locsf) #Checking that stations used over the year are equal!!!
215
#since all equal we can use one object, it will be sufficient...
216
tab_locs<-tab_locsf
217

  
218
####### Now join and summarize objects together...
219

  
220
tabf_melt<-melt(tab_resmod9f[c("id","date","res_mod7")],
143 221
               measure=c("res_mod7"), 
144 222
               id=c("id","date"),
145 223
               na.rm=F)
224
tabc_melt<-melt(tab_resmod9c[c("id","date","res_mod9")],
225
               measure=c("res_mod9"), 
226
               id=c("id","date"),
227
               na.rm=F)
228
tabf_cast<-cast(tabf_melt,id~date~variable,mean)
229
tabf_resmod9_locs<-as.data.frame(tabf_cast[,,1])
230
tabc_cast<-cast(tabc_melt,id~date~variable,mean)
231
tabc_resmod9_locs<-as.data.frame(tabc_cast[,,1])
146 232

  
147
tab_cast<-cast(tab_melt,id~date~variable,mean)
148
tab_resmod9_locs<-as.data.frame(tab_cast[,,1])
149
sd_v<-sapply(tab_resmod9_locs,sd,na.rm=T)
150
mean_v<-sapply(tab_resmod9_locs,mean,na.rm=T)
151
tab_res_rec<-tab_resmod9_locs
152

  
153
for (k in 1:365){
154
  mean<-mean_v[k]
155
  sd<-sd_v[k]
156
  y<-tab_resmod9_locs[,k]
157
  breaks<-c("-inf",mean-2*sd,mean+2*sd,"+inf")
158
  rec<-cut(y,breaks,labels=c(-1,0,1))
159
  rec<-as.numeric(as.character(rec))
160
  tab_res_rec[,k]<-rec
161
}
162

  
163

  
164
tab_res_rec<-tab_res_rec[,1:365]                  #mae_fun<-function (x){mean(abs(x),na.rm=T)}                                    
165
for (i in 1:nrow(tab_resmod9_locs)){
166
  rec<-as.numeric(tab_res_rec[i,])
167
  tmp<-table(rec)
168
  tab_res_rec$c1[i]<-as.numeric(tmp[1])
169
  tab_res_rec$c2[i]<-as.numeric(tmp[2])
170
  tab_res_rec$c3[i]<-as.numeric(tmp[3])                  
171
}
172 233

  
173
tab_res_rec$id<-as.character(rownames(tab_res_rec))
174
#mae_fun<-function (x){mean(abs(x),na.rm=T)}                                    
175
for (i in 1:nrow(tab_resmod9_locs)){
176
  x<-tab_resmod9_locs[i,]
177
  tab_locs$mae[i]<-mean(abs(x),na.rm=T)
178
  #tab_locs$mae2[i]<-mae_fun(x)
179
  rec<-as.numeric(tab_res_rec[i,])
180
  tmp<-table(rec)
181
  tab_res_rec$c1<-as.numeric(tmp[1])
182
  tab_res_rec$c2<-as.numeric(tmp[2])
183
  tab_res_rec$c3<-as.numeric(tmp[3])
234
#Now reclass the data into outliers: Class with -1 (negative exteme),class 0 (not extreme), class 1 (positive extre)
235
calculate_extremes_stat <-function (tab_resmod9_locs,tab_locs){
236
  
237
  #tab_resmod9_locs<-tabf_resmod9_locs #comment out when running function
238
  #tab_locs<-tab_locsf
239
  #tab_res_rec<-tabf_resmod9_locs
240
  
241
  #start here
242
  sd_v<-sapply(tab_resmod9_locs,sd,na.rm=T) #This the sandard deviation of residuals for every day for the fusion prediction
243
  mean_v<-sapply(tab_resmod9_locs,mean,na.rm=T)  #This the mean of residuals for every day for the fusion prediction
244
  
245
  for (k in 1:365){
246
    mean_val<-mean_v[k]
247
    sd_val<-sd_v[k]
248
    y<-tab_resmod9_locs[,k]
249
    breaks<-c("-inf",mean_val-2*sd_val,mean_val+2*sd_val,"+inf")
250
    rec<-cut(y,breaks,labels=c(-1,0,1))
251
    rec<-as.numeric(as.character(rec))
252
    tab_res_rec[,k]<-rec
253
  }
184 254
  
255
  tab_res_rec<-tab_res_rec[,1:365]                  #mae_fun<-function (x){mean(abs(x),na.rm=T)}                                    
256
  
257
  tab_res_rec$id<-as.character(rownames(tab_res_rec))
258
  #mae_fun<-function (x){mean(abs(x),na.rm=T)}                                    
259
  for (i in 1:nrow(tab_resmod9_locs)){
260
    x<-tab_resmod9_locs[i,]
261
    tab_locs$mae[i]<-mean(abs(x),na.rm=T)
262
    #tab_locs$mae2[i]<-mae_fun(x)
263
    rec<-as.numeric(tab_res_rec[i,])
264
    tmp<-table(rec)
265
    tab_res_rec$c1[i]<-as.numeric(tmp[1])
266
    tab_res_rec$c2[i]<-as.numeric(tmp[2])
267
    tab_res_rec$c3[i]<-as.numeric(tmp[3])  
268
  }
269
  
270
  projstr_info<-proj4string(tab_locs)
271
  tab_resmod9_locs$id<-rownames(tab_resmod9_locs)
272
  tab_resmod9_locs[is.na(tab_resmod9_locs)]<-NA  #replace NaN by NA
273
  tab_locs$id<-as.character(tab_locs$id)
274
  data_v_res<-merge(tab_locs,tab_resmod9_locs,by="id")
275
  data_v_res<-merge(tab_res_rec[,c("id","c1","c2","c3")],data_v_res,by="id")
276

  
277
  coords<- data_v_res[,c('x_OR83M','y_OR83M')]
278
  coordinates(data_v_res)<-coords
279
  proj4string(data_v_res)<-projstr_info  #Need to assign coordinates...
280
  
281
  #tmp<-subset(data_v_res,select=date_selected)
282
  data_v_res$idx<-1:length(data_v_res)
283
  #Plotting results...
284
  return (data_v_res)
185 285
}
186
tab_resmod9_locs$id<-rownames(tab_resmod9_locs)
187
tab_resmod9_locs[is.na(tab_resmod9_locs)]<-NA  #replace NaN by NA
188
tab_locs$id<-as.character(tab_locs$id)
189
data_v_res<-merge(tab_locs,tab_resmod9_locs,by="id")
190
data_v_res<-merge(tab_res_rec[,c("id","c1","c2","c3")],data_v_res,by="id")
191
coords<- data_v_res[,c('x_OR83M','y_OR83M')]
192
coordinates(data_v_res)<-coords
193
proj4string(data_v_res)<-proj_str  #Need to assign coordinates...
194
tmp<-subset(data_v_res,select=date_selected)
195

  
196
data_v_res$idx<-1:length(data_v_res)
197
#Plotting results...
198

  
199
data_v_plot<-subset(data_v_res,!is.na(mae),"mae")      
286

  
287
### usef the function...
288
data_v_resf<-calculate_extremes_stat(tabf_resmod9_locs,tab_locs) # call function
289
data_v_resc<-calculate_extremes_stat(tabc_resmod9_locs,tab_locs) # call functio
290

  
291
#Some plotting of results...
292
data_v_plot<-subset(data_v_resf,!is.na(data_v_resf$mae),"mae")      
200 293
bubble(data_v_plot,"mae",add=TRUE)
201
spplot(data_v_res,"mae")
294
spplot(data_v_resf,"mae")
202 295

  
203 296
data_v_plot<-subset(data_v_res,!is.na(c1),"c1")
204
bubble(data_v_res,"c3")
297
bubble(data_v_res,"c1")
205 298
plot(data_v_res[5,])
206 299
plot(reg_outline,add=TRUE)
207 300
#Figure on 11/07
208
s_range<-c(minValue(rast_fus_pred),maxValue(rast_fus_pred)) #stack min and max
209
s_range<-c(min(s_range),max(s_range))
210
col_breaks <- pretty(s_range, n=50)
211
lab_breaks <- pretty(s_range, n=5)
212
temp_colors <- colorRampPalette(c('blue', 'white', 'red'))
213
plot(rast_fus_pred, breaks=col_breaks, col=temp_colors(49),   
214
     axis=list(at=lab_breaks, labels=lab_breaks))              
301
s.range <- c(min(minValue(rast_fus_pred)), max(maxValue(rast_fus_pred)))
302
col.breaks <- pretty(s.range, n=50)
303
lab.breaks <- pretty(s.range, n=5)
304
temp.colors <- colorRampPalette(c('blue', 'white', 'red'))
305

  
306
#Training plot
307
plot(rast_fus_pred, breaks=col.breaks, col=temp.colors(length(col.breaks)-1),
308
     axis=list(at=lab.breaks, labels=lab.breaks))
215 309
plot(reg_outline,add=TRUE)
216 310

  
217 311
plot(data_v_res, pch=1,cex=sqrt(data_v_res$c1)/5,add=TRUE)
......
220 314
# Calculate RMSE and MAE for each station over 365 dates and plot it on a map as well as on a scatterplot
221 315
#Look at the number of observation.
222 316

  
317
#bubble(data_v_res,"X20101230",na.rm=T,do.sqrt=TRUE)
223 318

  
224 319
#1) Digitize features from high resolution imagery and see if you can see differences in the way it is detected in CAI and fusion method...
225 320
#in particular how much variation there is in such polygons...
226 321
#Polygon to digitize: valley, crop area, mountain...Show that CAI does not capture crop as well as Fusion
227
#2) Select transect with slope changes and examining variation in temperatures...: calculate average MAE on the transect for examle river
228
#3) Land cover: examine MAE per land cover...for CAI and Fusion, LST bias and ecoregions...
229
#4) Look at differences...regions
230
#5) Summarize by season/month
322
#2) Transect with slope changes and examining variation in temperatures...: calculate average MAE on the transect for examle river
323
#3) X Land cover: examine MAE per land cover...for CAI and Fusion, LST bias and ecoregions...: boxplots at 
324
#4) Look at differences...regions with box plot of MAE inside and outside regions of differences...
325
#5) X Summarize by season/month
231 326
#6) PCA on differences and CAI-Fusion
232
#7) PCA on residuals...
233
#8) Plot residuals at station by buble plot and kriging...
327
#7) X PCA on residuals...
328
#8) X Plot residuals at station by buble plot and kriging...
234 329
#9) PCA MAE and other variables ...                  
235 330

  
236
### RESIDUALS FOR SPECIFIC DATE
331
### PLOTS OF RESIDUALS AND COVARIATES FOR SPECIFIC DATE
237 332

  
333
#This should be in a loop...
334
setwd(path)
238 335
date_selected<-"20100103"
336
dates<-c("20100103","20100901")
337

  
338
for (i in 1:length(dates)){
339
  date_selected<-dates[i]
340
  k<-match(date_selected,sampling_date_list)
341
  names(gam_fus$gam_fus_mod[[k]])               #Show the name structure of the object/list
342
  
343
  #Extract the training and testing information for the given date...
344
  data_sf<-gam_fus$gam_fus_mod[[k]]$data_s #object for the first date...20100103                  
345
  data_vf<-gam_fus$gam_fus_mod[[k]]$data_v #object for the first date...20100103                  
346
  data_sc<-gam_cai$gam_CAI_mod[[k]]$data_s #object for the first date...20100103                  
347
  data_vc<-gam_cai$gam_CAI_mod[[k]]$data_v #object for the first date...20100103                  
348
  
349
  #Join information extracted using the function previously
350
  id_selected<-intersect(data_v_resf$id,data_vf$id) #Match station using their official IDSs
351
  pos<-match(id_selected,data_v_resf$id)
352
  tmp<-as.data.frame(data_v_resf[pos,c("id","idx","mae","c1","c2","c3")])
353
  tmp<-tmp[,c("id","idx","mae","c1","c2","c3")]
354
  data_vf<-merge(data_vf,tmp,by="id")
355
  id_selected<-intersect(data_v_resc$id,data_vc$id) #Match station using their official IDSs
356
  pos<-match(id_selected,data_v_resc$id)
357
  tmp<-as.data.frame(data_v_resc[pos,c("id","idx","mae","c1","c2","c3")])
358
  tmp<-tmp[,c("id","idx","mae","c1","c2","c3")]
359
  data_vc<-merge(data_vc,tmp,by="id")
360
  #Turn the data frame back into a spdf
361
  coords<- data_vf[,c('x_OR83M','y_OR83M')]
362
  coordinates(data_vf)<-coords
363
  proj4string(data_vf)<-proj_str  #Need to assign coordinates...
364
  coords<- data_vc[,c('x_OR83M','y_OR83M')]
365
  coordinates(data_vc)<-coords
366
  proj4string(data_vc)<-proj_str  #Need to assign coordinates...
367
  
368
  #Prepare for plotting
369
  X11(width=16,height=9)
370
  par(mfrow=c(1,2))
371
  
372
  #Select background image for plotting and Plot validation residuals for fusion on 20100103
373
  s.range <- c(min(minValue(rast_fus_pred)), max(maxValue(rast_fus_pred)))
374
  col.breaks <- pretty(s.range, n=50)
375
  lab.breaks <- pretty(s.range, n=5)
376
  temp.colors <- colorRampPalette(c('blue', 'white', 'red'))
377
  
378
  #Training plot
379
  plot(rast_fus_pred, breaks=col.breaks, col=temp.colors(length(col.breaks)-1),
380
       axis=list(at=lab.breaks, labels=lab.breaks))
381
  plot(data_sf,cex=1, add=TRUE)
382
  plot(reg_outline,add=TRUE)
383
  title(paste("Training stations",date_selected,sep=" "))
384
  #Testing plot
385
  plot(rast_fus_pred, breaks=col.breaks, col=temp.colors(length(col.breaks)-1),
386
       axis=list(at=lab.breaks, labels=lab.breaks))
387
  plot(data_vf,cex=1,pch=1,add=TRUE)
388
  plot(reg_outline,add=TRUE)
389
  text(data_vf,labels=data_vf$idx,pos=3,cex=1.3)
390
  title(paste("Testing stations",date_selected,sep=" "))
391
  
392
  #savePlot here...
393
  savePlot(paste("fig1_training_testing_",date_selected,out_prefix,".png", sep=""), type="png")
394
  
395
  #plot(rast_fus_pred, breaks=col.breaks, col=temp.colors(length(col.breaks)-1),
396
  #     axis=list(at=lab.breakts, labels=lab.breaks))
397
  #plot(data_vf,cex=sqrt(data_vf$res_mod7),pch=1,add=TRUE)
398
  #plot(reg_outline,add=TRUE)
399
  #text(data_vf,labels=data_vf$idx,pos=3)
400
  
401
  #X11(width=16,height=9)
402
  #par(mfrow=c(1,2))
403
  
404
  # CREATE A FUNCTION TO AUTOMATE THE PLOT: improve code here
405
  #Plot residuals
406
  y_range<-range(c(data_vf$res_mod7,data_vc$res_mod9))
407
  plot(data_vf$idx,data_vf$res_mod7, ylab="Residuals", xlab="Index", ylim=y_range)
408
  text(data_vf$idx,data_vf$res_mod7,labels=data_vf$idx,pos=3)
409
  grid(lwd=0.5,col="black")
410
  title(paste("Testing stations residuals fusion",date_selected,sep=" "))
411
  plot(data_vc$idx,data_vc$res_mod9,ylab="Residuals", xlab="Index", ylim=y_range)
412
  text(data_vc$idx,data_vc$res_mod9,data_vc$idx,labels=data_vc$idx,pos=3)
413
  grid(lwd=0.5, col="black")
414
  title(paste("Testing stations residuals CAI",date_selected,sep=" "))
415
  #savePlot here...
416
  savePlot(paste("fig2_testing_residuals_fusion_CAI_",date_selected,out_prefix,".png", sep=""), type="png")
417
  
418
  #Plot predicted vs observed
419
  y_range<-range(c(data_vf$pred_mod7,data_vc$pred_mod9))
420
  x_range<-range(c(data_vf$dailyTmax,data_vc$dailyTamx))
421
  plot(data_vf$pred_mod7,data_vf$dailyTmax, ylab="Observed daily tmax (C)", xlab="Predicted daily tmax (C)", 
422
       ylim=y_range,xlim=x_range)
423
  text(data_vf$pred_mod7,data_vf$dailyTmax,labels=data_vf$idx,pos=3)
424
  abline(0,1) #takes intercept at 0 and slope as 1 so display 1:1 ine
425
  grid(lwd=0.5,col="black")
426
  title(paste("Testing stations tmax fusion vs daily tmax",date_selected,sep=" "))
427
  plot(data_vc$pred_mod9,data_vc$dailyTmax,ylab="Observed daily tmax (C)", xlab="Predicted daily tmax (C)", 
428
       ylim=y_range,xlim=x_range)
429
  text(data_vc$pred_mod9,data_vc$dailyTmax,labels=data_vc$idx,pos=3)
430
  abline(0,1) #takes intercept at 0 and slope as 1 so display 1:1 ine
431
  grid(lwd=0.5, col="black")
432
  title(paste("Testing stations tmax CAI vs daily tmax",date_selected,sep=" "))
433
  #savePlot here...
434
  savePlot(paste("fig3_testing_predicted_observed_fusion_CAI_",date_selected,out_prefix,".png", sep=""), type="png")
435
 
436
  ###########Plot residuals and covariates
437
  ##Elevation and residulas
438
  y_range<-range(c(data_vf$res_mod7,data_vc$res_mod9))
439
  x_range<-range(c(data_vf$ELEV_SRTM,data_vc$ELEV_SRTM))
440
  plot(data_vf$ELEV_SRTM,data_vf$res_mod7, ylab="Residuals", xlab="ELEV_SRTM (m) ", 
441
       ylim=y_range, xlim=x_range)
442
  text(data_vf$ELEV_SRTM,data_vf$res_mod7,labels=data_vf$idx,pos=3)
443
  grid(lwd=0.5,col="black")
444
  title(paste("Testing stations residuals fusion vs Elevation",date_selected,sep=" "))
445
  plot(data_vc$ELEV_SRTM,data_vc$res_mod9, ylab="Residuals", xlab="ELEV_SRTM (m) ", 
446
       ylim=y_range, xlim=x_range)
447
  text(data_vc$ELEV_SRTM,data_vc$res_mod9,labels=data_vc$idx,pos=3)
448
  grid(lwd=0.5,col="black")
449
  title(paste("Testing stations residuals CAI vs Elevation",date_selected,sep=" "))
450
  savePlot(paste("fig4_testing_residuals_fusion_CAI_Elev_",date_selected,out_prefix,".png", sep=""), type="png")
451
  
452
  ##Forest (LC1) and residuals
453
  y_range<-range(c(data_vf$res_mod7,data_vc$res_mod9))
454
  x_range<-range(c(data_vf$LC1,data_vc$LC1))
455
  plot(data_vf$LC1,data_vf$res_mod7,ylab="Residuals", xlab="LC1 (%)", 
456
       ylim=y_range, xlim=x_range)
457
  text(data_vf$LC1,data_vf$res_mod7,labels=data_vf$idx,pos=3)
458
  grid(lwd=0.5,col="black")
459
  title(paste("Testing stations residuals CAI vs LC1 (forest)",date_selected,sep=" "))
460
  plot(data_vc$LC1,data_vc$res_mod9,ylab="Residuals", xlab="LC1 (%)", 
461
       ylim=y_range, xlim=x_range)
462
  text(data_vc$LC1,data_vc$res_mod9,labels=data_vc$idx,pos=3)
463
  grid(lwd=0.5,col="black")
464
  title(paste("Testing stations residuals CAI vs LC1(forest)",date_selected,sep=" "))
465
  savePlot(paste("fig5_testing_residuals_fusion_CAI_LC1_",date_selected,out_prefix,".png", sep=""), type="png")
466
  
467
  ##Grass (LC3)  and residuals
468
  y_range<-range(c(data_vf$res_mod7,data_vc$res_mod9))
469
  x_range<-range(c(data_vf$LC3,data_vc$LC3))
470
  plot(data_vf$LC3,data_vf$res_mod7,ylab="Residuals", xlab="LC3 (%)", 
471
       ylim=y_range, xlim=x_range)
472
  text(data_vf$LC3,data_vf$res_mod7,labels=data_vf$idx,pos=3)
473
  grid(lwd=0.5,col="black")
474
  title(paste("Testing stations residuals CAI vs LC3 (grass)",date_selected,sep=" "))
475
  plot(data_vc$LC3,data_vc$res_mod9,ylab="Residuals", xlab="LC3 (%)", 
476
       ylim=y_range, xlim=x_range)
477
  text(data_vc$LC3,data_vc$res_mod9,labels=data_vc$idx,pos=3)
478
  grid(lwd=0.5,col="black")
479
  title(paste("Testing stations residuals CAI vs LC3(grass)",date_selected,sep=" "))
480
  savePlot(paste("fig6_testing_residuals_fusion_CAI_LC3_",date_selected,out_prefix,".png", sep=""), type="png")
481
  
482
  #LSTD_bias and residuals
483
  y_range<-range(c(data_vf$res_mod7,data_vc$res_mod9))
484
  x_range<-range(c(data_vf$LSTD_bias,data_vc$LSTD_bias))
485
  plot(data_vf$LSTD_bias,data_vf$res_mod7)
486
  text(data_vf$LSTD_bias,data_vf$res_mod7,labels=data_vf$idx,pos=3)
487
  grid(lwd=0.5,col="black")
488
  title(paste("Testing stations LST bias vs residuals",date_selected,sep=" "))
489
  plot(data_sf$LSTD_bias,data_sf$res_mod7)
490
  #text(data_sf$LSTD_bias,data_sf$res_mod7,labels=data_vf$idx,pos=3) # Also labels for training?
491
  grid(lwd=0.5,col="black")
492
  title(paste("Training stations LST bias vs residuals",date_selected,sep=" "))
493
  savePlot(paste("fig7_testing_training_residuals_fusion_LSTD_bias_",date_selected,out_prefix,".png", sep=""), type="png")
494
   
495
  #LSTD vs TMax and residuals
496
  y_range<-range(c(data_vf$TMax,data_vc$TMax))
497
  x_range<-range(c(data_vf$LSTD_bias,data_vc$LSTD_bias))
498
  plot(data_vf$LSTD_bias,data_vf$res_mod7,xlab="LST bias", ylab="Residuals")
499
  text(data_vf$LSTD_bias,data_vf$res_mod7,labels=data_vf$idx,pos=3)
500
  grid(lwd=0.5,col="black")
501
  title(paste("Testing stations LST bias vs fusion residuals",date_selected,sep=" "))
502
  plot((data_vf$LST-273.16),data_vf$TMax,xlab="LST", ylab="TMax (monthly tmax in C)")
503
  text((data_vf$LST-273.16),data_vf$TMax,labels=data_vf$idx,pos=3)
504
  abline(0,1) #takes intercept at 0 and slope as 1 so display 1:1 ine
505
  grid(lwd=0.5,col="black")
506
  title(paste("Testing stations LST vs TMax ",date_selected,sep=" "))
507
  savePlot(paste("fig8_testing_TMax_fusion_",date_selected,out_prefix,".png", sep=""), type="png")
508
  dev.off()
509
}
510
      
511
##Check...difference at diff stations...
239 512

  
240
k<-match(date_selected,sampling_date_list)
241
names(gam_fus$gam_fus_mod[[k]])               #Show the name structure of the object/list
242 513

  
514
#Extract the training and testing information for the given date...
243 515
data_sf<-gam_fus$gam_fus_mod[[k]]$data_s #object for the first date...20100103                  
244 516
data_vf<-gam_fus$gam_fus_mod[[k]]$data_v #object for the first date...20100103                  
245 517
data_sc<-gam_cai$gam_CAI_mod[[k]]$data_s #object for the first date...20100103                  
246 518
data_vc<-gam_cai$gam_CAI_mod[[k]]$data_v #object for the first date...20100103                  
247 519

  
248
id_selected<-intersect(data_v_res$id,data_vf$id)
249
pos<-match(id_selected,data_v_res$id)
250
tmp<-as.data.frame(data_v_res[pos,c("id","idx","mae","c1","c2","c3")])
520
#Join information extracted using the function previously
521
id_selected<-intersect(data_v_resf$id,data_vf$id) #Match station using their official IDSs
522
pos<-match(id_selected,data_v_resf$id)
523
tmp<-as.data.frame(data_v_resf[pos,c("id","idx","mae","c1","c2","c3")])
251 524
tmp<-tmp[,c("id","idx","mae","c1","c2","c3")]
252 525
data_vf<-merge(data_vf,tmp,by="id")
253

  
526
id_selected<-intersect(data_v_resc$id,data_vc$id) #Match station using their official IDSs
527
pos<-match(id_selected,data_v_resc$id)
528
tmp<-as.data.frame(data_v_resc[pos,c("id","idx","mae","c1","c2","c3")])
529
tmp<-tmp[,c("id","idx","mae","c1","c2","c3")]
530
data_vc<-merge(data_vc,tmp,by="id")
531
#Turn the data frame back into a spdf
254 532
coords<- data_vf[,c('x_OR83M','y_OR83M')]
255 533
coordinates(data_vf)<-coords
256 534
proj4string(data_vf)<-proj_str  #Need to assign coordinates...
535
coords<- data_vc[,c('x_OR83M','y_OR83M')]
536
coordinates(data_vc)<-coords
537
proj4string(data_vc)<-proj_str  #Need to assign coordinates...
538

  
539
###################################################
540
############## RESIDUALS BY TIME!!! ##############
541

  
542
#tab_resmod9
543
for (i in 1:nrow(tab_resmod9f)){
544
  date<-strptime(tab_resmod9f$date[i], "%Y%m%d")   # interpolation date being processed, converting the string using specific format
545
  month<-as.integer(strftime(date, "%m"))
546
  tab_resmod9f$month[i]<-month
547
}
257 548

  
258
#Select background image for plotting and Plot validation residuals for fusion on 20100103
259
s.range <- c(min(minValue(rast_fus_pred)), max(maxValue(rast_fus_pred)))
260
col.breaks <- pretty(s.range, n=50)
261
lab.breaks <- pretty(s.range, n=5)
262
temp.colors <- colorRampPalette(c('blue', 'white', 'red'))
263

  
264
#Training plot
265
plot(rast_fus_pred, breaks=col.breaks, col=temp.colors(length(col.breaks)-1),
266
     axis=list(at=lab.breaks, labels=lab.breaks))
267
plot(data_sf,cex=1,main="Training stations", add=TRUE)
268
plot(reg_outline,add=TRUE)
269

  
270
#savePlot...
271

  
272
#Testing plot
273
plot(rast_fus_pred, breaks=col.breaks, col=temp.colors(length(col.breaks)-1),
274
     axis=list(at=lab.breaks, labels=lab.breaks))
275
plot(data_vf,cex=1,pch=1,add=TRUE)
276
plot(reg_outline,add=TRUE)
277
text(data_vf,labels=data_vf$idx,pos=3)
278
#savePlot...
279

  
280
#Plot residuals
281
res_modvf<-res_mod7
282

  
283
plot(data_vf$res_mod7)
284
text(data_vf$res_mod7,labels=data_vf$idx,pos=3)
285

  
286
#Plot residuals
287
res_modvf<-res_mod7
288

  
289
plot(data_vf$res_mod7)
290
text(data_vf$res_mod7,labels=data_vf$idx,pos=3)
291

  
292

  
293

  
294

  
295

  
296
#NOW USE RESHAPE TO CREATE TABLE....
549
mae_function<-function(res){mae<-mean(abs(res),na.rm=T)}
550
tab_test<-melt(tab_resmod9f,
551
               measure=c("res_mod7","x_OR83M","y_OR83M"),
552
               id=c("id","month"),
553
               na.rm=F)
554
tab_test_cast<-cast(data=tab_test,formula=id~month~variable,mean)
555
table_id_month<-tab_test_cast[,,1]
556
tab_month_cast<-cast(data=tab_test,formula=month~variable,mean)
557
tab_month_cast<-cast(data=tab_test,formula=month~variable,mae_function)
558
X11()
559
barplot(tab_month_cast$res_mod7,
560
        names.arg=1:12,cex.names=0.8,   #names of the teleconnections indices and size of fonts of axis labes
561
        xlab="Month",       # font.lab is 2 to make the font bold
562
        ylab="MAE",font.lab=2)
563

  
564
savePlot(paste("fig9_mae_fusion_per_season_",date_selected,out_prefix,".png", sep=""), type="png")
565

  
566
plot(table_id_month[55,])
567
plot(table_id_month[5,])
568

  
569
#tab_resmod9c
570

  
571
for (i in 1:nrow(tab_resmod9c)){
572
  date<-strptime(tab_resmod9c$date[i], "%Y%m%d")   # interpolation date being processed, converting the string using specific format
573
  month<-as.integer(strftime(date, "%m"))
574
  tab_resmod9c$month[i]<-month
575
}
297 576

  
298
#updated the analysis
577
mae_function<-function(res){mae<-mean(abs(res),na.rm=T)}
578
tab_test<-melt(tab_resmod9c,
579
               measure=c("res_mod9","x_OR83M","y_OR83M"),
580
               id=c("id","month"),
581
               na.rm=F)
582
tab_test_cast<-cast(data=tab_test,formula=id~month~variable,mean)
583
table_id_month_cai<-tab_test_cast[,,1]
584
tab_month_cast_cai<-cast(data=tab_test,formula=month~variable,mean)
585
tab_month_cast_cai<-cast(data=tab_test,formula=month~variable,mae_function)
299 586

  
300
"tmax_predicted*20100103_30_1_365d_GAM_fusion_all_lstd_10242012.rst"
301
oldpath<-getwd()
302
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_10242012_GAM"
303
setwd(path)
304
date_selected<-"20100103"
305
#lf_fus<-list.files(pattern=paste("GAM_bias_tmax_predicted_mod8_",date_selected,"_30_1_365d_GAM_fusion_all_lstd_10242012.rst$",sep="")) #Search for files in relation to fusion
306
file_pat<-glob2rx(paste("*tmax_pred*",date_selected,"*_365d_GAM_fusion_all_lstd_10242012.rst",sep="")) #Search for files in relation to fusion
307
lf_fus1a<-list.files(pattern=file_pat) #Search for files in relation to fusion
587
barplot(tab_month_cast_cai$res_mod9,
588
    names.arg=1:12,cex.names=0.8,   #names of the teleconnections indices and size of fonts of axis labes
589
    xlab="Month",       # font.lab is 2 to make the font bold
590
    ylab="MAE",font.lab=2)
591
grid(nx=12,ny=10)    
308 592

  
309
rast_fus1a<-stack(lf_fus1a)
310
rast_fus1a<-mask(rast_fus1a,mask_ELEV_SRTM)
593
savePlot(paste("fig10_mae_cai_per_season_",date_selected,out_prefix,".png", sep=""), type="png")
311 594

  
312
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_07192012_GAM"
313
setwd(path)
314
date_selected<-"20100103"
315
#lf_fus<-list.files(pattern=paste("GAM_bias_tmax_predicted_mod8_",date_selected,"_30_1_365d_GAM_fusion_all_lstd_10242012.rst$",sep="")) #Search for files in relation to fusion
316
#lf_fus1<-list.files(pattern=paste("*.tmax_predicted.*.",date_selected,".*._365d_GAM_fusion_lstd_10062012.rst$",sep=""))                  
317
file_pat<-glob2rx(paste("*tmax_predicted*",date_selected,"*_365d_GAM_fusion_lstd_10062012.rst",sep="")) #Search for files in relation to fusion
318
lf_fus1s<-list.files(pattern=file_pat) #Search for files in relation to fusion                  
319

  
320
rast_fus1s<-stack(lf_fus1s)
321
rast_fus1s<-mask(rast_fus1s,mask_ELEV_SRTM)
322

  
323
s_range<-c(minValue(rast_fusa1),maxValue(rast_fusa1)) #stack min and max
324
s_range<-c(min(s_range),max(s_range))
325
col_breaks <- pretty(s_range, n=50)
326
lab_breaks <- pretty(s_range, n=5)
327
temp_colors <- colorRampPalette(c('blue', 'white', 'red'))
328
X11(width=18,height=12)
329
par(mfrow=c(3,3))
330
for (k in 1:length(lf_fus1a)){
331
  fus1a_r<-raster(rast_fus1a,k)
332
  plot(fus1a_r, breaks=col_breaks, col=temp_colors(length(col_breaks)-1),   
333
       axis=list(at=lab_breaks, labels=lab_breaks))
334
}
595
plot(table_id_month_cai[55,])
596
plot(table_id_month_cai[5,])
335 597

  
598
###### 
599
overlay(data_v_resf,diff_r)
600
plot(avg_LC_rec)
336 601
#Wihtout setting range
337 602
s_range<-c(-12,18)
338 603
#col_breaks <- pretty(s_range, n=50)
......
350 615
       axis=list(at=lab_breaks, labels=lab_breaks),main=rname[k])
351 616
}
352 617

  
353
#Wihtout setting range
618
### Wihtout setting range
354 619
s_range<-c(-12,23)
355 620
#col_breaks <- pretty(s_range, n=50)
356 621
#lab_breaks <- pretty(s_range, n=5)
......
366 631
  plot(fus1s_r, breaks=col_breaks, col=temp_colors(col_breaks-1),   
367 632
       axis=list(at=lab_breaks, labels=lab_breaks),main=rname[k])
368 633
}
369

  
634
                 
635
############### NOW USE RESHAPE TO CREATE TABLE....##########
636
# DO A PCA HERE...also look at the difference between prediction at stations...
637
                 
638
var_pat<-glob2rx("*2010*") #Search for files in relation to fusion
639
var<-match("*2010*",names(data_v_resf)) #Search for files in relation to fusion                  
640
xp<-data_v_resf[,names(data_v_resf)[10:374]]      # subsetting the original training data, note that that this is a "SpatialPointsDataFrame"
641
                 
642
#xp<-data_s[,var]      # subsetting the original training data, note that that this is a "SpatialPointsDataFrame"
643
xp<-as.data.frame(xp)
644
dropsc<-c("x_OR83M","y_OR83M")  #columns to be removed
645
xp<-xp[,!(names(xp) %in% dropsc)]  # dropping columns
646
                 
647
X<-as.matrix(na.omit(xp))
648
                 
649
PCA9var<-prcomp(~.,data=xp, retx = TRUE, center= TRUE, scale = TRUE, na.action=na.omit)
650
E<-matrix()
651
E<-PCA9var$rotation    #E are the eigenvectors of the standardized PCA base on xp data.frame
652
Xs<-scale(X)           #Use scale to standardize the original data matrix (center on mean and divide by standard deviation)
653
Xpc<- Xs %*% E          #Rotate the original standardized matrix Xs by the eigenvectors from the standardized PCA
654
plot(PCA9var)
655
#plot(PCA9var$scores)
656
loadings<-cor(X, Xpc)   #Correlation between the original variables and the principal components...
657
loadings<-as.data.frame(loadings)
658
#quartz(width=6, height=6)
659
plot(PC2~PC1, xlab= "PC1", ylab= "PC2", xlim=c(-1, 1), ylim=c(-1,1), pch =16, width=1, 
660
              main= paste("This is PCA for ",date_selected, sep=""), height=1,asp=1,data=loadings) #Add aspect options to get 
661
axis(1,pos=0)
662
axis(2,pos=0)
663
text(loadings$PC1,loadings$PC2,rownames(loadings), adj = c(0,0),offset=2, col="red",cex=1.2)
664
draw.circle(0,0,radius=1)
665
                 
370 666

  
371 667

  

Also available in: Unified diff