Project

General

Profile

« Previous | Next » 

Revision acf32061

Added by Benoit Parmentier about 12 years ago

Methods comp part4 task#491 initial commit, residuals analyses at specific station for FUS and CAI

View differences:

climate/research/oregon/interpolation/methods_comparison_assessment_part4.R
1
#####################################  METHODS COMPARISON part 3 ##########################################
2
#################################### Spatial Analysis ############################################
3
#This script utilizes the R ojbects created during the interpolation phase.                       #
4
#At this stage the script produces figures of various accuracy metrics and compare methods:       #
5
#- rank station in terms of residuals and metrics (MAE and RMSE)                                  #
6
#- calculate accuary metrics for climtology predictions...                                        #
7
#- spatial density of station network and accuracy metric                                         #
8
#- visualization of maps of prediction and difference for comparison                              #
9
#AUTHOR: Benoit Parmentier                                                                        #
10
#DATE: 10/23/2012                                                                                 #
11
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#491 --                                  #
12
###################################################################################################
13

  
14
###Loading R library and packages                                                      
15
library(gtools)                                        # loading some useful tools such as mixedsort
16
library(mgcv)                                           # GAM package by Wood 2006 (version 2012)
17
library(sp)                                             # Spatial pacakge with class definition by Bivand et al. 2008
18
library(spdep)                                          # Spatial package with methods and spatial stat. by Bivand et al. 2012
19
library(rgdal)                                          # GDAL wrapper for R, spatial utilities (Keitt et al. 2012)
20
library(gstat)                                          # Kriging and co-kriging by Pebesma et al. 2004
21
library(automap)                                        # Automated Kriging based on gstat module by Hiemstra et al. 2008
22
library(spgwr)
23
library(gpclib)
24
library(maptools)
25
library(graphics)
26
library(parallel)                            # Urbanek S. and Ripley B., package for multi cores & parralel processing
27
library(raster)
28
library(rasterVis)
29
library(plotrix)   #Draw circle on graph
30
library(reshape)
31
## Functions
32
#loading R objects that might have similar names
33
load_obj <- function(f)
34
{
35
  env <- new.env()
36
  nm <- load(f, env)[1]
37
  env[[nm]]
38
}
39

  
40
###Parameters and arguments
41

  
42
infile1<- "ghcn_or_tmax_covariates_06262012_OR83M.shp"    #GHCN shapefile containing variables for modeling 2010                 
43
#infile2<-"list_10_dates_04212012.txt"                    #List of 10 dates for the regression
44
infile2<-"list_365_dates_04212012.txt"                    #list of dates
45
infile3<-"LST_dates_var_names.txt"                        #LST dates name
46
infile4<-"models_interpolation_05142012.txt"              #Interpolation model names
47
infile5<-"mean_day244_rescaled.rst"                       #mean LST for day 244
48
inlistf<-"list_files_05032012.txt"                        #list of raster images containing the Covariates
49
infile6<-"OR83M_state_outline.shp"
50
#stat_loc<-read.table(paste(path,"/","location_study_area_OR_0602012.txt",sep=""),sep=",", header=TRUE)
51

  
52
out_prefix<-"methods_11092012_"
53

  
54
##### LOAD USEFUL DATA
55

  
56
#obj_list<-"list_obj_08262012.txt"                                  #Results of fusion from the run on ATLAS
57
path<-"/home/parmentier/Data/IPLANT_project/methods_interpolation_comparison_10242012" #Jupiter LOCATION on Atlas for kriging                              #Jupiter Location on XANDERS
58
#path<-"/Users/benoitparmentier/Dropbox/Data/NCEAS/Oregon_covariates/"            #Local dropbox folder on Benoit's laptop
59
setwd(path) 
60
proj_str="+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
61
#User defined output prefix
62

  
63
############ PART 4: RESIDUALS ANALYSIS: ranking, plots, focus regions  ##################
64
############## EXAMINING STATION RESIDUALS ###########
65
########### CONSTANT OVER 365 AND SAMPLING OVER 365
66
#Plot daily_deltaclim_rast, bias_rast,add data_s and data_v
67

  
68
# RANK STATION by average or median RMSE
69
# Count the number of times a station is in the extremum group of outliers...
70
# LOOK at specific date...
71

  
72
#Examine residuals for a spciefic date...Jan, 1 using run of const_all i.e. same training over 365 dates
73
path_data_cai<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_10242012_CAI"  #Change to constant
74
path_data_fus<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_10242012_GAM"
75

  
76
date_selected<-"20100103"
77

  
78
oldpath<-getwd()
79
setwd(path_data_cai)
80
file_pat<-glob2rx(paste("*tmax_predicted*",date_selected,"*_365d_GAM_CAI2_const_all_10312012.rst",sep="")) #Search for files in relation to fusion                  
81
lf_cai2c<-list.files(pattern=file_pat) #Search for files in relation to fusion                  
82
rast_cai2c<-stack(lf_cai2c)                   #lf_cai2c CAI results with constant sampling over 365 dates
83
rast_cai2c<-mask(rast_cai2c,mask_ELEV_SRTM)
84

  
85
oldpath<-getwd()
86
setwd(path_data_fus)
87
file_pat<-glob2rx(paste("*tmax_predicted*",date_selected,"*_365d_GAM_fusion_const_all_lstd_11022012.rst",sep="")) #Search for files in relation to fusion                  
88
lf_fus1c<-list.files(pattern=file_pat) #Search for files in relation to fusion                        
89
rast_fus1c<-stack(lf_fus1c)
90
rast_fus1c<-mask(rast_fus1c,mask_ELEV_SRTM)
91

  
92
rast_fus_pred<-raster(rast_fus1c,1)  # Select the first model from the stack i.e fusion with kriging for both steps
93
rast_cai_pred<-raster(rast_cai2c,1)                  
94

  
95
#list files that contain model objects and ratingin-testing information for CAI and Fusion
96
fus1_c<-load_obj("results2_fusion_Assessment_measure_all_365d_GAM_fusion_const_10172012.RData")
97

  
98
gam_fus<-load_obj(file.path(path_data_fus,
99
                            "results_mod_obj__365d_GAM_fusion_const_all_lstd_11022012.RData"))
100
gam_cai<-load_obj(file.path(path_data_cai,
101
                            "results_mod_obj__365d_GAM_CAI2_const_all_10312012.RData"))  #This contains all the info
102
sampling_date_list<-gam_fus$sampling_obj$sampling_dat$date
103

  
104
#Create a residual table...
105
res_mod9_list<-vector("list",365)
106
res_mod2_list<-vector("list",365)
107

  
108
tab_nv<-matrix(NA,365,1)
109
for(k in 1:365){
110
  tab_nv[k]<-length(fus1_c[[k]]$data_v$res_mod9)
111
}
112
#note that there might be some variation in the number!!!
113

  
114
for(k in 1:365){
115
  res_mod9<-gam_fus$gam_fus_mod[[k]]$data_v$res_mod7
116
  res_mod2<-gam_fus$gam_fus_mod[[k]]$data_v$res_mod2
117
  res_mod9_list[[k]]<-res_mod9
118
  res_mod2_list[[k]]<-res_mod2
119
  #subset data frame? or rbind them...and reshape?? think about it
120
}
121
tab_resmod9<-do.call(rbind,res_mod9_list)
122

  
123
data_v_list<-vector("list",365)
124

  
125
for(k in 1:365){
126
  data_v<-as.data.frame(gam_fus$gam_fus_mod[[k]]$data_v)
127
  data_v<-data_v[,c("id","date","res_mod7","x_OR83M", "y_OR83M")]
128
  #subset data frame? or rbind them...and reshape?? think about it
129
  data_v_list[[k]]<-data_v
130
}
131
tab_resmod9<-do.call(rbind,data_v_list)
132
tab_locs<-melt(tab_resmod9,
133
               measure=c("x_OR83M","y_OR83M"),
134
               id=c("id"),
135
               na.rm=F)
136
tab_locs_cast<-cast(tab_locs,id~variable,mean)
137
tab_locs<-as.data.frame(tab_locs_cast)
138
coords<- tab_locs[,c('x_OR83M','y_OR83M')]
139
coordinates(tab_locs)<-coords
140
proj4string(tab_locs)<-proj_str  #Need to assign coordinates...
141

  
142
tab_melt<-melt(tab_resmod9[c("id","date","res_mod7")],
143
               measure=c("res_mod7"), 
144
               id=c("id","date"),
145
               na.rm=F)
146

  
147
tab_cast<-cast(tab_melt,id~date~variable,mean)
148
tab_resmod9_locs<-as.data.frame(tab_cast[,,1])
149
sd_v<-sapply(tab_resmod9_locs,sd,na.rm=T)
150
mean_v<-sapply(tab_resmod9_locs,mean,na.rm=T)
151
tab_res_rec<-tab_resmod9_locs
152

  
153
for (k in 1:365){
154
  mean<-mean_v[k]
155
  sd<-sd_v[k]
156
  y<-tab_resmod9_locs[,k]
157
  breaks<-c("-inf",mean-2*sd,mean+2*sd,"+inf")
158
  rec<-cut(y,breaks,labels=c(-1,0,1))
159
  rec<-as.numeric(as.character(rec))
160
  tab_res_rec[,k]<-rec
161
}
162

  
163

  
164
tab_res_rec<-tab_res_rec[,1:365]                  #mae_fun<-function (x){mean(abs(x),na.rm=T)}                                    
165
for (i in 1:nrow(tab_resmod9_locs)){
166
  rec<-as.numeric(tab_res_rec[i,])
167
  tmp<-table(rec)
168
  tab_res_rec$c1[i]<-as.numeric(tmp[1])
169
  tab_res_rec$c2[i]<-as.numeric(tmp[2])
170
  tab_res_rec$c3[i]<-as.numeric(tmp[3])                  
171
}
172

  
173
tab_res_rec$id<-as.character(rownames(tab_res_rec))
174
#mae_fun<-function (x){mean(abs(x),na.rm=T)}                                    
175
for (i in 1:nrow(tab_resmod9_locs)){
176
  x<-tab_resmod9_locs[i,]
177
  tab_locs$mae[i]<-mean(abs(x),na.rm=T)
178
  #tab_locs$mae2[i]<-mae_fun(x)
179
  rec<-as.numeric(tab_res_rec[i,])
180
  tmp<-table(rec)
181
  tab_res_rec$c1<-as.numeric(tmp[1])
182
  tab_res_rec$c2<-as.numeric(tmp[2])
183
  tab_res_rec$c3<-as.numeric(tmp[3])
184
  
185
}
186
tab_resmod9_locs$id<-rownames(tab_resmod9_locs)
187
tab_resmod9_locs[is.na(tab_resmod9_locs)]<-NA  #replace NaN by NA
188
tab_locs$id<-as.character(tab_locs$id)
189
data_v_res<-merge(tab_locs,tab_resmod9_locs,by="id")
190
data_v_res<-merge(tab_res_rec[,c("id","c1","c2","c3")],data_v_res,by="id")
191
coords<- data_v_res[,c('x_OR83M','y_OR83M')]
192
coordinates(data_v_res)<-coords
193
proj4string(data_v_res)<-proj_str  #Need to assign coordinates...
194
tmp<-subset(data_v_res,select=date_selected)
195

  
196
data_v_res$idx<-1:length(data_v_res)
197
#Plotting results...
198

  
199
data_v_plot<-subset(data_v_res,!is.na(mae),"mae")      
200
bubble(data_v_plot,"mae",add=TRUE)
201
spplot(data_v_res,"mae")
202

  
203
data_v_plot<-subset(data_v_res,!is.na(c1),"c1")
204
bubble(data_v_res,"c3")
205
plot(data_v_res[5,])
206
plot(reg_outline,add=TRUE)
207
#Figure on 11/07
208
s_range<-c(minValue(rast_fus_pred),maxValue(rast_fus_pred)) #stack min and max
209
s_range<-c(min(s_range),max(s_range))
210
col_breaks <- pretty(s_range, n=50)
211
lab_breaks <- pretty(s_range, n=5)
212
temp_colors <- colorRampPalette(c('blue', 'white', 'red'))
213
plot(rast_fus_pred, breaks=col_breaks, col=temp_colors(49),   
214
     axis=list(at=lab_breaks, labels=lab_breaks))              
215
plot(reg_outline,add=TRUE)
216

  
217
plot(data_v_res, pch=1,cex=sqrt(data_v_res$c1)/5,add=TRUE)
218

  
219
plot(data_v_plot, pch=1,cex=sqrt(data_v_plot$mae),add=TRUE)
220
# Calculate RMSE and MAE for each station over 365 dates and plot it on a map as well as on a scatterplot
221
#Look at the number of observation.
222

  
223

  
224
#1) Digitize features from high resolution imagery and see if you can see differences in the way it is detected in CAI and fusion method...
225
#in particular how much variation there is in such polygons...
226
#Polygon to digitize: valley, crop area, mountain...Show that CAI does not capture crop as well as Fusion
227
#2) Select transect with slope changes and examining variation in temperatures...: calculate average MAE on the transect for examle river
228
#3) Land cover: examine MAE per land cover...for CAI and Fusion, LST bias and ecoregions...
229
#4) Look at differences...regions
230
#5) Summarize by season/month
231
#6) PCA on differences and CAI-Fusion
232
#7) PCA on residuals...
233
#8) Plot residuals at station by buble plot and kriging...
234
#9) PCA MAE and other variables ...                  
235

  
236
### RESIDUALS FOR SPECIFIC DATE
237

  
238
date_selected<-"20100103"
239

  
240
k<-match(date_selected,sampling_date_list)
241
names(gam_fus$gam_fus_mod[[k]])               #Show the name structure of the object/list
242

  
243
data_sf<-gam_fus$gam_fus_mod[[k]]$data_s #object for the first date...20100103                  
244
data_vf<-gam_fus$gam_fus_mod[[k]]$data_v #object for the first date...20100103                  
245
data_sc<-gam_cai$gam_CAI_mod[[k]]$data_s #object for the first date...20100103                  
246
data_vc<-gam_cai$gam_CAI_mod[[k]]$data_v #object for the first date...20100103                  
247

  
248
id_selected<-intersect(data_v_res$id,data_vf$id)
249
pos<-match(id_selected,data_v_res$id)
250
tmp<-as.data.frame(data_v_res[pos,c("id","idx","mae","c1","c2","c3")])
251
tmp<-tmp[,c("id","idx","mae","c1","c2","c3")]
252
data_vf<-merge(data_vf,tmp,by="id")
253

  
254
coords<- data_vf[,c('x_OR83M','y_OR83M')]
255
coordinates(data_vf)<-coords
256
proj4string(data_vf)<-proj_str  #Need to assign coordinates...
257

  
258
#Select background image for plotting and Plot validation residuals for fusion on 20100103
259
s.range <- c(min(minValue(rast_fus_pred)), max(maxValue(rast_fus_pred)))
260
col.breaks <- pretty(s.range, n=50)
261
lab.breaks <- pretty(s.range, n=5)
262
temp.colors <- colorRampPalette(c('blue', 'white', 'red'))
263

  
264
#Training plot
265
plot(rast_fus_pred, breaks=col.breaks, col=temp.colors(length(col.breaks)-1),
266
     axis=list(at=lab.breaks, labels=lab.breaks))
267
plot(data_sf,cex=1,main="Training stations", add=TRUE)
268
plot(reg_outline,add=TRUE)
269

  
270
#savePlot...
271

  
272
#Testing plot
273
plot(rast_fus_pred, breaks=col.breaks, col=temp.colors(length(col.breaks)-1),
274
     axis=list(at=lab.breaks, labels=lab.breaks))
275
plot(data_vf,cex=1,pch=1,add=TRUE)
276
plot(reg_outline,add=TRUE)
277
text(data_vf,labels=data_vf$idx,pos=3)
278
#savePlot...
279

  
280
#Plot residuals
281
res_modvf<-res_mod7
282

  
283
plot(data_vf$res_mod7)
284
text(data_vf$res_mod7,labels=data_vf$idx,pos=3)
285

  
286
#Plot residuals
287
res_modvf<-res_mod7
288

  
289
plot(data_vf$res_mod7)
290
text(data_vf$res_mod7,labels=data_vf$idx,pos=3)
291

  
292

  
293

  
294

  
295

  
296
#NOW USE RESHAPE TO CREATE TABLE....
297

  
298
#updated the analysis
299

  
300
"tmax_predicted*20100103_30_1_365d_GAM_fusion_all_lstd_10242012.rst"
301
oldpath<-getwd()
302
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_10242012_GAM"
303
setwd(path)
304
date_selected<-"20100103"
305
#lf_fus<-list.files(pattern=paste("GAM_bias_tmax_predicted_mod8_",date_selected,"_30_1_365d_GAM_fusion_all_lstd_10242012.rst$",sep="")) #Search for files in relation to fusion
306
file_pat<-glob2rx(paste("*tmax_pred*",date_selected,"*_365d_GAM_fusion_all_lstd_10242012.rst",sep="")) #Search for files in relation to fusion
307
lf_fus1a<-list.files(pattern=file_pat) #Search for files in relation to fusion
308

  
309
rast_fus1a<-stack(lf_fus1a)
310
rast_fus1a<-mask(rast_fus1a,mask_ELEV_SRTM)
311

  
312
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_07192012_GAM"
313
setwd(path)
314
date_selected<-"20100103"
315
#lf_fus<-list.files(pattern=paste("GAM_bias_tmax_predicted_mod8_",date_selected,"_30_1_365d_GAM_fusion_all_lstd_10242012.rst$",sep="")) #Search for files in relation to fusion
316
#lf_fus1<-list.files(pattern=paste("*.tmax_predicted.*.",date_selected,".*._365d_GAM_fusion_lstd_10062012.rst$",sep=""))                  
317
file_pat<-glob2rx(paste("*tmax_predicted*",date_selected,"*_365d_GAM_fusion_lstd_10062012.rst",sep="")) #Search for files in relation to fusion
318
lf_fus1s<-list.files(pattern=file_pat) #Search for files in relation to fusion                  
319

  
320
rast_fus1s<-stack(lf_fus1s)
321
rast_fus1s<-mask(rast_fus1s,mask_ELEV_SRTM)
322

  
323
s_range<-c(minValue(rast_fusa1),maxValue(rast_fusa1)) #stack min and max
324
s_range<-c(min(s_range),max(s_range))
325
col_breaks <- pretty(s_range, n=50)
326
lab_breaks <- pretty(s_range, n=5)
327
temp_colors <- colorRampPalette(c('blue', 'white', 'red'))
328
X11(width=18,height=12)
329
par(mfrow=c(3,3))
330
for (k in 1:length(lf_fus1a)){
331
  fus1a_r<-raster(rast_fus1a,k)
332
  plot(fus1a_r, breaks=col_breaks, col=temp_colors(length(col_breaks)-1),   
333
       axis=list(at=lab_breaks, labels=lab_breaks))
334
}
335

  
336
#Wihtout setting range
337
s_range<-c(-12,18)
338
#col_breaks <- pretty(s_range, n=50)
339
#lab_breaks <- pretty(s_range, n=5)
340
col_breaks<-49
341
temp_colors <- colorRampPalette(c('blue', 'white', 'red'))
342
lab_breaks <- pretty(s_range, n=5)  
343
X11(width=18,height=12)
344
par(mfrow=c(3,3))
345
mod_name<-paste("mod",1:8, sep="")
346
rname<-c("FUS_kr",mod_name)
347
for (k in 1:length(lf_fus1a)){
348
  fus1a_r<-raster(rast_fus1a,k)
349
  plot(fus1a_r, breaks=col_breaks, col=temp_colors(col_breaks-1),   
350
       axis=list(at=lab_breaks, labels=lab_breaks),main=rname[k])
351
}
352

  
353
#Wihtout setting range
354
s_range<-c(-12,23)
355
#col_breaks <- pretty(s_range, n=50)
356
#lab_breaks <- pretty(s_range, n=5)
357
col_breaks<-49
358
temp_colors <- colorRampPalette(c('blue', 'white', 'red'))
359
lab_breaks <- pretty(s_range, n=5)  
360
X11(width=18,height=12)
361
par(mfrow=c(3,3))
362
mod_name<-paste("mod",1:8, sep="")
363
rname<-c("FUS_kr",mod_name)
364
for (k in 1:length(lf_fus1s)){
365
  fus1s_r<-raster(rast_fus1s,k)
366
  plot(fus1s_r, breaks=col_breaks, col=temp_colors(col_breaks-1),   
367
       axis=list(at=lab_breaks, labels=lab_breaks),main=rname[k])
368
}
369

  
370

  
371

  

Also available in: Unified diff