Project

General

Profile

Download (16 KB) Statistics
| Branch: | Revision:
1
#####################################  METHOD COMPARISON ##########################################
2
#################################### Spatial Analysis ########################################
3
#This script utilizes the R ojbects created during the interpolation phase.      
4
# R ojbects must be supplied in a text file with along with their names. 
5
# Five mehods are compared over set of year: Kriging, GWR, GAM, CAI and FUSION.
6
# At this stage the script produces figures of various accuracy metrics and compare x: #
7
#- boxplots for MAE, RMSE and other accuracy metrics   
8
#- MAE, RMSE plots per month
9
#- visualization of map results  for all predictions method  
10

    
11
#AUTHOR: Benoit Parmentier                                                                        
12
#DATE: 10/12/2012                                                                                 
13
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#491--  
14

    
15
###################################################################################################
16

    
17
###Loading R library and packages                                                      
18
#library(gtools)                                        # loading some useful tools 
19
library(mgcv)                                           # GAM package by Wood 2006 (version 2012)
20
library(sp)                                             # Spatial pacakge with class definition by Bivand et al. 2008
21
library(spdep)                                          # Spatial package with methods and spatial stat. by Bivand et al. 2012
22
library(rgdal)                                          # GDAL wrapper for R, spatial utilities (Keitt et al. 2012)
23
library(gstat)                                          # Kriging and co-kriging by Pebesma et al. 2004
24
library(automap)                                        # Automated Kriging based on gstat module by Hiemstra et al. 2008
25
library(spgwr)
26
library(maptools)
27
library(graphics)
28
library(parallel)                            # Urbanek S. and Ripley B., package for multi cores & parralel processing
29
library(raster)
30
library(rasterVis)
31
library(plotrix)      #Draw circle on graph and additional plotting options
32
library(reshape)      #Data format and type transformation
33
## Functions
34
#loading R objects that might have similar names
35
load_obj <- function(f)
36
{
37
  env <- new.env()
38
  nm <- load(f, env)[1]
39
  env[[nm]]
40
}
41

    
42
###Parameters and arguments
43

    
44
infile1<- "ghcn_or_tmax_covariates_06262012_OR83M.shp"    #GHCN shapefile containing variables for modeling 2010                 
45
#infile2<-"list_10_dates_04212012.txt"                    #List of 10 dates for the regression
46
infile2<-"list_365_dates_04212012.txt"                    #list of dates
47
infile3<-"LST_dates_var_names.txt"                        #LST dates name
48
infile4<-"models_interpolation_05142012.txt"              #Interpolation model names
49
infile5<-"mean_day244_rescaled.rst"                       #mean LST for day 244
50
inlistf<-"list_files_05032012.txt"                        #list of raster images containing the Covariates
51
infile6<-"OR83M_state_outline.shp"
52
#stat_loc<-read.table(paste(path,"/","location_study_area_OR_0602012.txt",sep=""),sep=",", header=TRUE)
53

    
54
obj_list<-"list_obj_10172012.txt"                                  #Results of fusion from the run on ATLAS
55
#obj_list<-"list_obj_08262012.txt"                                  #Results of fusion from the run on ATLAS
56
path<-"/home/parmentier/Data/IPLANT_project/methods_interpolation_comparison" #Jupiter LOCATION on Atlas for kriging                              #Jupiter Location on XANDERS
57
setwd(path) 
58
proj_str="+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
59
                                                                                #Number of kriging model
60
out_prefix<-"methods_10172012_"                                              #User defined output prefix
61

    
62
filename<-sub(".shp","",infile1)             #Removing the extension from file.
63
ghcn<-readOGR(".", filename)                 #reading shapefile 
64

    
65
### PREPARING RASTER COVARIATES STACK #######
66

    
67
#CRS<-proj4string(ghcn)                       #Storing projection information (ellipsoid, datum,etc.)
68
lines<-read.table(paste(path,"/",inlistf,sep=""), sep="")                      #Column 1 contains the names of raster files
69
inlistvar<-lines[,1]
70
inlistvar<-paste(path,"/",as.character(inlistvar),sep="")
71
covar_names<-as.character(lines[,2])                                         #Column two contains short names for covaraites
72

    
73
s_raster<- stack(inlistvar)                                                  #Creating a stack of raster images from the list of variables.
74
layerNames(s_raster)<-covar_names                                            #Assigning names to the raster layers
75
projection(s_raster)<-proj_str
76

    
77
#Create mask
78
pos<-match("LC10",layerNames(s_raster))
79
LC10<-subset(s_raster,pos)
80
LC10[is.na(LC10)]<-0   #Since NA values are 0, we assign all zero to NA
81
mask_land<-LC10<100
82
mask_land_NA<-mask_land
83
mask_land_NA[mask_land_NA==0]<-NA
84

    
85
data_name<-"mask_land_OR"
86
raster_name<-paste(data_name,".rst", sep="")
87
writeRaster(mask_land, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
88
#writeRaster(r2, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
89

    
90
pos<-match("mm_01",layerNames(s_raster))
91
mm_01<-subset(s_raster,pos)
92
mm_01<-mm_01-273.15
93
mm_01<-mask(mm_01,mask_land_NA)
94
#mention this is the last... files
95

    
96
############# METHODS COMPARISON  ###########
97

    
98
######################################################################
99
# PART 1 : USING ACCURACY METRICS FOR FIVE METHODS COMPARISON
100
# Boxplots and histograms
101

    
102
lines<-read.table(paste(path,"/",obj_list,sep=""), sep=",")   #Column 1 contains the names RData objects
103
inlistobj<-lines[,1]
104
inlistobj<-paste(path,"/",as.character(inlistobj),sep="")
105
obj_names<-as.character(lines[,2])                    #Column two contains short names for obj. model
106

    
107
nel<-length(inlistobj)
108
method_mod <-vector("list",nel) #list of one row data.frame
109
method_tb <-vector("list",nel) #list of one row data.frame
110
method_mean<-vector("list",nel)
111

    
112
for (i in 1:length(inlistobj)){
113
  obj_tmp<-load_obj(inlistobj[i])
114
  method_mod[[i]]<-obj_tmp
115
  #names(method_mod[[i]])<-obj_names[i]
116
}
117
obj_tmp<-load_obj(inlistobj[i])
118

    
119
names(method_mod)<-obj_names
120

    
121
#Condense and add other comparison, transform in function??
122

    
123
for(k in 1:length(method_mod)){            # start of the for main loop to all methods
124
  
125
  tb<-method_mod[[k]][[1]][[3]][0,] #copy
126
  mod_tmp<-method_mod[[k]]
127
  
128
  for (i in 1:365){                     # Assuming 365 days of prediction
129
    tmp<-mod_tmp[[i]][[3]]
130
    tb<-rbind(tb,tmp)
131
  }
132
  
133
  rm(mod_tmp)
134
  
135
  for(i in 4:(ncol(tb))){            # start of the for loop #1
136
    tb[,i]<-as.numeric(as.character(tb[,i]))  
137
  }
138
  
139
  method_tb[[k]]<-tb
140
  tb_RMSE<-subset(tb, metric=="RMSE")
141
  tb_MAE<-subset(tb,metric=="MAE")
142
  tb_ME<-subset(tb,metric=="ME")
143
  tb_R2<-subset(tb,metric=="R2")
144
  tb_RMSE_f<-subset(tb, metric=="RMSE_f")
145
  tb_MAE_f<-subset(tb,metric=="MAE_f")
146
  
147
  tb_diagnostic1<-rbind(tb_RMSE,tb_MAE,tb_ME,tb_R2)
148
  
149
  na_mod<-colSums(!is.na(tb_RMSE[,4:ncol(tb)]))
150
  for (j in 4:ncol(tb)){
151
    
152
    if (na_mod[j-3]<183){
153
      tb_RMSE<-tb_RMSE[,-j]   #Remove columns that have too many missing values!!!
154
    }
155
  }
156
  
157
  na_mod<-colSums(!is.na(tb_MAE[,4:ncol(tb)]))
158
  for (j in 4:ncol(tb)){
159
    
160
    if (na_mod[j-3]<183){
161
      tb_MAE<-tb_MAE[,-j]   #Remove columns that have too many missing values!!!
162
    }
163
  }
164
  
165
  na_mod<-colSums(!is.na(tb_MAE_f[,4:ncol(tb)])) 
166
  for (j in 4:ncol(tb)){
167
    
168
    if (na_mod[j-3]<183){
169
      tb_MAE_f<-tb_MAE_f[,-j]   #Remove columns that have too many missing values!!!
170
    }
171
  }
172
  
173
  na_mod<-colSums(!is.na(tb_ME[,4:ncol(tb)]))
174
  for (j in 4:ncol(tb)){
175
    
176
    if (na_mod[j-3]<183){
177
      tb_ME<-tb_ME[,-j]   #Remove columns that have too many missing values!!!
178
    }
179
  }
180
  
181
  #Add assessment of missing prediction over the year.
182
  
183
  mean_RMSE<-sapply(tb_RMSE[,4:ncol(tb_RMSE)],mean,na.rm=T
184
  mean_MAE<-sapply(tb_MAE[,4:ncol(tb_MAE)],mean,na.rm=T)
185
  mean_R2<-sapply(tb_R2[,4:ncol(tb_R2)],mean, n.rm=T)
186
  mean_ME<-sapply(tb_ME[,4:ncol(tb_ME)],mean,na.rm=T)
187
  mean_MAE_f<-sapply(tb_MAE[,4:ncol(tb_MAE_f)],mean,na.rm=T)
188
  mean_RMSE_f<-sapply(tb_RMSE_f[,4:ncol(tb_RMSE_f)],mean,na.rm=T)
189
  mean_list<-list(mean_RMSE,mean_MAE,mean_R2,mean_ME,mean_MAE_f,mean_RMSE_f)
190
  names(mean_list)<-c("RMSE","MAE","R2","ME","MAE_f","RMSE_f")
191
  method_mean[[k]]<-mean_list
192
  names_methods<-obj_names
193
  
194
  sd_RMSE<-sapply(tb_RMSE[,4:ncol(tb_RMSE)],sd,na.rm=T)
195
  sd_MAE<-sapply(tb_MAE[,4:ncol(tb_MAE)],sd,na.rm=T)
196
  
197
  # Now create plots
198
  
199
  png(paste("RMSE_for_",names_methods[k],out_prefix,".png", sep=""))
200
  boxplot(tb_RMSE[,4:ncol(tb_RMSE)],main=names_methods[k],ylim=c(1,4.5),
201
          ylab= "RMSE", outline=FALSE) #ADD TITLE RELATED TO METHODS...
202
  dev.off()
203
  
204
  #boxplot(tb_RMSE[,4:ncol(tb_RMSE)],main=names_methods[k],outline=FALSE) #ADD TITLE RELATED TO METHODS...
205
  png(paste("MAE_for_",names_methods[k],out_prefix,".png", sep=""))
206
  boxplot(tb_MAE[,4:ncol(tb_MAE)],main=names_methods[k], ylim=c(1,3.5),
207
          ylab= "MAE", outline=FALSE) #ADD TITLE RELATED TO METHODS...
208
  dev.off()
209
  
210
  #boxplot(tb_RMSE[,4:ncol(tb_RMSE)],main=names_methods[k],outline=FALSE) #ADD TITLE RELATED TO METHODS...
211
  png(paste("ME_for_",names_methods[k],out_prefix,".png", sep=""))
212
  boxplot(tb_ME[,4:ncol(tb_MAE)],main=names_methods[k],
213
          ylab= "ME", outline=FALSE) #ADD TITLE RELATED TO METHODS...
214
  dev.off()
215

    
216
  # OVER THE YEAR
217
  #...
218
  for(i in 1:nrow(tb)){
219
    date<-tb$dates[i]
220
    date<-strptime(date, "%Y%m%d")
221
    tb$month[i]<-as.integer(strftime(date, "%m"))
222
  }
223
  # USE RESHAPE...
224
  mod_pat<-glob2rx("mod*")   
225
  var_pat<-grep(mod_pat,names(tb),value=TRUE) # using grep with "value" extracts the matching names
226
  tb_melt<-melt(tb,
227
                    measure=var_pat, 
228
                     id=c("metric","month"),
229
                   na.rm=F)
230
  tb_cast<-cast(tb_melt,metric+month~variable,mean)
231
  
232
                    metrics<-as.character(unique(tb$metric))            #Name of accuracy metrics (RMSE,MAE etc.)
233
                    tb_metric_list<-vector("list",length(metrics))
234
               
235
                    
236
  png(paste("MAE_for_",names_methods[k],out_prefix,".png", sep=""))
237
  boxplot(tb__m_MAE[,4:ncol(tb_MAE)],main=names_methods[k], ylim=c(1,3.5),
238
          ylab= "MAE", outline=FALSE) #ADD TITLE RELATED TO METHODS...
239
  dev.off()
240
  metrics<-as.character(unique(tb$metric))            #Name of accuracy metrics (RMSE,MAE etc.)
241
  tb_metric_list<-vector("list",length(metrics))
242
                                        
243
  for(i in 1:length(metrics)){            # Reorganizing information in terms of metrics 
244
        metric_name<-paste("tb_t_",metrics[i],sep="")
245
         tb_metric<-subset(tb, metric==metrics[i])
246
         assign(metric_name,tb_metric)
247
        tb_metric_list[[i]]<-tb_metric
248
  }     
249
  
250
   tb_processed<-tb_metric_list[[i]]     
251
   mod_pat<-glob2rx("mod*")   
252
   var_pat<-grep(mod_pat,names(tb_processed),value=FALSE) # using grep with "value" extracts the matching names         
253
   na_mod<-colSums(!is.na(tb_processed[,var_pat]))
254
   for (j in 4:ncol(tb)){    
255
      if (na_mod[j-3]<183){
256
      tb_ME<-tb_ME[,-j]   #Remove columns that have too many missing values!!!
257
   }
258
   
259
}
260

    
261
mod_formulas<-vector("list",length(method_mod))
262
for(k in 1:length(method_mod)){            # start of the for main loop to all methods
263
  models_tmp<-method_mod[[k]][[1]][[5]]  #day 1 for model k
264
  list_formulas<-vector("list",length(models_tmp))
265
  for (j in 1:length(models_tmp)){  #
266
    formula<-try(formula(models_tmp[[j]]))
267
    list_formulas[[j]]<-formula
268
  }
269
  names(list_formulas)<-names(models_tmp)
270
  mod_formulas[[k]]<-list_formulas
271
}
272

    
273
names(method_mean)<-obj_names
274
#Add summary mean graphs!! HERE
275

    
276
write.table(as.data.frame(method_mean$gam_fus_mod1$MAE), "methods_mean_gam_MAE_test1.txt", sep=",")
277
write.table(as.data.frame(method_mean$fus_CAI$MAE), "methods_mean_fus_CAI_MAE_test1.txt", sep=",")
278

    
279
######### Average per month
280

    
281
#Add code here...
282
gam_fus_mod1<-method_mod[[1]]
283

    
284

    
285
#####################   PART II   #######################
286
# VISUALIZATION OF RESULTS PLOTS ACROSS MODELS FOR METHODS
287

    
288
date_selected<-"20100103"
289

    
290
lf_krig<-list.files(pattern=paste("*",date_selected,"_07312012_365d_Kriging_autokrig2.rst$",sep=""))
291
lf_gwr<-list.files(pattern=paste("*",date_selected,".*08152012_1d_gwr4.rst$",sep=""))
292
lf_gam1<-list.files(pattern=paste("^GAM.*",date_selected,"_07242012_365d_GAM_fusion5.rst$",sep=""))
293
lf_fus1<-list.files(pattern=paste("*.tmax_predicted.*.",date_selected,".*._365d_GAM_fusion_lstd_10062012.rst$",sep=""))
294
lf_cai1<-list.files(pattern=paste("*CAI_tmax_pred.*",date_selected,"*.08072012_365d_GAM_CAI2.rst$",sep="")) #Search for files in relation to fusion
295
lf_gam2<-list.files(pattern=paste("^GAM.*",date_selected,"_08122012_365d_GAM_fusion6.rst$",sep=""))
296

    
297
#lf2_fus<-list.files(pattern=paste("*",date_selected,"*._365d_GAM_fusion_lstd_10062012.rst$",sep=""))
298
#lf2_fus<-list.files(pattern=paste("*.20100103.*._365d_GAM_fusion_lstd_10062012.rst$",sep=""))
299
lf2_fus<-list.files(pattern=paste("^fusion_tmax.*",date_selected,"_07242012_365d_GAM_fusion5.rst$",sep="")) #Search for files in relation to fusion
300

    
301

    
302
d_krig_rast<-stack(lf_krig)
303
d_gwr_rast<-stack(lf_gwr)
304
d_gam1_rast<-stack(lf_gam1)
305
d_fus1_rast<-stack(lf_fus1)
306
d_cai1_rast<-stack(lf_cai1)
307
d_gam2_rast<-stack(lf_gam2)
308

    
309
list_day_method<-list(d_krig_rast,d_gwr_rast,d_gam1_rast,d_fus1_rast,d_cai1_rast,d_gam2_rast)
310
names(list_day_method)<-paste(c("krig_","gwr_","gam1_","fus1_","cai1_","gam2_"),date_selected,sep="")
311
out_prefix2<-"_10172012"
312

    
313
for (k in 1:length(list_day_method)){
314
  
315
  predictions<-list_day_method[[k]]
316
  projection(predictions)<-proj_str
317
  predictions<-mask(predictions,mask_elev_NA)
318
  #layerNames(predictions)<-c(paste('fusion',date_selected,sep=" "),paste('CAI',date_list2[[k]],sep=" "))
319
  # use overall min and max values to generate an nice, consistent set
320
  # of breaks for both colors (50 values) and legend labels (5 values)
321
  #s.range <- c(min(minValue(predictions)), max(maxValue(predictions)))
322
  s.range<-c(-12,18)
323
  col.breaks <- pretty(s.range, n=60)
324
  lab.breaks <- pretty(s.range, n=6)
325
  temp.colors <- colorRampPalette(c('blue', 'white', 'red'))
326
  
327
  # plot using these (common) breaks; note use of _reverse_ heat.colors,
328
  # making it so that larger numbers are redder
329
  X11(6,12)
330
  plot(predictions, breaks=col.breaks, col=temp.colors(length(col.breaks)-1),
331
       axis=list(at=lab.breaks, labels=lab.breaks))
332
  
333
  savePlot(paste(names(list_day_method)[[k]],"_method_prediction_",out_prefix2,".png", sep=""), type="png")
334
  dev.off()  
335
}
336

    
337
                
338
##PLOTING OF ONE DATE TO COMPARE METHODS!!!
339

    
340
pos<-match("ELEV_SRTM",layerNames(s_raster))
341
ELEV_SRTM<-raster(s_raster,pos)
342
elev<-ELEV_SRTM
343
elev[-0.050<elev]<-NA  #Remove all negative elevation lower than 50 meters...
344

    
345
mask_elev_NA<-elev> (-0.050)
346
  
347
date_selected<-"20100103"
348
lf_fus<-list.files(pattern=paste("^fusion_tmax.*",date_selected,"_07242012_365d_GAM_fusion5.rst$",sep="")) #Search for files in relation to fusion
349
lf_cai<-list.files(pattern=paste("*CAI_tmax_pred.*",date_selected,"*.08072012_365d_GAM_CAI2.rst$",sep="")) #Search for files in relation to fusion
350

    
351
r11<-raster(lf_fus) #Fusion
352
r12<-raster(lf_cai[1]) #CAI
353
predictions<-stack(r11,r12)
354
predictions<-mask(predictions,mask_land_elev_NA)
355
layerNames(predictions)<-c(paste('fusion',"20100103",sep=" "),paste('CAI',"20100103",sep=" "))
356

    
357
s.range <- c(min(minValue(predictions)), max(maxValue(predictions)))
358
col.breaks <- pretty(s.range, n=50)
359
lab.breaks <- pretty(s.range, n=5)
360
temp.colors <- colorRampPalette(c('blue', 'white', 'red'))
361

    
362
# plot using these (common) breaks; note use of _reverse_ heat.colors,
363
# making it so that larger numbers are redder
364
X11(6,12)
365
#plot(predictions, breaks=col.breaks, col=rev(heat.colors(length(col.breaks)-1)),
366
#   axis=list(at=lab.breaks, labels=lab.breaks))
367
plot(predictions, breaks=col.breaks, col=temp.colors(length(col.breaks)-1),
368
     axis=list(at=lab.breaks, labels=lab.breaks))
369
#plot(reg_outline, add=TRUE)
370
savePlot(paste("comparison_one_date_CAI_fusion_tmax_prediction_",date_selected,out_prefix,".png", sep=""),type="png")
371

    
372
#results2_fusion_Assessment_measure_all_365d_GAM_fusion_lstd_10062012.RData
373
#### END OF THE SCRIPT
(30-30/36)