Project

General

Profile

Download (18.7 KB) Statistics
| Branch: | Revision:
1
#####################################  METHODS COMPARISON part 7 ##########################################
2
#################################### Spatial Analysis: validation CAI-fusion  ############################################
3
#This script utilizes the R ojbects created during the interpolation phase.                       #
4
#We use the SNOTEL dataset and the GHCN network to assess the prediction accuracy.
5
#This scripts focuses on a detailed study of differences in the predictions of CAI_kr and FUsion_Kr                              #
6
#AUTHOR: Benoit Parmentier                                                                        #
7
#DATE: 12/03/2012                                                                                 #
8
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#491 --                                  #
9
###################################################################################################
10

    
11
###Loading R library and packages                                                      
12
library(gtools)                                        # loading some useful tools such as mixedsort
13
library(mgcv)                                           # GAM package by Wood 2006 (version 2012)
14
library(sp)                                             # Spatial pacakge with class definition by Bivand et al. 2008
15
library(spdep)                                          # Spatial package with methods and spatial stat. by Bivand et al. 2012
16
library(rgdal)                                          # GDAL wrapper for R, spatial utilities (Keitt et al. 2012)
17
library(gstat)                                          # Kriging and co-kriging by Pebesma et al. 2004
18
library(automap)                                        # Automated Kriging based on gstat module by Hiemstra et al. 2008
19
library(spgwr)
20
library(gpclib)
21
library(maptools)
22
library(graphics)
23
library(parallel)                            # Urbanek S. and Ripley B., package for multi cores & parralel processing
24
library(raster)
25
library(rasterVis)
26
library(plotrix)   #Draw circle on graph
27
library(reshape)
28
library(RCurl)
29
######### Functions used in the script
30
#
31

    
32
load_obj <- function(f)
33
{
34
  env <- new.env()
35
  nm <- load(f, env)[1]
36
  env[[nm]]
37
}
38

    
39
format_padding_month<-function(date_str){
40
  date_trans<-character(length=length(date_str))
41
  for (i in 1:length(date_str)){
42
    tmp_date<-date_str[i]
43
    nc<-nchar(tmp_date)
44
    nstart<-nc-1
45
    year<-substr(tmp_date,start=nstart,stop=nc)
46
    md<-substr(tmp_date,start=1,stop=(nstart-1))
47
    if (nchar(md)==3){
48
      md<-paste("0",md,sep="")
49
    }
50
    date_trans[i]<-paste(md,year,sep="")
51
  }  
52
  return(date_trans)
53
}
54

    
55
merge_multiple_df<-function(df_list,by_name){
56
  for (i in 1:(length(df_list)-1)){
57
    if (i==1){
58
      df1=df_list[[i]]
59
    }
60
    if (i!=1){
61
      df1=df_m
62
    }
63
    df2<-df_list[[i+1]]
64
    df_m<-merge(df1,df2,by=by_name,all=T)
65
  }
66
  return(df_m)
67
}
68

    
69
reclassify_df<-function(df,var_name,brks,lab_brks,suffix,summary_var){
70
  var_tab<-vector("list",length(var_name))
71
  for (i in 1:length(var_name)){
72
    var_rec_name<-paste(var_name[i],suffix,sep="_")
73
    var_rcstat<-cut(df[[var_name[i]]],breaks=brks,labels=lab_brks,right=T)
74
    df[[var_rec_name]]<-var_rcstat
75
    tmp<-aggregate(df[[summary_var]]~df[[var_rec_name]],data=df,FUN=mean)
76
    names(tmp)<-c(suffix,var_rec_name)
77
    var_tab[[i]]<-tmp
78
  }
79
  obj<-list(var_tab,df)
80
  names(obj)<-c("agg_df","df")
81
  return(list(var_tab,df))
82
}
83

    
84
station_data_interp<-function(date_str,obj_mod_interp_str,training=TRUE,testing=TRUE){
85
  date_selected<-date_str
86
  #load interpolation object
87
  obj_mod_interp<-load_obj(obj_mod_interp_str)
88
  sampling_date_list<-obj_mod_interp$sampling_obj$sampling_dat$date
89
  k<-match(date_selected,sampling_date_list)
90
  names(obj_mod_interp[[1]][[k]])               #Show the name structure of the object/list
91
  
92
  #Extract the training and testing information for the given date...
93
  data_s<-obj_mod_interp[[1]][[k]]$data_s #object for the first date...20100103                  
94
  data_v<-obj_mod_interp[[1]][[k]]$data_v #object for the first date...20100103                  
95
  if (testing==TRUE & training==FALSE){
96
    return(data_v)
97
  }
98
  if (training==TRUE & testing==FALSE){
99
    return(data_s)
100
  }
101
  if (training==TRUE & testing==TRUE ){
102
    dataset_stat<-list(data_v,data_s)
103
    names(dataset_stat)<-c("testing","training")
104
    return(dataset_stat)
105
  }
106
}
107

    
108
### Caculate accuracy metrics
109
calc_accuracy_metrics<-function(x,y){
110
  residuals<-x-y
111
  mae<-mean(abs(residuals),na.rm=T)
112
  rmse<-sqrt(mean((residuals)^2,na.rm=T))
113
  me<-mean(residuals,na.rm=T)
114
  r<-cor(x,y,use="complete")
115
  avg<-mean(residuals,na.rm=T)
116
  m50<-median(residuals,na.rm=T)
117
  metrics_dat<-as.data.frame(cbind(mae,rmse,me,r,avg,m50))
118
  names(metrics_dat)<-c("mae","rmse","me","r","avg","m50")
119
  return(metrics_dat)
120
}
121

    
122
#MODIFY LATER
123
# raster_pred_interp<-function(date_str,rast_file_name_list,path_data,data_sp){
124
#   date_selected<-date_str
125
#   #load interpolation object
126
#   setwd(path_data)
127
#   file_pat<-glob2rx(paste("*tmax_predicted*",date_selected,"*_365d_GAM_CAI2_const_all_10312012.rst",sep="")) #Search for files in relation to fusion                  
128
#   lf_pred<-list.files(pattern=file_pat) #Search for files in relation to fusion                  
129
#   
130
#   rast_cai2c<-stack(lf_cai2c)                   #lf_cai2c CAI results with constant sampling over 365 dates
131
#   rast_cai2c<-mask(rast_cai2c,mask_ELEV_SRTM)
132
#   
133
#   obj_mod_interp<-load_obj(obj_mod_interp_str)
134
#   sampling_date_list<-obj_mod_interp$sampling_obj$sampling_dat$date
135
#   k<-match(date_selected,sampling_date_list)
136
#   names(obj_mod_interp[[1]][[k]])               #Show the name structure of the object/list
137
#   
138
#   #Extract the training and testing information for the given date...
139
#   data_s<-obj_mod_interp[[1]][[k]]$data_s #object for the first date...20100103                  
140
#   data_v<-obj_mod_interp[[1]][[k]]$data_v #object for the first date...20100103                  
141
#   if (testing==TRUE & training==FALSE){
142
#     return(data_v)
143
#   }
144
#   if (training==TRUE & testing==FALSE){
145
#     return(data_s)
146
#   }
147
#   if (training==TRUE & testing==TRUE ){
148
#     dataset_stat<-list(data_v,data_s)
149
#     names(dataset_stat)<-c("testing","training")
150
#     return(dataset_stat)
151
#   }
152
# }
153

    
154
#########
155
#loading R objects that might have similar names
156

    
157
out_prefix<-"_method_comp7_12102012b_"
158
infile2<-"list_365_dates_04212012.txt"
159
infile1<- "ghcn_or_tmax_covariates_06262012_OR83M.shp"    #GHCN shapefile containing variables for modeling 2010                 
160
#infile2<-"list_10_dates_04212012.txt"                    #List of 10 dates for the regression
161
infile2<-"list_365_dates_04212012.txt"                    #list of dates
162
infile3<-"LST_dates_var_names.txt"                        #LST dates name
163
infile4<-"models_interpolation_05142012.txt"              #Interpolation model names
164
infile5<-"mean_day244_rescaled.rst"                       #mean LST for day 244
165
inlistf<-"list_files_05032012.txt"                        #list of raster images containing the Covariates
166
infile6<-"OR83M_state_outline.shp"
167
#stat_loc<-read.table(paste(path,"/","location_study_area_OR_0602012.txt",sep=""),sep=",", header=TRUE)
168

    
169
i=2
170
##### LOAD USEFUL DATA
171

    
172
#obj_list<-"list_obj_08262012.txt"                                  #Results of fusion from the run on ATLAS
173
path<-"/home/parmentier/Data/IPLANT_project/methods_interpolation_comparison_10242012" #Jupiter LOCATION on Atlas for kriging                              #Jupiter Location on XANDERS
174
path_wd<-"/home/parmentier/Data/IPLANT_project/methods_interpolation_comparison_10242012" #Jupiter LOCATION on Atlas for kriging
175
#path<-"/Users/benoitparmentier/Dropbox/Data/NCEAS/Oregon_covariates/"            #Local dropbox folder on Benoit's laptop
176
setwd(path) 
177
path_data_cai<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_10242012_CAI"  #Change to constant
178
path_data_fus<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_10242012_GAM"
179
#list files that contain model objects and ratingin-testing information for CAI and Fusion
180
obj_mod_fus_name<-"results_mod_obj__365d_GAM_fusion_const_all_lstd_11022012.RData"
181
obj_mod_cai_name<-"results_mod_obj__365d_GAM_CAI2_const_all_10312012.RData"
182

    
183
#external function
184
source("function_methods_comparison_assessment_part7_12102012.R")
185

    
186
### Projection for the current region
187
proj_str="+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
188
#User defined output prefix
189

    
190
### MAKE THIS A FUNCTION TO LOAD STACK AND DEFINE VALID RANGE...
191
#CRS<-proj4string(ghcn)                       #Storing projection information (ellipsoid, datum,etc.)
192
lines<-read.table(paste(path,"/",inlistf,sep=""), sep="")                      #Column 1 contains the names of raster files
193
inlistvar<-lines[,1]
194
inlistvar<-paste(path,"/",as.character(inlistvar),sep="")
195
covar_names<-as.character(lines[,2])                                         #Column two contains short names for covaraites
196

    
197
s_raster<- stack(inlistvar)                                                  #Creating a stack of raster images from the list of variables.
198
layerNames(s_raster)<-covar_names                                            #Assigning names to the raster layers
199
projection(s_raster)<-proj_str
200

    
201
#Create mask using land cover data
202
pos<-match("LC10",layerNames(s_raster))            #Find the layer which contains water bodies
203
LC10<-subset(s_raster,pos)
204
LC10[is.na(LC10)]<-0                               #Since NA values are 0, we assign all zero to NA
205
mask_land<-LC10<100                                #All values below 100% water are assigned the value 1, value 0 is "water"
206
mask_land_NA<-mask_land                            
207
mask_land_NA[mask_land_NA==0]<-NA                  #Water bodies are assigned value 1
208

    
209
data_name<-"mask_land_OR"
210
raster_name<-paste(data_name,".rst", sep="")
211
writeRaster(mask_land, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
212
#writeRaster(r2, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
213

    
214
pos<-match("ELEV_SRTM",layerNames(s_raster)) #Find column with name "ELEV_SRTM"
215
ELEV_SRTM<-raster(s_raster,layer=pos)             #Select layer from stack on 10/30
216
s_raster<-dropLayer(s_raster,pos)
217
ELEV_SRTM[ELEV_SRTM <0]<-NA
218
mask_ELEV_SRTM<-ELEV_SRTM>0
219

    
220
#Change this a in loop...
221
pos<-match("LC1",layerNames(s_raster)) #Find column with name "value"
222
LC1<-raster(s_raster,layer=pos)             #Select layer from stack
223
s_raster<-dropLayer(s_raster,pos)
224
LC1[is.na(LC1)]<-0
225
pos<-match("LC2",layerNames(s_raster)) #Find column with name "value"
226
LC2<-raster(s_raster,layer=pos)             #Select layer from stack
227
s_raster<-dropLayer(s_raster,pos)
228
LC2[is.na(LC2)]<-0
229
pos<-match("LC3",layerNames(s_raster)) #Find column with name "value"
230
LC3<-raster(s_raster,layer=pos)             #Select layer from stack
231
s_raster<-dropLayer(s_raster,pos)
232
LC3[is.na(LC3)]<-0
233
pos<-match("LC4",layerNames(s_raster)) #Find column with name "value"
234
LC4<-raster(s_raster,layer=pos)             #Select layer from stack
235
s_raster<-dropLayer(s_raster,pos)
236
LC4[is.na(LC4)]<-0
237
pos<-match("LC6",layerNames(s_raster)) #Find column with name "value"
238
LC6<-raster(s_raster,layer=pos)             #Select layer from stack
239
s_raster<-dropLayer(s_raster,pos)
240
LC6[is.na(LC6)]<-0
241
pos<-match("LC7",layerNames(s_raster)) #Find column with name "value"
242
LC7<-raster(s_raster,layer=pos)             #Select layer from stack
243
s_raster<-dropLayer(s_raster,pos)
244
LC7[is.na(LC7)]<-0
245
pos<-match("LC9",layerNames(s_raster)) #Find column with name "LC9", this is wetland...
246
LC9<-raster(s_raster,layer=pos)             #Select layer from stack
247
s_raster<-dropLayer(s_raster,pos)
248
LC9[is.na(LC9)]<-0
249

    
250
LC_s<-stack(LC1,LC2,LC3,LC4,LC6,LC7)
251
layerNames(LC_s)<-c("LC1_forest","LC2_shrub","LC3_grass","LC4_crop","LC6_urban","LC7_barren")
252
LC_s <-mask(LC_s,mask_ELEV_SRTM)
253
plot(LC_s)
254

    
255
s_raster<-addLayer(s_raster, LC_s)
256

    
257
#mention this is the last... files
258

    
259
#Read region outline...
260
filename<-sub(".shp","",infile6)             #Removing the extension from file.
261
reg_outline<-readOGR(".", filename)                 #reading shapefile 
262

    
263
########## Load Snotel data 
264
infile_snotname<-"snot_OR_2010_sp2_methods_11012012_.shp" #load Snotel data
265
snot_OR_2010_sp<-readOGR(".",sub(".shp","",infile_snotname))
266
snot_OR_2010_sp$date<-as.character(snot_OR_2010_sp$date)
267

    
268
#dates<-c("20100103","20100901")
269
#dates_snot<-c("10310","90110")
270
#dates<-c("20100101","20100103","20100301","20100302","20100501","20100502","20100801","20100802","20100901","20100902")
271
#dates_snot<-c("10110","10310","30110","30210","50110","50210","80110","80210","90110","90210")
272

    
273
#Use file with date
274
dates<-readLines(file.path(path,infile2))
275
#Or use list of date in string
276
#dates<-c("20100103","20100901")
277

    
278
dates_snot_tmp<-snot_OR_2010_sp$date
279
dates_snot_formatted<-format_padding_month(dates_snot_tmp)
280
date_test<-strptime(dates_snot_formatted, "%m%d%y")   # interpolation date being processed
281
snot_OR_2010_sp$date_formatted<-date_test
282
#Load GHCN data used in modeling: training and validation site
283

    
284
### load specific date...and plot: make a function to extract the diff and prediction...
285
#rast_diff_fc<-rast_fus_pred-rast_cai_pred
286
#layerNames(rast_diff)<-paste("diff",date_selected,sep="_")
287

    
288
####COMPARE WITH LOCATION OF GHCN and SNOTEL NETWORK
289

    
290

    
291
i=1
292
date_selected<-dates[i]
293

    
294
X11(12,12)
295
# #plot(rast_diff_fc)
296
# plot(snot_OR_2010_sp,pch=2,col="red",add=T)
297
# plot(data_stat,add=T) #This is the GHCN network
298
# legend("bottom",legend=c("SNOTEL", "GHCN"), 
299
#        cex=0.8, col=c("red","black"),
300
#        pch=c(2,1))
301
# title(paste("SNOTEL and GHCN networks on ", date_selected, sep=""))
302

    
303
plot(ELEV_SRTM)
304
plot(snot_OR_2010_sp,pch=2,col="red",add=T)
305
#plot(data_stat,add=T)
306
legend("bottom",legend=c("SNOTEL", "GHCN"), 
307
       cex=0.8, col=c("red","black"),
308
       pch=c(2,1))
309
title(paste("SNOTEL and GHCN networks", sep=""))
310
savePlot(paste("fig1_map_SNOT_GHCN_network_diff_elev_bckgd",date_selected,out_prefix,".png", sep=""), type="png")
311
dev.off()
312

    
313
#add histogram of elev for SNOT and GHCN
314
#X11(width=16,height=9)
315
#par(mfrow=c(1,2))
316
#hist(snot_data_selected$ELEV_SRTM,main="")
317
#title(paste("SNOT stations and Elevation",date_selected,sep=" "))
318
#hist(data_vc$ELEV_SRTM,main="")
319
#title(paste("GHCN stations and Elevation",date_selected,sep=" "))
320
#savePlot(paste("fig2_hist_elev_SNOT_GHCN_",out_prefix,".png", sep=""), type="png")
321
#dev.off()
322
## Select date from SNOT
323
#not_selected<-subset(snot_OR_2010_sp, date=="90110" )
324
list_ac_tab <-vector("list", length(dates))  #storing the accuracy metric data.frame in a list...
325
names(list_ac_tab)<-paste("date",1:length(dates),sep="")
326

    
327

    
328
#ac_mod<-mclapply(1:length(dates), accuracy_comp_CAI_fus_function,mc.preschedule=FALSE,mc.cores = 8) #This is the end bracket from mclapply(...) statement
329
source("function_methods_comparison_assessment_part7_12102012.R")
330
#Use mcMap or mappply for function with multiple arguments...
331
#ac_mod<-mclapply(1:6, accuracy_comp_CAI_fus_function,mc.preschedule=FALSE,mc.cores = 1) #This is the end bracket from mclapply(...) statement
332
ac_mod<-mclapply(1:length(dates), accuracy_comp_CAI_fus_function,mc.preschedule=FALSE,mc.cores = 8) #This is the end bracket from mclapply(...) statement
333

    
334
tb<-ac_mod[[1]][[4]][0,]  #empty data frame with metric table structure that can be used in rbinding...
335
tb_tmp<-ac_mod #copy
336

    
337
for (i in 1:length(tb_tmp)){
338
  tmp<-tb_tmp[[i]][[4]]
339
  tb<-rbind(tb,tmp)
340
}
341
rm(tb_tmp)
342
#Collect accuracy information for different dates
343
#ac_data_xdates<-do.call(rbind,tb)
344
ac_data_xdates<-tb
345
##Now subset for each model...
346

    
347
mod_names<-unique(ac_data_xdates$mod_id)
348
for (i in 1:length(rowstr)){
349
  data_ac<-subset(ac_data_xdates,mod_id==mod_names[i])
350
  data_name<-paste("data_ac_",mod_names[i],sep="")
351
  assign(data_name,data_ac)
352
}
353

    
354
X11(12,12)
355
boxplot(data_ac_ghcn_fus$mae)
356
boxplot(data_ac_snot_fus$mae)
357
boxplot(data_ac_ghcn_cai$mae)
358
boxplot(data_ac_snot_cai$mae)
359
boxplot(data_ac_snot_fus$mae,data_ac_snot_cai$mae,names=c("fus","CAI"))
360
boxplot(data_ac_ghcn_fus$mae,data_ac_ghcn_cai$mae,names=c("fus","CAI"))
361
boxplot(data_ac_ghcn_fus$mae,data_ac_ghcn_cai$mae,data_ac_snot_fus$mae,data_ac_snot_cai$mae,names=c("fus_SNOT","CAI_SNOT","fus_GHCN","CAI_GHCN"))
362
savePlot(paste("fig12_prediction_tmax_MAE_boxplot_fus_CAI_GHCN_SNOT_",date_selected,out_prefix,".png", sep=""), type="png")
363

    
364
boxplot(data_ac_ghcn_fus$rmse,data_ac_ghcn_cai$rmse,data_ac_snot_fus$rmse,data_ac_snot_cai$rmse,names=c("fus_SNOT","CAI_SNOT","fus_GHCN","CAI_GHCN"))
365
savePlot(paste("fig12_prediction_tmax_RMSE_boxplot_fus_CAI_GHCN_SNOT_",date_selected,out_prefix,".png", sep=""), type="png")
366

    
367
filename<-paste("accuracy_table_GHCN_SNOT_", date_selected,out_prefix,".RData",sep="")
368
save(ac_data_xdates,file=filename)
369

    
370
mean(data_ac_snot_fus)
371
mean(data_ac_snot_cai)
372
mean(data_ac_ghcn_fus)
373
mean(data_ac_ghcn_cai)
374

    
375
### END OF CODE
376
### END OF CODE
377
#Write a part to caculate MAE per date...
378
#ac_table_metrics<-do.call(rbind,ac_tab_list)
379

    
380
#Subset and present the average MAE and RMSE for the dataset...
381

    
382
#calculate average per month, extract LST too...?
383

    
384
####################################################################
385
#From this line on: code is exploratory...
386
####################################################################
387
#### DO THIS FOR IMAGE STACK...DIFF and LAND COVER...#### RESIDUALS AND LAND COVER...
388
# 
389
# dat_stack<-stack(rast_diff,rast_fus_pred,rast_cai_pred, ELEV_SRTM)
390
# dat_analysis<-as(dat_stack,"SpatialGridDataFrame")
391
# names(dat_analysis)<-c("diff_fc","pred_fus","pred_cai","E_SRTM")
392
# brks<-c(0,500,1000,1500,2000,2500,4000)
393
# lab_brks<-1:6
394
# elev_rcstat<-cut(dat_analysis$E_SRTM,breaks=brks,labels=lab_brks,right=F)
395
# dat_analysis$elev_rec<-elev_rcstat
396
# 
397
# spplot(dat_analysis,"elev_rec")
398
# spplot(dat_analysis,"diff_fc")
399
# mean_diff_fc<-aggregate(diff_fc~elev_rec,data=dat_analysis,mean)
400
# table(dat_analysis$elev_rec) #Number of observation per class
401
# 
402
# diffelev_mod<-lm(diff_fc~elev_rec,data=dat_analysis)
403
# summary(diffelev_mod)
404
# mean_rec6_val<-0.65993+(-8.56327)
405
# mean_diff_fc
406
# 
407
# brks<-c(0,500,1000,1500,2000,2500,4000)
408
# lab_brks<-1:6
409
# elev_rcstat<-cut(data_vf$ELEV_SRTM,breaks=brks,labels=lab_brks,right=F)
410
# y_range<-range(c(diff_fc))
411
# x_range<-range(c(elev_rcstat))
412
# plot(elev_rcstat,diff_fc, ylab="diff_cf", xlab="ELEV_SRTM (m) ", 
413
#      ylim=y_range, xlim=x_range)
414
# text(elev_rcstat,diff_cf,labels=data_vf$idx,pos=3)
415
# grid(lwd=0.5,col="black")
416
# title(paste("Testing stations residuals fusion vs Elevation",date_selected,sep=" "))
417
# 
418
# # Combine both training and testing
419
# pred_fus<-c(data_vf$pred_mod7,data_sf$pred_mod7)
420
# pred_cai<-c(data_vc$pred_mod9,data_sc$pred_mod9)
421
# elev_station<-c(data_vf$ELEV_SRTM,data_sf$ELEV_SRTM)
422
# diff_fc<-pred_fus-pred_cai
423
# 
424
# elev_rcstat<-cut(elev_station,breaks=brks,labels=lab_brks,right=F)
425
# y_range<-range(diff_fc)
426
# x_range<-range(elev_station)
427
# plot(elev_station,diff_fc, ylab="diff_fc", xlab="ELEV_SRTM (m) ", 
428
#      ylim=y_range, xlim=x_range)
429
# text(elev_rcstat,diff_fc,labels=data_vf$idx,pos=3)
430
# grid(lwd=0.5,col="black")
431
# title(paste("Testing stations residuals fusion vs Elevation",date_selected,sep=" "))
432
# 
(34-34/34)