Project

General

Profile

« Previous | Next » 

Revision 618bf412

Added by Benoit Parmentier about 12 years ago

Initial commit-task#491-methods comparison part 1: kriging, GAM, GWR, FUS, CAI

View differences:

climate/research/oregon/interpolation/methods_comparison_assessment_part1.R
1
#####################################  METHOD COMPARISON ##########################################
2
#################################### Spatial Analysis ########################################
3
#This script utilizes the R ojbects created during the interpolation phase.                       #
4
#At this stage the script produces figures of various accuracy metrics and compare methods:       #
5
#- multisampling plots                                                                            #
6
#- spatial accuracy in terms of distance to closest station                                       #
7
#- spatial density of station network and accuracy metric                                         # 
8
#AUTHOR: Benoit Parmentier                                                                        #
9
#DATE: 09/25/2012                                                                                 #
10
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#??--                                    #
11
###################################################################################################
12

  
13
###Loading R library and packages                                                      
14
#library(gtools)                                        # loading some useful tools 
15
library(mgcv)                                           # GAM package by Wood 2006 (version 2012)
16
library(sp)                                             # Spatial pacakge with class definition by Bivand et al. 2008
17
library(spdep)                                          # Spatial package with methods and spatial stat. by Bivand et al. 2012
18
library(rgdal)                                          # GDAL wrapper for R, spatial utilities (Keitt et al. 2012)
19
library(gstat)                                          # Kriging and co-kriging by Pebesma et al. 2004
20
library(automap)                                        # Automated Kriging based on gstat module by Hiemstra et al. 2008
21
library(spgwr)
22
library(gpclib)
23
library(maptools)
24
library(graphics)
25
library(parallel)                            # Urbanek S. and Ripley B., package for multi cores & parralel processing
26
library(raster)
27
library(rasterVis)
28
library(plotrix)   #Draw circle on graph
29
library(reshape)
30
## Functions
31
#loading R objects that might have similar names
32
load_obj <- function(f)
33
{
34
  env <- new.env()
35
  nm <- load(f, env)[1]
36
  env[[nm]]
37
}
38

  
39
###Parameters and arguments
40

  
41
infile1<- "ghcn_or_tmax_covariates_06262012_OR83M.shp"    #GHCN shapefile containing variables for modeling 2010                 
42
#infile2<-"list_10_dates_04212012.txt"                    #List of 10 dates for the regression
43
infile2<-"list_365_dates_04212012.txt"                    #list of dates
44
infile3<-"LST_dates_var_names.txt"                        #LST dates name
45
infile4<-"models_interpolation_05142012.txt"              #Interpolation model names
46
infile5<-"mean_day244_rescaled.rst"                       #mean LST for day 244
47
inlistf<-"list_files_05032012.txt"                        #list of raster images containing the Covariates
48
infile6<-"OR83M_state_outline.shp"
49
#stat_loc<-read.table(paste(path,"/","location_study_area_OR_0602012.txt",sep=""),sep=",", header=TRUE)
50

  
51

  
52
obj_list<-"list_obj_08262012.txt"                                  #Results of fusion from the run on ATLAS
53
path<-"/home/parmentier/Data/IPLANT_project/methods_interpolation_comparison" #Jupiter LOCATION on Atlas for kriging                              #Jupiter Location on XANDERS
54
#path<-"/Users/benoitparmentier/Dropbox/Data/NCEAS/Oregon_covariates/"            #Local dropbox folder on Benoit's laptop
55
setwd(path) 
56
proj_str="+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
57
                                                                                #Number of kriging model
58
out_prefix<-"methods_09262012_"                                              #User defined output prefix
59

  
60
sampling_CAI<-load_obj("results2_CAI_sampling_obj_09132012_365d_GAM_CAI2_multisampling2.RData")
61
sampling_fus<-load_obj("results2_fusion_sampling_obj_10d_GAM_fusion_multisamp4_09192012.RData")
62
fus_CAI_mod<-load_obj("results2_CAI_Assessment_measure_all_09132012_365d_GAM_CAI2_multisampling2.RData")
63
gam_fus_mod1<-load_obj("results2_fusion_Assessment_measure_all_10d_GAM_fusion_multisamp4_09192012.RData")
64

  
65
filename<-sub(".shp","",infile1)             #Removing the extension from file.
66
ghcn<-readOGR(".", filename)                 #reading shapefile 
67

  
68
#CRS<-proj4string(ghcn)                       #Storing projection information (ellipsoid, datum,etc.)
69
lines<-read.table(paste(path,"/",inlistf,sep=""), sep="")                      #Column 1 contains the names of raster files
70
inlistvar<-lines[,1]
71
inlistvar<-paste(path,"/",as.character(inlistvar),sep="")
72
covar_names<-as.character(lines[,2])                                         #Column two contains short names for covaraites
73

  
74
s_raster<- stack(inlistvar)                                                  #Creating a stack of raster images from the list of variables.
75
layerNames(s_raster)<-covar_names                                            #Assigning names to the raster layers
76
projection(s_raster)<-proj_str
77

  
78
#Create mask
79
pos<-match("LC10",layerNames(s_raster))
80
LC10<-subset(s_raster,pos)
81
LC10[is.na(LC10)]<-0   #Since NA values are 0, we assign all zero to NA
82
mask_land<-LC10<100
83
mask_land_NA<-mask_land
84
mask_land_NA[mask_land_NA==0]<-NA
85

  
86
data_name<-"mask_land_OR"
87
raster_name<-paste(data_name,".rst", sep="")
88
writeRaster(mask_land, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
89
#writeRaster(r2, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
90

  
91
pos<-match("mm_01",layerNames(s_raster))
92
mm_01<-subset(s_raster,pos)
93
mm_01<-mm_01-273.15
94
mm_01<-mask(mm_01,mask_land_NA)
95
#mention this is the last... files
96

  
97
### RESULTS COMPARISON
98

  
99
### CODE BEGIN #####
100

  
101
### PART I MULTISAMPLING COMPARISON ####
102

  
103
tb_diagnostic<-sampling_CAI$tb
104
tb_diagnostic2<-sampling_fus$tb
105

  
106
tb_diagnostic[["prop"]]<-as.factor(tb_diagnostic[["prop"]])
107
tb_diagnostic2[["prop"]]<-as.factor(tb_diagnostic2[["prop"]])
108

  
109
#Preparing the data for the plot
110
#fus data
111
t<-melt(tb_diagnostic,
112
        measure=c("mod1","mod2","mod3","mod4", "mod5", "mod6", "mod7", "mod8","mod9"), 
113
        id=c("dates","metric","prop"),
114
        na.rm=F)
115
avg_tb<-cast(t,metric+prop~variable,mean)
116
sd_tb<-cast(t,metric+prop~variable,sd)
117
n_tb<-cast(t,metric+prop~variable,length)
118
avg_tb[["prop"]]<-as.numeric(as.character(avg_tb[["prop"]]))
119
avg_RMSE<-subset(avg_tb,metric=="RMSE")
120

  
121
#CAI data
122
t2<-melt(tb_diagnostic2,
123
        measure=c("mod1","mod2","mod3","mod4", "mod5", "mod6", "mod7", "mod8","mod9"), 
124
        id=c("dates","metric","prop"),
125
        na.rm=F)
126
avg_tb2<-cast(t2,metric+prop~variable,mean)
127
sd_tb2<-cast(t2,metric+prop~variable,sd)
128
n_tb2<-cast(t2,metric+prop~variable,length)
129
avg_tb2[["prop"]]<-as.numeric(as.character(avg_tb2[["prop"]]))
130
avg_RMSE2<-subset(avg_tb2,metric=="RMSE")
131

  
132
#Select only information related to FUSION
133

  
134
x<-avg_RMSE[["prop"]]
135
i=9
136
mod_name<-paste("mod",i,sep="")
137
y<-avg_RMSE[[mod_name]]
138

  
139
sd_tb_RMSE <- subset(sd_tb, metric=="RMSE") 
140
x_sd<-sd_tb_RMSE[["prop"]]
141
i=9
142
mod_name<-paste("mod",i,sep="")
143
y_sd<-sd_tb_RMSE[[mod_name]]
144

  
145
#Select only information related to CAI
146

  
147
x2<-avg_RMSE2[["prop"]]
148
i=9
149
mod_name<-paste("mod",i,sep="")
150
y2<-avg_RMSE2[[mod_name]]
151

  
152
sd_tb_RMSE2 <- subset(sd_tb2, metric=="RMSE") 
153
x_sd2<-sd_tb_RMSE2[["prop"]]
154
i=9
155
mod_name<-paste("mod",i,sep="")
156
y_sd2<-sd_tb_RMSE2[[mod_name]]
157

  
158
n=150
159
ciw   <- qt(0.975, n) * y_sd / sqrt(n)
160
ciw2   <- qt(0.975, n) * y_sd2 / sqrt(n)
161

  
162
X11()
163
plotCI(y=y, x=x, uiw=ciw, col="black", main=" FUS: RMSE proportion of hold out", barcol="blue", lwd=1)
164
lines(x,y,col="grey")
165
plotCI(y=y2, x=x2, uiw=ciw2, col="black", main=" CAI: RMSE proportion of hold out", barcol="blue", lwd=1)
166
lines(x2,y2,col="grey")
167
plot(x,y,col="grey",type="b")
168
lines(x2,y2,col="blue")
169

  
170
savePlot(paste("fus_CAI_multisapling_",out_prefix,".png", sep=""), type="png")
171
dev.off()
172

  
173

  
174
### PART II SPATIAL PATTERN COMPARISON: TEMPORAL PROFILES ####
175

  
176

  
177
#gam_fus_mod1<-method_mod$gam_fus_mod1
178
#fus_CAI_mod<- method_mod$fus_CAI_mod
179
#gwr_mod<-method_mod$gwr_mod
180
l_f<-list.files(pattern="*tmax_predicted.*fusion5.rst$") #Search for files in relation to fusion
181
l_f2<-list.files(pattern="CAI_tmax_predicted.*_GAM_CAI2.rst$")
182
inlistpred<-paste(path,"/",as.character(l_f),sep="")
183
inlistpred2<-paste(path,"/",as.character(l_f2),sep="")
184

  
185
fus_rast<- stack(inlistpred)                                                  #Creating a stack of raster images from the list of variables.
186
cai_rast<- stack(inlistpred2)                                                  #Creating a stack of raster images from the list of variables.
187

  
188
id<-unique(ghcn$station)
189
ghcn_id<-as.data.frame(subset(ghcn,select=c("station","x_OR83M","y_OR83M")))
190

  
191
ghcn_melt<-melt(ghcn_id,
192
                measure=c("x_OR83M","y_OR83M"), 
193
                id=c("station"),
194
                na.rm=F)
195

  
196
ghcn_cast<-cast(ghcn_melt,station~variable,mean)
197
ghcn_locs<-as.data.frame(ghcn_cast)
198

  
199
coords<- ghcn_locs[,c('x_OR83M','y_OR83M')]
200
coordinates(ghcn_locs)<-coords
201
proj4string(ghcn_locs)<-proj_str  #Need to assign coordinates...
202

  
203
tmp<-extract(fus_rast,ghcn_locs)
204
tmp2<-extract(cai_rast,ghcn_locs)
205

  
206
tmp_names<-paste("fusd",seq(1,365),sep="")
207
colnames(tmp)<-tmp_names
208
tmp_names<-paste("caid",seq(1,365),sep="")
209
colnames(tmp2)<-tmp_names
210
ghcn_fus_pred<-cbind(as.data.frame(ghcn_locs),as.data.frame(tmp))
211
ghcn_cai_pred<-cbind(as.data.frame(ghcn_locs),as.data.frame(tmp2))
212

  
213
write.table(ghcn_fus_pred,file="extract3_fus_y2010.txt",sep=",")
214
write.table(ghcn_cai_pred,file="extract3_cai_y2010.txt",sep=",")
215

  
216
ghcn$value[ghcn$value< -150 | ghcn$value>400]<-NA #screenout values out of range
217
ghcn$value<-ghcn$value/10
218
ghcn_m<-melt(as.data.frame(ghcn),
219
                measure=c("value"), 
220
                id=c("station","date"),
221
                na.rm=F)
222

  
223
ghcn_mc<-cast(ghcn_m,station~date~variable,mean) #This creates an array of dimension 186,366,1
224

  
225
ghcn_value<-as.data.frame(ghcn_mc[,,1])
226
ghcn_value<-cbind(ghcn_locs,ghcn_value[,1:365])
227
write.table(ghcn_value,na="",file="extract3_dailyTmax_y2010.txt",sep=",")
228

  
229
id<-c("USW00094261","USW00004141","USC00356252","USC00357208")
230
m<-match(id,ghcn_locs$station)
231
dat_id<-ghcn_locs[m,]  #creating new subset
232
#dat_id<-subset(ghcn_locs[gj])
233

  
234
filename<-sub(".shp","",infile6)             #Removing the extension from file.
235
reg_outline<-readOGR(".", filename)                 #reading shapefile 
236
X11()
237
s.range <- c(min(minValue(mm_01)), max(maxValue(mm_01)))
238
col.breaks <- pretty(s.range, n=50)
239
lab.breaks <- pretty(s.range, n=5)
240
temp.colors <- colorRampPalette(c('blue', 'white', 'red'))
241
plot(mm_01, breaks=col.breaks, col=temp.colors(length(col.breaks)-1),
242
     axis=list(at=lab.breaks, labels=lab.breaks))
243
plot(reg_outline, add=TRUE)
244
plot(dat_id,cex=1.5,add=TRUE)
245
title("Selected stations for comparison",line=3)
246
title("(Background: mean January LST)", cex=0.5, line=2)
247
coords<-coordinates(dat_id)
248
text(x=coords[,1],y=coords[,2],labels=id,cex=0.8, adj=c(0,1),offset=2) #c(0,1) for lower right corner!
249
savePlot(paste("temporal_profile_station_locations_map",out_prefix,".png", sep=""), type="png")
250
dev.off()
251

  
252
stat_list<-vector("list",3 )
253
stat_list[[1]]<-ghcn_fus_pred
254
stat_list[[2]]<-ghcn_cai_pred
255
stat_list[[3]]<-ghcn_value
256
ac_temp<-matrix(NA,length(id),2)
257

  
258
#id<-ghcn_value$station #if runinng on all the station...
259
for (i in 1:length(id)){
260
  m1<-match(id[i],ghcn_fus_pred$station)
261
  m2<-match(id[i],ghcn_cai_pred$station)
262
  m3<-match(id[i],ghcn_value$station)
263
  y1<-as.numeric(ghcn_fus_pred[m1,6:ncol(ghcn_fus_pred)])
264
  y2<-as.numeric(ghcn_cai_pred[m2,6:ncol(ghcn_cai_pred)])
265
  y3<-as.numeric(ghcn_value[m3,6:ncol(ghcn_value)])
266
  res2<-y2-y3
267
  res1<-y1-y3
268
  x<-1:365
269
  X11(6,15)
270
  plot(x,y1,type="l",col="black")
271
  lines(x,y2,col="blue")
272
  lines(x,y3,col="red")
273
  title(paste("temporal profile for station ", id[i],sep=""))
274
  # add a legend
275
  legend("topright",legend=c("fus","CAI","OBS"), cex=1.2, col=c("black","blue","red"),
276
         lty=1, title="tmax")
277
  savePlot(paste("Temporal_profile_",id[i],out_prefix,".png", sep=""), type="png")
278
  zero<-rep(0,365)
279
  plot(x,res2,type="l",col="black")
280
  lines(x,res1,col="blue")
281
  lines(x,zero,col="red")
282
  legend("topright",legend=c("fus","CAI"), cex=1.2, col=c("black","blue"),
283
         lty=1, title="tmax")
284
  savePlot(paste("Temporal_profile_res",id[i],out_prefix,".png", sep=""), type="png")
285
  
286
  ac_temp[i,1]<-mean(abs(res1),na.rm=T)
287
  ac_temp[i,2]<-mean(abs(res2),na.rm=T)
288
  dev.off()
289
}
290
ac_temp<-as.data.frame(ac_temp)
291
ac_temp$station<-id
292
names(ac_temp)<-c("fus","CAI","station")
293

  
294
id<-ghcn_value$station #if runinng on all the station...
295
ac_temp2<-matrix(NA,length(id),2)
296

  
297
for (i in 1:length(id)){
298
  m1<-match(id[i],ghcn_fus_pred$station)
299
  m2<-match(id[i],ghcn_cai_pred$station)
300
  m3<-match(id[i],ghcn_value$station)
301
  y1<-as.numeric(ghcn_fus_pred[m1,6:ncol(ghcn_fus_pred)])
302
  y2<-as.numeric(ghcn_cai_pred[m2,6:ncol(ghcn_cai_pred)])
303
  y3<-as.numeric(ghcn_value[m3,6:ncol(ghcn_value)])
304
  res2<-y2-y3
305
  res1<-y1-y3
306
  ac_temp2[i,1]<-mean(abs(res1),na.rm=T)
307
  ac_temp2[i,2]<-mean(abs(res2),na.rm=T)
308
}  
309

  
310
ac_temp2<-as.data.frame(ac_temp2)
311
ac_temp2$station<-id
312
names(ac_temp2)<-c("fus","CAI","station")
313

  
314
ac_temp2<-ac_temp2[order(ac_temp2$fus,ac_temp2$CAI), ]
315
ghcn_MAE<-merge(ghcn_locs,ac_temp2,by.x=station,by.y=station)
316
##### USING TEMPORAL IMAGES...
317

  
318
date_list<-vector("list", length(l_f))
319
for (k in 1:length(l_f)){
320
  tmp<-(unlist(strsplit(l_f[k],"_"))) #spliting file name to obtain the prediction date
321
  date_list[k]<-tmp[4] 
322
}
323

  
324

  
325

  
326
date_list2<-vector("list", length(l_f2))
327
for (k in 1:length(l_f2)){
328
  tmp<-(unlist(strsplit(l_f2[k],"_"))) #spliting file name to obtain the prediction date
329
  date_list2[k]<-tmp[4] 
330
}
331

  
332
setdiff(date_list,date_list2)
333
all.equal(date_list,date_list2) #This checks that both lists are equals
334

  
335
list_fus_data_s<-vector("list", 365)
336
list_cai_data_s<-vector("list", 365)
337
list_fus_data_v<-vector("list", 365)
338
list_cai_data_v<-vector("list", 365)
339

  
340
list_fus_data<-vector("list", 365)
341
list_cai_data<-vector("list", 365)
342

  
343
list_dstspat_er<-vector("list", 365)
344
k=1
345
for (k in 1:365){
346
  
347
  #Start loop over the full year!!!
348
  names(gam_fus_mod1[[k]])
349
  data_s<-gam_fus_mod1[[k]]$data_s
350
  data_v<-gam_fus_mod1[[k]]$data_v
351
  
352
  date_proc<-unique(data_s$date)
353
  index<-match(as.character(date_proc),unlist(date_list)) #find the correct date..
354
  #raster_pred<-raster(rp_raster,index)
355
  raster_pred<-raster(l_f[[index]])
356
  layerNames(raster_pred)<-"y_pred"
357
  projection(raster_pred)<-proj_str
358
  pred_sgdf<-as(raster_pred,"SpatialGridDataFrame") #Conversion to spatial grid data frame
359
  
360
  rpred_val_s <- overlay(pred_sgdf,data_s)             #This overlays the kriged surface tmax and the location of weather stations
361
  rpred_val_v <- overlay(pred_sgdf,data_v)             #This overlays the kriged surface tmax and the location of weather stations
362
  
363
  pred_mod<-"pred_fus" #Change for the name of the method
364
  #Adding the results back into the original dataframes.
365
  data_s[[pred_mod]]<-rpred_val_s$y_pred
366
  
367
  data_v[[pred_mod]]<-rpred_val_v$y_pred  
368
  res_mod_s<- data_s$dailyTmax - data_s[[pred_mod]]           #Residuals from kriging training
369
  res_mod_v<- data_v$dailyTmax - data_v[[pred_mod]]           #Residuals from kriging validation
370
  
371
  res_mod<-"res_fus"
372
  data_v[[res_mod]]<-res_mod_v
373
  data_s[[res_mod]]<-res_mod_s
374
  
375
  #####second series added
376
  data_v2<-fus_CAI_mod[[k]]$data_v
377
  data_s2<-fus_CAI_mod[[k]]$data_s
378
  
379
  date_proc<-unique(data_s$date)
380
  index<-match(as.character(date_proc),unlist(date_list)) #find the correct date..
381
  #raster_pred<-raster(rp_raster,index)
382
  raster_pred2<-raster(l_f2[[index]])
383
  layerNames(raster_pred2)<-"y_pred"
384
  projection(raster_pred2)<-proj_str
385
  pred_sgdf2<-as(raster_pred2,"SpatialGridDataFrame") #Conversion to spatial grid data frame
386
  
387
  rpred_val_s2 <- overlay(pred_sgdf2,data_s2)             #This overlays the kriged surface tmax and the location of weather stations
388
  rpred_val_v2 <- overlay(pred_sgdf2,data_v2)             #This overlays the kriged surface tmax and the location of weather stations
389
  
390
  pred_mod2<-"pred_CAI" #Change for the name of the method
391
  #Adding the results back into the original dataframes.
392
  data_s2[[pred_mod2]]<-rpred_val_s2$y_pred
393
  
394
  data_v2[[pred_mod2]]<-rpred_val_v2$y_pred  
395
  res_mod_s2<- data_s2$dailyTmax - data_s2[[pred_mod2]]           #Residuals from kriging training
396
  res_mod_v2<- data_v2$dailyTmax - data_v2[[pred_mod2]]           #Residuals from kriging validation
397
  
398
  res_mod2<-"res_CAI"
399
  data_v2[[res_mod2]]<-res_mod_v2
400
  data_s2[[res_mod2]]<-res_mod_s2
401
  
402
  ###Checking if training and validation have the same columns
403
  nd<-setdiff(names(data_s),names(data_v))
404
  nd2<-setdiff(names(data_s2),names(data_v2))
405
  
406
  data_v[[nd]]<-NA #daily_delta is not the same
407
  
408
  data_v$training<-rep(0,nrow(data_v))
409
  data_v2$training<-rep(0,nrow(data_v2))
410
  data_s$training<-rep(1,nrow(data_s))
411
  data_s2$training<-rep(1,nrow(data_s2))
412
  
413
  #if length(nd)!=0 {
414
  #  for (j in 1:length(nd))
415
  #  data_s
416
  #}
417
  
418
  list_fus_data_s[[k]]<-data_s
419
  list_cai_data_s[[k]]<-data_s2
420
  list_fus_data_v[[k]]<-data_v
421
  list_cai_data_v[[k]]<-data_v2
422
  list_fus_data[[k]]<-rbind(data_v,data_s)
423
  list_cai_data[[k]]<-rbind(data_v2,data_s2)
424
  
425
  d_s_v<-matrix(0,nrow(data_v),nrow(data_s))
426
  for(i in 1:nrow(data_s)){
427
    pt<-data_s[i,]
428
    d_pt<-(spDistsN1(data_v,pt,longlat=FALSE))/1000  #Distance to stataion i in km
429
    d_s_v[,i]<-d_pt
430
  }
431
  
432
  #Create data.frame with position, ID, dst and residuals...
433
  pos<-vector("numeric",nrow(data_v))
434
  y<-vector("numeric",nrow(data_v))
435
  dst<-vector("numeric",nrow(data_v))
436
  for (i in 1:nrow(data_v)){
437
    pos[i]<-match(min(d_s_v[i,]),d_s_v[i,])
438
    dst[i]<-min(d_s_v[i,]) 
439
  }
440
  
441
  #Check if 8 models exist in data_v, if it doesn't then add column with name and "NA"
442
  #mod_name<-paste(rep("res_mod",8),1:8,sep="")
443
  #t2<-match(names(data_v),mod_name)
444
  #dstspat_er<-as.data.frame(cbind(as.vector(data_v$id),as.vector(data_s$id[pos]),pos, dst,res_mod_v))
445
  dstspat_er<-as.data.frame(cbind(as.vector(data_v$id),as.vector(data_s$id[pos]),pos, data_v$lat, data_v$lon, data_v$x_OR83M,data_v$y_OR83M,
446
                                  dst,
447
                                  res_mod_v,
448
                                  data_v$res_mod1,
449
                                  data_v$res_mod2,
450
                                  data_v$res_mod3,
451
                                  data_v$res_mod4,
452
                                  data_v$res_mod5,
453
                                  res_mod_v2))
454
  
455
  names(dstspat_er)[1:7]<-c("v_id","s_id","pos","lat","lon","x_OR83M","y_OR83M")
456
  names(dstspat_er)[10:15]<-c("res_mod1","res_mod2","res_mod3","res_mod4","res_mod5","res_CAI")
457
  list_dstspat_er[[k]]<-dstspat_er
458

  
459
}  
460
save(list_dstspat_er,file="spat_ac5.RData")
461
#obj_tmp2<-load_obj("spat_ac4.RData")
462
save(list_fus_data,file="list_fus_data_combined.RData")
463
save(list_cai_data,file="list_cai_data_combined.RData")
464

  
465
save(list_fus_data_s,file="list_fus_data_s_combined.RData")
466
save(list_cai_data_s,file="list_cai_data_s_combined.RData")
467
save(list_fus_data_v,file="list_fus_data_v_combined.RData")
468
save(list_cai_data_v,file="list_cai_data_v_combined.RData")
469

  
470
for (k in 1:365){
471
  data_s<-as.data.frame(list_fus_data_s[[k]])
472
  data_v<-as.data.frame(list_fus_data_v[[k]])
473
  list_fus_data[[k]]<-rbind(data_s,data_v)
474
  data_s2<-as.data.frame(list_cai_data_s[[k]])
475
  data_v2<-as.data.frame(list_cai_data_v[[k]])
476
  list_cai_data[[k]]<-rbind(data_s2,data_v2)
477
}
478
data_fus<-do.call(rbind.fill,list_fus_data)
479
data_cai<-do.call(rbind.fill,list_cai_data)
480

  
481
data_fus_melt<-melt(data_fus,
482
                   measure=c("x_OR83M","y_OR83M","res_fus","res_mod1","res_mod2","res_mod3","res_mod4","res_mod5","pred_fus","dailyTmax","TMax","LST","training"), 
483
                   id=c("id","date"),
484
                   na.rm=F)
485
data_fus_cast<-cast(data_fus_melt,id+date~variable,mean)
486
id1="USC00350036"
487
id2="USW00004128"
488
dat_id1<-subset(data_fus_cast,id==id1)
489
dat_id2<-subset(data_fus_cast,id==id2)
490
write.table(dat_id1,file=paste("station_",id1,".txt",sep=""),sep=",")
491
#list_fus_data<-vector("list", 365)
492
#list_cai_data<-vector("list", 365)
493

  
494
test_dst<-list_dstspat_er
495
test<-do.call(rbind,list_dstspat_er)
496

  
497
for(i in 4:ncol(test)){            # start of the for loop #1
498
  test[,i]<-as.numeric(as.character(test[,i]))	
499
}
500

  
501
# Plot results
502
plot(test$dst,abs(test$res_mod_v))
503
limit<-seq(0,150, by=10)
504
tmp<-cut(test$dst,breaks=limit)
505
erd1<-tapply(test$res_mod_v,tmp, mean)
506
erd2<-as.numeric(tapply(abs(test$res_mod_v),tmp, mean))
507
plot(erd2)
508

  
509
erd1_mod1<-tapply(test$res_mod1,tmp, mean)
510
erd2_mod1<-tapply(abs(test$res_mod1),tmp, mean)
511
erd2_mod2<-tapply(abs(test$res_mod2),tmp, mean)
512
erd2_mod3<-tapply(abs(test$res_mod3),tmp, mean)
513
erd2_mod4<-tapply(abs(test$res_mod4),tmp, mean)
514
erd2_mod5<-tapply(abs(test$res_mod5),tmp, mean)
515
erd2_CAI<-tapply(abs(test$res_CAI),tmp, mean)
516
n<-tapply(abs(test$res_mod1),tmp, length)
517
distance<-seq(5,145,by=10)
518

  
519
X11()
520
plot(distance,erd2,ylim=c(1,4), type="b", col="red",ylab=" Average MAE", 
521
     xlab="distance to closest training station (km)")
522
lines(distance,erd2_mod1,col="black")
523
lines(distance,erd2_mod2,col="green")
524
lines(distance,erd2_mod3,col="blue")
525
lines(distance,erd2_mod4,col="yellow")
526
lines(distance,erd2_mod5,col="pink")
527
lines(distance,erd2_CAI,col="grey")
528

  
529
# add a title and subtitle 
530
title("MAE in terms of distance to closest station GAM and FUSION")
531

  
532
#colused<-
533
# add a legend 
534
#legend("bottomright",legend=1:(5), cex=1.2, col=colors,
535
       #pch=plotchar, lty=linetype, title="mod")
536
savePlot(paste("Comparison_models_er_spat",out_prefix,".png", sep=""), type="png")
537
dev.off()
538

  
539
means <- erd2_CAI
540
means2<- erd2
541
stdev <-tapply(abs(test$res_CAI),tmp, sd)
542
stdev2 <-tapply(abs(test$res_mod_v),tmp, sd)
543

  
544
ciw   <- qt(0.975, n) * stdev / sqrt(n)
545
ciw2   <- qt(0.975, n) * stdev2 / sqrt(n)
546

  
547
X11()
548
plotCI(y=means, x=distance, uiw=ciw, col="black", main=" CAI: MAE and distance to clostest training station", barcol="blue", lwd=1)
549
lines(distance,erd2_CAI,col="grey")
550
savePlot(paste("CI_CAI_er_spat_",out_prefix,".png", sep=""), type="png")
551
dev.off()
552

  
553
X11()
554
plotCI(y=means2, x=distance, uiw=ciw2, col="black", main=" FUSION: MAE and distance to clostest training station", barcol="blue", lwd=1)
555
lines(distance,erd2,col="black")
556
savePlot(paste("CI_fusion_er_spat_",out_prefix,".png", sep=""), type="png")
557
dev.off()
558

  
559
X11()
560
barplot(n,names.arg=as.character(distance))
561
savePlot(paste("Barplot_freq_er_spat_",out_prefix,".png", sep=""), type="png")
562
dev.off()
563

  
564
### Average MAE per station and coarse grid box (0.5 deg)
565

  
566
test$abs_res_fus<-abs(test$res_mod_v)
567
test$abs_res_CAI<-abs(test$res_CAI)
568

  
569
station_melt<-melt(test,
570
        measure=c("x_OR83M","y_OR83M","res_mod_v","res_mod1","res_mod2","res_mod3","res_mod4","res_mod5","abs_res_fus","abs_res_CAI"), 
571
        id=c("v_id"),
572
        na.rm=F)
573
station_v_er<-cast(station_melt,v_id~variable,mean)
574
#station_v_er2<-as.data.frame(station_v_er)
575
station_v_er<-as.data.frame(station_v_er)
576
oc<-vector("numeric",nrow(station_v_er))
577
oc<-oc+1
578
station_v_er$oc<-oc
579

  
580
unique(ghcn$id)
581

  
582
coords<- station_v_er[,c('x_OR83M','y_OR83M')]
583
coordinates(station_v_er)<-coords
584
proj4string(station_v_er)<-CRS  #Need to assign coordinates...
585

  
586
bubble(station_v_er,"abs_res_fus")
587

  
588
rast_agg<-aggregate(raster_pred,fact=50,fun=mean,na.rm=TRUE) #Changing the raster resolution by aggregation factor
589
rast_MAE_fus<-rasterize(station_v_er,rast_agg,"abs_res_fus",na.rm=TRUE,fun=mean)
590
rast_MAE_CAI<-rasterize(station_v_er,rast_agg,"abs_res_CAI",na.rm=TRUE,fun=mean)
591
rast_oc<-rasterize(station_v_er,rast_agg,"oc",na.rm=TRUE,fun=sum)
592
ac_agg50<-as.data.frame(values(rast_oc))
593
ac_agg50$MAE_fus<-as.numeric(values(rast_MAE_fus))
594
ac_agg50$MAE_CAI<-as.numeric(values(rast_MAE_CAI))
595
names(ac_agg50)<-c("oc","MAE_fus","MAE_CAI")
596

  
597
ghcn_sub<-as.data.frame(subset(ghcn, select=c("station","x_OR83M","y_OR83M")))
598
ghcn_sub_melt<-melt(ghcn_sub,
599
                   measure=c("x_OR83M","y_OR83M"), 
600
                   id=c("station"),
601
                   na.rm=F)
602
ghcn_stations<-as.data.frame(cast(ghcn_sub_melt,station~variable,mean))
603
coords<- ghcn_stations[,c('x_OR83M','y_OR83M')]
604
coordinates(ghcn_stations)<-coords
605
proj4string(ghcn_stations)<-CRS  #Need to assign coordinates...
606
oc_all<-vector("numeric",nrow(ghcn_stations))
607
oc_all<-oc_all+1
608

  
609
ghcn_stations$oc_all<-oc_all
610
rast_oc_all<-rasterize(ghcn_stations,rast_agg,"oc_all",na.rm=TRUE,fun=sum)
611
ac_agg50$oc_all<-values(rast_oc_all)
612

  
613
td1<-aggregate(MAE_fus~oc,data=ac_agg50,mean) 
614
td2<-aggregate(MAE_CAI~oc,data=ac_agg50,mean)
615
td<-merge(td1,td2,by="oc")
616

  
617
td1_all<-aggregate(MAE_fus~oc_all,data=ac_agg50,mean) 
618
td2_all<-aggregate(MAE_CAI~oc_all,data=ac_agg50,mean)
619
td_all<-merge(td1_all,td2_all,by="oc_all")
620

  
621
plot(MAE_fus~oc,data=td,type="b")
622
lines(td$oc,td$MAE_CAI, type="b", lwd=1.5,co="red")
623
plot(MAE_fus~oc_all,data=td_all,type="b")
624
lines(td_all$oc_all,td_all$MAE_CAI, type="b", lwd=1.5,co="red")
625

  
626
filename<-sub(".shp","",infile6)             #Removing the extension from file.
627
reg_outline<-readOGR(".", filename)                 #reading shapefile 
628
plot(rast_MAE_fus, main="Fusion MAE in coarsened 50km grid")
629
plot(reg_outline, add=TRUE)
630

  
631
plot(rast_MAE_CAI, main="CAI MAE in coarsened 50km grid")
632
plot(reg_outline, add=TRUE)
633

  
634
plot(rast_oc, main="Number of val stations in coarsened 50km grid")
635
plot(reg_outline, add=TRUE)
636
plot(rast_oc_all, main="Number of stations in coarsened 50km grid")
637
plot(reg_outline, add=TRUE)
638

  
639
list_var_stat<-vector("list", 365)
640
#list_var_stat<-vector("list", 2)
641
#k=2
642
for (k in 1:length(l_f)){
643
  
644
  raster_pred<-raster(l_f[[k]])
645
  layerNames(raster_pred)<-"fus"
646
  projection(raster_pred)<-proj_str
647
  
648
  raster_pred2<-raster(l_f2[[k]])
649
  layerNames(raster_pred2)<-"fus"
650
  projection(raster_pred2)<-proj_str
651
  
652
  tmp_rast<-mask(raster_pred2,raster_pred)
653
  
654
  t1<-cellStats(raster_pred,na.rm=TRUE,stat=sd)
655
  t2<-cellStats(raster_pred2,na.rm=TRUE,stat=sd)
656
  t2_b<-cellStats(tmp_rast,na.rm=TRUE,stat=sd)
657
  
658
  m1<-Moran(raster_pred,w=3)
659
  m2<-Moran(tmp_rast,w=3)
660
  stat<-as.data.frame(t(c(m1,m2,t1,t2)))
661
  names(stat)<-c("moran_fus","moran_CAI","sd_fus","sd_CAI")
662
  list_var_stat[[k]]<-stat
663
}
664

  
665
var_stat<-do.call(rbind,list_var_stat)
666

  
667

  
668
pos<-match("ELEV_SRTM",layerNames(s_raster)) #Find column with name "value"
669
elev<-raster(s_raster,layer=pos)             #Select layer from stack
670
elev<-mask(elev,raster_pred)
671
te<-cellStats(elev,na.rm=TRUE,stat=sd)
672

  
673
pos<-match("mm_12",layerNames(s_raster)) #Find column with name "value"
674
m_12<-raster(s_raster,layer=pos)             #Select layer from stack
675
m_LST<-Moran(m_12,w=3)
676
m_e<-Moran(elev,w=3)
677
m_12<-m_12-273.15
678
plot(MAE_fus~oc,data=td,type="b")
679
lines(td$oc,td$MAE_CAI, type="b", lwd=1.5,co="red")
680

  
681
data_dist<-as.data.frame(cbind(distance,erd2,erd2_mod1,erd2_mod2,erd2_mod3,erd2_mod4,erd2_mod5,erd2_CAI,n))
682
rownames(data_dist)<-NULL
683

  
684
#PLOTING CAI AND FUSION TO COMPARE
685

  
686
infile2<-"list_10_dates_04212012.txt"                             #List of 10 dates for the regression
687
dates2<-read.table(paste(path,"/",infile2,sep=""), sep="")         #Column 1 contains the names of raster files
688
date_list2<-as.list(as.character(dates2[,1]))
689

  
690

  
691
for (k in 1:length(date_list2)){
692
  
693
  date_proc2<-date_list2[[k]]
694
  #date_proc<-date_list[[k]]
695
  index<-match(as.character(date_proc2),unlist(date_list)) #find the correct date... in the 365 stack
696
  #raster_pred<-raster(rp_raster,index)
697
  raster_pred1<-raster(l_f[[index]])
698
  projection(raster_pred1)<-proj_str
699
  raster_pred1<-mask(raster_pred1,mask_land_NA)
700
  
701
  raster_pred2<-raster(l_f2[[index]])
702
  projection(raster_pred2)<-proj_str
703
  raster_pred2<-mask(raster_pred2,mask_land_NA)
704
  
705
  predictions <- stack(raster_pred1,raster_pred2)
706
  layerNames(predictions)<-c(paste('fusion',date_list2[[k]],sep=" "),paste('CAI',date_list2[[k]],sep=" "))
707
  # use overall min and max values to generate an nice, consistent set
708
  # of breaks for both colors (50 values) and legend labels (5 values)
709
  s.range <- c(min(minValue(predictions)), max(maxValue(predictions)))
710
  col.breaks <- pretty(s.range, n=50)
711
  lab.breaks <- pretty(s.range, n=5)
712
  temp.colors <- colorRampPalette(c('blue', 'white', 'red'))
713
  
714
  # plot using these (common) breaks; note use of _reverse_ heat.colors,
715
  # making it so that larger numbers are redder
716
  X11(6,12)
717
  #plot(predictions, breaks=col.breaks, col=rev(heat.colors(length(col.breaks)-1)),
718
    #   axis=list(at=lab.breaks, labels=lab.breaks))
719
  plot(predictions, breaks=col.breaks, col=temp.colors(length(col.breaks)-1),
720
       axis=list(at=lab.breaks, labels=lab.breaks))
721
  
722
  savePlot(paste("comparison_raster1_CAI_fusion_tmax_prediction_",date_list2[[k]],out_prefix,".png", sep=""), type="png")
723
  diff<-raster_pred1-raster_pred2
724
  s.range <- c(min(minValue(dif)), max(maxValue(d)))
725
  
726
  plot(diff,col=temp.colors(50))
727
  savePlot(paste("comparison_raster1_diff_CAI_fusion_tmax_prediction_",date_list2[[k]],out_prefix,".png", sep=""), type="png")
728
  
729
  
730
  
731
  #hist(predictions, freq=FALSE,maxpixels=ncells(predictions))
732
  hist(predictions, breaks=col.breaks,freq=FALSE,maxpixels=ncells(predictions))
733
  savePlot(paste("comparison_histo_CAI_fusion_tmax_prediction_",date_list2[[k]],out_prefix,".png", sep=""), type="png")
734
  #plot(predictions)
735
  dev.off()
736
  
737
}  
738

  
739

  
740
write.table(data_dist,file=paste("data_dist_",out_prefix,".txt",sep=""),sep=",")
741
write.table(test,file=paste("ac_spat_dist",out_prefix,".txt",sep=""),sep=",")
742
write.table(var_stat,file=paste("moran_var_stat_",out_prefix,".txt",sep=""),sep=",")
743
write.table(td,file=paste("MAE_density_station_",out_prefix,".txt",sep=""),sep=",")
744
write.table(td_all,file=paste("MAE_density_station_all_",out_prefix,".txt",sep=""),sep=",")
745

  
746
# VISUALIZATION OF RESULTS PLOTS ACROSS MODELS FOR METHODS
747

  
748
date_selected<-"20100103"
749

  
750
lf_gwr<-list.files(pattern=paste("*",date_selected,".*08152012_1d_gwr4.rst$",sep=""))
751
lf_krig<-list.files(pattern=paste("*",date_selected,"_07312012_365d_Kriging_autokrig2.rst$",sep=""))
752
lf_gam<-list.files(pattern=paste("^GAM.*",date_selected,"_07242012_365d_GAM_fusion5.rst$",sep=""))
753
lf_fus<-list.files(pattern=paste("^fusion_tmax.*",date_selected,"_07242012_365d_GAM_fusion5.rst$",sep="")) #Search for files in relation to fusion
754
lf_cai<-list.files(pattern=paste("*CAI_tmax_pred.*",date_selected,"*.08072012_365d_GAM_CAI2.rst$",sep="")) #Search for files in relation to fusion
755
lf_gam2<-list.files(pattern=paste("^GAM.*",date_selected,"_08122012_365d_GAM_fusion6.rst$",sep=""))
756

  
757
d_gwr_rast<-stack(lf_gwr)
758
d_krig_rast<-stack(lf_krig)
759
d_gam_rast<-stack(lf_gam)
760
d_fus_rast<-stack(lf_fus)
761
d_cai_rast<-stack(lf_cai)
762
d_gam2_rast<-stack(lf_gam2)
763

  
764
list_day_method<-list(d_gwr_rast,d_krig_rast,d_gam_rast,d_fus_rast,d_cai_rast,d_gam2_rast)
765
names(list_day_method)<-paste(c("gwr_","krig_","gam1_","fus_","cai_","gam2_"),date_selected,sep="")
766
out_prefix2<-"_10042012"
767

  
768
for (k in 1:length(list_day_method)){
769
  
770
  predictions<-list_day_method[[k]]
771
  projection(predictions)<-proj_str
772
  predictions<-mask(predictions,mask_land_NA)
773
  #layerNames(predictions)<-c(paste('fusion',date_selected,sep=" "),paste('CAI',date_list2[[k]],sep=" "))
774
  # use overall min and max values to generate an nice, consistent set
775
  # of breaks for both colors (50 values) and legend labels (5 values)
776
  s.range <- c(min(minValue(predictions)), max(maxValue(predictions)))
777
  s.range<-c(-12,18)
778
  col.breaks <- pretty(s.range, n=60)
779
  lab.breaks <- pretty(s.range, n=6)
780
  temp.colors <- colorRampPalette(c('blue', 'white', 'red'))
781
  
782
  # plot using these (common) breaks; note use of _reverse_ heat.colors,
783
  # making it so that larger numbers are redder
784
  X11(6,12)
785
  plot(predictions, breaks=col.breaks, col=temp.colors(length(col.breaks)-1),
786
       axis=list(at=lab.breaks, labels=lab.breaks))
787
  
788
  savePlot(paste(names(list_day_method)[[k]],"_method_prediction_",out_prefix2,".png", sep=""), type="png")
789
  dev.off()  
790
}
791

  
792
#### END OF THE SCRIPT

Also available in: Unified diff