Project

General

Profile

« Previous | Next » 

Revision b8e2ba85

Added by Benoit Parmentier over 11 years ago

Kriging tmax OR raster predictions

View differences:

climate/research/oregon/interpolation/kriging_prediction_reg.R
1
##################    Interpolation of Tmax Using Kriging  #######################################
2
########################### Kriging and Cokriging   ###############################################
3
#This script interpolates station values for the Oregon case study using Kriging and Cokring.    #
4
#The script uses LST monthly averages as input variables and  loads the station data             # 
5
#from a shape file with projection information.                                                  #
6
#Note that this program:                                                                         #
7
#1)assumes that the shape file is in the current working.                                        # 
8
#2)relevant variables were extracted from raster images before performing the regressions        #
9
#  and stored shapefile                                                                          #
10
#This scripts predicts tmax using autokrige, gstat and LST derived from MOD11A1.                 #
11
#also included and assessed using the RMSE,MAE,ME and R2 from validation dataset.                #
12
#TThe dates must be provided as a textfile.                                                      #
13
#AUTHOR: Benoit Parmentier                                                                       #
14
#DATE: 07/15/2012                                                                                #
15
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#364--                                  #
16
##################################################################################################
17

  
18
###Loading R library and packages                                                      
19
#library(gtools)                                         # loading some useful tools 
20
library(mgcv)                                           # GAM package by Wood 2006 (version 2012)
21
library(sp)                                             # Spatial pacakge with class definition by Bivand et al. 2008
22
library(spdep)                                          # Spatial pacakge with methods and spatial stat. by Bivand et al. 2012
23
library(rgdal)                                          # GDAL wrapper for R, spatial utilities (Keitt et al. 2012)
24
library(gstat)                                          # Kriging and co-kriging by Pebesma et al. 2004
25
library(automap)                                        # Automated Kriging based on gstat module by Hiemstra et al. 2008
26
library(spgwr)
27
library(gpclib)
28
library(maptools)
29
library(graphics)
30
library(parallel)                            # Urbanek S. and Ripley B., package for multi cores & parralel processing
31
library(raster)
32

  
33
###Parameters and arguments
34

  
35
infile1<- "ghcn_or_tmax_covariates_06262012_OR83M.shp"             #GHCN shapefile containing variables for modeling 2010                 
36
#infile2<-"list_10_dates_04212012.txt"                     #List of 10 dates for the regression
37
infile2<-"list_365_dates_04212012.txt"
38
infile3<-"LST_dates_var_names.txt"                        #LST dates name
39
infile4<-"models_interpolation_05142012.txt"              #Interpolation model names
40
infile5<-"mean_day244_rescaled.rst"                       
41
inlistf<-"list_files_05032012.txt"                        #Stack of images containing the Covariates
42

  
43
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_07152012"     #Jupiter LOCATION on Atlas for kriging
44
#path<-"H:/Data/IPLANT_project/data_Oregon_stations"                                 #Jupiter Location on XANDERS
45

  
46
setwd(path) 
47
prop<-0.3                                                                       #Proportion of testing retained for validation   
48
seed_number<- 100                                                               #Seed number for random sampling
49
models<-7                                                                       #Number of kriging model
50
out_prefix<-"_07202012_auto_krig1"                                              #User defined output prefix
51

  
52
source("krigingUK_function_07192012b.R")
53

  
54
###STEP 1 DATA PREPARATION AND PROCESSING#####
55

  
56
###Reading the station data and setting up for models' comparison
57
filename<-sub(".shp","",infile1)             #Removing the extension from file.
58
ghcn<-readOGR(".", filename)                 #reading shapefile 
59

  
60
CRS<-proj4string(ghcn)                       #Storing projection information (ellipsoid, datum,etc.)
61

  
62
mean_LST<- readGDAL(infile5)                 #Reading the whole raster in memory. This provides a grid for kriging
63
proj4string(mean_LST)<-CRS                   #Assigning coordinate information to prediction grid.
64

  
65
##Extracting the variables values from the raster files                                             
66

  
67
lines<-read.table(paste(path,"/",inlistf,sep=""), sep=" ")                  #Column 1 contains the names of raster files
68
inlistvar<-lines[,1]
69
inlistvar<-paste(path,"/",as.character(inlistvar),sep="")
70
covar_names<-as.character(lines[,2])                                         #Column two contains short names for covaraites
71

  
72
s_raster<- stack(inlistvar)                                                  #Creating a stack of raster images from the list of variables.
73
layerNames(s_raster)<-covar_names                                            #Assigning names to the raster layers
74
projection(s_raster)<-CRS
75

  
76
#stat_val<- extract(s_raster, ghcn3)                                          #Extracting values from the raster stack for every point location in coords data frame.
77
pos<-match("ASPECT",layerNames(s_raster)) #Find column with name "value"
78
r1<-raster(s_raster,layer=pos)             #Select layer from stack
79
pos<-match("slope",layerNames(s_raster)) #Find column with name "value"
80
r2<-raster(s_raster,layer=pos)             #Select layer from stack
81
N<-cos(r1*pi/180)
82
E<-sin(r1*pi/180)
83
Nw<-sin(r2*pi/180)*cos(r1*pi/180)   #Adding a variable to the dataframe
84
Ew<-sin(r2*pi/180)*sin(r1*pi/180)   #Adding variable to the dataframe.
85
#r<-stack(N,E,Nw,Ew)
86
#rnames<-c("Northness","Eastness","Northness_w","Eastness_w")
87
#layerNames(r)<-rnames
88
#s_raster<-addLayer(s_raster, r)
89
#s_sgdf<-as(s_raster,"SpatialGridDataFrame") #Conversion to spatial grid data frame
90
xy<-coordinates(r1)  #get x and y projected coordinates...
91
xy_latlon<-project(xy, CRS, inv=TRUE) # find lat long for projected coordinats (or pixels...)
92
tmp<-raster(xy_latlon) #, ncol=ncol(r1), nrow=nrow(r1))
93
ncol(tmp)<-ncol(r1)
94
nrow(tmp)<-nrow(r1)
95
extent(tmp)<-extent(r1)
96
projection(tmp)<-CRS
97
tmp2<-tmp
98
values(tmp)<-xy_latlon[,1]
99
values(tmp2)<-xy_latlon[,2]
100

  
101
r<-stack(N,E,Nw,Ew,tmp,tmp2)
102
rnames<-c("Northness","Eastness","Northness_w","Eastness_w", "lon","lat")
103
layerNames(r)<-rnames
104
s_raster<-addLayer(s_raster, r)
105
rm(r)
106
### adding var
107
ghcn = transform(ghcn,Northness = cos(ASPECT*pi/180)) #Adding a variable to the dataframe
108
ghcn = transform(ghcn,Eastness = sin(ASPECT*pi/180))  #adding variable to the dataframe.
109
ghcn = transform(ghcn,Northness_w = sin(slope*pi/180)*cos(ASPECT*pi/180)) #Adding a variable to the dataframe
110
ghcn = transform(ghcn,Eastness_w = sin(slope*pi/180)*sin(ASPECT*pi/180))  #adding variable to the dataframe.
111

  
112
#Remove NA for LC and CANHEIGHT
113
ghcn$LC1[is.na(ghcn$LC1)]<-0
114
ghcn$LC3[is.na(ghcn$LC3)]<-0
115
ghcn$CANHEIGHT[is.na(ghcn$CANHEIGHT)]<-0
116

  
117
dates <-readLines(paste(path,"/",infile2, sep=""))
118
LST_dates <-readLines(paste(path,"/",infile3, sep=""))
119
#models <-readLines(paste(path,"/",infile4, sep=""))
120

  
121
#Model assessment: specific diagnostic/metrics for GAM
122
results_AIC<- matrix(1,1,models+3)  
123
results_GCV<- matrix(1,1,models+3)
124
results_DEV<- matrix(1,1,models+3)
125
#results_RMSE_f<- matrix(1,length(models)+3)
126

  
127
#Model assessment: general diagnostic/metrics 
128
results_RMSE <- matrix(1,1,models+3)
129
results_MAE <- matrix(1,1,models+3)
130
results_ME <- matrix(1,1,models+3)       #There are 8+1 models
131
results_R2 <- matrix(1,1,models+3)       #Coef. of determination for the validation dataset
132

  
133
results_RMSE_f<- matrix(1,1,models+3)    #RMSE fit, RMSE for the training dataset
134
results_MAE_f <- matrix(1,1,models+3)
135
#Screening for bad values: value is tmax in this case
136
#ghcn$value<-as.numeric(ghcn$value)
137
ghcn_all<-ghcn
138
ghcn_test<-subset(ghcn,ghcn$value>-150 & ghcn$value<400)
139
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0)
140
ghcn<-ghcn_test2
141
#coords<- ghcn[,c('x_OR83M','y_OR83M')]
142

  
143
###CREATING SUBSETS BY INPUT DATES AND SAMPLING
144
set.seed(seed_number)                        #Using a seed number allow results based on random number to be compared...
145
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, ghcn$date==as.numeric(d)))   #Producing a list of data frame, one data frame per date.
146
sampling<-vector("list",length(dates))
147

  
148
for(i in 1:length(dates)){
149
  n<-nrow(ghcn.subsets[[i]])
150
  ns<-n-round(n*prop)   #Create a sample from the data frame with 70% of the rows
151
  nv<-n-ns              #create a sample for validation with prop of the rows
152
  ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly
153
  ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training)
154
  sampling[[i]]<-ind.training
155
}
156

  
157

  
158
kriging_mod<-mclapply(1:length(dates), runKriging, mc.cores = 8)#This is the end bracket from mclapply(...) statement
159

  
160
#for(i in 1:length(dates)){            # start of the for loop #1
161
#i<-3                                           #Date 10 is used to test kriging
162

  
163
## Plotting and saving diagnostic measures
164
accuracy_tab_fun<-function(i,f_list){
165
  tb<-f_list[[i]][[3]]
166
  return(tb)
167
}
168

  
169
tb<-kriging_mod[[1]][[3]][0,]  #empty data frame with metric table structure that can be used in rbinding...
170
tb_tmp<-kriging_mod #copy
171

  
172
for (i in 1:length(tb_tmp)){
173
  tmp<-tb_tmp[[i]][[3]]
174
  tb<-rbind(tb,tmp)
175
}
176
rm(tb_tmp)
177

  
178
for(i in 4:(models+3)){            # start of the for loop #1
179
  tb[,i]<-as.numeric(as.character(tb[,i]))  
180
}
181

  
182
tb_RMSE<-subset(tb, metric=="RMSE")
183
tb_MAE<-subset(tb,metric=="MAE")
184
tb_ME<-subset(tb,metric=="ME")
185
tb_R2<-subset(tb,metric=="R2")
186
tb_RMSE_f<-subset(tb, metric=="RMSE_f")
187
tb_MAE_f<-subset(tb,metric=="MAE_f")
188

  
189
tb_diagnostic1<-rbind(tb_RMSE,tb_MAE,tb_ME,tb_R2)
190
#tb_diagnostic2<-rbind(tb_,tb_MAE,tb_ME,tb_R2)
191

  
192
mean_RMSE<-sapply(tb_RMSE[,4:(models+3)],mean)
193
mean_MAE<-sapply(tb_MAE[,4:(models+3)],mean)
194
mean_R2<-sapply(tb_R2[,4:(models+3)],mean)
195
mean_ME<-sapply(tb_ME[,4:(models+3)],mean)
196
mean_MAE_f<-sapply(tb_MAE[,4:(models+3)],mean)
197
mean_RMSE_f<-sapply(tb_RMSE_f[,4:(models+3)],mean)
198

  
199
write.table(tb_diagnostic1, file= paste(path,"/","results2_fusion_Assessment_measure1",out_prefix,".txt",sep=""), sep=",")
200
write.table(tb, file= paste(path,"/","results2_fusion_Assessment_measure_all",out_prefix,".txt",sep=""), sep=",")
201
save(kriging_mod,file= paste(path,"/","results2_fusion_Assessment_measure_all",out_prefix,".RData",sep=""))
202

  
203
#### END OF SCRIPT #####

Also available in: Unified diff