Project

General

Profile

« Previous | Next » 

Revision 30d4022b

Added by Benoit Parmentier over 12 years ago

Initial modification by Benoit-adding testing points for 10 dates

View differences:

climate/research/oregon/interpolation/fusion_reg.R
1
#setup
2
mo=9
3
day=2
4
year=2010
5
datelabel=format(ISOdate(year,mo,day),"%b %d, %Y")
6
#
7
#  Step 1 - 10 year monthly averages
8
#
1
##################    CLIMATE INTERPOLATION FUSION METHOD   #######################################
2
############################ Merging LST and station data ##########################################
3
#This script interpolates tmax values using MODIS LST and GHCND station data                     #
4
#interpolation area. It requires the text file of stations and a shape file of the study area.   #       
5
#Note that the projection for both GHCND and study area is lonlat WGS84.                         #
6
#AUTHOR: Brian McGill                                                                            #
7
#DATE: 06/09/212                                                                                 #
8
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--                                  #
9
###################################################################################################
10

  
11
###Loading R library and packages                                                      
12
library(gtools)                                         # loading some useful tools 
13
library(mgcv)                                           # GAM package by Simon Wood
14
library(sp)                                             # Spatial pacakge with class definition by Bivand et al.
15
library(spdep)                              # Spatial pacakge with methods and spatial stat. by Bivand et al.
16
library(rgdal)                               # GDAL wrapper for R, spatial utilities
17
library(gstat)                               # Kriging and co-kriging by Pebesma et al.
18
library(fields)                              # Spatial Interpolation methods such as kriging, splines
9 19
library(raster)
10
old<-getwd()
11
setwd("c:/data/benoit/data_Oregon_stations_Brian_04242012")
12
l=list.files(pattern="mean_month.*rescaled.tif")
13
molst<-stack(l)
14
setwd(old)
15
molst=molst-273.16  #K->C
16
idx <- seq(as.Date('2010-01-15'), as.Date('2010-12-15'), 'month')
17
molst <- setZ(molst, idx)
18
layerNames(molst) <- month.abb
19
themolst<-raster(molst,mo)
20
plot(themolst)
21
#
22
# Step 2 - Weather station means across same days
23
#
24
# ??? which years & what quality flags???
25
#select ghcn.id, lat,lon, elev, month, avg(value/10) as "TMax", count(*) as "NumDays" from ghcn, stations where ghcn.id in (select id from stations where state=='OR') and ghcn.id==stations.id and value<>-9999 and year>=2000 and  element=='TMAX' group by stations.id, month;select ghcn.id, lat,lon, elev, month, avg(value/10) as "TMax", count(*) as "NumDays" from ghcn, stations where ghcn.id in (select id from stations where state=='OR') and ghcn.id==stations.id and value<>-9999 and year>=2000 and  element=='TMAX' group by stations.id, month;
26
#below table from above SQL query
27
dst<-read.csv('/data/benoit/data_oregon_stations_brian_04242012/station_means.csv',h=T)
28
modst=dst[dst$month==mo,]
20
### Parameters and argument
21

  
22
infile1<- "ghcn_or_tmax_b_04142012_OR83M.shp"             #GHCN shapefile containing variables for modeling 2010                 
23
infile2<-"list_10_dates_04212012.txt"                     #List of 10 dates for the regression
24
#infile2<-"list_365_dates_04212012.txt"
25
infile3<-"LST_dates_var_names.txt"                        #LST dates name
26
infile4<-"models_interpolation_05142012.txt"              #Interpolation model names
27
infile5<-"mean_day244_rescaled.rst"                       #Raster or grid for the locations of predictions
28

  
29
#path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations"
30
path<-"M:/Data/IPLANT_project/data_Oregon_stations"   #Locations on Atlas
31

  
32
#Station location of the study area
33
stat_loc<-read.table(paste(path,"/","location_study_area_OR_0602012.txt",sep=""),sep=",", header=TRUE)
34
#GHCN Database for 1980-2010 for study area (OR) 
35
data3<-read.table(paste(path,"/","ghcn_data_TMAXy1980_2010_OR_0602012.txt",sep=""),sep=",", header=TRUE)
36

  
37
prop<-0.3                                                                           #Proportion of testing retained for validation   
38
seed_number<- 100                                                                   #Seed number for random sampling
39
out_prefix<-"_06142012_10d_fusion2"                                                   #User defined output prefix
40
setwd(path)
41
############ START OF THE SCRIPT ##################
42

  
29 43
#
30
# Step 3 - get LST at stations
44
### Step 0/Step 6 in Brian's code...preparing year 2010 data for modeling 
31 45
#
32
sta_lola=modst[,c("lon","lat")]
33
library(rgdal)
34
proj_str="+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
35
lookup<-function(r,lat,lon) {
36
	xy<-project(cbind(lon,lat),proj_str);
37
	cidx<-cellFromXY(r,xy);
38
	return(r[cidx])
46

  
47

  
48
###Reading the station data and setting up for models' comparison
49
filename<-sub(".shp","",infile1)             #Removing the extension from file.
50
ghcn<-readOGR(".", filename)                 #reading shapefile 
51

  
52
CRS<-proj4string(ghcn)                       #Storing projection information (ellipsoid, datum,etc.)
53

  
54
mean_LST<- readGDAL(infile5)                 #Reading the whole raster in memory. This provides a grid for kriging
55
proj4string(mean_LST)<-CRS                   #Assigning coordinate information to prediction grid.
56

  
57
ghcn = transform(ghcn,Northness = cos(ASPECT)) #Adding a variable to the dataframe
58
ghcn = transform(ghcn,Eastness = sin(ASPECT))  #adding variable to the dataframe.
59
ghcn = transform(ghcn,Northness_w = sin(slope)*cos(ASPECT)) #Adding a variable to the dataframe
60
ghcn = transform(ghcn,Eastness_w = sin(slope)*sin(ASPECT))  #adding variable to the dataframe.
61

  
62
set.seed(seed_number)                        #Using a seed number allow results based on random number to be compared...
63

  
64
dates <-readLines(paste(path,"/",infile2, sep=""))
65
LST_dates <-readLines(paste(path,"/",infile3, sep=""))
66
models <-readLines(paste(path,"/",infile4, sep=""))
67

  
68
#Model assessment: specific diagnostic/metrics for GAM
69
# results_AIC<- matrix(1,length(dates),length(models)+3)  
70
# results_GCV<- matrix(1,length(dates),length(models)+3)
71
# results_DEV<- matrix(1,length(dates),length(models)+3)
72
# results_RMSE_f<- matrix(1,length(dates),length(models)+3)
73

  
74
#Model assessment: general diagnostic/metrics 
75
results_RMSE <- matrix(1,length(dates),length(models)+3)
76
results_MAE <- matrix(1,length(dates),length(models)+3)
77
results_ME <- matrix(1,length(dates),length(models)+3)
78
results_R2 <- matrix(1,length(dates),length(models)+3)       #Coef. of determination for the validation dataset
79
results_RMSE_f<- matrix(1,length(dates),length(models)+3)    #RMSE fit, RMSE for the training dataset
80
results_RMSE_f_kr<- matrix(1,length(dates),length(models)+3)
81

  
82
# #Tracking relationship between LST AND LC
83
# cor_LST_LC1<-matrix(1,10,1)      #correlation LST-LC1
84
# cor_LST_LC3<-matrix(1,10,1)      #correlation LST-LC3
85
# cor_LST_tmax<-matrix(1,10,1)     #correlation LST-tmax
86

  
87
#Screening for bad values
88
ghcn_all<-ghcn
89
ghcn_test<-subset(ghcn,ghcn$tmax>-150 & ghcn$tmax<400)
90
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0)
91
ghcn<-ghcn_test2
92
#coords<- ghcn[,c('x_OR83M','y_OR83M')]
93

  
94
month_var<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09", "mm_10", "mm_11", "mm_12")
95
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date==d)) #this creates a list of 10 or 365 subsets dataset based on dates
96

  
97

  
98
#Start loop here...
99

  
100
## looping through the dates...this is the main part of the code
101
#i=1 #for debugging
102
#j=1 #for debugging
103
for(i in 1:length(dates)){            # start of the for loop #1
104
  
105
  date<-strptime(dates[i], "%Y%m%d")   # interpolation date being processed
106
  month<-strftime(date, "%m")          # current month of the date being processed
107
  LST_month<-paste("mm_",month,sep="") # name of LST month to be matched
108
  
109
  ###Regression part 1: Creating a validation dataset by creating training and testing datasets
110
  
111
  mod_LST <-ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))]  #Match interpolation date and monthly LST average
112
  ghcn.subsets[[i]] = transform(ghcn.subsets[[i]],LST = mod_LST)            #Add the variable LST to the subset dataset
113
  n<-nrow(ghcn.subsets[[i]])
114
  ns<-n-round(n*prop)   #Create a sample from the data frame with 70% of the rows
115
  nv<-n-ns              #create a sample for validation with prop of the rows
116
  ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly
117
  ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training)
118
  data_s <- ghcn.subsets[[i]][ind.training, ]   #Training dataset currently used in the modeling
119
  data_v <- ghcn.subsets[[i]][ind.testing, ]    #Testing/validation dataset
120
  
121
  #i=1
122
  date_proc<-dates[i]
123
  date_proc<-strptime(dates[i], "%Y%m%d")   # interpolation date being processed
124
  mo<-as.integer(strftime(date_proc, "%m"))          # current month of the date being processed
125
  day<-as.integer(strftime(date_proc, "%d"))
126
  year<-as.integer(strftime(date_proc, "%Y"))
127
  
128
  #setup
129
  
130
  #mo=9           #Commented out by Benoit on June 14
131
  #day=2
132
  #year=2010
133
  datelabel=format(ISOdate(year,mo,day),"%b %d, %Y")
134
  
135
  #
136
  #  Step 1 - 10 year monthly averages
137
  #
138
  
139
  library(raster)
140
  #old<-getwd()
141
  #setwd("c:/data/benoit/data_Oregon_stations_Brian_04242012")
142
  #l=list.files(pattern="mean_month.*rescaled.tif")
143
  l=list.files(pattern="mean_month.*rescaled.rst")
144
  molst<-stack(l)
145
  #setwd(old)
146
  molst=molst-273.16  #K->C
147
  idx <- seq(as.Date('2010-01-15'), as.Date('2010-12-15'), 'month')
148
  molst <- setZ(molst, idx)
149
  layerNames(molst) <- month.abb
150
  themolst<-raster(molst,mo)
151
  plot(themolst)
152
  #
153
  # Step 2 - Weather station means across same days
154
  #
155
  # ??? which years & what quality flags???
156
  #select ghcn.id, lat,lon, elev, month, avg(value/10) as "TMax", count(*) as "NumDays" from ghcn, stations where ghcn.id in (select id from stations where state=='OR') and ghcn.id==stations.id and value<>-9999 and year>=2000 and  element=='TMAX' group by stations.id, month;select ghcn.id, lat,lon, elev, month, avg(value/10) as "TMax", count(*) as "NumDays" from ghcn, stations where ghcn.id in (select id from stations where state=='OR') and ghcn.id==stations.id and value<>-9999 and year>=2000 and  element=='TMAX' group by stations.id, month;
157
  #below table from above SQL query
158
  #dst<-read.csv('/data/benoit/data_oregon_stations_brian_04242012/station_means.csv',h=T)
159
  
160
  
161
  ##Added by Benoit ######
162
  date1<-ISOdate(data3$year,data3$month,data3$day) #Creating a date object from 3 separate column
163
  date2<-as.POSIXlt(as.Date(date1))
164
  data3$date<-date2
165
  d<-subset(data3,year>=2000 & mflag=="0" )
166
  d1<-aggregate(value~station+month, data=d, mean)  #Calculate monthly mean for every station in OR
167
  id<-as.data.frame(unique(d1$station))
168
  
169
  dst<-merge(d1, stat_loc, by.x="station", by.y="STAT_ID")
170
  #This allows to change only one name of the data.frame
171
  names(dst)[3]<-c("TMax")
172
  dst$TMax<-dst$TMax/10
173
  #dstjan=dst[dst$month==9,]  #dst contains the monthly averages for tmax for every station over 2000-2010
174
  ##############
175
  
176
  modst=dst[dst$month==mo,]
177
  #
178
  # Step 3 - get LST at stations
179
  #
180
  sta_lola=modst[,c("lon","lat")]
181
  library(rgdal)
182
  proj_str="+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
183
  lookup<-function(r,lat,lon) {
184
    xy<-project(cbind(lon,lat),proj_str);
185
    cidx<-cellFromXY(r,xy);
186
    return(r[cidx])
187
  }
188
  sta_tmax_from_lst=lookup(themolst,sta_lola$lat,sta_lola$lon)
189
  #
190
  # Step 4 - bias at stations
191
  #
192
  sta_bias=sta_tmax_from_lst-modst$TMax;
193
  bias_xy=project(as.matrix(sta_lola),proj_str)
194
  # windows()
195
  plot(modst$TMax,sta_tmax_from_lst,xlab="Station mo Tmax",ylab="LST mo Tmax")
196
  abline(0,1)
197
  #
198
  # Step 5 - interpolate bias
199
  #
200
  # ?? include covariates like elev, distance to coast, cloud frequency, tree height
201
  library(fields)
202
  #windows()
203
  quilt.plot(sta_lola,sta_bias,main="Bias at stations",asp=1)
204
  US(add=T,col="magenta",lwd=2)
205
  #fitbias<-Tps(bias_xy,sta_bias) #use TPS or krige
206
  fitbias<-Krig(bias_xy,sta_bias,theta=1e5) #use TPS or krige
207
  # windows()
208
  surface(fitbias,col=rev(terrain.colors(100)),asp=1,main="Interpolated bias")
209
  #US(add=T,col="magenta",lwd=2)
210
  #
211
  # Step 6 - return to daily station data & calcualate delta=daily T-monthly T from stations
212
  #Commmented out by Benoit 06/14
213
  # library(RSQLite)
214
  # m<-dbDriver("SQLite")
215
  # con<-dbConnect(m,dbname='c:/data/ghcn_tmintmax.db')
216
  # querystr=paste("select ghcn.id, value as 'dailyTmax' from ghcn where ghcn.id in (select id from stations where state=='OR') and value<>-9999",
217
  #    "and year==",year,"and month==",mo,"and day==",day,
218
  #                "and element=='TMAX' ")
219
  # rs<-dbSendQuery(con,querystr)
220
  # d<-fetch(rs,n=-1)
221
  # dbClearResult(rs)
222
  # dbDisconnect(con)
223
  # 
224
  # d$dailyTmax=d$dailyTmax/10 #stored as 1/10 degree C to allow integer storage
225
  # dmoday=merge(modst,d,by="id")
226
  ##########################Commented out by Benoit
227
  
228
  #added by Benoit 
229
  #d<-ghcn.subsets[[i]]
230
  d<-data_s
231
  names(d)[8]<-c("dailyTmax")
232
  d$dailyTmax=d$dailyTmax/10 #stored as 1/10 degree C to allow integer storage
233
  names(d)[1]<-c("id")
234
  names(modst)[1]<-c("id")
235
  dmoday=merge(modst,d,by="id")  #LOOSING DATA HERE!!! from 162 t0 146
236
  names(dmoday)[4]<-c("lat")
237
  names(dmoday)[5]<-c("lon")
238
  ###
239
  
240
  # windows()
241
  plot(dailyTmax~TMax,data=dmoday,xlab="Mo Tmax",ylab=paste("Daily for",datelabel),main="across stations in OR")
242
  #
243
  # Step 7 - interpolate delta across space
244
  #
245
  daily_sta_lola=dmoday[,c("lon","lat")] #could be same as before but why assume merge does this - assume not
246
  daily_sta_xy=project(as.matrix(daily_sta_lola),proj_str)
247
  daily_delta=dmoday$dailyTmax-dmoday$TMax
248
  #windows()
249
  quilt.plot(daily_sta_lola,daily_delta,asp=1,main="Station delta for Jan 15")
250
  US(add=T,col="magenta",lwd=2)
251
  #fitdelta<-Tps(daily_sta_xy,daily_delta) #use TPS or krige
252
  fitdelta<-Krig(daily_sta_xy,daily_delta,theta=1e5) #use TPS or krige
253
  # windows()
254
  surface(fitdelta,col=rev(terrain.colors(100)),asp=1,main="Interpolated delta")
255
  #US(add=T,col="magenta",lwd=2)
256
  #
257
  # Step 8 - assemble final answer - T=LST+Bias(interpolated)+delta(interpolated)
258
  #
259
  bias_rast=interpolate(themolst,fitbias)
260
  plot(bias_rast,main="Raster bias")
261
  daily_delta_rast=interpolate(themolst,fitdelta)
262
  plot(daily_delta_rast,main="Raster Daily Delta")
263
  tmax_predicted=themolst+daily_delta_rast-bias_rast #Final surface??
264
  plot(tmax_predicted,main="Predicted daily")
265
  #
266
  # check
267
  #
268
  sta_pred=lookup(tmax_predicted,data_v$lat,data_v$lon)
269
  #sta_pred=lookup(tmax_predicted,daily_sta_lola$lat,daily_sta_lola$lon)
270
  RMSE<-function(x,y) {return(mean((x-y)^2)^0.5)}
271
  rmse=RMSE(sta_pred,dmoday$dailyTmax)
272
  #plot(sta_pred~dmoday$dailyTmax,xlab=paste("Actual daily for",datelabel),ylab="Pred daily",main=paste("RMSE=",rmse))
273
  tmax<-data_v$tmax/10
274
  plot(sta_pred~tmax,xlab=paste("Actual daily for",datelabel),ylab="Pred daily",main=paste("RMSE=",rmse))
275
  abline(0,1)
276
  resid=sta_pred-dmoday$dailyTmax
277
  quilt.plot(daily_sta_lola,resid)
278
  
279
  ### END OF BRIAN's code
280
  
281
  ### Added by benoit
282
  j=1
283
  results_RMSE[i,1]<- dates[i]    #storing the interpolation dates in the first column
284
  results_RMSE[i,2]<- ns          #number of stations used in the training stage
285
  results_RMSE[i,3]<- "RMSE"
286
  results_RMSE[i,j+3]<- rmse  #Storing RMSE for the model j
287
  
288
  # end of the for loop1
289
  
39 290
}
40
sta_tmax_from_lst=lookup(themolst,sta_lola$lat,sta_lola$lon)
41
#
42
# Step 4 - bias at stations
43
#
44
sta_bias=sta_tmax_from_lst-modst$TMax;
45
bias_xy=project(as.matrix(sta_lola),proj_str)
46
# windows()
47
plot(modst$TMax,sta_tmax_from_lst,xlab="Station mo Tmax",ylab="LST mo Tmax")
48
abline(0,1)
49
#
50
# Step 5 - interpolate bias
51
#
52
# ?? include covariates like elev, distance to coast, cloud frequency, tree height
53
library(fields)
54
#windows()
55
quilt.plot(sta_lola,sta_bias,main="Bias at stations",asp=1)
56
US(add=T,col="magenta",lwd=2)
57
#fitbias<-Tps(bias_xy,sta_bias) #use TPS or krige
58
fitbias<-Krig(bias_xy,sta_bias,theta=1e5) #use TPS or krige
59
# windows()
60
surface(fitbias,col=rev(terrain.colors(100)),asp=1,main="Interpolated bias")
61
#US(add=T,col="magenta",lwd=2)
62
#
63
# Step 6 - return to daily station data & calcualate delta=daily T-monthly T from stations
64
#
65
library(RSQLite)
66
m<-dbDriver("SQLite")
67
con<-dbConnect(m,dbname='c:/data/ghcn_tmintmax.db')
68
querystr=paste("select ghcn.id, value as 'dailyTmax' from ghcn where ghcn.id in (select id from stations where state=='OR') and value<>-9999",
69
   "and year==",year,"and month==",mo,"and day==",day,
70
               "and element=='TMAX' ")
71
rs<-dbSendQuery(con,querystr)
72
d<-fetch(rs,n=-1)
73
dbClearResult(rs)
74
dbDisconnect(con)
75
d$dailyTmax=d$dailyTmax/10 #stored as 1/10 degree C to allow integer storage
76
dmoday=merge(modst,d,by="id")
77
# windows()
78
plot(dailyTmax~TMax,data=dmoday,xlab="Mo Tmax",ylab=paste("Daily for",datelabel),main="across stations in OR")
79
#
80
# Step 7 - interpolate delta across space
81
#
82
daily_sta_lola=dmoday[,c("lon","lat")] #could be same as before but why assume merge does this - assume not
83
daily_sta_xy=project(as.matrix(daily_sta_lola),proj_str)
84
daily_delta=dmoday$dailyTmax-dmoday$TMax
85
#windows()
86
quilt.plot(daily_sta_lola,daily_delta,asp=1,main="Station delta for Jan 15")
87
US(add=T,col="magenta",lwd=2)
88
#fitdelta<-Tps(daily_sta_xy,daily_delta) #use TPS or krige
89
fitdelta<-Krig(daily_sta_xy,daily_delta,theta=1e5) #use TPS or krige
90
# windows()
91
surface(fitdelta,col=rev(terrain.colors(100)),asp=1,main="Interpolated delta")
92
#US(add=T,col="magenta",lwd=2)
93
#
94
# Step 8 - assemble final answer - T=LST+Bias(interpolated)+delta(interpolated)
95
#
96
bias_rast=interpolate(themolst,fitbias)
97
plot(bias_rast,main="Raster bias")
98
daily_delta_rast=interpolate(themolst,fitdelta)
99
plot(daily_delta_rast,main="Raster Daily Delta")
100
tmax_predicted=themolst+daily_delta_rast-bias_rast
101
plot(tmax_predicted,main="Predicted daily")
102
#
103
# check
104
#
105
sta_pred=lookup(tmax_predicted,daily_sta_lola$lat,daily_sta_lola$lon)
106
RMSE<-function(x,y) {return(mean((x-y)^2)^0.5)}
107
rmse=RMSE(sta_pred,dmoday$dailyTmax)
108
plot(sta_pred~dmoday$dailyTmax,xlab=paste("Actual daily for",datelabel),ylab="Pred daily",main=paste("RMSE=",rmse))
109
abline(0,1)
110
resid=sta_pred-dmoday$dailyTmax
111
quilt.plot(daily_sta_lola,resid)
291

  
292

  
293
## Plotting and saving diagnostic measures
294

  
295
#Specific diagnostic measures related to the testing datasets
296

  
297
results_table_RMSE<-as.data.frame(results_RMSE)
298
results_table_MAE<-as.data.frame(results_MAE)
299
results_table_ME<-as.data.frame(results_ME)
300
results_table_R2<-as.data.frame(results_R2)
301

  
302
cname<-c("dates","ns","metric","mod1", "mod2","mod3", "mod4", "mod5", "mod6", "mod7","mod8")
303
colnames(results_table_RMSE)<-cname
304

  
305
#tb_diagnostic1<-rbind(results_table_RMSE,results_table_MAE, results_table_ME, results_table_R2)   #
306
tb_diagnostic1<-results_table_RMSE
307

  
308
write.table(tb_diagnostic1, file= paste(path,"/","results_fusion_Assessment_measure1",out_prefix,".txt",sep=""), sep=",")
309

  
310
#### END OF SCRIPT

Also available in: Unified diff