Revision 1c15fc49
Added by Adam M. Wilson almost 11 years ago
climate/research/oregon/interpolation/AAG2013_conference_Oregon_interpolation.R | ||
---|---|---|
1 |
#################################### INTERPOLATION TEMPERATURES ####################################### |
|
2 |
############################ AAG 2013 and OREGON transitition script ####################################### |
|
3 |
#This script reads information concerning the Oregon case study to adapt data for the revised |
|
4 |
# interpolation code. |
|
5 |
#Figures and data for the AAG conference are also produced. |
|
6 |
#AUTHOR: Benoit Parmentier # |
|
7 |
#DATE: 04/08/2013 |
|
8 |
#Version: 1 |
|
9 |
#PROJECT: Environmental Layers project # |
|
10 |
################################################################################################# |
|
11 |
|
|
12 |
###Loading R library and packages |
|
13 |
library(gtools) # loading some useful tools |
|
14 |
library(mgcv) # GAM package by Simon Wood |
|
15 |
library(sp) # Spatial pacakge with class definition by Bivand et al. |
|
16 |
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al. |
|
17 |
library(rgdal) # GDAL wrapper for R, spatial utilities |
|
18 |
library(gstat) # Kriging and co-kriging by Pebesma et al. |
|
19 |
library(fields) # NCAR Spatial Interpolation methods such as kriging, splines |
|
20 |
library(raster) # Hijmans et al. package for raster processing |
|
21 |
library(gdata) # various tools with xls reading |
|
22 |
library(rasterVis) |
|
23 |
library(parallel) |
|
24 |
library(maptools) |
|
25 |
library(maps) |
|
26 |
library(reshape) |
|
27 |
library(plotrix) |
|
28 |
|
|
29 |
#### UNCTION USED IN SCRIPT |
|
30 |
|
|
31 |
create_modis_tiles_region<-function(tiles,modis_sp){ |
|
32 |
#This functions returns a subset of tiles from the modis grdi. |
|
33 |
#Arguments: modies grid tile,list of tiles |
|
34 |
#Output: spatial grid data frame of the subset of tiles |
|
35 |
|
|
36 |
h_list<-lapply(tiles,substr,start=2,stop=3) #passing multiple arguments |
|
37 |
v_list<-lapply(tiles,substr,start=5,stop=6) #passing multiple arguments |
|
38 |
selected_tiles<-subset(subset(modis_sp,subset = h %in% as.numeric (h_list) ), |
|
39 |
subset = v %in% as.numeric(v_list)) |
|
40 |
return(selected_tiles) |
|
41 |
} |
|
42 |
|
|
43 |
### Parameters and argument |
|
44 |
|
|
45 |
lc_path<-"/home/layers/data/land-cover/lc-consensus-global" |
|
46 |
infile_modis_grid<-"modis_sinusoidal_grid_world.shp" |
|
47 |
infile_elev<-"/home/layers/data/terrain/dem-cgiar-srtm-1km-tif/srtm_1km.tif" #this is the global file: replace later with the input produced by the DEM team |
|
48 |
infile_canheight<-"Simard_Pinto_3DGlobalVeg_JGR.tif" #Canopy height |
|
49 |
#list_tiles_modis = c('h11v08','h11v07','h12v07','h12v08','h10v07','h10v08') #tile for Venezuel and surrounding area |
|
50 |
list_tiles_modis = c('h08v04','h09v04') |
|
51 |
|
|
52 |
#infile_reg_outline="" #input region outline defined by polygon |
|
53 |
infile_reg_outline= "OR83M_state_outline.shp" |
|
54 |
infile_countries_sinusoidal<-"countries_sinusoidal_world.shp" |
|
55 |
|
|
56 |
#CRS_interp<-"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs"; |
|
57 |
CRS_interp <-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs"; |
|
58 |
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84 |
|
59 |
out_region_name<-"Oregon_region" #generated on the fly |
|
60 |
out_prefix<-"_OR_04052013" |
|
61 |
ref_rast_name<- "mean_day244_rescaled.rst" #This is the shape file of outline of the study area. #local raster name defining resolution, exent, local projection--. set on the fly?? |
|
62 |
infile_covariates<-"covariates__venezuela_region__VE_01292013.tif" #this is an output from covariate script and used in stage 3 and stage 4 |
|
63 |
|
|
64 |
#The names of covariates can be changed...these names should be output/input from covar script!!! |
|
65 |
rnames<-c("x","y","lon","lat","N","E","N_w","E_w","elev","slope","aspect","CANHEIGHT","DISTOC") |
|
66 |
lc_names<-c("LC1","LC2","LC3","LC4","LC5","LC6","LC7","LC8","LC9","LC10","LC11","LC12") |
|
67 |
lst_names<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12", |
|
68 |
"nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08", |
|
69 |
"nobs_09","nobs_10","nobs_11","nobs_12") |
|
70 |
covar_names<-c(rnames,lc_names,lst_names) |
|
71 |
infile2<-"/home/layers/data/climate/ghcn/v2.92-upd-2012052822/ghcnd-stations.txt" #This is the textfile of station |
|
72 |
|
|
73 |
in_path<- "/home/parmentier/Data/IPLANT_project/Oregon_interpolation/Oregon_covariates" |
|
74 |
|
|
75 |
#c("Oregon", c("h08v04","h09v04","h08v05","h09v05")) |
|
76 |
study_area_list_tiles <- vector("list",6) |
|
77 |
study_area_list_tiles[[1]] <-list("Oregon", c("h08v04","h09v04")) |
|
78 |
study_area_list_tiles[[2]] <-list("Venezuela",c("h10v07", "h10v08", "h11v7", "h11v08", "h12v07", "h12v08")) |
|
79 |
study_area_list_tiles[[3]] <-list("Norway",c("h18v02","h18v03", "h19v02", "h19v03")) |
|
80 |
study_area_list_tiles[[4]] <-list("East_Africa",c("h20v08", "h21v08", "h22v08", "h20v09", "h21v09", "h22v09", "h21v10")) |
|
81 |
study_area_list_tiles[[5]] <-list("South_Africa",c("h19v11", "h20v11", "h19v12", "h20v12")) |
|
82 |
study_area_list_tiles[[6]] <-list("Queensland",c("h31v10", "h31v10", "h32v10", "h30v11", "h31v11")) |
|
83 |
|
|
84 |
####################################################################################### |
|
85 |
########################### BEGIN SCRIPT ###################################### |
|
86 |
|
|
87 |
|
|
88 |
setwd(in_path) |
|
89 |
|
|
90 |
|
|
91 |
####### PART I: Prepare data for figures and for Oregon interpolation ########## |
|
92 |
|
|
93 |
### Read in Venezuela covariate stack |
|
94 |
|
|
95 |
s_raster_Ven<-brick(infile_covariates) #read brick |
|
96 |
names(s_raster_Ven)<-covar_names #assign names |
|
97 |
mm_01_Ven<-subset(s_raster_ven,"mm_01") #select LST January month average |
|
98 |
|
|
99 |
### Read in world map to show stuy areas. |
|
100 |
|
|
101 |
world_sp <- getData("countries") # different resolutions available |
|
102 |
outfile2<-file.path(in_path,paste("word_countries.shp",sep="")) #Name of the file |
|
103 |
writeOGR(world_sp,dsn= dirname(outfile2),layer= sub(".shp","",basename(outfile2)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
104 |
|
|
105 |
### Read in sinusoidal grid and world countries |
|
106 |
filename<-sub(".shp","",infile_modis_grid) #Removing the extension from file. |
|
107 |
modis_grid<-readOGR(".", filename) #Reading shape file using rgdal library |
|
108 |
|
|
109 |
### Create list ofALL STUDY AREAS/TEST SITES |
|
110 |
|
|
111 |
## Create list of study area regions: |
|
112 |
list_tiles<-lapply(1:length(study_area_list_tiles),function(k) study_area_list_tiles[[k]][[2]]) |
|
113 |
modis_reg_outlines<-lapply(list_tiles,FUN=create_modis_tiles_region,modis_sp=modis_grid) #problem...this does not |
|
114 |
|
|
115 |
#writeOGR(modis_reg_outline,dsn= ".",layer= paste("outline",out_region_name,"_",out_suffix,sep=""), |
|
116 |
# driver="ESRI Shapefile",overwrite_layer="TRUE") |
|
117 |
|
|
118 |
#################################################### |
|
119 |
#Read in GHCND database station locations |
|
120 |
|
|
121 |
dat_stat <- read.fwf(infile2, |
|
122 |
widths = c(11,9,10,7,3,31,4,4,6),fill=TRUE) |
|
123 |
colnames(dat_stat)<-c("STAT_ID","lat","lon","elev","state","name","GSNF","HCNF","WMOID") |
|
124 |
coords<- dat_stat[,c('lon','lat')] |
|
125 |
coordinates(dat_stat)<-coords |
|
126 |
proj4string(dat_stat)<-CRS_locs_WGS84 #this is the WGS84 projection |
|
127 |
#Save shapefile for later |
|
128 |
outfile1<-file.path(in_path,paste("ghcnd_stations.shp",sep="")) #Name of the file |
|
129 |
writeOGR(dat_stat,dsn= dirname(outfile1),layer= sub(".shp","",basename(outfile1)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
130 |
|
|
131 |
interp_area <- readOGR(dsn=in_path,sub(".shp","",infile_reg_outline)) |
|
132 |
interp_area_WGS84 <-spTransform(interp_area,CRS_locs_WGS84) # Project from WGS84 to new coord. system |
|
133 |
|
|
134 |
# Spatial query to find relevant stations |
|
135 |
|
|
136 |
inside <- !is.na(over(dat_stat, as(interp_area_WGS84, "SpatialPolygons"))) #Finding stations contained in the current interpolation area |
|
137 |
stat_reg_OR<-dat_stat[inside,] #Selecting stations contained in the current interpolation area |
|
138 |
|
|
139 |
stat_reg_OR <-spTransform(stat_reg_OR,CRS(proj4string(interp_area))) # Project from WGS84 to new coord. system |
|
140 |
|
|
141 |
#Now Venezuela |
|
142 |
interp_area_Ven_WGS84 <-spTransform(modis_reg_outlines[[2]],CRS_locs_WGS84) # Project from WGS84 to new coord. system |
|
143 |
inside <- !is.na(over(dat_stat, as(interp_area_Ven_WGS84, "SpatialPolygons"))) #Finding stations contained in the current interpolation area |
|
144 |
stat_reg_Ven <-dat_stat[inside,] #Selecting stations contained in the current interpolation area |
|
145 |
|
|
146 |
## Get the data in the local projection |
|
147 |
stat_reg_Ven <-spTransform(stat_reg_Ven,CRS(proj4string(modis_reg_outlines[[2]]))) # Project from WGS84 to new coord. system |
|
148 |
|
|
149 |
### READ IN COVARIATES FILES FOR OREGON AND MAKE IT A MULTI-BAND FILE |
|
150 |
|
|
151 |
inlistf<-"list_files_covariates_04032013.txt" |
|
152 |
lines<-read.table(paste(inlistf,sep=""), sep=" ") #Column 1 contains the names of raster files |
|
153 |
inlistvar<-lines[,1] |
|
154 |
inlistvar<-as.character(inlistvar) |
|
155 |
covar_names_OR<-as.character(lines[,2]) #Column two contains short names for covaraites |
|
156 |
|
|
157 |
s_raster_OR<- stack(inlistvar) #Creating a stack of raster images from the list of variables. |
|
158 |
layerNames(s_raster_OR)<-covar_names_OR #Assigning names to the raster layers |
|
159 |
|
|
160 |
aspect<-subset(s_raster_OR,"aspect") #Select layer from stack |
|
161 |
slope<-subset(s_raster_OR,"slope") #Select layer from stack |
|
162 |
distoc<-subset(s_raster_OR,"DISTOC") |
|
163 |
|
|
164 |
N<-cos(aspect*pi/180) |
|
165 |
E<-sin(aspect*pi/180) |
|
166 |
Nw<-sin(slope*pi/180)*cos(aspect*pi/180) #Adding a variable to the dataframe |
|
167 |
Ew<-sin(slope*pi/180)*sin(aspect*pi/180) #Adding variable to the dataframe. |
|
168 |
|
|
169 |
xy <-coordinates(slope) #get x and y projected coordinates... |
|
170 |
xy_latlon<-project(xy, CRS_interp, inv=TRUE) # find lat long for projected coordinats (or pixels...) |
|
171 |
|
|
172 |
x <-init(slope,v="x") |
|
173 |
y <-init(slope,v="y") |
|
174 |
lon<-x |
|
175 |
lat<-y |
|
176 |
lon <-setValues(lon,xy_latlon[,1]) #longitude for every pixel in the processing tile/region |
|
177 |
lat <-setValues(lat,xy_latlon[,2]) #latitude for every pixel in the processing tile/region |
|
178 |
CANHEIGHT<-subset(s_raster_OR,"CANHEIGHT") |
|
179 |
CANHEIGHT[is.na(CANHEIGHT)]<-0 |
|
180 |
|
|
181 |
elev<-subset(s_raster_OR,"elev") |
|
182 |
elev[elev==-9999] <- NA |
|
183 |
|
|
184 |
r<-stack(x,y,lon,lat,N,E,Nw,Ew,elev,slope,aspect,CANHEIGHT,distoc) |
|
185 |
rnames<-c("x","y","lon","lat","N","E","N_w","E_w","elev","slope","aspect","CANHEIGHT","DISTOC") |
|
186 |
s_raster<-r |
|
187 |
#Add landcover layers |
|
188 |
lc_names<-c("LC1","LC2","LC3","LC4","LC5","LC6","LC7","LC8","LC9","LC10") |
|
189 |
lc_reg_s<-subset(s_raster_OR,lc_names) |
|
190 |
#Should be a function...ok for now?? |
|
191 |
test<-vector("list",nlayers(lc_reg_s)) |
|
192 |
for (k in 1:nlayers(lc_reg_s)){ |
|
193 |
LC<-raster(lc_reg_s,layer=k) #Select layer from stack |
|
194 |
LC[is.na(LC)]<-0 |
|
195 |
test[[k]]<-LC |
|
196 |
} |
|
197 |
#tmp_df<-freq(lc_reg_s,merge=TRUE) #check to see if it worked |
|
198 |
#head(tmp_df) |
|
199 |
lc_reg_s<-stack(test) |
|
200 |
s_raster<-addLayer(s_raster, lc_reg_s) |
|
201 |
|
|
202 |
lst_names<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12", |
|
203 |
"nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08", |
|
204 |
"nobs_09","nobs_10","nobs_11","nobs_12") |
|
205 |
lst_mm_names<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12") |
|
206 |
lst_mm_s<-subset(s_raster_OR,lst_mm_names) |
|
207 |
lst_mm_s <- lst_mm_s - 273.16 |
|
208 |
lst_nobs_names<-c("nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08", |
|
209 |
"nobs_09","nobs_10","nobs_11","nobs_12") |
|
210 |
lst_nobs_s<-subset(s_raster_OR,lst_nobs_names) |
|
211 |
|
|
212 |
s_raster<-addLayer(s_raster, lst_mm_s) |
|
213 |
s_raster<-addLayer(s_raster, lst_nobs_s) |
|
214 |
covar_names<-c(rnames,lc_names,lst_names) |
|
215 |
names(s_raster)<-covar_names |
|
216 |
|
|
217 |
# create mask!!! Should combine with mask of elev |
|
218 |
LC10<-subset(s_raster,"LC10") |
|
219 |
|
|
220 |
LC10_mask<-LC10 |
|
221 |
LC10_mask[is.na(LC10_mask)]<- 0 |
|
222 |
LC10_mask[LC10==100]<- NA |
|
223 |
LC10_mask[LC10_mask<100]<- 1 |
|
224 |
LC10_mask[is.na(LC10_mask)]<- 0 |
|
225 |
mask_land_NA<-LC10_mask |
|
226 |
mask_land_NA[mask_land_NA==0]<-NA |
|
227 |
|
|
228 |
|
|
229 |
##### SAVE AS MULTIBAND...make this a function... |
|
230 |
|
|
231 |
#list of files... |
|
232 |
file_format<-".tif" |
|
233 |
NA_val<- -9999 |
|
234 |
band_order<- "BSQ" |
|
235 |
var<-"TMAX" |
|
236 |
data_name<-paste("covariates_",out_region_name,"_",sep="") |
|
237 |
raster_name<-paste(data_name,var,"_",out_prefix,file_format, sep="") |
|
238 |
#writeRaster(s_raster, filename=raster_name,NAflag=-999,bylayer=FALSE,bandorder="BSQ",overwrite=TRUE) #Writing the data in a raster file format... |
|
239 |
#if mask |
|
240 |
#stat_val<- extract(s_raster, ghcn3) #Extracting values from the raster stack for every point location in coords data frame. |
|
241 |
s_raster_m<-mask(s_raster,mask_land_NA,filename=raster_name, |
|
242 |
overwrite=TRUE,NAflag=NA_val,bylayer=FALSE,bandorder=band_order) |
|
243 |
#if no mask |
|
244 |
#writeRaster(s_raster, filename=raster_name,NAflag=-999,bylayer=FALSE,bandorder="BSQ",overwrite=TRUE) #Writing the data in a raster file format... |
|
245 |
|
|
246 |
############# PART II: PRODUCE FIGURES ####### |
|
247 |
|
|
248 |
### CREATE FIGURE TEST SITES |
|
249 |
|
|
250 |
dat_stat_sinusoidal <- spTransform(dat_stat,CRS(proj4string(modis_grid))) |
|
251 |
world_sinusoidal <- readOGR(dsn=".",sub(".shp","",infile_countries_sinusoidal)) |
|
252 |
|
|
253 |
png(paste("Study_area_modis_grid",out_prefix,".png",sep="")) |
|
254 |
plot(world_sinusoidal) |
|
255 |
plot(dat_stat_sinusoidal,cex=0.2,pch=16,col=c("blue"),add=TRUE) |
|
256 |
plot(modis_grid,add=TRUE) |
|
257 |
for (k in 1:length(modis_reg_outlines)){ |
|
258 |
plot(modis_reg_outlines[[k]],border=c("red"),lwd=2.5,add=TRUE) |
|
259 |
} |
|
260 |
title("Study area for temperature and precipitation predictions") |
|
261 |
#legend |
|
262 |
dev.off() |
|
263 |
|
|
264 |
### CREATE FIGURE MEAN DAILY AND MEAN MONTHLY: AAG 2013 #### |
|
265 |
|
|
266 |
lst_md<-raster(ref_rast_name) |
|
267 |
lst_mm_09<-subset(s_raster,"mm_09") |
|
268 |
plot(stack(lst_md,lst_mm_09)) |
|
269 |
|
|
270 |
lst_md<-raster("mean_day001_rescaled.rst") |
|
271 |
lst_md<- lst_md - 273.16 |
|
272 |
lst_mm_01<-subset(s_raster,"mm_01") |
|
273 |
|
|
274 |
png(filename=paste("Comparison_daily_monthly_mean_lst",out_prefix,".png",sep=""),width=960,height=480) |
|
275 |
par(mfrow=c(1,2)) |
|
276 |
plot(lst_md) |
|
277 |
plot(interp_area,add=TRUE) |
|
278 |
title("Mean January 1") |
|
279 |
plot(lst_mm_01) |
|
280 |
plot(interp_area,add=TRUE) |
|
281 |
title("Mean for monht of January") |
|
282 |
dev.off() |
|
283 |
|
|
284 |
### CREATE FIGURE NUMBER OF STATIONS PER SITE |
|
285 |
|
|
286 |
png(paste("stations_for_Venezuela_Oregon_areas",out_prefix,".png",sep=""),,width=960,height=480) |
|
287 |
par(mfrow=c(1,2)) |
|
288 |
#Oregon data |
|
289 |
plot(lst_mm_01) |
|
290 |
plot(interp_area,add=TRUE) |
|
291 |
plot(stat_reg_OR,add=TRUE) |
|
292 |
title("Stations located in Oregon from GHNCD") |
|
293 |
plot(mm_01_Ven) |
|
294 |
plot(modis_reg_outlines[[2]],add=TRUE) |
|
295 |
plot(stat_reg_Ven,add=TRUE) |
|
296 |
title("Stations located in Venezuela from GHNCD") |
|
297 |
dev.off() |
|
298 |
|
|
299 |
### CREATE FIGURE NUMBER OF STATIONS PER SITE AND SPECIFIC MONTH... |
|
300 |
|
|
301 |
#png(paste("stations_for_Venezuela_Oregon_areas_per_month",out_prefix,".png",sep="")) |
|
302 |
#par(mfrow=c(1,2)) |
|
303 |
#plot(interp_area_WGS84) |
|
304 |
#plot(stat_reg_OR,add=TRUE) |
|
305 |
#plot(modis_reg_outlines[[2]]) |
|
306 |
#plot(stat_reg_Ven,add=TRUE) |
|
307 |
#dev.off() |
|
308 |
|
|
309 |
############ PART III: SCREENING OF COVARIATES ############# |
|
310 |
|
|
311 |
### SCREENING FUNCTION for covariate stack and GHNCD data base to add later in the functions |
|
312 |
|
|
313 |
#Screen for extreme values": this needs more thought, min and max val vary with regions |
|
314 |
#min_val<-(-15+273.16) #if values less than -15C then screen out (note the Kelvin units that will need to be changed later in all datasets) |
|
315 |
#r1[r1 < (min_val)]<-NA |
|
316 |
#s_raster<-addLayer(s_raster,LST) #Adding current month |
|
317 |
|
|
318 |
nel<-12 |
|
319 |
#tab_range<-data.frame(varname=character(nel),varterm=character(nel),vmin=numeric(nel),vmax=numeric(nel)) |
|
320 |
val_range<-vector("list",nel) #list of one row data.frame |
|
321 |
val_rst<-vector("list",nel) #list of one row data.frame |
|
322 |
val_range[[1]]<-data.frame(varname="lon",vmin=-180,vmax=180) |
|
323 |
val_range[[2]]<-data.frame(varname="lat",vmin=-90,vmax=90) |
|
324 |
val_range[[3]]<-data.frame(varname="N",vmin=-1,vmax=1) |
|
325 |
val_range[[4]]<-data.frame(varname="E",vmin=-1,vmax=1) |
|
326 |
val_range[[5]]<-data.frame(varname="N_w",vmin=-1,vmax=1) |
|
327 |
val_range[[6]]<-data.frame(varname="E_w",vmin=-1,vmax=1) |
|
328 |
val_range[[7]]<-data.frame(varname="elev",vmin=0,vmax=6000) |
|
329 |
val_range[[8]]<-data.frame(varname="slope",varterm="slope",vmin=0,vmax=90) |
|
330 |
val_range[[9]]<-data.frame(varname="aspect",varterm="aspect",vmin=0,vmax=360) |
|
331 |
val_range[[10]]<-data.frame(varname="DISTOC",vmin=-0,vmax=10000000) |
|
332 |
val_range[[11]]<-data.frame(varname="CANHEIGHT",vmin=0,vmax=255) |
|
333 |
val_range[[12]]<-data.frame(varname="LC1",vmin=0,vmax=100) |
|
334 |
val_range[[13]]<-data.frame(varname="LC3",vmin=0,vmax=100) |
|
335 |
val_range[[14]]<-data.frame(varname="mm_01",vmin=-15,vmax=50) |
|
336 |
val_range[[15]]<-data.frame(varname="mm_02",vmin=-15,vmax=50) |
|
337 |
val_range[[16]]<-data.frame(varname="mm_03",vmin=-15,vmax=50) |
|
338 |
val_range[[17]]<-data.frame(varname="mm_04",vmin=-15,vmax=50) |
|
339 |
val_range[[18]]<-data.frame(varname="mm_05",vmin=-15,vmax=50) |
|
340 |
val_range[[19]]<-data.frame(varname="mm_06",vmin=-15,vmax=50) |
|
341 |
val_range[[20]]<-data.frame(varname="mm_07",vmin=-15,vmax=50) |
|
342 |
val_range[[21]]<-data.frame(varname="mm_08",vmin=-15,vmax=50) |
|
343 |
val_range[[22]]<-data.frame(varname="mm_09",vmin=-15,vmax=50) |
|
344 |
val_range[[23]]<-data.frame(varname="mm_10",vmin=-15,vmax=50) |
|
345 |
val_range[[24]]<-data.frame(varname="mm_11",vmin=-15,vmax=50) |
|
346 |
val_range[[25]]<-data.frame(varname="mm_12",vmin=-15,vmax=50) |
|
347 |
|
|
348 |
tab_range<-do.call(rbind,val_range) |
|
349 |
|
|
350 |
#pos<-match("ELEV_SRTM",layerNames(s_raster)) #Find column with the current month for instance mm12 |
|
351 |
#ELEV_SRTM<-raster(s_raster,pos) |
|
352 |
|
|
353 |
screening_val_covariates_fun<-function(tab_range,r_stack){ |
|
354 |
#Screening for raster stack |
|
355 |
# |
|
356 |
for (k in 1:nrow(tab_range)){ |
|
357 |
avl<-c(-Inf,tab_range$vmin[k],NA, tab_range$vmax[k],+Inf,NA) #This creates a input vector...val 1 are -9999, 2 neg, 3 positive |
|
358 |
rclmat<-matrix(avl,ncol=3,byrow=TRUE) |
|
359 |
#s_raster_r<-raster(r_stack,match(tab_range$varterm[k],names(r_stack))) #select relevant layer from stack |
|
360 |
s_raster_r<-raster(r_stack,match(tab_range$varname[k],names(r_stack))) |
|
361 |
s_raster_r<-reclass(s_raster_r,rclmat) #now reclass values |
|
362 |
r_stack<-dropLayer(r_stack,"N") |
|
363 |
names(s_raster_r)<-tab_range$varname[k] #Loss of layer names when using reclass |
|
364 |
val_rst[[k]]<-s_raster_r |
|
365 |
} |
|
366 |
s_rst_m<-stack(val_rst) #This a raster stack with valid range of values |
|
367 |
r_stack<-addLayer(r_stack,s_rst_m) #add back layers that were screened out |
|
368 |
return(r_stack) |
|
369 |
} |
|
370 |
|
|
371 |
#### ADD SCREENING FUNCTION FOR GHCND extracted!!! |
|
372 |
|
|
373 |
#Remove NA for LC and CANHEIGHT |
|
374 |
#ghcn$LC1[is.na(ghcn$LC1)]<-0 |
|
375 |
#ghcn$LC3[is.na(ghcn$LC3)]<-0 |
|
376 |
#ghcn$CANHEIGHT[is.na(ghcn$CANHEIGHT)]<-0 |
|
377 |
#ghcn$LC4[is.na(ghcn$LC4)]<-0 |
|
378 |
#ghcn$LC6[is.na(ghcn$LC6)]<-0 |
|
379 |
##ghcn$ELEV_SRTM[ghcn$ELEV_SRTM==-9999]<-NA |
|
380 |
#dst<-subset(dst,dst$TMax>-15 & dst$TMax<40) |
|
381 |
#dst<-subset(dst,dst$ELEV_SRTM>0) #This will drop two stations...or 24 rows |
climate/research/oregon/interpolation/Database_stations_covariates_processing_function.R | ||
---|---|---|
1 |
################## Data preparation for interpolation ####################################### |
|
2 |
############################ Extraction of station data ########################################## |
|
3 |
|
|
4 |
|
|
5 |
database_covariates_preparation<-function(list_param_prep){ |
|
6 |
#This function performs queries on the Postgres ghcnd database for stations matching the |
|
7 |
#interpolation area. It requires 11 inputs: |
|
8 |
# 1) db.name : Postgres database name containing the meteorological stations |
|
9 |
# 2) var: the variable of interest - "TMAX","TMIN" or "PRCP" |
|
10 |
# 3) range_years: range of records used in the daily interpolation, note that upper bound year is not included |
|
11 |
# 4) range_years_clim: range of records used in the monthly climatology interpolation, note that upper bound is not included |
|
12 |
# 5) infile_reg_outline: region outline as a shape file - used in the interpolation stage too |
|
13 |
# 6) infile_ghncd_data: ghcnd stations locations as a textfile name with lat-long fields |
|
14 |
# 7) infile_covarariates: tif file of raser covariates for the interpolation area: it should have a local projection |
|
15 |
# 8) CRS_locs_WGS84: longlat EPSG 4326 used as coordinates reference system (proj4)for stations locations |
|
16 |
# 9) in_path: input path for covariates data and other files, this is also the output? |
|
17 |
# 10) out_path: output path created in master script |
|
18 |
# 11) covar_names: names of covariates used for the interpolation --may be removed later? (should be stored in the brick) |
|
19 |
# 12) qc_flags_stations: flag used to screen out at this stage only two values... |
|
20 |
# 13) out_prefix: output suffix added to output names--it is the same in the interpolation script |
|
21 |
# |
|
22 |
#The output is a list of four shapefile names produced by the function: |
|
23 |
#1) loc_stations: locations of stations as shapefile in EPSG 4326 |
|
24 |
#2) loc_stations_ghcn: ghcn daily data for the year range of interpolation (locally projected) |
|
25 |
#3) daily_query_ghcn_data: ghcn daily data from daily query before application of quality flag |
|
26 |
#4) daily_covar_ghcn_data: ghcn daily data with covariates for the year range of interpolation (locally projected) |
|
27 |
#5) monthly_query_ghcn_data: ghcn daily data from monthly query before application of quality flag |
|
28 |
#6) monthly_covar_ghcn_data: ghcn monthly averaged data with covariates for the year range of interpolation (locally projected) |
|
29 |
|
|
30 |
#AUTHOR: Benoit Parmentier |
|
31 |
#DATE: 05/21/2013 |
|
32 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363-- |
|
33 |
#Comments and TODO |
|
34 |
#-Add buffer option... |
|
35 |
#-Add screening for value predicted: var |
|
36 |
################################################################################################## |
|
37 |
|
|
38 |
###Loading R library and packages: should it be read in before??? |
|
39 |
|
|
40 |
library(RPostgreSQL) |
|
41 |
library(sp) # Spatial pacakge with class definition by Bivand et al. |
|
42 |
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al. |
|
43 |
library(rgdal) # GDAL wrapper for R, spatial utilities |
|
44 |
library(rgeos) |
|
45 |
library(rgdal) |
|
46 |
library(raster) |
|
47 |
library(rasterVis) |
|
48 |
library(maps) |
|
49 |
library(maptools) |
|
50 |
|
|
51 |
### Functions used in the script |
|
52 |
|
|
53 |
format_s <-function(s_ID){ |
|
54 |
#Format station ID in a vector format/tuple that is used in a psql query. |
|
55 |
# Argument 1: vector of station ID |
|
56 |
# Return: character of station ID |
|
57 |
tx2<-s_ID |
|
58 |
tx2<-as.character(tx2) |
|
59 |
stat_list<-tx2 |
|
60 |
temp<-shQuote(stat_list) |
|
61 |
t<-paste(temp, collapse= " ") |
|
62 |
t1<-gsub(" ", ",",t) |
|
63 |
sf_ID<-paste("(",t1,")",sep="") #vector containing the station ID to query |
|
64 |
return(sf_ID) |
|
65 |
} |
|
66 |
|
|
67 |
#parsing input arguments |
|
68 |
|
|
69 |
db.name <- list_param_prep$db.name #name of the Postgres database |
|
70 |
var <- list_param_prep$var #name of the variables to keep: TMIN, TMAX or PRCP |
|
71 |
year_start <-list_param_prep$range_years[1] #"2010" #starting year for the query (included) |
|
72 |
year_end <-list_param_prep$range_years[2] #"2011" #end year for the query (excluded) |
|
73 |
year_start_clim <-list_param_prep$range_years_clim[1] #right bound not included in the range!! starting year for monthly query to calculate clime |
|
74 |
year_end_clim <-list_param_prep$range_years_clim[2] #right bound not included in the range!! starting year for monthly query to calculate clime |
|
75 |
infile_reg_outline<- list_param_prep$infile_reg_outline #This is the shape file of outline of the study area #It is an input/output of the covariate script |
|
76 |
infile_ghncd_data<-list_param_prep$infile_ghncd_data #"/home/layers/data/climate/ghcn/v2.92-upd-2012052822/ghcnd-stations.txt" #This is the textfile of station locations from GHCND |
|
77 |
infile_covariates<-list_param_prep$infile_covariates #"covariates__venezuela_region__VE_01292013.tif" #this is an output from covariate script |
|
78 |
CRS_locs_WGS84<-list_param_prep$CRS_locs_WGS84 #Station coords WGS84: same as earlier |
|
79 |
in_path <- list_param_prep$in_path #CRS_locs_WGS84"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/input_data/" |
|
80 |
out_path <- list_param_prep$out_path #CRS_locs_WGS84"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/input_data/" |
|
81 |
covar_names<-list_param_prep$covar_names # names should be written in the tif file!!! |
|
82 |
qc_flags_stations <- list_param_prep$qc_flags_stations #flags allowed for the query from the GHCND?? |
|
83 |
out_prefix<-list_param_prep$out_prefix #"_365d_GAM_fus5_all_lstd_03012013" #User defined output prefix |
|
84 |
|
|
85 |
## working directory is the same for input and output for this function |
|
86 |
#setwd(in_path) |
|
87 |
setwd(out_path) |
|
88 |
##### STEP 1: Select station in the study area |
|
89 |
|
|
90 |
filename<-sub(".shp","",infile_reg_outline) #Removing the extension from file. |
|
91 |
interp_area <- readOGR(dsn=dirname(filename),basename(filename)) |
|
92 |
CRS_interp<-proj4string(interp_area) #Storing the coordinate information: geographic coordinates longlat WGS84 |
|
93 |
|
|
94 |
#Read in GHCND database station locations |
|
95 |
dat_stat <- read.fwf(infile_ghncd_data, |
|
96 |
widths = c(11,9,10,7,3,31,4,4,6),fill=TRUE) |
|
97 |
colnames(dat_stat)<-c("STAT_ID","latitude","longitude","elev","state","name","GSNF","HCNF","WMOID") |
|
98 |
coords<- dat_stat[,c('longitude','latitude')] |
|
99 |
coordinates(dat_stat)<-coords |
|
100 |
proj4string(dat_stat)<-CRS_locs_WGS84 #this is the WGS84 projection |
|
101 |
#proj4string(dat_stat)<-CRS_interp |
|
102 |
interp_area_WGS84 <-spTransform(interp_area,CRS_locs_WGS84) # Project from WGS84 to new coord. system |
|
103 |
|
|
104 |
# Spatial query to find relevant stations |
|
105 |
|
|
106 |
inside <- !is.na(over(dat_stat, as(interp_area_WGS84, "SpatialPolygons"))) #Finding stations contained in the current interpolation area |
|
107 |
stat_reg<-dat_stat[inside,] #Selecting stations contained in the current interpolation area |
|
108 |
|
|
109 |
#### |
|
110 |
##TODO: Add buffer option? |
|
111 |
#### |
|
112 |
|
|
113 |
#### STEP 2: Connecting to the database and query for relevant data |
|
114 |
|
|
115 |
drv <- dbDriver("PostgreSQL") |
|
116 |
db <- dbConnect(drv, dbname=db.name) |
|
117 |
|
|
118 |
time1<-proc.time() #Start stop watch |
|
119 |
list_s<-format_s(stat_reg$STAT_ID) |
|
120 |
data2<-dbGetQuery(db, paste("SELECT * |
|
121 |
FROM ghcn |
|
122 |
WHERE element=",shQuote(var), |
|
123 |
"AND year>=",year_start, |
|
124 |
"AND year<",year_end, |
|
125 |
"AND station IN ",list_s,";",sep="")) #Selecting station using a SQL query |
|
126 |
time_duration<-proc.time()-time1 #Time for the query may be long given the size of the database |
|
127 |
time_minutes<-time_duration[3]/60 |
|
128 |
dbDisconnect(db) |
|
129 |
|
|
130 |
data_table<-merge(data2,as.data.frame(stat_reg), by.x = "station", by.y = "STAT_ID") |
|
131 |
|
|
132 |
#Transform the subset data frame in a spatial data frame and reproject |
|
133 |
data_reg<-data_table #Make a copy of the data frame |
|
134 |
coords<- data_reg[c('longitude','latitude')] #Define coordinates in a data frame: clean up here!! |
|
135 |
coordinates(data_reg)<-coords #Assign coordinates to the data frame |
|
136 |
proj4string(data_reg)<-CRS_locs_WGS84 #Assign coordinates reference system in PROJ4 format |
|
137 |
data_reg<-spTransform(data_reg,CRS(CRS_interp)) #Project from WGS84 to new coord. system |
|
138 |
|
|
139 |
data_d <-data_reg #data_d: daily data containing the query without screening |
|
140 |
#data_reg <-subset(data_d,mflag=="0" | mflag=="S") #should be input arguments!! |
|
141 |
#Transform the query to be depending on the number of flags |
|
142 |
|
|
143 |
data_reg <-subset(data_d, mflag %in% qc_flags_stations) #screening using flags |
|
144 |
#data_reg2 <-subset(data_d,mflag==qc_flags_stations[1] | mflag==qc_flags_stations[2]) #screening using flags |
|
145 |
|
|
146 |
################################################################## |
|
147 |
### STEP 3: Save results and outuput in textfile and a shape file |
|
148 |
#browser() |
|
149 |
#Save shape files of the locations of meteorological stations in the study area |
|
150 |
#outfile1<-file.path(in_path,paste("stations","_",out_prefix,".shp",sep="")) |
|
151 |
outfile1<-file.path(out_path,paste("stations","_",out_prefix,".shp",sep="")) |
|
152 |
writeOGR(stat_reg,dsn= dirname(outfile1),layer= sub(".shp","",basename(outfile1)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
153 |
#writeOGR(dst,dsn= ".",layer= sub(".shp","",outfile4), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
154 |
|
|
155 |
outfile2<-file.path(out_path,paste("ghcn_data_query_",var,"_",year_start,"_",year_end,out_prefix,".shp",sep="")) #Name of the file |
|
156 |
#writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension |
|
157 |
writeOGR(data_d,dsn= dirname(outfile2),layer= sub(".shp","",basename(outfile2)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
158 |
|
|
159 |
outfile3<-file.path(out_path,paste("ghcn_data_",var,"_",year_start,"_",year_end,out_prefix,".shp",sep="")) #Name of the file |
|
160 |
#writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension |
|
161 |
writeOGR(data_reg,dsn= dirname(outfile3),layer= sub(".shp","",basename(outfile3)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
162 |
|
|
163 |
################################################################### |
|
164 |
### STEP 4: Extract values at stations from covariates stack of raster images |
|
165 |
#Eventually this step may be skipped if the covariates information is stored in the database... |
|
166 |
|
|
167 |
#s_raster<-stack(file.path(in_path,infile_covariates)) #read in the data stack |
|
168 |
s_raster<-brick(infile_covariates) #read in the data stack |
|
169 |
names(s_raster)<-covar_names #Assigning names to the raster layers: making sure it is included in the extraction |
|
170 |
stat_val<- extract(s_raster, data_reg) #Extracting values from the raster stack for every point location in coords data frame. |
|
171 |
#stat_val_test<- extract(s_raster, data_reg,def=TRUE) |
|
172 |
|
|
173 |
#create a shape file and data_frame with names |
|
174 |
|
|
175 |
data_RST<-as.data.frame(stat_val) #This creates a data frame with the values extracted |
|
176 |
data_RST_SDF<-cbind(data_reg,data_RST) |
|
177 |
coordinates(data_RST_SDF)<-coordinates(data_reg) #Transforming data_RST_SDF into a spatial point dataframe |
|
178 |
CRS_reg<-proj4string(data_reg) |
|
179 |
proj4string(data_RST_SDF)<-CRS_reg #Need to assign coordinates... |
|
180 |
|
|
181 |
#Creating a date column |
|
182 |
date1<-ISOdate(data_RST_SDF$year,data_RST_SDF$month,data_RST_SDF$day) #Creating a date object from 3 separate column |
|
183 |
date2<-gsub("-","",as.character(as.Date(date1))) |
|
184 |
data_RST_SDF$date<-date2 #Date format (year,month,day) is the following: "20100627" |
|
185 |
|
|
186 |
#This allows to change only one name of the data.frame |
|
187 |
pos<-match("value",names(data_RST_SDF)) #Find column with name "value" |
|
188 |
if (var=="TMAX"){ |
|
189 |
#names(data_RST_SDF)[pos]<-c("TMax") |
|
190 |
data_RST_SDF$value<-data_RST_SDF$value/10 #TMax is the average max temp for monthy data |
|
191 |
} |
|
192 |
|
|
193 |
if (var=="TMIN"){ |
|
194 |
#names(data_RST_SDF)[pos]<-c("TMin") |
|
195 |
data_RST_SDF$value<-data_RST_SDF$value/10 #TMax is the average max temp for monthy data |
|
196 |
} |
|
197 |
|
|
198 |
#write out a new shapefile (including .prj component) |
|
199 |
outfile4<-file.path(out_path,paste("daily_covariates_ghcn_data_",var,"_",range_years[1],"_",range_years[2],out_prefix,".shp",sep="")) #Name of the file |
|
200 |
writeOGR(data_RST_SDF,dsn= dirname(outfile4),layer= sub(".shp","",basename(outfile4)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
201 |
|
|
202 |
############################################################### |
|
203 |
######## STEP 5: Preparing monthly averages from the ProstGres database |
|
204 |
|
|
205 |
drv <- dbDriver("PostgreSQL") |
|
206 |
db <- dbConnect(drv, dbname=db.name) |
|
207 |
|
|
208 |
#year_start_clim: set at the start of the script |
|
209 |
time1<-proc.time() #Start stop watch |
|
210 |
list_s<-format_s(stat_reg$STAT_ID) |
|
211 |
data_m<-dbGetQuery(db, paste("SELECT * |
|
212 |
FROM ghcn |
|
213 |
WHERE element=",shQuote(var), |
|
214 |
"AND year>=",year_start_clim, |
|
215 |
"AND year<",year_end_clim, |
|
216 |
"AND station IN ",list_s,";",sep="")) #Selecting station using a SQL query |
|
217 |
time_duration<-proc.time()-time1 #Time for the query may be long given the size of the database |
|
218 |
time_minutes<-time_duration[3]/60 |
|
219 |
dbDisconnect(db) |
|
220 |
#Clean out this section!! |
|
221 |
date1<-ISOdate(data_m$year,data_m$month,data_m$day) #Creating a date object from 3 separate column |
|
222 |
date2<-as.POSIXlt(as.Date(date1)) |
|
223 |
data_m$date<-date2 |
|
224 |
#Save the query data here... |
|
225 |
data_m<-merge(data_m, stat_reg, by.x="station", by.y="STAT_ID") #Inner join all columns are retained |
|
226 |
#Extracting covariates from stack for the monthly dataset... |
|
227 |
coords<- data_m[c('longitude','latitude')] #Define coordinates in a data frame |
|
228 |
coordinates(data_m)<-coords #Assign coordinates to the data frame |
|
229 |
proj4string(data_m)<-CRS_locs_WGS84 #Assign coordinates reference system in PROJ4 format |
|
230 |
data_m<-spTransform(data_m,CRS(CRS_interp)) #Project from WGS84 to new coord. system |
|
231 |
outfile5<-file.path(out_path,paste("monthly_query_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep="")) #Name of the file |
|
232 |
writeOGR(data_m,dsn= dirname(outfile5),layer= sub(".shp","",basename(outfile5)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
233 |
|
|
234 |
#In Venezuela and other regions where there are not many stations...mflag==S should be added..see Durenne etal.2010. |
|
235 |
|
|
236 |
#d<-subset(data_m,mflag==qc_flags_stations[1] | mflag==qc_flags_stations[2]) |
|
237 |
d<-subset(data_m,mflag %in% qc_flags_stations) |
|
238 |
|
|
239 |
#Add screening here ...May need some screeing??? i.e. range of temp and elevation... |
|
240 |
|
|
241 |
d1<-aggregate(value~station+month, data=d, mean) #Calculate monthly mean for every station in OR |
|
242 |
#d2<-aggregate(value~station+month, data=d, length) #Calculate monthly mean for every station in OR |
|
243 |
is_not_na_fun<-function(x) sum(!is.na(x)) #count the number of available observation |
|
244 |
d2<-aggregate(value~station+month, data=d, is_not_na_fun) #Calculate monthly mean for every station in OR |
|
245 |
#names(d2)[names(d2)=="value"] <-"nobs_station" |
|
246 |
d1$nobs_station<-d2$value |
|
247 |
dst<-merge(d1, stat_reg, by.x="station", by.y="STAT_ID") #Inner join all columns are retained |
|
248 |
|
|
249 |
#This allows to change only one name of the data.frame |
|
250 |
pos<-match("value",names(dst)) #Find column with name "value" |
|
251 |
if (var=="TMAX"){ |
|
252 |
names(dst)[pos]<-c("TMax") #renaming the column "value" extracted from the Postgres database |
|
253 |
dst$TMax<-dst$TMax/10 #TMax is the average max temp for monthy data |
|
254 |
} |
|
255 |
|
|
256 |
if (var=="TMIN"){ |
|
257 |
names(dst)[pos]<-c("TMin") |
|
258 |
dst$TMin<-dst$TMin/10 #TMin is the average min temp for monthy data |
|
259 |
} |
|
260 |
|
|
261 |
#Extracting covariates from stack for the monthly dataset... |
|
262 |
#names(dst)[5:6] <-c('latitude','longitude') |
|
263 |
coords<- dst[c('longitude','latitude')] #Define coordinates in a data frame |
|
264 |
|
|
265 |
coordinates(dst)<-coords #Assign coordinates to the data frame |
|
266 |
proj4string(dst)<-CRS_locs_WGS84 #Assign coordinates reference system in PROJ4 format |
|
267 |
dst_month<-spTransform(dst,CRS(CRS_interp)) #Project from WGS84 to new coord. system |
|
268 |
|
|
269 |
stations_val<-extract(s_raster,dst_month,df=TRUE) #extraction of the information at station location in a data frame |
|
270 |
#dst_extract<-spCbind(dst_month,stations_val) #this is in sinusoidal from the raster stack |
|
271 |
dst_extract<-cbind(dst_month,stations_val) #this is in sinusoidal from the raster stack |
|
272 |
dst<-dst_extract #problem!!! two column named elev!!! use elev_s?? |
|
273 |
|
|
274 |
#browser() |
|
275 |
coords<- dst[c('x','y')] #Define coordinates in a data frame, this is the local x,y |
|
276 |
index<-!is.na(coords$x) #remove if NA, may need to revisit at a later stage |
|
277 |
dst<-dst[index,] |
|
278 |
coords<- dst[c('x','y')] #Define coordinates in a data frame, this is the local x,y |
|
279 |
coordinates(dst)<-coords #Assign coordinates to the data frame |
|
280 |
proj4string(dst)<-CRS_interp #Assign coordinates reference system in PROJ4 format |
|
281 |
|
|
282 |
### ADD SCREENING HERE BEFORE WRITING OUT DATA |
|
283 |
#Covariates ok since screening done in covariate script |
|
284 |
#screening on var i.e. value, TMIN, TMAX... |
|
285 |
|
|
286 |
#### |
|
287 |
|
|
288 |
#### |
|
289 |
#write out a new shapefile (including .prj component) |
|
290 |
dst$OID<-1:nrow(dst) #need a unique ID? |
|
291 |
outfile6<-file.path(out_path,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end_clim,out_prefix,".shp",sep="")) #Name of the file |
|
292 |
writeOGR(dst,dsn= dirname(outfile6),layer= sub(".shp","",basename(outfile6)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
293 |
|
|
294 |
### list of outputs return |
|
295 |
|
|
296 |
outfiles_obj<-list(outfile1,outfile2,outfile3,outfile4,outfile5,outfile6) |
|
297 |
names(outfiles_obj)<- c("loc_stations","loc_stations_ghcn","daily_query_ghcn_data","daily_covar_ghcn_data","monthly_query_ghcn_data","monthly_covar_ghcn_data") |
|
298 |
save(outfiles_obj,file= file.path(out_path,paste("met_stations_outfiles_obj_",interpolation_method,"_", out_prefix,".RData",sep=""))) |
|
299 |
|
|
300 |
return(outfiles_obj) |
|
301 |
|
|
302 |
#END OF FUNCTION # |
|
303 |
} |
|
304 |
|
|
305 |
########## END OF SCRIPT ########## |
climate/research/oregon/interpolation/Database_stations_extraction_raster_covariates_processing.R | ||
---|---|---|
1 |
################## Data preparation for interpolation ####################################### |
|
2 |
############################ Extraction of station data ########################################## |
|
3 |
|
|
4 |
|
|
5 |
database_covaratiates_preparation<-function(list_param_prep){ |
|
6 |
#This function performs queries on the Postgres ghcnd database for stations matching the |
|
7 |
#interpolation area. It requires 11 inputs: |
|
8 |
# 1) db.name : Postgres database name containing the meteorological stations |
|
9 |
# 2) var: the variable of interest - "TMAX","TMIN" or "PRCP" |
|
10 |
# 3) range_years: range of records used in the daily interpolation, note that upper bound year is not included |
|
11 |
# 4) range_years_clim: range of records used in the monthly climatology interpolation, note that upper bound is not included |
|
12 |
# 5) infile1: region outline as a shape file - used in the interpolation stage too |
|
13 |
# 6) infile2: ghcnd stations locations as a textfile name with lat-long fields |
|
14 |
# 7) infile_covarariates: tif file of raser covariates for the interpolation area: it should have a local projection |
|
15 |
# 8) CRS_locs_WGS84: longlat EPSG 4326 used as coordinates reference system (proj4)for stations locations |
|
16 |
# 9) in_path: input path for covariates data and other files, this is also the output? |
|
17 |
# 10) covar_names: names of covariates used for the interpolation --may be removed later? (should be stored in the brick) |
|
18 |
# 11) out_prefix: output suffix added to output names--it is the same in the interpolation script |
|
19 |
# |
|
20 |
#The output is a list of four shapefile names produced by the function: |
|
21 |
#1) loc_stations: locations of stations as shapefile in EPSG 4326 |
|
22 |
#2) loc_stations_ghcn: ghcn daily data for the year range of interpolation (locally projected) |
|
23 |
#3) daily_covar_ghcn_data: ghcn daily data with covariates for the year range of interpolation (locally projected) |
|
24 |
#4) monthly_covar_ghcn_data: ghcn daily data with covariates for the year range of interpolation (locally projected) |
|
25 |
|
|
26 |
#AUTHOR: Benoit Parmentier |
|
27 |
#DATE: 03/01/2013 |
|
28 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363-- |
|
29 |
#Comments and TODO |
|
30 |
#-Add buffer option... |
|
31 |
#-Add output path argument option |
|
32 |
#-Add qc flag options |
|
33 |
################################################################################################## |
|
34 |
|
|
35 |
###Loading R library and packages: should it be read in before??? |
|
36 |
|
|
37 |
library(RPostgreSQL) |
|
38 |
library(sp) # Spatial pacakge with class definition by Bivand et al. |
|
39 |
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al. |
|
40 |
library(rgdal) # GDAL wrapper for R, spatial utilities |
|
41 |
library(rgeos) |
|
42 |
library(rgdal) |
|
43 |
library(raster) |
|
44 |
library(rasterVis) |
|
45 |
|
|
46 |
### Functions used in the script |
|
47 |
|
|
48 |
format_s <-function(s_ID){ |
|
49 |
#Format station ID in a vector format/tuple that is used in a psql query. |
|
50 |
# Argument 1: vector of station ID |
|
51 |
# Return: character of station ID |
|
52 |
tx2<-s_ID |
|
53 |
tx2<-as.character(tx2) |
|
54 |
stat_list<-tx2 |
|
55 |
temp<-shQuote(stat_list) |
|
56 |
t<-paste(temp, collapse= " ") |
|
57 |
t1<-gsub(" ", ",",t) |
|
58 |
sf_ID<-paste("(",t1,")",sep="") #vector containing the station ID to query |
|
59 |
return(sf_ID) |
|
60 |
} |
|
61 |
|
|
62 |
#parsing input arguments |
|
63 |
|
|
64 |
db.name <- list_param_prep$db.name #name of the Postgres database |
|
65 |
var <- list_param_prep$var #name of the variables to keep: TMIN, TMAX or PRCP |
|
66 |
year_start <-list_param_prep$range_years[1] #"2010" #starting year for the query (included) |
|
67 |
year_end <-list_param_prep$range_years[2] #"2011" #end year for the query (excluded) |
|
68 |
year_start_clim <-list_param_prep$range_years_clim[1] #right bound not included in the range!! starting year for monthly query to calculate clime |
|
69 |
infile1<- list_param_prep$infile1 #This is the shape file of outline of the study area #It is an input/output of the covariate script |
|
70 |
infile2<-list_param_prep$infile2 #"/home/layers/data/climate/ghcn/v2.92-upd-2012052822/ghcnd-stations.txt" #This is the textfile of station locations from GHCND |
|
71 |
infile3<-list_param_prep$infile_covariates #"covariates__venezuela_region__VE_01292013.tif" #this is an output from covariate script |
|
72 |
CRS_locs_WGS84<-list_param_prep$CRS_locs_WGS84 #Station coords WGS84: same as earlier |
|
73 |
in_path <- list_param_prep$in_path #CRS_locs_WGS84"/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/input_data/" |
|
74 |
out_prefix<-list_param_prep$out_prefix #"_365d_GAM_fus5_all_lstd_03012013" #User defined output prefix |
|
75 |
#qc_flags<-list_param_prep$qc_flags flags allowed for the query from the GHCND?? |
|
76 |
covar_names<-list_param_prep$covar_names # names should be written in the tif file!!! |
|
77 |
|
|
78 |
## working directory is the same for input and output for this function |
|
79 |
setwd(in_path) |
|
80 |
|
|
81 |
##### STEP 1: Select station in the study area |
|
82 |
|
|
83 |
filename<-sub(".shp","",infile1) #Removing the extension from file. |
|
84 |
interp_area <- readOGR(".",filename) |
|
85 |
CRS_interp<-proj4string(interp_area) #Storing the coordinate information: geographic coordinates longlat WGS84 |
|
86 |
|
|
87 |
#Read in GHCND database station locations |
|
88 |
dat_stat <- read.fwf(infile2, |
|
89 |
widths = c(11,9,10,7,3,31,4,4,6),fill=TRUE) |
|
90 |
colnames(dat_stat)<-c("STAT_ID","lat","lon","elev","state","name","GSNF","HCNF","WMOID") |
|
91 |
coords<- dat_stat[,c('lon','lat')] |
|
92 |
coordinates(dat_stat)<-coords |
|
93 |
proj4string(dat_stat)<-CRS_locs_WGS84 #this is the WGS84 projection |
|
94 |
#proj4string(dat_stat)<-CRS_interp |
|
95 |
dat_stat2<-spTransform(dat_stat,CRS(CRS_interp)) # Project from WGS84 to new coord. system |
|
96 |
|
|
97 |
# Spatial query to find relevant stations |
|
98 |
inside <- !is.na(over(dat_stat2, as(interp_area, "SpatialPolygons"))) #Finding stations contained in the current interpolation area |
|
99 |
stat_reg<-dat_stat2[inside,] #Selecting stations contained in the current interpolation area |
|
100 |
|
|
101 |
#### |
|
102 |
##TODO: Add buffer option? |
|
103 |
#### |
|
104 |
|
|
105 |
#### STEP 2: Connecting to the database and query for relevant data |
|
106 |
|
|
107 |
drv <- dbDriver("PostgreSQL") |
|
108 |
db <- dbConnect(drv, dbname=db.name) |
|
109 |
|
|
110 |
time1<-proc.time() #Start stop watch |
|
111 |
list_s<-format_s(stat_reg$STAT_ID) |
|
112 |
data2<-dbGetQuery(db, paste("SELECT * |
|
113 |
FROM ghcn |
|
114 |
WHERE element=",shQuote(var), |
|
115 |
"AND year>=",year_start, |
|
116 |
"AND year<",year_end, |
|
117 |
"AND station IN ",list_s,";",sep="")) #Selecting station using a SQL query |
|
118 |
time_duration<-proc.time()-time1 #Time for the query may be long given the size of the database |
|
119 |
time_minutes<-time_duration[3]/60 |
|
120 |
dbDisconnect(db) |
|
121 |
### |
|
122 |
#Add month query and averages here... |
|
123 |
### |
|
124 |
|
|
125 |
#data2 contains only 46 stations for Venezueal area?? |
|
126 |
data_table<-merge(data2,as.data.frame(stat_reg), by.x = "station", by.y = "STAT_ID") |
|
127 |
|
|
128 |
#Transform the subset data frame in a spatial data frame and reproject |
|
129 |
data_reg<-data_table #Make a copy of the data frame |
|
130 |
coords<- data_reg[c('lon','lat')] #Define coordinates in a data frame: clean up here!! |
|
131 |
#Wrong label...it is in fact projected... |
|
132 |
coordinates(data_reg)<-coords #Assign coordinates to the data frame |
|
133 |
#proj4string(data3)<-locs_coord #Assign coordinates reference system in PROJ4 format |
|
134 |
proj4string(data_reg)<-CRS_locs_WGS84 #Assign coordinates reference system in PROJ4 format |
|
135 |
data_reg<-spTransform(data_reg,CRS(CRS_interp)) #Project from WGS84 to new coord. system |
|
136 |
|
|
137 |
################################################################## |
|
138 |
### STEP 3: Save results and outuput in textfile and a shape file |
|
139 |
#browser() |
|
140 |
#Save shape files of the locations of meteorological stations in the study area |
|
141 |
outfile1<-file.path(in_path,paste("stations","_",out_prefix,".shp",sep="")) |
|
142 |
writeOGR(stat_reg,dsn= dirname(outfile1),layer= sub(".shp","",basename(outfile1)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
143 |
#writeOGR(dst,dsn= ".",layer= sub(".shp","",outfile4), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
144 |
|
|
145 |
outfile2<-file.path(in_path,paste("ghcn_data_",var,"_",year_start_clim,"_",year_end,out_prefix,".shp",sep="")) #Name of the file |
|
146 |
#writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension |
|
147 |
writeOGR(data_reg,dsn= dirname(outfile2),layer= sub(".shp","",basename(outfile2)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
148 |
|
|
149 |
################################################################### |
|
150 |
### STEP 4: Extract values at stations from covariates stack of raster images |
|
151 |
#Eventually this step may be skipped if the covariates information is stored in the database... |
|
152 |
|
|
153 |
s_raster<-stack(infile3) #read in the data stack |
|
154 |
names(s_raster)<-covar_names #Assigning names to the raster layers: making sure it is included in the extraction |
|
155 |
stat_val<- extract(s_raster, data_reg) #Extracting values from the raster stack for every point location in coords data frame. |
|
156 |
|
|
157 |
#create a shape file and data_frame with names |
|
158 |
|
|
159 |
data_RST<-as.data.frame(stat_val) #This creates a data frame with the values extracted |
|
160 |
data_RST_SDF<-cbind(data_reg,data_RST) |
|
161 |
coordinates(data_RST_SDF)<-coordinates(data_reg) #Transforming data_RST_SDF into a spatial point dataframe |
|
162 |
CRS_reg<-proj4string(data_reg) |
|
163 |
proj4string(data_RST_SDF)<-CRS_reg #Need to assign coordinates... |
|
164 |
|
|
165 |
#Creating a date column |
|
166 |
date1<-ISOdate(data_RST_SDF$year,data_RST_SDF$month,data_RST_SDF$day) #Creating a date object from 3 separate column |
|
167 |
date2<-gsub("-","",as.character(as.Date(date1))) |
|
168 |
data_RST_SDF$date<-date2 #Date format (year,month,day) is the following: "20100627" |
|
169 |
|
|
170 |
#This allows to change only one name of the data.frame |
|
171 |
pos<-match("value",names(data_RST_SDF)) #Find column with name "value" |
|
172 |
if (var=="TMAX"){ |
|
173 |
#names(data_RST_SDF)[pos]<-c("TMax") |
|
174 |
data_RST_SDF$value<-data_RST_SDF$value/10 #TMax is the average max temp for monthy data |
|
175 |
} |
|
176 |
|
|
177 |
#write out a new shapefile (including .prj component) |
|
178 |
outfile3<-file.path(in_path,paste("daily_covariates_ghcn_data_",var,"_",range_years[1],"_",range_years[2],out_prefix,".shp",sep="")) #Name of the file |
|
179 |
writeOGR(data_RST_SDF,dsn= dirname(outfile3),layer= sub(".shp","",basename(outfile3)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
180 |
|
|
181 |
############################################################### |
|
182 |
######## STEP 5: Preparing monthly averages from the ProstGres database |
|
183 |
|
|
184 |
drv <- dbDriver("PostgreSQL") |
|
185 |
db <- dbConnect(drv, dbname=db.name) |
|
186 |
|
|
187 |
#year_start_clim: set at the start of the script |
|
188 |
year_end<-2011 |
|
189 |
time1<-proc.time() #Start stop watch |
|
190 |
list_s<-format_s(stat_reg$STAT_ID) |
|
191 |
data_m<-dbGetQuery(db, paste("SELECT * |
|
192 |
FROM ghcn |
|
193 |
WHERE element=",shQuote(var), |
|
194 |
"AND year>=",year_start_clim, |
|
195 |
"AND year<",year_end, |
|
196 |
"AND station IN ",list_s,";",sep="")) #Selecting station using a SQL query |
|
197 |
time_duration<-proc.time()-time1 #Time for the query may be long given the size of the database |
|
198 |
time_minutes<-time_duration[3]/60 |
|
199 |
dbDisconnect(db) |
|
200 |
#Clean out this section!! |
|
201 |
date1<-ISOdate(data_m$year,data_m$month,data_m$day) #Creating a date object from 3 separate column |
|
202 |
date2<-as.POSIXlt(as.Date(date1)) |
|
203 |
data_m$date<-date2 |
|
204 |
#In Venezuela and other regions where there are not many stations...mflag==S should be added..see Durenne etal.2010. |
|
205 |
#d<-subset(data_m,year>=2000 & mflag=="0" ) #Selecting dataset 2000-2010 with good quality: 193 stations |
|
206 |
d<-subset(data_m,mflag=="0" | mflag=="S") #should be input arguments!! |
|
207 |
#May need some screeing??? i.e. range of temp and elevation... |
|
208 |
d1<-aggregate(value~station+month, data=d, mean) #Calculate monthly mean for every station in OR |
|
209 |
id<-as.data.frame(unique(d1$station)) #Unique station in OR for year 2000-2010: 193 but 7 loss of monthly avg |
|
210 |
|
|
211 |
dst<-merge(d1, stat_reg, by.x="station", by.y="STAT_ID") #Inner join all columns are retained |
|
212 |
|
|
213 |
#This allows to change only one name of the data.frame |
|
214 |
pos<-match("value",names(dst)) #Find column with name "value" |
|
215 |
if (var=="TMAX"){ |
|
216 |
names(dst)[pos]<-c("TMax") |
|
217 |
dst$TMax<-dst$TMax/10 #TMax is the average max temp for monthy data |
|
218 |
} |
|
219 |
|
|
220 |
#Extracting covariates from stack for the monthly dataset... |
|
221 |
coords<- dst[c('lon','lat')] #Define coordinates in a data frame |
|
222 |
coordinates(dst)<-coords #Assign coordinates to the data frame |
|
223 |
proj4string(dst)<-CRS_locs_WGS84 #Assign coordinates reference system in PROJ4 format |
|
224 |
dst_month<-spTransform(dst,CRS(CRS_interp)) #Project from WGS84 to new coord. system |
|
225 |
|
|
226 |
stations_val<-extract(s_raster,dst_month) #extraction of the infomration at station location |
|
227 |
stations_val<-as.data.frame(stations_val) |
|
228 |
dst_extract<-cbind(dst_month,stations_val) #this is in sinusoidal from the raster stack |
|
229 |
dst<-dst_extract |
|
230 |
|
|
231 |
coords<- dst[c('x','y')] #Define coordinates in a data frame, this is the local x,y |
|
232 |
coordinates(dst)<-coords #Assign coordinates to the data frame |
|
233 |
proj4string(dst)<-projection(s_raster) #Assign coordinates reference system in PROJ4 format |
|
234 |
|
|
235 |
#### |
|
236 |
#write out a new shapefile (including .prj component) |
|
237 |
dst$OID<-1:nrow(dst) #need a unique ID? |
|
238 |
outfile4<-file.path(in_path,paste("monthly_covariates_ghcn_data_",var,"_",year_start_clim,"_",year_end,out_prefix,".shp",sep="")) #Name of the file |
|
239 |
writeOGR(dst,dsn= dirname(outfile4),layer= sub(".shp","",basename(outfile4)), driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
240 |
|
|
241 |
### list of outputs return |
|
242 |
|
|
243 |
outfiles_obj<-list(outfile1,outfile2,outfile3,outfile4) |
|
244 |
names(outfiles_obj)<- c("loc_stations","loc_stations_ghcn","daily_covar_ghcn_data","monthly_covar_ghcn_data") |
|
245 |
return(outfiles_obj) |
|
246 |
|
|
247 |
#END OF FUNCTION # |
|
248 |
} |
|
249 |
|
|
250 |
##### END OF SCRIPT ########## |
climate/research/oregon/interpolation/Database_stations_extraction_raster_covariates_processing_region.R | ||
---|---|---|
1 |
################## Data preparation for interpolation ####################################### |
|
2 |
############################ Extraction of station data ########################################## |
|
3 |
#This script perform queries on the Postgres database ghcn for stations matching the |
|
4 |
#interpolation area. It requires the following inputs: |
|
5 |
# 1)the text file ofGHCND stations from NCDC matching the database version release |
|
6 |
# 2)a shape file of the study area with geographic coordinates: lonlat WGS84 # |
|
7 |
# 3)a new coordinate system can be provided as an argument |
|
8 |
# 4)the variable of interest: "TMAX","TMIN" or "PRCP" |
|
9 |
# 5)the location of raser covariate stack. |
|
10 |
#The outputs are text files and a shape file of a time subset of the database |
|
11 |
#AUTHOR: Benoit Parmentier |
|
12 |
#DATE: 02/08/2013 |
|
13 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363-- |
|
14 |
#Comments and TODO |
|
15 |
#-Add buffer option... |
|
16 |
#-Add calculation of monthly mean... |
|
17 |
################################################################################################## |
|
18 |
|
|
19 |
###Loading R library and packages |
|
20 |
|
|
21 |
library(RPostgreSQL) |
|
22 |
library(sp) # Spatial pacakge with class definition by Bivand et al. |
|
23 |
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al. |
|
24 |
library(rgdal) # GDAL wrapper for R, spatial utilities |
|
25 |
library(rgeos) |
|
26 |
library(rgdal) |
|
27 |
library(raster) |
|
28 |
library(rasterVis) |
|
29 |
|
|
30 |
### Parameters and arguments |
|
31 |
|
|
32 |
db.name <- "ghcn" #name of the Postgres database |
|
33 |
var <- "TMAX" #name of the variables to keep: TMIN, TMAX or PRCP |
|
34 |
year_start<-"2010" #starting year for the query (included) |
|
35 |
year_end<-"2011" #end year for the query (excluded) |
|
36 |
infile1<- "outline_venezuela_region__VE_01292013.shp" #This is the shape file of outline of the study area. #It is projected alreaday |
|
37 |
infile2<-"ghcnd-stations.txt" #This is the textfile of station locations from GHCND |
|
38 |
infile3<-"covariates__venezuela_region__VE_01292013.tif" #this is an output from covariate script |
|
39 |
|
|
40 |
new_proj<-"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs" |
|
41 |
locs_coord<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") |
|
42 |
CRS_locs_WGS84<-"+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0" |
|
43 |
##Paths to inputs and output |
|
44 |
in_path <- "/home/parmentier/Data/benoit_test" |
|
45 |
in_path <- "/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/input_data/" |
|
46 |
out_path<- "/home/parmentier/Data/IPLANT_project/Venezuela_interpolation/Venezuela_01142013/output_data/" |
|
47 |
ghcnd_path<- "/home/layers/data/climate/ghcn/v2.92-upd-2012052822" |
|
48 |
setwd(in_path) |
|
49 |
out_suffix<-"y2010_2010_VE_02082013" #User defined output prefix |
|
50 |
out_region_name<-"_venezuela_region" |
|
51 |
#out_suffix<-"_VE_01292013" |
|
52 |
|
|
53 |
### Functions used in the script |
|
54 |
|
|
55 |
format_s <-function(s_ID){ |
|
56 |
#Format station ID in a vector format/tuple that is used in a psql query. |
|
57 |
# Argument 1: vector of station ID |
|
58 |
# Return: character of station ID |
|
59 |
tx2<-s_ID |
|
60 |
tx2<-as.character(tx2) |
|
61 |
stat_list<-tx2 |
|
62 |
temp<-shQuote(stat_list) |
|
63 |
t<-paste(temp, collapse= " ") |
|
64 |
t1<-gsub(" ", ",",t) |
|
65 |
sf_ID<-paste("(",t1,")",sep="") #vector containing the station ID to query |
|
66 |
return(sf_ID) |
|
67 |
} |
|
68 |
|
|
69 |
############ BEGIN: START OF THE SCRIPT ################# |
|
70 |
|
|
71 |
##### STEP 1: Select station in the study area |
|
72 |
|
|
73 |
filename<-sub(".shp","",infile1) #Removing the extension from file. |
|
74 |
interp_area <- readOGR(".",filename) |
|
75 |
CRS_interp<-proj4string(interp_area) #Storing the coordinate information: geographic coordinates longlat WGS84 |
|
76 |
|
|
77 |
dat_stat <- read.fwf(file.path(ghcnd_path,"ghcnd-stations.txt"), widths = c(11,9,10,7,3,31,4,4,6),fill=TRUE) |
|
78 |
colnames(dat_stat)<-c("STAT_ID","lat","lon","elev","state","name","GSNF","HCNF","WMOID") |
|
79 |
coords<- dat_stat[,c('lon','lat')] |
|
80 |
coordinates(dat_stat)<-coords |
|
81 |
proj4string(dat_stat)<-locs_coord #this is the WGS84 projection |
|
82 |
#proj4string(dat_stat)<-CRS_interp |
|
83 |
dat_stat2<-spTransform(dat_stat,CRS(new_proj)) # Project from WGS84 to new coord. system |
|
84 |
|
|
85 |
# Spatial query to find relevant stations |
|
86 |
inside <- !is.na(over(dat_stat2, as(interp_area, "SpatialPolygons"))) #Finding stations contained in the current interpolation area |
|
87 |
stat_reg<-dat_stat2[inside,] #Selecting stations contained in the current interpolation area |
|
88 |
|
|
89 |
#Quick visualization of station locations |
|
90 |
plot(interp_area, axes =TRUE) |
|
91 |
plot(stat_reg, pch=1, col="red", cex= 0.7, add=TRUE) |
|
92 |
#plot(data3,pch=1,col="blue",cex=3,add=TRUE) |
|
93 |
#legend("topleft", pch=1,col="red",bty="n",title= "Stations",cex=1.6) |
|
94 |
#only 357 station for Venezuela?? |
|
95 |
|
|
96 |
#### |
|
97 |
##Add buffer option? |
|
98 |
#### |
|
99 |
|
|
100 |
#### STEP 2: Connecting to the database and query for relevant data |
|
101 |
|
|
102 |
drv <- dbDriver("PostgreSQL") |
|
103 |
db <- dbConnect(drv, dbname=db.name) |
|
104 |
|
|
105 |
time1<-proc.time() #Start stop watch |
|
106 |
list_s<-format_s(stat_reg$STAT_ID) |
|
107 |
data2<-dbGetQuery(db, paste("SELECT * |
|
108 |
FROM ghcn |
|
109 |
WHERE element=",shQuote(var), |
|
110 |
"AND year>=",year_start, |
|
111 |
"AND year<",year_end, |
|
112 |
"AND station IN ",list_s,";",sep="")) #Selecting station using a SQL query |
|
113 |
time_duration<-proc.time()-time1 #Time for the query may be long given the size of the database |
|
114 |
time_minutes<-time_duration[3]/60 |
|
115 |
|
|
116 |
### |
|
117 |
#Add month query and averages here... |
|
118 |
### |
|
119 |
|
|
120 |
#data2 contains only 46 stations for Venezueal area?? |
|
121 |
data_table<-merge(data2,as.data.frame(stat_reg), by.x = "station", by.y = "STAT_ID") |
|
122 |
|
|
123 |
#Transform the subset data frame in a spatial data frame and reproject |
|
124 |
data_reg<-data_table #Make a copy of the data frame |
|
125 |
coords<- data_reg[c('lon','lat')] #Define coordinates in a data frame: clean up here!! |
|
126 |
#Wrong label...it is in fact projected... |
|
127 |
coordinates(data_reg)<-coords #Assign coordinates to the data frame |
|
128 |
#proj4string(data3)<-locs_coord #Assign coordinates reference system in PROJ4 format |
|
129 |
proj4string(data_reg)<-locs_coord #Assign coordinates reference system in PROJ4 format |
|
130 |
data_reg<-spTransform(data_reg,CRS(new_proj)) #Project from WGS84 to new coord. system |
|
131 |
|
|
132 |
plot(interp_area, axes =TRUE) |
|
133 |
plot(stat_reg, pch=1, col="red", cex= 0.7, add=TRUE) |
|
134 |
plot(data_reg,pch=2,col="blue",cex=2,add=TRUE) |
|
135 |
|
|
136 |
################################################################## |
|
137 |
### STEP 3: Save results and outuput in textfile and a shape file |
|
138 |
|
|
139 |
#Save a textfile of the locations of meteorological stations in the study area |
|
140 |
write.table(as.data.frame(stat_reg), file=file.path(in_path,paste("stations",out_region_name,"_", |
|
141 |
out_suffix,".txt",sep="")),sep=",") |
|
142 |
outfile<-paste("stations",out_region_name,"_", |
|
143 |
out_suffix,sep="") |
|
144 |
writeOGR(stat_reg,dsn= ".",layer= outfile, driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
145 |
|
|
146 |
outfile<-paste("ghcn_data_",var,out_suffix,sep="") #Name of the file |
|
147 |
#writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension |
|
148 |
writeOGR(data_reg,dsn= ".",layer= outfile, driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
149 |
|
|
150 |
################################################################### |
|
151 |
### STEP 4: Extract values at stations from covariates stack of raster images |
|
152 |
#Eventually this step may be skipped if the covariates information is stored in the database... |
|
153 |
|
|
154 |
#The names of covariates can be changed... |
|
155 |
rnames<-c("x","y","lon","lat","N","E","N_w","E_w","elev","slope","aspect","CANHEIGHT","DISTOC") |
|
156 |
lc_names<-c("LC1","LC2","LC3","LC4","LC5","LC6","LC7","LC8","LC9","LC10","LC11","LC12") |
|
157 |
lst_names<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12", |
|
158 |
"nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08", |
|
159 |
"nobs_09","nobs_10","nobs_11","nobs_12") |
|
160 |
|
|
161 |
covar_names<-c(rnames,lc_names,lst_names) |
|
162 |
|
|
163 |
s_raster<-stack(infile3) #read in the data stack |
|
164 |
names(s_raster)<-covar_names #Assigning names to the raster layers: making sure it is included in the extraction |
|
165 |
stat_val<- extract(s_raster, data_reg) #Extracting values from the raster stack for every point location in coords data frame. |
|
166 |
|
|
167 |
#create a shape file and data_frame with names |
|
168 |
|
|
169 |
data_RST<-as.data.frame(stat_val) #This creates a data frame with the values extracted |
|
170 |
data_RST_SDF<-cbind(data_reg,data_RST) |
|
171 |
coordinates(data_RST_SDF)<-coordinates(data_reg) #Transforming data_RST_SDF into a spatial point dataframe |
|
172 |
CRS_reg<-proj4string(data_reg) |
|
173 |
proj4string(data_RST_SDF)<-CRS_reg #Need to assign coordinates... |
|
174 |
|
|
175 |
#Creating a date column |
|
176 |
date1<-ISOdate(data_RST_SDF$year,data_RST_SDF$month,data_RST_SDF$day) #Creating a date object from 3 separate column |
|
177 |
date2<-gsub("-","",as.character(as.Date(date1))) |
|
178 |
data_RST_SDF$date<-date2 #Date format (year,month,day) is the following: "20100627" |
|
179 |
|
|
180 |
#This allows to change only one name of the data.frame |
|
181 |
pos<-match("value",names(data_RST_SDF)) #Find column with name "value" |
|
182 |
if (var=="TMAX"){ |
|
183 |
#names(data_RST_SDF)[pos]<-c("TMax") |
|
184 |
data_RST_SDF$value<-data_RST_SDF$value/10 #TMax is the average max temp for monthy data |
|
185 |
} |
|
186 |
|
|
187 |
#write out a new shapefile (including .prj component) |
|
188 |
outfile<-paste("daily_covariates_ghcn_data_",var,out_suffix,sep="") #Name of the file |
|
189 |
writeOGR(data_RST_SDF,dsn= ".",layer= outfile, driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
190 |
|
|
191 |
############################################################### |
|
192 |
######## STEP 5: Preparing monthly averages from the ProstGres database |
|
193 |
|
|
194 |
drv <- dbDriver("PostgreSQL") |
|
195 |
db <- dbConnect(drv, dbname=db.name) |
|
196 |
|
|
197 |
year_start<-2000 |
|
198 |
year_end<-2011 |
|
199 |
time1<-proc.time() #Start stop watch |
|
200 |
list_s<-format_s(stat_reg$STAT_ID) |
|
201 |
data_m<-dbGetQuery(db, paste("SELECT * |
|
202 |
FROM ghcn |
|
203 |
WHERE element=",shQuote(var), |
|
204 |
"AND year>=",year_start, |
|
205 |
"AND year<",year_end, |
|
206 |
"AND station IN ",list_s,";",sep="")) #Selecting station using a SQL query |
|
207 |
time_duration<-proc.time()-time1 #Time for the query may be long given the size of the database |
|
208 |
time_minutes<-time_duration[3]/60 |
|
209 |
|
|
210 |
# do this work outside of (before) this function |
|
211 |
# to avoid making a copy of the data frame inside the function call |
|
212 |
date1<-ISOdate(data_m$year,data_m$month,data_m$day) #Creating a date object from 3 separate column |
|
213 |
date2<-as.POSIXlt(as.Date(date1)) |
|
214 |
data_m$date<-date2 |
|
215 |
#In Venezuela and other regions where there are not many stations...mflag==S should be added..see Durenne etal.2010. |
|
216 |
#d<-subset(data_m,year>=2000 & mflag=="0" ) #Selecting dataset 2000-2010 with good quality: 193 stations |
|
217 |
d<-subset(data_m,mflag=="0" | mflag=="S") |
|
218 |
#May need some screeing??? i.e. range of temp and elevation... |
|
219 |
d1<-aggregate(value~station+month, data=d, mean) #Calculate monthly mean for every station in OR |
|
220 |
id<-as.data.frame(unique(d1$station)) #Unique station in OR for year 2000-2010: 193 but 7 loss of monthly avg |
|
221 |
|
|
222 |
dst<-merge(d1, stat_reg, by.x="station", by.y="STAT_ID") #Inner join all columns are retained |
|
223 |
|
|
224 |
#This allows to change only one name of the data.frame |
|
225 |
pos<-match("value",names(dst)) #Find column with name "value" |
|
226 |
if (var=="TMAX"){ |
|
227 |
names(dst)[pos]<-c("TMax") |
|
228 |
dst$TMax<-dst$TMax/10 #TMax is the average max temp for monthy data |
|
229 |
} |
|
230 |
#dstjan=dst[dst$month==9,] #dst contains the monthly averages for tmax for every station over 2000-2010 |
|
231 |
|
|
232 |
#Extracting covariates from stack for the monthly dataset... |
|
233 |
coords<- dst[c('lon','lat')] #Define coordinates in a data frame |
|
234 |
coordinates(dst)<-coords #Assign coordinates to the data frame |
|
235 |
proj4string(dst)<-CRS_locs_WGS84 #Assign coordinates reference system in PROJ4 format |
|
236 |
dst_month<-spTransform(dst,CRS(CRS_interp)) #Project from WGS84 to new coord. system |
|
237 |
|
|
238 |
stations_val<-extract(s_raster,dst_month) #extraction of the infomration at station location |
|
239 |
stations_val<-as.data.frame(stations_val) |
|
240 |
dst_extract<-cbind(dst_month,stations_val) #this is in sinusoidal from the raster stack |
|
241 |
dst<-dst_extract |
|
242 |
|
|
243 |
coords<- dst[c('x','y')] #Define coordinates in a data frame, this is the local x,y |
|
244 |
coordinates(dst)<-coords #Assign coordinates to the data frame |
|
245 |
proj4string(dst)<-projection(s_raster) #Assign coordinates reference system in PROJ4 format |
|
246 |
|
|
247 |
#### |
|
248 |
#write out a new shapefile (including .prj component) |
|
249 |
outfile<-paste("monthly_covariates_ghcn_data_",var,out_suffix,sep="") #Name of the file |
|
250 |
dst$OID<-1:nrow(dst) #need a unique ID? |
|
251 |
writeOGR(dst,dsn= ".",layer= outfile, driver="ESRI Shapefile",overwrite_layer=TRUE) |
|
252 |
|
|
253 |
##### END OF SCRIPT ########## |
climate/research/oregon/interpolation/GAM_CAI_analysis_raster_prediction_multisampling.R | ||
---|---|---|
1 |
######################################## METHOD COMPARISON ####################################### |
|
2 |
############################ Constant sampling for GAM CAI method ##################################### |
|
3 |
#This script interpolates tmax values using MODIS LST and GHCND station data # |
|
4 |
#interpolation area. It requires the text file of stations and a shape file of the study area. # |
|
5 |
#Note that the projection for both GHCND and study area is lonlat WGS84. # |
|
6 |
#Method is assedsed using constant sampling with variation of validation sample with different # |
|
7 |
#hold out proportions. # |
|
8 |
#AUTHOR: Benoit Parmentier # |
|
9 |
#DATE: 12/27/2012 # |
|
10 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#491-- # |
|
11 |
################################################################################################### |
|
12 |
|
|
13 |
###Loading R library and packages |
|
14 |
library(gtools) # loading some useful tools |
|
15 |
library(mgcv) # GAM package by Simon Wood |
|
16 |
library(sp) # Spatial pacakge with class definition by Bivand et al. |
|
17 |
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al. |
|
18 |
library(rgdal) # GDAL wrapper for R, spatial utilities |
|
19 |
library(gstat) # Kriging and co-kriging by Pebesma et al. |
|
20 |
library(fields) # NCAR Spatial Interpolation methods such as kriging, splines |
|
21 |
library(raster) # Hijmans et al. package for raster processing |
|
22 |
library(rasterVis) |
|
23 |
library(parallel) # Urbanek S. and Ripley B., package for multi cores & parralel processing |
|
24 |
library(reshape) |
|
25 |
|
|
26 |
### Parameters and argument |
|
27 |
|
|
28 |
infile1<- "ghcn_or_tmax_covariates_06262012_OR83M.shp" #GHCN shapefile containing variables for modeling 2010 |
|
29 |
#infile2<-"list_10_dates_04212012.txt" #List of 10 dates for the regression |
|
30 |
infile2<-"list_365_dates_04212012.txt" |
|
31 |
infile3<-"LST_dates_var_names.txt" #LST dates name |
|
32 |
infile4<-"models_interpolation_05142012.txt" #Interpolation model names |
|
33 |
infile5<-"mean_day244_rescaled.rst" #Raster or grid for the locations of predictions |
|
34 |
#infile6<-"lst_climatology.txt" |
|
35 |
infile6<-"LST_files_monthly_climatology.txt" |
|
36 |
inlistf<-"list_files_05032012.txt" #Stack of images containing the Covariates |
|
37 |
|
|
38 |
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_10242012_CAI" #Atlas location |
|
39 |
setwd(path) |
|
40 |
|
|
41 |
#Station location for the study area |
|
42 |
stat_loc<-read.table(paste(path,"/","location_study_area_OR_0602012.txt",sep=""),sep=",", header=TRUE) |
|
43 |
#GHCN Database for 1980-2010 for study area (OR) |
|
44 |
data3<-read.table(paste(path,"/","ghcn_data_TMAXy1980_2010_OR_0602012.txt",sep=""),sep=",", header=TRUE) |
|
45 |
|
|
46 |
nmodels<-9 #number of models running |
|
47 |
y_var_name<-"dailyTmax" #climate variable interpolated |
|
48 |
climgam<-1 #if 1, then GAM is run on the climatology rather than the daily deviation surface... |
|
49 |
predval<-1 #if 1, produce raster prediction |
|
50 |
prop<-0.3 #Proportion of testing retained for validation |
|
51 |
|
|
52 |
seed_number<- 100 #Seed number for random sampling, if seed_number<0, no seed number is used.. |
|
53 |
#out_prefix<-"_365d_GAM_CAI2_const_10222012_" #User defined output prefix |
|
54 |
#out_prefix<-"_365d_GAM_CAI2_const_all_lstd_10272012" #User defined output prefix |
|
55 |
out_prefix<-"_365d_GAM_CAI4_all_12272012" #User defined output prefix |
|
56 |
|
|
57 |
bias_val<-0 #if value 1 then daily training data is used in the bias surface rather than the all monthly stations (added on 07/11/2012) |
|
58 |
bias_prediction<-1 #if value 1 then use GAM for the BIAS prediction otherwise GAM direct reprediction for y_var (daily tmax) |
|
59 |
nb_sample<-1 #number of time random sampling must be repeated for every hold out proportion |
|
60 |
prop_min<-0.3 #if prop_min=prop_max and step=0 then predicitons are done for the number of dates... |
|
61 |
prop_max<-0.3 |
|
62 |
step<-0 |
|
63 |
constant<-0 #if value 1 then use the same sample used in the first date for interpolation over the set of dates |
|
64 |
#projection used in the interpolation of the study area |
|
65 |
CRS_interp<-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs"; |
|
66 |
CRS_locs_WGS84<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") #Station coords WGS84 |
|
67 |
|
|
68 |
#This can be entered as textfile or option later...ok for running now on 12/07/2012 |
|
69 |
list_formulas<-vector("list",nmodels) |
|
70 |
|
|
71 |
list_formulas[[1]] <- as.formula("y_var~ s(ELEV_SRTM)", env=.GlobalEnv) |
|
72 |
list_formulas[[2]] <- as.formula("y_var~ s(LST)", env=.GlobalEnv) |
|
73 |
list_formulas[[3]] <- as.formula("y_var~ s(ELEV_SRTM,LST)", env=.GlobalEnv) |
|
74 |
list_formulas[[4]] <- as.formula("y_var~ s(lat)+s(lon)+s(ELEV_SRTM)", env=.GlobalEnv) |
|
75 |
list_formulas[[5]] <- as.formula("y_var~ s(lat,lon,ELEV_SRTM)", env=.GlobalEnv) |
|
76 |
list_formulas[[6]] <- as.formula("y_var~ s(lat,lon)+s(ELEV_SRTM)+s(Northness_w,Eastness_w)+s(LST)", env=.GlobalEnv) |
|
77 |
list_formulas[[7]] <- as.formula("y_var~ s(lat,lon)+s(ELEV_SRTM)+s(Northness_w,Eastness_w)+s(LST)+s(LC1)", env=.GlobalEnv) |
|
78 |
list_formulas[[8]] <- as.formula("y_var~ s(lat,lon)+s(ELEV_SRTM)+s(Northness_w,Eastness_w)+s(LST)+s(LC3)", env=.GlobalEnv) |
|
79 |
list_formulas[[9]] <- as.formula("y_var~ s(x_OR83M,y_OR83M)", env=.GlobalEnv) |
|
80 |
|
|
81 |
#source("GAM_CAI_function_multisampling_10252012.R") |
|
82 |
source("GAM_CAI_function_multisampling_12072012.R") |
|
83 |
|
|
84 |
############ START OF THE SCRIPT ################## |
|
85 |
|
|
86 |
###Reading the station data and setting up for models' comparison |
|
87 |
filename<-sub(".shp","",infile1) #Removing the extension from file. |
|
88 |
ghcn<-readOGR(".", filename) #reading shapefile |
|
89 |
|
|
90 |
CRS<-proj4string(ghcn) #Storing projection information (ellipsoid, datum,etc.) |
|
91 |
|
|
92 |
mean_LST<- readGDAL(infile5) #Reading the whole raster in memory. This provides a grid for kriging |
|
93 |
proj4string(mean_LST)<-CRS #Assigning coordinate information to prediction grid. |
|
94 |
|
|
95 |
ghcn <- transform(ghcn,Northness = cos(ASPECT*pi/180)) #Adding a variable to the dataframe |
|
96 |
ghcn <- transform(ghcn,Eastness = sin(ASPECT*pi/180)) #adding variable to the dataframe. |
|
97 |
ghcn <- transform(ghcn,Northness_w = sin(slope*pi/180)*cos(ASPECT*pi/180)) #Adding a variable to the dataframe |
|
98 |
ghcn <- transform(ghcn,Eastness_w = sin(slope*pi/180)*sin(ASPECT*pi/180)) #adding variable to the dataframe. |
|
99 |
|
|
100 |
#Remove NA for LC and CANHEIGHT |
|
101 |
ghcn$LC1[is.na(ghcn$LC1)]<-0 |
|
102 |
ghcn$LC3[is.na(ghcn$LC3)]<-0 |
|
103 |
ghcn$CANHEIGHT[is.na(ghcn$CANHEIGHT)]<-0 |
|
104 |
ghcn$LC4[is.na(ghcn$LC4)]<-0 |
|
105 |
ghcn$LC6[is.na(ghcn$LC6)]<-0 |
|
106 |
|
|
107 |
#Use file.path for to construct pathfor independent os platform? !!! |
|
108 |
dates<-readLines(file.path(path,infile2)) |
|
109 |
#dates <-readLines(paste(path,"/",infile2, sep="")) |
|
110 |
LST_dates <-readLines(paste(path,"/",infile3, sep="")) |
|
111 |
models <-readLines(paste(path,"/",infile4, sep="")) |
|
112 |
|
|
113 |
##Extracting the variables values from the raster files |
|
114 |
|
|
115 |
lines<-read.table(paste(path,"/",inlistf,sep=""), sep=" ") #Column 1 contains the names of raster files |
|
116 |
inlistvar<-lines[,1] |
|
117 |
inlistvar<-paste(path,"/",as.character(inlistvar),sep="") |
|
118 |
covar_names<-as.character(lines[,2]) #Column two contains short names for covaraites |
|
119 |
|
|
120 |
s_raster<- stack(inlistvar) #Creating a stack of raster images from the list of variables. |
|
121 |
layerNames(s_raster)<-covar_names #Assigning names to the raster layers |
|
122 |
projection(s_raster)<-CRS |
|
123 |
|
|
124 |
#stat_val<- extract(s_raster, ghcn3) #Extracting values from the raster stack for every point location in coords data frame. |
|
125 |
pos<-match("ASPECT",layerNames(s_raster)) #Find column with name "value" |
|
126 |
r1<-raster(s_raster,layer=pos) #Select layer from stack |
|
127 |
pos<-match("slope",layerNames(s_raster)) #Find column with name "value" |
|
128 |
r2<-raster(s_raster,layer=pos) #Select layer from stack |
|
129 |
N<-cos(r1*pi/180) |
|
130 |
E<-sin(r1*pi/180) |
|
131 |
Nw<-sin(r2*pi/180)*cos(r1*pi/180) #Adding a variable to the dataframe |
|
132 |
Ew<-sin(r2*pi/180)*sin(r1*pi/180) #Adding variable to the dataframe. |
|
133 |
|
|
134 |
pos<-match("LC1",layerNames(s_raster)) #Find column with name "value" |
|
135 |
LC1<-raster(s_raster,layer=pos) #Select layer from stack |
|
136 |
s_raster<-dropLayer(s_raster,pos) |
|
137 |
LC1[is.na(LC1)]<-0 #NA must be set to zero. |
|
138 |
pos<-match("LC3",layerNames(s_raster)) #Find column with name "value" |
|
139 |
LC3<-raster(s_raster,layer=pos) #Select layer from stack |
|
140 |
s_raster<-dropLayer(s_raster,pos) |
|
141 |
LC3[is.na(LC3)]<-0 |
|
142 |
|
|
143 |
#Modification added to account for other land cover |
|
144 |
|
|
145 |
pos<-match("LC4",layerNames(s_raster)) #Find column with name "value" |
|
146 |
LC4<-raster(s_raster,layer=pos) #Select layer from stack |
|
147 |
s_raster<-dropLayer(s_raster,pos) |
|
148 |
LC4[is.na(LC4)]<-0 |
|
149 |
|
|
150 |
pos<-match("LC6",layerNames(s_raster)) #Find column with name "value" |
|
151 |
LC6<-raster(s_raster,layer=pos) #Select layer from stack |
|
152 |
s_raster<-dropLayer(s_raster,pos) |
|
153 |
LC6[is.na(LC6)]<-0 |
|
154 |
|
|
155 |
LC_s<-stack(LC1,LC3,LC4,LC6) |
|
156 |
layerNames(LC_s)<-c("LC1_forest","LC3_grass","LC4_crop","LC6_urban") |
|
157 |
#plot(LC_s) |
|
158 |
|
|
159 |
pos<-match("CANHEIGHT",layerNames(s_raster)) #Find column with name "value" |
|
160 |
CANHEIGHT<-raster(s_raster,layer=pos) #Select layer from stack |
|
161 |
s_raster<-dropLayer(s_raster,pos) |
|
162 |
CANHEIGHT[is.na(CANHEIGHT)]<-0 |
|
163 |
pos<-match("ELEV_SRTM",layerNames(s_raster)) #Find column with name "ELEV_SRTM" |
|
164 |
ELEV_SRTM<-raster(s_raster,layer=pos) #Select layer from stack on 10/30 |
|
165 |
s_raster<-dropLayer(s_raster,pos) |
|
166 |
ELEV_SRTM[ELEV_SRTM <0]<-NA |
|
167 |
|
|
168 |
xy<-coordinates(r1) #get x and y projected coordinates... |
|
169 |
xy_latlon<-project(xy, CRS, inv=TRUE) # find lat long for projected coordinats (or pixels...) |
|
170 |
lon<-raster(xy_latlon) #Transform a matrix into a raster object ncol=ncol(r1), nrow=nrow(r1)) |
|
171 |
ncol(lon)<-ncol(r1) |
|
172 |
nrow(lon)<-nrow(r1) |
|
173 |
extent(lon)<-extent(r1) |
|
174 |
projection(lon)<-CRS #At this stage this is still an empty raster with 536 nrow and 745 ncell |
|
175 |
lat<-lon |
|
176 |
values(lon)<-xy_latlon[,1] |
|
177 |
values(lat)<-xy_latlon[,2] |
|
178 |
|
|
179 |
r<-stack(N,E,Nw,Ew,lon,lat,LC1,LC3,LC4,LC6, CANHEIGHT,ELEV_SRTM) |
|
180 |
rnames<-c("Northness","Eastness","Northness_w","Eastness_w", "lon","lat","LC1","LC3","LC4","LC6","CANHEIGHT","ELEV_SRTM") |
|
181 |
layerNames(r)<-rnames |
|
182 |
s_raster<-addLayer(s_raster, r) |
|
183 |
|
|
184 |
#s_sgdf<-as(s_raster,"SpatialGridDataFrame") #Conversion to spatial grid data frame |
|
185 |
|
|
186 |
####### Preparing LST stack of climatology... |
|
187 |
|
|
188 |
#l=list.files(pattern="mean_month.*rescaled.rst") |
|
189 |
l <-readLines(paste(path,"/",infile6, sep="")) |
|
190 |
molst<-stack(l) #Creating a raster stack... |
|
191 |
#setwd(old) |
|
192 |
molst<-molst-273.16 #K->C #LST stack of monthly average... |
|
193 |
idx <- seq(as.Date('2010-01-15'), as.Date('2010-12-15'), 'month') |
|
194 |
molst <- setZ(molst, idx) |
|
195 |
layerNames(molst) <- month.abb |
|
196 |
|
|
197 |
###### Preparing tables for model assessment: specific diagnostic/metrics |
|
198 |
|
|
199 |
#Model assessment: specific diagnostics/metrics |
|
200 |
results_m1<- matrix(1,1,nmodels+3) |
|
201 |
results_m2<- matrix(1,1,nmodels+3) |
|
202 |
results_m3<- matrix(1,1,nmodels+3) |
|
203 |
#results_RMSE_f<- matrix(1,length(models)+3) |
|
204 |
|
|
205 |
#Model assessment: general diagnostic/metrics |
|
206 |
results_RMSE <- matrix(1,1,nmodels+4) |
|
207 |
results_MAE <- matrix(1,1,nmodels+4) |
|
208 |
results_ME <- matrix(1,1,nmodels+4) #There are 8+1 models |
|
209 |
results_R2 <- matrix(1,1,nmodels+4) #Coef. of determination for the validation dataset |
|
210 |
|
|
211 |
results_RMSE_f<- matrix(1,1,nmodels+4) #RMSE fit, RMSE for the training dataset |
|
212 |
results_MAE_f <- matrix(1,1,nmodels+4) |
|
213 |
results_R2_f<-matrix(1,1,nmodels+4) |
|
214 |
######## Preparing monthly averages from the ProstGres database |
|
215 |
|
|
216 |
# do this work outside of (before) this function |
|
217 |
# to avoid making a copy of the data frame inside the function call |
|
218 |
date1<-ISOdate(data3$year,data3$month,data3$day) #Creating a date object from 3 separate column |
|
219 |
date2<-as.POSIXlt(as.Date(date1)) |
|
220 |
data3$date<-date2 |
|
221 |
d<-subset(data3,year>=2000 & mflag=="0" ) #Selecting dataset 2000-2010 with good quality: 193 stations |
|
222 |
#May need some screeing??? i.e. range of temp and elevation... |
|
223 |
d1<-aggregate(value~station+month, data=d, mean) #Calculate monthly mean for every station in OR |
|
224 |
id<-as.data.frame(unique(d1$station)) #Unique station in OR for year 2000-2010: 193 but 7 loss of monthly avg |
|
225 |
|
|
226 |
dst<-merge(d1, stat_loc, by.x="station", by.y="STAT_ID") #Inner join all columns are retained |
|
227 |
|
|
228 |
#This allows to change only one name of the data.frame |
|
229 |
pos<-match("value",names(dst)) #Find column with name "value" |
|
230 |
names(dst)[pos]<-c("TMax") |
|
231 |
dst$TMax<-dst$TMax/10 #TMax is the average max temp for monthy data |
|
232 |
#dstjan=dst[dst$month==9,] #dst contains the monthly averages for tmax for every station over 2000-2010 |
|
233 |
|
|
234 |
#Extracting covariates from stack for the monthly dataset... |
|
235 |
coords<- dst[c('lon','lat')] #Define coordinates in a data frame |
|
236 |
coordinates(dst)<-coords #Assign coordinates to the data frame |
|
237 |
proj4string(dst)<-CRS_locs_WGS84 #Assign coordinates reference system in PROJ4 format |
|
238 |
dst_month<-spTransform(dst,CRS(CRS_interp)) #Project from WGS84 to new coord. system |
|
239 |
|
|
240 |
stations_val<-extract(s_raster,dst_month) #extraction of the infomration at station location |
|
241 |
stations_val<-as.data.frame(stations_val) |
|
242 |
dst_extract<-cbind(dst_month,stations_val) |
|
243 |
dst<-dst_extract |
|
244 |
#Now clean and screen monthly values |
|
245 |
dst_all<-dst |
|
246 |
dst<-subset(dst,dst$TMax>-15 & dst$TMax<40) |
|
247 |
dst<-subset(dst,dst$ELEV_SRTM>0) #This will drop two stations...or 24 rows |
|
248 |
|
|
249 |
######### Preparing daily values for training and testing |
|
250 |
|
|
251 |
#Screening for bad values: value is tmax in this case |
|
252 |
#ghcn$value<-as.numeric(ghcn$value) |
|
253 |
ghcn_all<-ghcn |
|
254 |
ghcn_test<-subset(ghcn,ghcn$value>-150 & ghcn$value<400) |
|
255 |
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0) |
|
256 |
ghcn<-ghcn_test2 |
|
257 |
#coords<- ghcn[,c('x_OR83M','y_OR83M')] |
|
258 |
|
|
259 |
##Sampling: training and testing sites... |
|
260 |
|
|
261 |
if (seed_number>0) { |
|
262 |
set.seed(seed_number) #Using a seed number allow results based on random number to be compared... |
|
263 |
} |
|
264 |
nel<-length(dates) |
|
265 |
dates_list<-vector("list",nel) #list of one row data.frame |
|
266 |
|
|
267 |
prop_range<-(seq(from=prop_min,to=prop_max,by=step))*100 |
|
268 |
sn<-length(dates)*nb_sample*length(prop_range) |
|
269 |
|
|
270 |
for(i in 1:length(dates)){ |
|
271 |
d_tmp<-rep(dates[i],nb_sample*length(prop_range)) #repeating same date |
|
272 |
s_nb<-rep(1:nb_sample,length(prop_range)) #number of random sample per proportion |
|
273 |
prop_tmp<-sort(rep(prop_range, nb_sample)) |
Also available in: Unified diff
Revert "Merge branch 'ag/interp' of code.nceas.ucsb.edu:environmental-layers into aw/precip"
This reverts commit f9c712987ba814b83b2c2cc058c6c1f9b07933c1, reversing
changes made to f0375becc9fb9e13e55c5b7a48be5c5f98064f87.