Revision 95d521f9
Added by Benoit Parmentier over 12 years ago
climate/research/oregon/interpolation/GHCND_stations_extraction_study_area.R | ||
---|---|---|
1 |
################## Data preparation for interpolation ####################################### |
|
2 |
########################### TWO-STAGE REGRESSION ############################################### |
|
3 |
#This script perform queries on the Postgres database ghcn for stations matching the # |
|
4 |
#interpolation area. It requires the text file of stations and a shape file of the study area. # |
|
5 |
#Note that the projection for both GHCND and study area is lonlat WGS84. # |
|
6 |
#AUTHOR: Benoit Parmentier # |
|
7 |
#DATE: 06/02/212 # |
|
8 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363-- # |
|
9 |
################################################################################################## |
|
10 |
|
|
11 |
###Loading R library and packages |
|
12 |
library(RPostgreSQL) |
|
13 |
library(sp) # Spatial pacakge with class definition by Bivand et al. |
|
14 |
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al. |
|
15 |
library(rgdal) # GDAL wrapper for R, spatial utilities |
|
16 |
|
|
17 |
### Parameters and arguments |
|
18 |
|
|
19 |
db.name <- "ghcn" #name of the Postgres database |
|
20 |
var <- "TMAX" #name of the variables to keep: TMIN, TMAX or PRCP |
|
21 |
year_start<-"2010" #starting year for the query (included) |
|
22 |
year_end<-"2011" #end year for the query (excluded) |
|
23 |
infile1<- "ORWGS84_state_outline.shp" #This is the shape file of outline of the study area. |
|
24 |
infile2<-"ghcnd-stations.txt" #This is the textfile of station locations from GHCND |
|
25 |
|
|
26 |
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations/" #Jupiter LOCATION on EOS/Atlas |
|
27 |
#path<-"H:/Data/IPLANT_project/data_Oregon_stations" #Jupiter Location on XANDERS |
|
28 |
setwd(path) |
|
29 |
out_prefix<-"y2010_2010_OR_0602012" #User defined output prefix |
|
30 |
|
|
31 |
### Functions |
|
32 |
|
|
33 |
format_s <-function(s_ID){ |
|
34 |
#Format station ID in a vector format/tuple that is used in a psql query. |
|
35 |
# Argument 1: vector of station ID |
|
36 |
# Return: character of station ID |
|
37 |
tx2<-s_ID |
|
38 |
tx2<-as.character(tx2) |
|
39 |
stat_list<-tx2 |
|
40 |
temp<-shQuote(stat_list) |
|
41 |
t<-paste(temp, collapse= " ") |
|
42 |
t1<-gsub(" ", ",",t) |
|
43 |
sf_ID<-paste("(",t1,")",sep="") #vector containing the station ID to query |
|
44 |
return(sf_ID) |
|
45 |
} |
|
46 |
|
|
47 |
############ START OF THE SCRIPT ################# |
|
48 |
|
|
49 |
##### STEP 1: Select station in Oregon |
|
50 |
|
|
51 |
infile1<- "ORWGS84_state_outline.shp" #This is the shape file of outline of the study area. |
|
52 |
filename<-sub(".shp","",infile1) #Removing the extension from file. |
|
53 |
interp_area <- readOGR(".",filename) |
|
54 |
CRS_interp<-proj4string(interp_area) #Storing the coordinate information: geographic coordinates longlat WGS84 |
|
55 |
|
|
56 |
dat_stat <- read.fwf("ghcnd-stations.txt", widths = c(11,9,10,7,3,31,4,4,6),fill=TRUE) |
|
57 |
colnames(dat_stat)<-c("STAT_ID","lat","lon","elev","state","name","GSNF","HCNF","WMOID") |
|
58 |
coords<- dat_stat[,c('lon','lat')] |
|
59 |
coordinates(dat_stat)<-coords |
|
60 |
locs_coord<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") |
|
61 |
#proj4string(dat_stat)<-locs_coord |
|
62 |
proj4string(dat_stat)<-CRS_interp |
|
63 |
|
|
64 |
# Spatial query to find relevant stations |
|
65 |
inside <- !is.na(over(dat_stat, as(interp_area, "SpatialPolygons"))) #Finding stations contained in the current interpolation area |
|
66 |
stat_OR<-dat_stat[inside,] #Finding stations contained in the current interpolation area |
|
67 |
|
|
68 |
#Quick visualization of station locations |
|
69 |
plot(interp_area, axes =TRUE) |
|
70 |
plot(stat_OR, pch=1, col="red", cex= 0.7, add=TRUE) |
|
71 |
#legend("topleft", pch=1,col="red",bty="n",title= "Stations",cex=1.6) |
|
72 |
|
|
73 |
#### STEP 2: Connecting to the database and query for relevant data |
|
74 |
|
|
75 |
drv <- dbDriver("PostgreSQL") |
|
76 |
db <- dbConnect(drv, dbname=db.name) |
|
77 |
|
|
78 |
z1<-stat_OR$STAT_ID[999:1002] # Selecting three station to check the query and processing time |
|
79 |
l2<-format_s(z1) |
|
80 |
|
|
81 |
d1<-dbGetQuery(db, paste("SELECT * |
|
82 |
FROM ghcn |
|
83 |
WHERE element=",shQuote(var), |
|
84 |
"AND year>=",year_start, |
|
85 |
"AND year<",year_end, |
|
86 |
"AND station IN ",l2,";",sep="")) #Selecting one station |
|
87 |
|
|
88 |
time1<-proc.time() #Start stop watch |
|
89 |
list_s<-format_s(stat_OR$STAT_ID) |
|
90 |
data<-dbGetQuery(db, paste("SELECT * |
|
91 |
FROM ghcn |
|
92 |
WHERE element=",shQuote(var), |
|
93 |
"AND year>=",year_start, |
|
94 |
"AND year<",year_end, |
|
95 |
"AND station IN ",list_s,";",sep="")) #Selecting one station |
|
96 |
time_duration<-proc.time()-time1 #time for the query? |
|
97 |
|
|
98 |
write.table(data, file= paste(path,"/","ghcn_data_",var,out_prefix,".txt",sep=""), sep=",") |
|
99 |
#write.table(d1, file= paste(path,"/","ghcn_data_",var,out_prefix,".txt",sep=""), sep=",") |
|
100 |
outfile<-file= paste(path,"/","ghcn_data_",var,out_prefix,".shp",sep="") #Removing extension if it is present |
|
101 |
#writeOGR(data_SDF,".", outfile, driver ="ESRI Shapefile") |
|
102 |
|
|
103 |
##### END OF SCRIPT ########## |
|
104 |
|
|
105 |
# data<-dbGetQuery(db, paste("SELECT * |
|
106 |
# FROM ghcn |
|
107 |
# WHERE element='TMAX' |
|
108 |
# AND station IN ",t1,";",sep="")) #Selecting one station |
Also available in: Unified diff
GHNCD station data selection using Postgres database, initial commit, task #363