Revision 2b88b158
Added by Adam M. Wilson over 12 years ago
climate/research/oregon/interpolation/GHCND_stations_extraction_study_area.R | ||
---|---|---|
18 | 18 |
library(sp) # Spatial pacakge with class definition by Bivand et al. |
19 | 19 |
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al. |
20 | 20 |
library(rgdal) # GDAL wrapper for R, spatial utilities |
21 |
library(rgeos) # Polygon buffering and other vector operations |
|
21 | 22 |
|
22 | 23 |
### Parameters and arguments |
23 | 24 |
|
24 | 25 |
db.name <- "ghcn" #name of the Postgres database |
25 |
var <- "TMAX" #name of the variables to keep: TMIN, TMAX or PRCP
|
|
26 |
var <- "PRCP" #name of the variables to keep: TMIN, TMAX or PRCP
|
|
26 | 27 |
year_start<-"2010" #starting year for the query (included) |
27 | 28 |
year_end<-"2011" #end year for the query (excluded) |
28 | 29 |
infile1<- "ORWGS84_state_outline.shp" #This is the shape file of outline of the study area. |
30 |
buffer=100 # size of buffer around shapefile to include in spatial query (in km), set to 0 to use no buffer |
|
29 | 31 |
infile2<-"ghcnd-stations.txt" #This is the textfile of station locations from GHCND |
30 | 32 |
new_proj<-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs"; |
31 | 33 |
|
32 | 34 |
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations/" #Jupiter LOCATION on EOS/Atlas |
33 | 35 |
#path<-"H:/Data/IPLANT_project/data_Oregon_stations" #Jupiter Location on XANDERS |
36 |
|
|
37 |
outpath=path # create different output path because we don't have write access to other's home dirs |
|
38 |
#outpath="/home/wilson/data" |
|
34 | 39 |
setwd(path) |
35 |
out_prefix<-"y2010_2010_OR_0626012" #User defined output prefix |
|
36 |
|
|
37 |
### Functions used in the script |
|
38 |
|
|
39 |
format_s <-function(s_ID){ |
|
40 |
#Format station ID in a vector format/tuple that is used in a psql query. |
|
41 |
# Argument 1: vector of station ID |
|
42 |
# Return: character of station ID |
|
43 |
tx2<-s_ID |
|
44 |
tx2<-as.character(tx2) |
|
45 |
stat_list<-tx2 |
|
46 |
temp<-shQuote(stat_list) |
|
47 |
t<-paste(temp, collapse= " ") |
|
48 |
t1<-gsub(" ", ",",t) |
|
49 |
sf_ID<-paste("(",t1,")",sep="") #vector containing the station ID to query |
|
50 |
return(sf_ID) |
|
51 |
} |
|
40 |
out_prefix<-"y2010_2010_OR_20110705" #User defined output prefix |
|
41 |
|
|
52 | 42 |
|
53 | 43 |
############ START OF THE SCRIPT ################# |
54 | 44 |
|
45 |
##### Connect to Station database |
|
46 |
drv <- dbDriver("PostgreSQL") |
|
47 |
db <- dbConnect(drv, dbname=db.name)#,options="statement_timeout = 1m") |
|
48 |
|
|
55 | 49 |
##### STEP 1: Select station in the study area |
56 | 50 |
|
57 | 51 |
infile1<- "ORWGS84_state_outline.shp" #This is the shape file of outline of the study area. |
... | ... | |
59 | 53 |
interp_area <- readOGR(".",filename) |
60 | 54 |
CRS_interp<-proj4string(interp_area) #Storing the coordinate information: geographic coordinates longlat WGS84 |
61 | 55 |
|
62 |
dat_stat <- read.fwf("ghcnd-stations.txt", widths = c(11,9,10,7,3,31,4,4,6),fill=TRUE) |
|
63 |
colnames(dat_stat)<-c("STAT_ID","lat","lon","elev","state","name","GSNF","HCNF","WMOID") |
|
64 |
coords<- dat_stat[,c('lon','lat')] |
|
65 |
coordinates(dat_stat)<-coords |
|
66 |
locs_coord<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0") |
|
67 |
#proj4string(dat_stat)<-locs_coord |
|
56 |
##### Buffer shapefile if desired |
|
57 |
## This is done to include stations from outside the region in the interpolation fitting process and reduce edge effects when stiching regions |
|
58 |
if(buffer>0){ #only apply buffer if buffer >0 |
|
59 |
interp_area=gUnionCascaded(interp_area) #dissolve any subparts of roi (if there are islands, lakes, etc.) |
|
60 |
interp_areaC=gCentroid(interp_area) # get centroid of region |
|
61 |
interp_areaB=spTransform( # buffer roi (transform to azimuthal equidistant with centroid of region for most (?) accurate buffering, add buffer, then transform to WGS84) |
|
62 |
gBuffer( |
|
63 |
spTransform(interp_area, |
|
64 |
CRS(paste("+proj=aeqd +lat_0=",interp_areaC@coords[2]," +lon_0=",interp_areaC@coords[1]," +ellps=WGS84 +datum=WGS84 +units=m +no_defs ",sep=""))), |
|
65 |
width=buffer*1000), # convert buffer (km) to meters |
|
66 |
CRS(CRS_interp)) # reproject back to original projection |
|
67 |
interp_area=interp_areaB # replace original region with buffered region |
|
68 |
} |
|
69 |
|
|
70 |
## get bounding box of study area |
|
71 |
bbox=bbox(interp_area) |
|
72 |
|
|
73 |
### read in station location information from database |
|
74 |
### use the bbox of the region to include only station in rectangular region to speed up overlay |
|
75 |
dat_stat=dbGetQuery(db, paste("SELECT id,name,latitude,longitude |
|
76 |
FROM stations |
|
77 |
WHERE latitude>=",bbox[2,1]," AND latitude<=",bbox[2,2]," |
|
78 |
AND longitude>=",bbox[1,1]," AND longitude<=",bbox[1,2]," |
|
79 |
;",sep="")) |
|
80 |
coordinates(dat_stat)<-c("longitude","latitude") |
|
68 | 81 |
proj4string(dat_stat)<-CRS_interp |
69 | 82 |
|
83 |
|
|
70 | 84 |
# Spatial query to find relevant stations |
71 | 85 |
inside <- !is.na(over(dat_stat, as(interp_area, "SpatialPolygons"))) #Finding stations contained in the current interpolation area |
72 | 86 |
stat_OR<-dat_stat[inside,] #Finding stations contained in the current interpolation area |
... | ... | |
78 | 92 |
|
79 | 93 |
#### STEP 2: Connecting to the database and query for relevant data |
80 | 94 |
|
81 |
drv <- dbDriver("PostgreSQL") |
|
82 |
db <- dbConnect(drv, dbname=db.name) |
|
83 |
|
|
84 |
time1<-proc.time() #Start stop watch |
|
85 |
list_s<-format_s(stat_OR$STAT_ID) |
|
86 |
data2<-dbGetQuery(db, paste("SELECT * |
|
87 |
FROM ghcn |
|
88 |
WHERE element=",shQuote(var), |
|
89 |
"AND year>=",year_start, |
|
90 |
"AND year<",year_end, |
|
91 |
"AND station IN ",list_s,";",sep="")) #Selecting station using a SQL query |
|
92 |
time_duration<-proc.time()-time1 #Time for the query may be long given the size of the database |
|
93 |
time_minutes<-time_duration[3]/60 |
|
94 |
|
|
95 |
data_table<-merge(data2,stat_OR, by.x = "station", by.y = "STAT_ID") |
|
96 |
|
|
95 |
## Query to link station location information and observations |
|
96 |
## Concatenate date columns into single field for easy convert to date |
|
97 |
## Divide value by 10 to convert to degrees C and mm |
|
98 |
## Subset to years in year_start -> year_end |
|
99 |
## Drop missing values (-9999) |
|
100 |
## Drop observations that failed quality control (keep only qflag==NA) |
|
101 |
system.time( |
|
102 |
data_table<<-dbGetQuery(db, |
|
103 |
paste("SELECT station,year||'-'||month||'-'||day AS date,value / 10.0 as value,latitude,longitude,elevation |
|
104 |
FROM ghcn, stations |
|
105 |
WHERE station = id |
|
106 |
AND id IN ('",paste(stat_OR$id,collapse="','"),"') |
|
107 |
AND element='",var,"' |
|
108 |
AND year>=",year_start," |
|
109 |
AND year<",year_end," |
|
110 |
AND value<>-9999 |
|
111 |
AND qflag IS NULL |
|
112 |
;",sep="")) |
|
113 |
) ### print used time in seconds - only taking ~30 seconds.... |
|
114 |
|
|
115 |
|
|
97 | 116 |
#Transform the subset data frame in a spatial data frame and reproject |
98 | 117 |
data3<-data_table #Make a copy of the data frame |
99 |
coords<- data3[c('lon.1','lat.1')] #Define coordinates in a data frame
|
|
118 |
coords<- data3[c('longitude','latitude')] #Define coordinates in a data frame
|
|
100 | 119 |
coordinates(data3)<-coords #Assign coordinates to the data frame |
101 | 120 |
proj4string(data3)<-CRS_interp #Assign coordinates reference system in PROJ4 format |
102 | 121 |
data_proj<-spTransform(data3,CRS(new_proj)) #Project from WGS84 to new coord. system |
... | ... | |
104 | 123 |
### STEP 3: Save results and outuput in textfile and a shape file |
105 | 124 |
|
106 | 125 |
#Save a textfile of the locations of meteorological stations in the study area |
107 |
write.table(as.data.frame(stat_OR), file=paste(path,"/","location_study_area",out_prefix,".txt",sep=""),sep=",")
|
|
126 |
write.table(as.data.frame(stat_OR), file=paste(outpath,"/","location_study_area_",out_prefix,".txt",sep=""),sep=",")
|
|
108 | 127 |
|
109 | 128 |
#Save a textfile and shape file of all the subset data |
110 |
write.table(data_table, file= paste(path,"/","ghcn_data_",var,out_prefix,".txt",sep=""), sep=",") |
|
129 |
write.table(data_table, file= paste(outpath,"/","ghcn_data_",var,out_prefix,".txt",sep=""), sep=",")
|
|
111 | 130 |
#outfile<-paste(path,"ghcn_data_",var,out_prefix,sep="") #Removing extension if it is present |
112 |
outfile<-paste("ghcn_data_",var,out_prefix,sep="") #Name of the file
|
|
131 |
outfile<-paste(outpath,"/ghcn_data_",var,out_prefix,sep="") #Name of the file
|
|
113 | 132 |
writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension |
114 | 133 |
|
115 | 134 |
##### END OF SCRIPT ########## |
Also available in: Unified diff
Updated GHCN_stations script to:
1) draw station location information from postgres database and perform merge/join query within database rather than r
2) add function to buffer region of interest to include stations outside region (to minimize edge effects)