Project

General

Profile

« Previous | Next » 

Revision 2b88b158

Added by Adam M. Wilson almost 12 years ago

Updated GHCN_stations script to:

1) draw station location information from postgres database and perform merge/join query within database rather than r
2) add function to buffer region of interest to include stations outside region (to minimize edge effects)

View differences:

climate/research/oregon/interpolation/GHCND_stations_extraction_study_area.R
18 18
library(sp)                                             # Spatial pacakge with class definition by Bivand et al.
19 19
library(spdep)                                          # Spatial pacakge with methods and spatial stat. by Bivand et al.
20 20
library(rgdal)                                          # GDAL wrapper for R, spatial utilities
21
library(rgeos)                                          # Polygon buffering and other vector operations
21 22

  
22 23
### Parameters and arguments
23 24

  
24 25
db.name <- "ghcn"                #name of the Postgres database
25
var <- "TMAX"                    #name of the variables to keep: TMIN, TMAX or PRCP
26
var <- "PRCP"                    #name of the variables to keep: TMIN, TMAX or PRCP
26 27
year_start<-"2010"               #starting year for the query (included)
27 28
year_end<-"2011"                 #end year for the query (excluded)
28 29
infile1<- "ORWGS84_state_outline.shp"                     #This is the shape file of outline of the study area.                
30
buffer=100                      #  size of buffer around shapefile to include in spatial query (in km), set to 0 to use no buffer
29 31
infile2<-"ghcnd-stations.txt"                             #This is the textfile of station locations from GHCND
30 32
new_proj<-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
31 33

  
32 34
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations/"        #Jupiter LOCATION on EOS/Atlas
33 35
#path<-"H:/Data/IPLANT_project/data_Oregon_stations"                      #Jupiter Location on XANDERS
36

  
37
outpath=path                                                              # create different output path because we don't have write access to other's home dirs
38
#outpath="/home/wilson/data"
34 39
setwd(path) 
35
out_prefix<-"y2010_2010_OR_0626012"                                                 #User defined output prefix
36

  
37
### Functions used in the script
38

  
39
format_s <-function(s_ID){
40
  #Format station ID in a vector format/tuple that is used in a psql query.
41
  # Argument 1: vector of station ID
42
  # Return: character of station ID
43
  tx2<-s_ID
44
  tx2<-as.character(tx2)
45
  stat_list<-tx2
46
  temp<-shQuote(stat_list)
47
  t<-paste(temp, collapse= " ")
48
  t1<-gsub(" ", ",",t)
49
  sf_ID<-paste("(",t1,")",sep="") #vector containing the station ID to query
50
  return(sf_ID)
51
}
40
out_prefix<-"y2010_2010_OR_20110705"                                                 #User defined output prefix
41

  
52 42

  
53 43
############ START OF THE SCRIPT #################
54 44

  
45
#####  Connect to Station database
46
drv <- dbDriver("PostgreSQL")
47
db <- dbConnect(drv, dbname=db.name)#,options="statement_timeout = 1m")
48

  
55 49
##### STEP 1: Select station in the study area
56 50

  
57 51
infile1<- "ORWGS84_state_outline.shp"        #This is the shape file of outline of the study area.
......
59 53
interp_area <- readOGR(".",filename)
60 54
CRS_interp<-proj4string(interp_area)         #Storing the coordinate information: geographic coordinates longlat WGS84
61 55

  
62
dat_stat <- read.fwf("ghcnd-stations.txt", widths = c(11,9,10,7,3,31,4,4,6),fill=TRUE)
63
colnames(dat_stat)<-c("STAT_ID","lat","lon","elev","state","name","GSNF","HCNF","WMOID")
64
coords<- dat_stat[,c('lon','lat')]
65
coordinates(dat_stat)<-coords
66
locs_coord<-CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +towgs84=0,0,0")
67
#proj4string(dat_stat)<-locs_coord
56
#####  Buffer shapefile if desired
57
##     This is done to include stations from outside the region in the interpolation fitting process and reduce edge effects when stiching regions
58
if(buffer>0){  #only apply buffer if buffer >0
59
  interp_area=gUnionCascaded(interp_area)  #dissolve any subparts of roi (if there are islands, lakes, etc.)
60
  interp_areaC=gCentroid(interp_area)       # get centroid of region
61
  interp_areaB=spTransform(                # buffer roi (transform to azimuthal equidistant with centroid of region for most (?) accurate buffering, add buffer, then transform to WGS84)
62
    gBuffer(
63
      spTransform(interp_area,
64
                  CRS(paste("+proj=aeqd +lat_0=",interp_areaC@coords[2]," +lon_0=",interp_areaC@coords[1]," +ellps=WGS84 +datum=WGS84 +units=m +no_defs ",sep=""))),
65
      width=buffer*1000),                  # convert buffer (km) to meters
66
    CRS(CRS_interp))                       # reproject back to original projection
67
  interp_area=interp_areaB                 # replace original region with buffered region
68
}
69

  
70
## get bounding box of study area
71
bbox=bbox(interp_area)
72

  
73
### read in station location information from database
74
### use the bbox of the region to include only station in rectangular region to speed up overlay
75
dat_stat=dbGetQuery(db, paste("SELECT id,name,latitude,longitude
76
                  FROM stations
77
                  WHERE latitude>=",bbox[2,1]," AND latitude<=",bbox[2,2],"
78
                  AND longitude>=",bbox[1,1]," AND longitude<=",bbox[1,2],"
79
                  ;",sep=""))
80
coordinates(dat_stat)<-c("longitude","latitude")
68 81
proj4string(dat_stat)<-CRS_interp
69 82

  
83

  
70 84
# Spatial query to find relevant stations
71 85
inside <- !is.na(over(dat_stat, as(interp_area, "SpatialPolygons")))  #Finding stations contained in the current interpolation area
72 86
stat_OR<-dat_stat[inside,]                                            #Finding stations contained in the current interpolation area
......
78 92

  
79 93
#### STEP 2: Connecting to the database and query for relevant data 
80 94

  
81
drv <- dbDriver("PostgreSQL")
82
db <- dbConnect(drv, dbname=db.name)
83

  
84
time1<-proc.time()    #Start stop watch
85
list_s<-format_s(stat_OR$STAT_ID)
86
data2<-dbGetQuery(db, paste("SELECT *
87
      FROM ghcn
88
      WHERE element=",shQuote(var),
89
      "AND year>=",year_start,
90
      "AND year<",year_end,
91
      "AND station IN ",list_s,";",sep=""))  #Selecting station using a SQL query
92
time_duration<-proc.time()-time1             #Time for the query may be long given the size of the database
93
time_minutes<-time_duration[3]/60
94
  
95
data_table<-merge(data2,stat_OR, by.x = "station", by.y = "STAT_ID")
96

  
95
##  Query to link station location information and observations
96
##  Concatenate date columns into single field for easy convert to date
97
##  Divide value by 10 to convert to degrees C and mm
98
##  Subset to years in year_start -> year_end
99
##  Drop missing values (-9999)
100
##  Drop observations that failed quality control (keep only qflag==NA)
101
system.time(
102
  data_table<<-dbGetQuery(db,
103
                          paste("SELECT station,year||'-'||month||'-'||day AS date,value / 10.0 as value,latitude,longitude,elevation
104
                                 FROM ghcn, stations
105
                                 WHERE station = id
106
                                 AND id IN ('",paste(stat_OR$id,collapse="','"),"')
107
                                 AND element='",var,"'
108
                                 AND year>=",year_start,"
109
                                 AND year<",year_end,"
110
                                 AND value<>-9999
111
                                 AND qflag IS NULL
112
                                 ;",sep=""))
113
  )  ### print used time in seconds  - only taking ~30 seconds....
114

  
115
 
97 116
#Transform the subset data frame in a spatial data frame and reproject
98 117
data3<-data_table                               #Make a copy of the data frame
99
coords<- data3[c('lon.1','lat.1')]              #Define coordinates in a data frame
118
coords<- data3[c('longitude','latitude')]              #Define coordinates in a data frame
100 119
coordinates(data3)<-coords                      #Assign coordinates to the data frame
101 120
proj4string(data3)<-CRS_interp                  #Assign coordinates reference system in PROJ4 format
102 121
data_proj<-spTransform(data3,CRS(new_proj))     #Project from WGS84 to new coord. system
......
104 123
### STEP 3: Save results and outuput in textfile and a shape file
105 124

  
106 125
#Save a textfile of the locations of meteorological stations in the study area
107
write.table(as.data.frame(stat_OR), file=paste(path,"/","location_study_area",out_prefix,".txt",sep=""),sep=",")
126
write.table(as.data.frame(stat_OR), file=paste(outpath,"/","location_study_area_",out_prefix,".txt",sep=""),sep=",")
108 127

  
109 128
#Save a textfile and shape file of all the subset data
110
write.table(data_table, file= paste(path,"/","ghcn_data_",var,out_prefix,".txt",sep=""), sep=",")
129
write.table(data_table, file= paste(outpath,"/","ghcn_data_",var,out_prefix,".txt",sep=""), sep=",")
111 130
#outfile<-paste(path,"ghcn_data_",var,out_prefix,sep="")   #Removing extension if it is present
112
outfile<-paste("ghcn_data_",var,out_prefix,sep="")         #Name of the file
131
outfile<-paste(outpath,"/ghcn_data_",var,out_prefix,sep="")         #Name of the file
113 132
writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension
114 133

  
115 134
##### END OF SCRIPT ##########

Also available in: Unified diff