Revision b8bf4042
Added by Benoit Parmentier over 12 years ago
climate/research/oregon/interpolation/GHCND_stations_extraction_study_area.R | ||
---|---|---|
1 | 1 |
################## Data preparation for interpolation ####################################### |
2 |
########################### TWO-STAGE REGRESSION ###############################################
|
|
2 |
############################ Extraction of station data ##########################################
|
|
3 | 3 |
#This script perform queries on the Postgres database ghcn for stations matching the # |
4 |
#interpolation area. It requires the text file of stations and a shape file of the study area. # |
|
5 |
#Note that the projection for both GHCND and study area is lonlat WGS84. # |
|
4 |
#interpolation area. It requires the following inputs: # |
|
5 |
# 1)the text file ofGHCND stations from NCDC matching the database version release # |
|
6 |
# 2)a shape file of the study area with geographic coordinates: lonlat WGS84 # # |
|
7 |
# 3)a new coordinate system can be provided as an argument # |
|
8 |
# 4)the variable of interest: "TMAX","TMIN" or "PRCP" # |
|
9 |
# # |
|
10 |
#The outputs are text files and a shape file of a time subset of the database # |
|
6 | 11 |
#AUTHOR: Benoit Parmentier # |
7 | 12 |
#DATE: 06/02/212 # |
8 | 13 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363-- # |
... | ... | |
22 | 27 |
year_end<-"2011" #end year for the query (excluded) |
23 | 28 |
infile1<- "ORWGS84_state_outline.shp" #This is the shape file of outline of the study area. |
24 | 29 |
infile2<-"ghcnd-stations.txt" #This is the textfile of station locations from GHCND |
30 |
new_proj<-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs"; |
|
25 | 31 |
|
26 | 32 |
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations/" #Jupiter LOCATION on EOS/Atlas |
27 | 33 |
#path<-"H:/Data/IPLANT_project/data_Oregon_stations" #Jupiter Location on XANDERS |
28 | 34 |
setwd(path) |
29 |
out_prefix<-"y2010_2010_OR_0602012" #User defined output prefix
|
|
35 |
out_prefix<-"y2010_2010_OR_0626012" #User defined output prefix
|
|
30 | 36 |
|
31 |
### Functions |
|
37 |
### Functions used in the script
|
|
32 | 38 |
|
33 | 39 |
format_s <-function(s_ID){ |
34 | 40 |
#Format station ID in a vector format/tuple that is used in a psql query. |
... | ... | |
46 | 52 |
|
47 | 53 |
############ START OF THE SCRIPT ################# |
48 | 54 |
|
49 |
##### STEP 1: Select station in Oregon
|
|
55 |
##### STEP 1: Select station in the study area
|
|
50 | 56 |
|
51 | 57 |
infile1<- "ORWGS84_state_outline.shp" #This is the shape file of outline of the study area. |
52 | 58 |
filename<-sub(".shp","",infile1) #Removing the extension from file. |
... | ... | |
75 | 81 |
drv <- dbDriver("PostgreSQL") |
76 | 82 |
db <- dbConnect(drv, dbname=db.name) |
77 | 83 |
|
78 |
z1<-stat_OR$STAT_ID[999:1002] # Selecting three station to check the query and processing time |
|
79 |
l2<-format_s(z1) |
|
80 |
|
|
81 |
d1<-dbGetQuery(db, paste("SELECT * |
|
82 |
FROM ghcn |
|
83 |
WHERE element=",shQuote(var), |
|
84 |
"AND year>=",year_start, |
|
85 |
"AND year<",year_end, |
|
86 |
"AND station IN ",l2,";",sep="")) #Selecting one station |
|
87 |
|
|
88 | 84 |
time1<-proc.time() #Start stop watch |
89 | 85 |
list_s<-format_s(stat_OR$STAT_ID) |
90 |
data<-dbGetQuery(db, paste("SELECT * |
|
86 |
data2<-dbGetQuery(db, paste("SELECT *
|
|
91 | 87 |
FROM ghcn |
92 | 88 |
WHERE element=",shQuote(var), |
93 | 89 |
"AND year>=",year_start, |
94 | 90 |
"AND year<",year_end, |
95 |
"AND station IN ",list_s,";",sep="")) #Selecting one station |
|
96 |
time_duration<-proc.time()-time1 #time for the query? |
|
97 |
|
|
98 |
write.table(data, file= paste(path,"/","ghcn_data_",var,out_prefix,".txt",sep=""), sep=",") |
|
99 |
#write.table(d1, file= paste(path,"/","ghcn_data_",var,out_prefix,".txt",sep=""), sep=",") |
|
100 |
outfile<-file= paste(path,"/","ghcn_data_",var,out_prefix,".shp",sep="") #Removing extension if it is present |
|
101 |
#writeOGR(data_SDF,".", outfile, driver ="ESRI Shapefile") |
|
91 |
"AND station IN ",list_s,";",sep="")) #Selecting station using a SQL query |
|
92 |
time_duration<-proc.time()-time1 #Time for the query may be long given the size of the database |
|
93 |
time_minutes<-time_duration[3]/60 |
|
94 |
|
|
95 |
data_table<-merge(data2,stat_OR, by.x = "station", by.y = "STAT_ID") |
|
96 |
|
|
97 |
#Transform the subset data frame in a spatial data frame and reproject |
|
98 |
data3<-data_table #Make a copy of the data frame |
|
99 |
coords<- data3[c('lon.1','lat.1')] #Define coordinates in a data frame |
|
100 |
coordinates(data3)<-coords #Assign coordinates to the data frame |
|
101 |
proj4string(data3)<-CRS_interp #Assign coordinates reference system in PROJ4 format |
|
102 |
data_proj<-spTransform(data3,CRS(new_proj)) #Project from WGS84 to new coord. system |
|
103 |
|
|
104 |
### STEP 3: Save results and outuput in textfile and a shape file |
|
105 |
|
|
106 |
#Save a textfile of the locations of meteorological stations in the study area |
|
107 |
write.table(as.data.frame(stat_OR), file=paste(path,"/","location_study_area",out_prefix,".txt",sep=""),sep=",") |
|
108 |
|
|
109 |
#Save a textfile and shape file of all the subset data |
|
110 |
write.table(data_table, file= paste(path,"/","ghcn_data_",var,out_prefix,".txt",sep=""), sep=",") |
|
111 |
#outfile<-paste(path,"ghcn_data_",var,out_prefix,sep="") #Removing extension if it is present |
|
112 |
outfile<-paste("ghcn_data_",var,out_prefix,sep="") #Name of the file |
|
113 |
writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension |
|
102 | 114 |
|
103 | 115 |
##### END OF SCRIPT ########## |
104 |
|
|
105 |
# data<-dbGetQuery(db, paste("SELECT * |
|
106 |
# FROM ghcn |
|
107 |
# WHERE element='TMAX' |
|
108 |
# AND station IN ",t1,";",sep="")) #Selecting one station |
Also available in: Unified diff
OR data preparation task#363, modified code for ghcn to create a shapefile