Project

General

Profile

« Previous | Next » 

Revision b8bf4042

Added by Benoit Parmentier over 12 years ago

OR data preparation task#363, modified code for ghcn to create a shapefile

View differences:

climate/research/oregon/interpolation/GHCND_stations_extraction_study_area.R
1 1
##################    Data preparation for interpolation   #######################################
2
###########################  TWO-STAGE REGRESSION  ###############################################
2
############################ Extraction of station data ##########################################
3 3
#This script perform queries on the Postgres database ghcn for stations matching the             #
4
#interpolation area. It requires the text file of stations and a shape file of the study area.   #       
5
#Note that the projection for both GHCND and study area is lonlat WGS84.                         #
4
#interpolation area. It requires the following inputs:                                           #
5
# 1)the text file ofGHCND  stations from NCDC matching the database version release              #
6
# 2)a shape file of the study area with geographic coordinates: lonlat WGS84                     #                                                     #       
7
# 3)a new coordinate system can be provided as an argument                                       #
8
# 4)the variable of interest: "TMAX","TMIN" or "PRCP"                                            #
9
#                                                                                                #
10
#The outputs are text files and a shape file of a time subset of the database                    #
6 11
#AUTHOR: Benoit Parmentier                                                                       #
7 12
#DATE: 06/02/212                                                                                 #
8 13
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--                                  #
......
22 27
year_end<-"2011"                 #end year for the query (excluded)
23 28
infile1<- "ORWGS84_state_outline.shp"                     #This is the shape file of outline of the study area.                
24 29
infile2<-"ghcnd-stations.txt"                             #This is the textfile of station locations from GHCND
30
new_proj<-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
25 31

  
26 32
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations/"        #Jupiter LOCATION on EOS/Atlas
27 33
#path<-"H:/Data/IPLANT_project/data_Oregon_stations"                      #Jupiter Location on XANDERS
28 34
setwd(path) 
29
out_prefix<-"y2010_2010_OR_0602012"                                                 #User defined output prefix
35
out_prefix<-"y2010_2010_OR_0626012"                                                 #User defined output prefix
30 36

  
31
### Functions
37
### Functions used in the script
32 38

  
33 39
format_s <-function(s_ID){
34 40
  #Format station ID in a vector format/tuple that is used in a psql query.
......
46 52

  
47 53
############ START OF THE SCRIPT #################
48 54

  
49
##### STEP 1: Select station in Oregon
55
##### STEP 1: Select station in the study area
50 56

  
51 57
infile1<- "ORWGS84_state_outline.shp"        #This is the shape file of outline of the study area.
52 58
filename<-sub(".shp","",infile1)             #Removing the extension from file.
......
75 81
drv <- dbDriver("PostgreSQL")
76 82
db <- dbConnect(drv, dbname=db.name)
77 83

  
78
z1<-stat_OR$STAT_ID[999:1002] # Selecting three station to check the query and processing time
79
l2<-format_s(z1)  
80

  
81
d1<-dbGetQuery(db, paste("SELECT *
82
      FROM ghcn
83
      WHERE element=",shQuote(var),
84
      "AND year>=",year_start,
85
      "AND year<",year_end,
86
      "AND station IN ",l2,";",sep=""))  #Selecting one station
87

  
88 84
time1<-proc.time()    #Start stop watch
89 85
list_s<-format_s(stat_OR$STAT_ID)
90
data<-dbGetQuery(db, paste("SELECT *
86
data2<-dbGetQuery(db, paste("SELECT *
91 87
      FROM ghcn
92 88
      WHERE element=",shQuote(var),
93 89
      "AND year>=",year_start,
94 90
      "AND year<",year_end,
95
      "AND station IN ",list_s,";",sep=""))  #Selecting one station 
96
time_duration<-proc.time()-time1 #time for the query?
97

  
98
write.table(data, file= paste(path,"/","ghcn_data_",var,out_prefix,".txt",sep=""), sep=",")
99
#write.table(d1, file= paste(path,"/","ghcn_data_",var,out_prefix,".txt",sep=""), sep=",")
100
outfile<-file= paste(path,"/","ghcn_data_",var,out_prefix,".shp",sep="")   #Removing extension if it is present
101
#writeOGR(data_SDF,".", outfile, driver ="ESRI Shapefile")
91
      "AND station IN ",list_s,";",sep=""))  #Selecting station using a SQL query
92
time_duration<-proc.time()-time1             #Time for the query may be long given the size of the database
93
time_minutes<-time_duration[3]/60
94
  
95
data_table<-merge(data2,stat_OR, by.x = "station", by.y = "STAT_ID")
96

  
97
#Transform the subset data frame in a spatial data frame and reproject
98
data3<-data_table                               #Make a copy of the data frame
99
coords<- data3[c('lon.1','lat.1')]              #Define coordinates in a data frame
100
coordinates(data3)<-coords                      #Assign coordinates to the data frame
101
proj4string(data3)<-CRS_interp                  #Assign coordinates reference system in PROJ4 format
102
data_proj<-spTransform(data3,CRS(new_proj))     #Project from WGS84 to new coord. system
103

  
104
### STEP 3: Save results and outuput in textfile and a shape file
105

  
106
#Save a textfile of the locations of meteorological stations in the study area
107
write.table(as.data.frame(stat_OR), file=paste(path,"/","location_study_area",out_prefix,".txt",sep=""),sep=",")
108

  
109
#Save a textfile and shape file of all the subset data
110
write.table(data_table, file= paste(path,"/","ghcn_data_",var,out_prefix,".txt",sep=""), sep=",")
111
#outfile<-paste(path,"ghcn_data_",var,out_prefix,sep="")   #Removing extension if it is present
112
outfile<-paste("ghcn_data_",var,out_prefix,sep="")         #Name of the file
113
writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension
102 114

  
103 115
##### END OF SCRIPT ##########
104

  
105
# data<-dbGetQuery(db, paste("SELECT *
106
#       FROM ghcn
107
#       WHERE element='TMAX'
108
#       AND station IN ",t1,";",sep=""))  #Selecting one station

Also available in: Unified diff