Project

General

Profile

« Previous | Next » 

Revision 0924578a

Added by Adam Wilson about 12 years ago

Added script to evaluate climatic stationarity (Task #479)

View differences:

climate/research/LST_landcover_exploration.R
23 23

  
24 24

  
25 25
### copy lulc data to litoria
26
setwd("data/lulc")
27
system("scp atlas:/home/parmentier/data_Oregon_stations/W_Layer* .")
26
#setwd("data/lulc")
27
#system("scp atlas:/home/parmentier/data_Oregon_stations/W_Layer* .")
28 28

  
29 29

  
30 30
setwd("/home/adamw/acrobates/projects/interp")       
......
43 43
d2[,c("lon","lat")]=coordinates(st2)[match(d2$id,st2$id),]
44 44
d2$elev=st2$elev[match(d2$id,st2$id)]
45 45
d2$month=format(d2$date,"%m")
46
#d2$value=d2$value/10 #convert to mm
46
d2$value=d2$value/10 #convert to mm
47 47

  
48 48

  
49 49
## load topographical data
50
topo=brick(as.list(list.files("data/topography",pattern="rst$",full=T)))
50
topo=brick(as.list(list.files("data/regions/oregon/topo",pattern="SRTM.*rst$",full=T)))
51 51
topo=calc(topo,function(x) ifelse(x<0,NA,x))
52 52
names(topo)=c("aspect","dem","slope")
53 53
colnames(topo@data@values)=c("aspect","dem","slope")
......
81 81

  
82 82

  
83 83
### load the lulc data as a brick
84
lulc=brick(as.list(list.files("data/lulc",pattern="rst$",full=T)))
84
lulc=brick(as.list(list.files("data/regions/oregon/lulc",pattern="rst$",full=T)))
85 85
#projection(lulc)=
86 86
#plot(lulc)
87 87

  
......
102 102
lulc=calc(lulc,function(x) ifelse(is.na(x),0,x))
103 103
projection(lulc)=projs
104 104

  
105
### reclass/sum classes
106
ShrubGrass=subset(lulc,"Shrub")+subset(lulc,"Grass");layerNames(ShrubGrass)="ShrubGrass"
107
Other=subset(lulc,"Mosaic")+subset(lulc,"Barren")+subset(lulc,"Snow")+subset(lulc,"Wetland");layerNames(Other)="Other"
108
lulc2=stack(subset(lulc,"Forest"),subset(lulc,"Urban"),subset(lulc,"Crop"),Other,ShrubGrass)
109

  
105 110
### load the LST data
106
lst=brick(as.list(list.files("data/lst",pattern="rescaled.rst$",full=T)[c(4:12,1:3)]))
111
lst=brick(as.list(list.files("data/regions/oregon/lst",pattern="rescaled.rst$",full=T)[c(4:12,1:3)]))
107 112
lst=lst-273.15
108 113
colnames(lst@data@values)=format(as.Date(paste("2000-",as.numeric(gsub("[a-z]|[A-Z]|[_]|83","",layerNames(lst))),"-15",sep="")),"%b")
109 114
layerNames(lst)=format(as.Date(paste("2000-",as.numeric(gsub("[a-z]|[A-Z]|[_]|83","",layerNames(lst))),"-15",sep="")),"%b")
......
112 117

  
113 118
######################################
114 119
## compare LULC with station data
115
st2=SpatialPointsDataFrame(st2,data=cbind.data.frame(st2@data,demb=extract(demb,st2),extract(topo,st2),extract(topo2,st2),extract(lulc,st2),extract(lst,st2)))
116
stlulc=extract(lulc,st2) #overlay stations and LULC values
120
st2=st2[!is.na(extract(demb,st2)),]
121
st2=SpatialPointsDataFrame(st2,data=cbind.data.frame(st2@data,demb=extract(demb,st2),extract(topo,st2),extract(topo2,st2),extract(lulc2,st2,buffer=1500,fun=mean),extract(lst,st2)))
122
stlulc=extract(lulc2,st2,buffer=1500,fun=mean) #overlay stations and LULC values
117 123
st2$lulc=do.call(c,lapply(apply(stlulc,1,function(x) which.max(x)),function(x) ifelse(is.null(names(x)),NA,names(x))))
118 124

  
119 125

  
126
###  add MODIS metric to station data for month corresponding to that date
127
### reshape for easy merging
128
sdata.ul=melt(st2@data,id.vars=c("id","lat","lon","Forest","ShrubGrass","Crop","Urban","Other","lulc"),measure.vars=format(as.Date(paste("2000-",1:12,"-15",sep="")),"%b"))
129

  
120 130
### generate sample of points to speed processing
121 131
n=10000/length(unique(demb))
122 132
n2=30  #number of knots
......
174 184
#ms1[ms1$parm%in%c("x","y","dem"),c("Q2.5","Q50","Q97.5")]=ms1[ms1$parm%in%c("x","y","dem"),c("Q2.5","Q50","Q97.5")]
175 185

  
176 186

  
187
#######################################################################
188
#### look at interaction of tmax~lst*lulc using monthly means
189
### add monthly data to sdata table by matching unique station_month ids.
190
d2$month=as.numeric(format(d2$date,"%m"))
191
### get monthly means and sd's
192
dm=melt(cast(d2,id+lon+lat+elev~month,value="value",fun.aggregate=mean,na.rm=T),id.vars=c("id","lon","lat","elev"));colnames(dm)[grep("value",colnames(dm))]="mean"
193
ds=melt(cast(d2,id+lon+lat+elev~month,value="value",fun.aggregate=sd,na.rm=T),id.vars=c("id","lon","lat","elev"))  #sd of tmax
194
dn=melt(cast(d2,id+lon+lat+elev~month,value="value",fun.aggregate=length),id.vars=c("id","lon","lat","elev"))  #number of observations
195
dm$sd=ds$value
196
dm$n=dn$value[match(paste(dm$month,dm$id),paste(dn$month,dn$id))]/max(dn$value)  # % complete record
197

  
198
#get lulc classes
199
lcs=layerNames(lulc2)
200

  
201
dm$lst=sdata.ul$value[match(paste(dm$id,format(as.Date(paste("2000-",dm$month,"-15",sep=""),"%Y-%m-%d"),"%b"),sep="_"),paste(sdata.ul$id,sdata.ul$variable,sep="_"))]
202
dm[,lcs]=sdata.ul[match(dm$id,sdata.ul$id),lcs]
203
dm=dm[!is.na(dm$ShrubGrass),]
204
dm$class=lcs[apply(dm[,lcs],1,which.max)]
205
## update month names
206
dm$m2=format(as.Date(paste("2000-",dm$month,"-15",sep="")),"%B")
207
dm$m2=factor(as.character(dm$m2),levels=format(as.Date(paste("2000-",1:12,"-15",sep="")),"%B"),ordered=T)
208

  
209
xyplot(mean~lst|m2,groups=class,data=dm,panel=function(x,y,subscripts,groups){  #+cut(dm$elev,breaks=quantile(dm$elev,seq(0,1,len=4)),labels=c("low","medium","high"))
210
  dt=dm[subscripts,]
211
  #panel.segments(dt$lst,dt$mean-dt$sd,dt$lst,dt$mean+dt$sd,groups=groups,lwd=.5,col="#C1CDCD")
212
  panel.xyplot(dt$lst,dt$mean,groups=groups,subscripts=subscripts,type=c("p","r"),cex=0.5)
213
  panel.abline(0,1,col="black",lwd=2)
214
},par.settings = list(superpose.symbol = list(pch=1:6,col=c("lightgreen","darkgreen","grey","brown","red"))),
215
       auto.key=list(space="right"),scales=list(relation="free"),
216
       sub="Each point represents a monthly mean (climatology) for a single station \n Points are colored by LULC class with largest % \n Heavy black line is y=x",main="Monthly Mean LST and Monthly Mean Tmax",
217
       ylab="Mean Monthly Tmax (C)",xlab="Mean Monthly LST")
218

  
219

  
220
mods=data.frame(
221
  form=c(
222
    "mean~lst+elev",
223
    "mean~lst+elev+ShrubGrass+Urban+Crop+Other",
224
    "mean~lst+elev+lst*ShrubGrass+lst*Urban+lst*Crop+lst*Other"
225
    ),
226
  type=c("lst","intercept","interact"),
227
  stringsAsFactors=F)
228
mods2=expand.grid(form=mods$form,month=1:12)
229
mods2$type=mods$type[match(mods2$form,mods$form)]
230

  
231

  
232
#summary(lm(mods$form[4],data=dm,weight=dm$n))
233

  
234
ms=lapply(1:nrow(mods2),function(i) {
235
  m=lm(as.formula(as.character(mods2$form[i])),data=dm[dm$month==mods2$month[i],],weight=dm$n[dm$month==mods2$month[i]])
236
  return(list(model=m,
237
              res=data.frame(
238
                Formula=as.character(mods2$form[i]),
239
                Month=mods2$month[i],
240
                type=mods2$type[i],
241
                AIC=AIC(m),
242
                R2=summary(m)$r.squared)))
243
})
244

  
245
### identify lowest AIC per month
246
ms1=do.call(rbind.data.frame,lapply(ms,function(m) m$res))
247
aicw= cast(ms1,Month~type,value="AIC")
248
aicwt=as.data.frame(t(apply(aicw[,-1],1,function(x) ifelse(x==min(x),"Minimum",ifelse((x-min(x))<7,"NS Minimum","NS")))));colnames(aicwt)=colnames(aicw)[-1];aicwt$Month=aicw$Month
249
aic=melt(aicwt,id.vars="Month");colnames(aic)=c("Month","type","minAIC")
250
aic$minAIC=factor(aic$minAIC,ordered=F)
251

  
252
xyplot(AIC~as.factor(Month),groups=Formula,data=ms1,type=c("p","l"),pch=16,auto.key=list(space="top"),main="Model Comparison across months",
253
       par.settings = list(superpose.symbol = list(pch=16,cex=1)),xlab="Month")
254

  
255

  
256
ms2=lapply(ms,function(m) m$model)
257

  
258
mi=rep(c(1:12),each=3)  #month indices
259

  
260
fs=do.call(rbind.data.frame,lapply(1:12,function(i){
261
  it=which(mi==i)
262
  x=anova(ms2[[it[1]]],ms2[[it[2]]],ms2[[it[3]]])
263
  fs=c(
264
    paste(as.character(formula(ms2[[it[1]]]))[c(2,1,3)],collapse=" "),
265
    paste(as.character(formula(ms2[[it[2]]]))[c(2,1,3)],collapse=" "),
266
    paste(as.character(formula(ms2[[it[3]]]))[c(2,1,3)],collapse=" "))
267
  data.frame(month=rep(i,3),model=fs,p=as.data.frame(x)[,6],sig=ifelse(as.data.frame(x)[,6]<0.05,T,F))
268
}))
269

  
270
table(fs$sig,fs$model)
271
which(fs$sig)
272

  
177 273
### load oregon boundary for comparison
178 274
roi=spTransform(as(readOGR("data/regions/Test_sites/Oregon.shp","Oregon"),"SpatialLines"),projs)
179 275

  
......
184 280
### Summary plots of covariates
185 281
## LULC
186 282
at=seq(0.1,100,length=100)
187
levelplot(lulc,at=at,col.regions=bgyr(length(at)),
283
levelplot(lulc2,at=at,col.regions=bgyr(length(at)),
188 284
          main="Land Cover Classes",sub="Sub-pixel %")+
189 285
  layer(sp.lines(roi, lwd=1.2, col='black'))
190 286

  
climate/research/climate_stationarity.R
1
##################    Data preparation for interpolation   #######################################
2
############################ Extraction of station data ##########################################
3
#This script perform queries on the Postgres database ghcn for stations matching the             #
4
#interpolation area. It requires the following inputs:                                           #
5
# 1)the text file ofGHCND  stations from NCDC matching the database version release              #
6
# 2)a shape file of the study area with geographic coordinates: lonlat WGS84                     #                                                     #       
7
# 3)a new coordinate system can be provided as an argument                                       #
8
# 4)the variable of interest: "TMAX","TMIN" or "PRCP"                                            #
9
#                                                                                                #
10
#The outputs are text files and a shape file of a time subset of the database                    #
11
#AUTHOR: Benoit Parmentier                                                                       #
12
#DATE: 06/02/212                                                                                 #
13
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--                                  #
14
##################################################################################################
15

  
16
###Loading R library and packages   
17
library(RPostgreSQL)
18
library(sp)                                             # Spatial pacakge with class definition by Bivand et al.
19
library(spdep)                                          # Spatial pacakge with methods and spatial stat. by Bivand et al.
20
library(rgdal)                                          # GDAL wrapper for R, spatial utilities
21
library(rgeos)                                          # Polygon buffering and other vector operations
22
library(reshape)
23

  
24
### Parameters and arguments
25

  
26
db.name <- "ghcn"                #name of the Postgres database
27
var <- c("TMAX","TMIN","PRCP")                    #name of the variables to keep: TMIN, TMAX or PRCP
28
year_start<-"1970"               #starting year for the query (included)
29
year_end<-"2011"                 #end year for the query (excluded)
30

  
31
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations/"        #Jupiter LOCATION on EOS/Atlas
32
#path<-"H:/Data/IPLANT_project/data_Oregon_stations"                      #Jupiter Location on XANDERS
33

  
34
outpath=path                                                              # create different output path because we don't have write access to other's home dirs
35
setwd(path) 
36
out_prefix<-"stationarity"                                                 #User defined output prefix
37
buffer=100
38

  
39
#for Adam
40
outpath="/home/wilson/data/"
41

  
42

  
43
############ START OF THE SCRIPT #################
44

  
45
#####  Connect to Station database
46
drv <- dbDriver("PostgreSQL")
47
db <- dbConnect(drv, dbname=db.name)#,options="statement_timeout = 1m")
48

  
49
##### STEP 1: Select station in the study area
50

  
51
infile1<- "ORWGS84_state_outline.shp"        #This is the shape file of outline of the study area.
52
filename<-sub(".shp","",infile1)             #Removing the extension from file.
53
interp_area <- readOGR(".",filename)
54
CRS_interp<-proj4string(interp_area)         #Storing the coordinate information: geographic coordinates longlat WGS84
55

  
56
#####  Buffer shapefile if desired
57
##     This is done to include stations from outside the region in the interpolation fitting process and reduce edge effects when stiching regions
58
if(buffer>0){  #only apply buffer if buffer >0
59
  interp_area=gUnionCascaded(interp_area)  #dissolve any subparts of roi (if there are islands, lakes, etc.)
60
  interp_areaC=gCentroid(interp_area)       # get centroid of region
61
  interp_areaB=spTransform(                # buffer roi (transform to azimuthal equidistant with centroid of region for most (?) accurate buffering, add buffer, then transform to WGS84)
62
    gBuffer(
63
      spTransform(interp_area,
64
                  CRS(paste("+proj=aeqd +lat_0=",interp_areaC@coords[2]," +lon_0=",interp_areaC@coords[1]," +ellps=WGS84 +datum=WGS84 +units=m +no_defs ",sep=""))),
65
      width=buffer*1000),                  # convert buffer (km) to meters
66
    CRS(CRS_interp))                       # reproject back to original projection
67
#  interp_area=interp_areaB                 # replace original region with buffered region
68
}
69

  
70
## get bounding box of study area
71
bbox=bbox(interp_areab)
72

  
73
### read in station location information from database
74
### use the bbox of the region to include only station in rectangular region to speed up overlay
75
dat_stat=dbGetQuery(db, paste("SELECT id,name,latitude,longitude
76
                  FROM stations
77
                  WHERE latitude>=",bbox[2,1]," AND latitude<=",bbox[2,2],"
78
                  AND longitude>=",bbox[1,1]," AND longitude<=",bbox[1,2],"
79
                  ;",sep=""))
80
coordinates(dat_stat)<-c("longitude","latitude")
81
proj4string(dat_stat)<-CRS_interp
82

  
83

  
84
# Spatial query to find relevant stations
85
inside <- !is.na(over(dat_stat, as(interp_areab, "SpatialPolygons")))  #Finding stations contained in the current interpolation area
86
stat_roi<-dat_stat[inside,]                                            #Finding stations contained in the current interpolation area
87
#stat_roi<-spTransform(stat_roi,CRS(new_proj))         # Project from WGS84 to new coord. system
88

  
89
#Quick visualization of station locations
90
plot(interp_area, axes =TRUE)
91
plot(stat_roi, pch=1, col="red", cex= 0.7, add=TRUE)
92
#legend("topleft", pch=1,col="red",bty="n",title= "Stations",cex=1.6)
93

  
94

  
95
#################################################################
96
### STEP 2: generate monthly means for climate-aided interpolation
97
##  Query to link station location information and observations
98
##  Concatenate date columns into single field for easy convert to date
99
##  Divide value by 10 to convert to degrees C and mm
100
##  Subset to years in year_start -> year_end
101
##  Drop missing values (-9999)
102
##  Drop observations that failed quality control (keep only qflag==NA)
103

  
104
### first extract average daily values by month.  
105
system.time(
106
           d<<-dbGetQuery(db,  # create dm object (data monthly)
107
                          paste("SELECT station,month,element,count30,value30,count10,value10,latitude,longitude,elevation
108
                                 FROM 
109
                                          (SELECT station,month,element,count(value) as count30,avg(value)/10.0 as value30,latitude,longitude,elevation
110
                                           FROM ghcn, stations
111
                                           WHERE station = id
112
                                           AND id IN ('",paste(stat_roi$id,collapse="','"),"')
113
                                           AND element IN ('",paste(var,collapse="','"),"')
114
                                           AND year>=",1970,"
115
                                           AND year<",2000,"
116
                                           AND value<>-9999
117
                                           AND qflag IS NULL
118
                                           GROUP BY station, month,latitude,longitude,elevation,element
119
                                           ) as a30
120
                                  INNER JOIN 
121
                                           (SELECT station,month,element,count(value) as count10,avg(value)/10.0 as value10
122
                                           FROM ghcn, stations
123
                                           WHERE station = id
124
                                           AND id IN ('",paste(stat_roi$id,collapse="','"),"')
125
                                           AND element IN ('",paste(var,collapse="','"),"')
126
                                           AND year>=",2000,"
127
                                           AND year<",2010,"
128
                                           AND value<>-9999
129
                                           AND qflag IS NULL
130
                                           GROUP BY station, month,element
131
                                           ) as a10
132
                                      USING (station,element,month)
133
                                 ;",sep=""))
134
            )  ### print used time in seconds  ~ 10 minutes
135

  
136
save(d,file=paste(outpath,"stationarity.Rdata"))
137

  
138
#####################################################################33
139
#### Explore it
140
load(paste(outpath,"stationarity.Rdata"))
141

  
142
## subset by # of observations?
143
thresh=.75 #threshold % to keep
144
d$keep=d$count30/900>thresh&d$count10/300>thresh
145
table(d$keep)
146

  
147
## create month factor
148
d$monthname=factor(d$month,labels=format(as.Date(paste(2000,1:12,1,sep="-")),"%B"),ordered=T)
149
### start PDF
150
pdf(paste(outpath,"ClimateStationarity.pdf",sep=""),width=11,height=8.5)
151

  
152
library(latticeExtra)
153
#combineLimits(useOuterStrips(xyplot(value10~value30|monthname+element,data=d[d$keep,],scales=list(relation="free",rot=0),cex=.5,pch=16,
154
#                      ylab="2000-2010 Mean Daily Value",xlab="1970-2000 Mean Daily Value",
155
#                      main="Comparison of Mean Daily Values",asp=1)))+
156
#  layer(panel.abline(0,1,col="red"))+
157
#  layer(panel.text(max(x),min(y),paste("R^2=",round(summary(lm(y~x))$r.squared,2)),cex=.5,pos=2))
158

  
159
for(v in unique(d$element)){
160
print(xyplot(value10~value30|monthname,data=d[d$keep&d$element==v,],scales=list(relation="free",rot=0),cex=.5,pch=16,
161
                      ylab="2000-2010 Mean Daily Value",xlab="1970-2000 Mean Daily Value",
162
                      main=paste("Comparison of Mean Daily Values for",v),asp=1)+
163
  layer(panel.abline(0,1,col="red"))+
164
  layer(panel.text(max(x),min(y),paste("R^2=",round(summary(lm(y~x))$r.squared,2)),cex=1,pos=2)))
165
}
166

  
167
## look at deviances
168
d$dif=d$value10-d$value30
169

  
170
trellis.par.set(superpose.symbol = list(col=c("blue","grey","green","red"),cex=.5,pch=16))
171

  
172
 for(v in unique(d$element)){
173
    print(xyplot(latitude~longitude|monthname,group=cut(dif,quantile(d$dif[d$keep&d$element==v],seq(0,1,len=5))),
174
       data=d[d$keep&d$element==v,],auto.key=list(space="right"),
175
                 main=paste("Current-Past anomolies for",v," (2000-2010 Daily Means Minus 1970-2000 Daily Means)"),
176
                 sub="Positive values indicate stations that were warmer/wetter in 2000-2010 than 1970-2000")+
177
          layer(sp.lines(as(interp_area,"SpatialLines"),col="black")))
178
}
179

  
180
dev.off()
climate/research/oregon/interpolation/Extraction_raster_covariates_study_area.R
71 71
sdata.u@data=cbind.data.frame(sdata.u@data,extract(subset(covar,subset=which(getZ(covar)!="00")), sdata.u))      #Extracting values from the raster stack for every point 
72 72
sdata.u=sdata.u@data #drop the spatial-ness
73 73

  
74
###  add MODIS metric to station data for month corresponding to that date
74 75
### reshape for easy merging
75 76
sdata.ul=melt(sdata.u,id.vars=c("station","latitude","longitude","x","y"))
76 77
sdata.ul[,c("metric","type","month")]=do.call(rbind.data.frame,strsplit(as.character(sdata.ul$variable),"_"))
climate/research/oregon/interpolation/GAM.R
79 79
    "value ~ s(CLD_mean) + elev + ns + ew",
80 80
    "value ~ s(COT_mean) + elev + ns + ew",
81 81
    "value ~ s(CER_P20um) + elev + ns + ew",
82
    "value ~ s(CER_mean) + elev + ns + ew"
82
    "value ~ s(CER_mean) + elev + ns + ew",
83
    "value ~ s(CLD_mean) + s(CER_P20um) + elev + ns + ew",
84
    "value ~ s(COT_mean) + s(CLD_mean) + s(CER_P20um) + elev + ns + ew"
83 85
                                        #    "value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew + s(CER_P20um)",
84 86
#    "value ~ s(x_OR83M,y_OR83M,CER_P20um) +s(x_OR83M,y_OR83M,CLD_mean) + elev + ns + ew",
85 87
#    "value ~ s(x_OR83M,y_OR83M) + s(CER_P20um,CLD_mean) + elev + ns + ew",
......
128 130
ghcn.subsets <-lapply(dates, function(d) subset(ghcn@data, date==d)) #this creates a list of 10 subset data
129 131
  
130 132
  results=do.call(rbind.data.frame,                   # Collect the results in a single data.frame
131
   lapply(1:length(dates),function(i,savemodel=T,saveFullPrediction=T,scale=F,verbose=T) {            # loop over dates
133
   lapply(1:length(dates),function(i,savemodel=F,saveFullPrediction=F,scale=F,verbose=T) {            # loop over dates
132 134
     if(verbose)      print(paste("Starting Date:",dates[i]))
133 135
     date<-dates[i]                                  # get date
134 136
     month<-strftime(date, "%m")                     # get month

Also available in: Unified diff