1
|
###################################################################################
|
2
|
### R code to interpolate monthly climatologies
|
3
|
|
4
|
|
5
|
## connect to server of choice
|
6
|
#system("ssh litoria")
|
7
|
#R
|
8
|
|
9
|
library(sp)
|
10
|
#library(spgrass6)
|
11
|
library(rgdal)
|
12
|
library(reshape)
|
13
|
library(ncdf4)
|
14
|
#library(geosphere)
|
15
|
#library(rgeos)
|
16
|
library(multicore)
|
17
|
library(raster)
|
18
|
library(rasterVis)
|
19
|
library(lattice)
|
20
|
library(latticeExtra)
|
21
|
#library(rgl)
|
22
|
#library(hdf5)
|
23
|
#library(heR.Misc)
|
24
|
#library(car)
|
25
|
library(mgcv)
|
26
|
library(sampling)
|
27
|
|
28
|
X11.options(type="Xlib")
|
29
|
ncores=20 #number of threads to use
|
30
|
|
31
|
setwd("/home/adamw/acrobates/projects/interp")
|
32
|
|
33
|
## get MODLAND tile information
|
34
|
tb=read.table("http://landweb.nascom.nasa.gov/developers/sn_tiles/sn_bound_10deg.txt",skip=6,nrows=648,header=T)
|
35
|
tb$tile=paste("h",sprintf("%02d",tb$ih),"v",sprintf("%02d",tb$iv),sep="")
|
36
|
save(tb,file="modlandTiles.Rdata")
|
37
|
|
38
|
#tile="h11v08" #can move this to submit script if needed
|
39
|
tile="h21v09" #Kenya
|
40
|
tile="h09v04" #oregon
|
41
|
tiles=c("h11v08","h09v04")
|
42
|
|
43
|
psin=CRS("+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs")
|
44
|
|
45
|
tile_bb=tb[tb$tile==tile,] ## identify tile of interest
|
46
|
roi_ll=extent(tile_bb$lon_min,tile_bb$lon_max,tile_bb$lat_min,tile_bb$lat_max)
|
47
|
|
48
|
dmod06="data/modis/mod06/summary"
|
49
|
outdir=paste("data/tiles/",tile,"/",sep="")
|
50
|
|
51
|
|
52
|
##########################
|
53
|
#### Organize the data
|
54
|
months=seq(as.Date("2000-01-15"),as.Date("2000-12-15"),by="month")
|
55
|
|
56
|
getmod06<-function(variable,month=NA){
|
57
|
d=brick(list.files(dmod06,pattern=paste("MOD06_",tile,".nc$",sep=""),full=T),varname=toupper(variable))
|
58
|
if(!is.na(month)) {
|
59
|
d=subset(d,subset=month)
|
60
|
names(d)=variable
|
61
|
}
|
62
|
if(is.na(month)){
|
63
|
setZ(d,format(as.Date(d@z$Date),"%m"),name="time")
|
64
|
layerNames(d) <- as.character(format(as.Date(d@z$Date),"%b")) #paste(variable,format(as.Date(d@z$Date),"%m"))
|
65
|
}
|
66
|
projection(d)=psin
|
67
|
return(d)
|
68
|
}
|
69
|
|
70
|
cer=getmod06("cer")
|
71
|
cld=getmod06("cld")
|
72
|
cot=getmod06("cot")
|
73
|
cer20=getmod06("cer20")
|
74
|
|
75
|
|
76
|
|
77
|
pcol=colorRampPalette(c("brown","red","yellow","darkgreen"))
|
78
|
|
79
|
### create data dir for tiled data
|
80
|
ddir=paste("data/tiles/",tile,sep="")
|
81
|
if(!file.exists(ddir)) dir.create(ddir)
|
82
|
|
83
|
## load WorldClim data for comparison (download then uncompress)
|
84
|
#system("wget -P data/worldclim/ http://biogeo.ucdavis.edu/data/climate/worldclim/1_4/grid/cur/prec_30s_bil.zip",wait=F)
|
85
|
#system("wget -P data/worldclim/ http://biogeo.ucdavis.edu/data/climate/worldclim/1_4/grid/cur/alt_30s_bil.zip",wait=F)
|
86
|
|
87
|
### load WORLDCLIM elevation
|
88
|
if(!file.exists(paste(ddir,"/dem_",tile,".tif",sep=""))){
|
89
|
dem=raster(list.files("data/worldclim/alt_30s_bil/",pattern="bil$",full=T))
|
90
|
projection(dem)=CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
|
91
|
dem=crop(dem,roi_ll)
|
92
|
dem[dem>30000]=NA
|
93
|
dem=projectRaster(dem,cer)
|
94
|
writeRaster(dem,file=paste("data/tiles/",tile,"/dem_",tile,".tif",sep=""),format="GTiff",overwrite=T)
|
95
|
}
|
96
|
dem=raster(paste("data/tiles/",tile,"/dem_",tile,".tif",sep=""))
|
97
|
names(dem)="dem"
|
98
|
|
99
|
### get station data, subset to stations in region, and transform to sinusoidal
|
100
|
dm=readOGR(ddir,paste("station_monthly_",tile,"_PRCP",sep=""))
|
101
|
colnames(dm@data)[grep("elevation",colnames(dm@data))]="elev"
|
102
|
colnames(dm@data)[grep("value",colnames(dm@data))]="ppt"
|
103
|
|
104
|
#xyplot(latitude~longitude|month,data=dm@data)
|
105
|
|
106
|
## transform to sinusoidal to overlay with raster data
|
107
|
dm2=spTransform(dm,CRS(projection(cer)))
|
108
|
dm2@data[,c("x","y")]=coordinates(dm2)
|
109
|
|
110
|
## limit data to station-months with at least 300 daily observations (~10 years)
|
111
|
dm2=dm2[dm2$count>300,]
|
112
|
|
113
|
## create coordinate rasters
|
114
|
|
115
|
### create monthly raster bricks for prediction
|
116
|
getd<-function(vars=c("cer","cot","cer20","cld"),month){
|
117
|
pdata=stack(lapply(vars,function(v){getmod06(v,month=month)}))
|
118
|
x=rasterFromXYZ(coordinates(dem)[,c(1,2,1)])
|
119
|
y=rasterFromXYZ(coordinates(dem)[,c(1,2,2)])
|
120
|
pdata=stack(pdata,dem,x,y)
|
121
|
return(pdata)
|
122
|
}
|
123
|
|
124
|
## Set up models to compare
|
125
|
####################################
|
126
|
#### build table comparing various metrics
|
127
|
models=data.frame(model=c(
|
128
|
# "ppt~s(y)+ s(x)",
|
129
|
# "ppt~s(y,x)",
|
130
|
"lppt~s(y,x)",
|
131
|
"lppt~s(y,x)+s(dem)",
|
132
|
"lppt~s(y,x,dem)",
|
133
|
"lppt~s(y,x,dem)+s(cld)",
|
134
|
"lppt~s(y,x)+s(dem)+s(cld)",
|
135
|
"lppt~s(y,x)+s(cld)",
|
136
|
"lppt~s(y,x)+s(cot)",
|
137
|
"lppt~s(y,x)+s(dem)+s(cot)",
|
138
|
"lppt~s(y,x,dem)+s(cot)",
|
139
|
"lppt~s(y,x)+s(cer20)",
|
140
|
"lppt~s(y,x)+s(dem)+cld+cot+cer20",
|
141
|
"lppt~s(y,x,dem)+cld+cot+cer20",
|
142
|
"lppt~s(y,x)+s(dem)+s(cld,cot,cer20)",
|
143
|
"lppt~s(y,x)+s(dem)+s(cld)+s(cot)+s(cer20)"))
|
144
|
months=1:12
|
145
|
## add category for each model
|
146
|
models$type="Spatial"
|
147
|
models$type[grepl("cer|cot|cer20",models$model)]="MOD06"
|
148
|
models$type[grepl("cld",models$model)&!grepl("cer|cot|cer20",models$model)]="MOD35"
|
149
|
|
150
|
## build the list of models/months to process
|
151
|
mm=expand.grid(model=models$model,months=months,stringsAsFactors=F)
|
152
|
mm$mid=match(mm$model,models$model)
|
153
|
|
154
|
### Sample validation stations
|
155
|
prop=0.1
|
156
|
|
157
|
### run it
|
158
|
fitmod<-function(i,prop=0.1,predict=F, nv,...){
|
159
|
model=as.character(mm$model[i])
|
160
|
month=mm$month[i]
|
161
|
mid=mm$mid[i]
|
162
|
### extract data
|
163
|
dr=getd(month=month)
|
164
|
## subset data for this month
|
165
|
dt=dm2[dm2$month==month,]
|
166
|
## add log+1 ppt
|
167
|
dt$lppt=log(dt$ppt+1)
|
168
|
## extract for points from dm2
|
169
|
dr2=subset(dr,subset=grep("^x$|^y$",names(dr),invert=T))
|
170
|
ds=cbind.data.frame(dt@data,extract(dr2,dt))
|
171
|
### flag stations to hold out for validation
|
172
|
ts2=do.call(rbind.data.frame,lapply(1:nv,function(r) {
|
173
|
ds$validation=F
|
174
|
ds$validation[sample(1:nrow(ds),size=as.integer(prop*nrow(ds)))]=T
|
175
|
### fit model
|
176
|
mod<<-try(gam(as.formula(model),data=ds[!ds$validation,]),silent=T)
|
177
|
## if fitting fails, return a NULL
|
178
|
if(class(mod)[[1]]=="try-error") return(NULL)
|
179
|
### Model Validation
|
180
|
vd=ds[ds$validation,] # extract validation points
|
181
|
y=as.data.frame(predict(mod,ds,se.fit=T)) # make predictions
|
182
|
if(attr(terms(mod),"variables")[[2]]=="lppt"){ #if modeling log(ppt+1), transform back to real units
|
183
|
y$fit=exp(y$fit)-1
|
184
|
y$se.fit=exp(y$se.fit)-1
|
185
|
}
|
186
|
ds[,c("pred","pred.se")]=cbind(y$fit,y$se.fit)
|
187
|
ds$model=model
|
188
|
ds$modelid=mid
|
189
|
vlm=lm(pred~ppt,data=ds[ds$validation,]) # lm() to summarize predictive fit
|
190
|
if(r==nv) ds<<-ds #if on last iteration, save predictions
|
191
|
### summarize validation
|
192
|
s1=summary(mod)
|
193
|
s2=data.frame(
|
194
|
model.r2=s1$r.sq,
|
195
|
model.dev=s1$dev.expl,
|
196
|
model.aic=AIC(mod),
|
197
|
valid.rmse=sqrt(mean((vd$ppt-y$fit[ds$validation])^2,na.rm=T)),
|
198
|
valid.nrmse=sqrt(mean((vd$ppt-y$fit[ds$validation])^2,na.rm=T))/mean(vd$ppt+.1,na.rm=T),
|
199
|
valid.mer=mean(vd$ppt-y$fit[ds$validation],na.rm=T),
|
200
|
valid.mae=mean(abs(vd$ppt-y$fit[ds$validation]),na.rm=T),
|
201
|
valid.r2=summary(vlm)$r.squared)
|
202
|
return(s2)}))
|
203
|
## summarize the gcv datasets
|
204
|
if(nrow(ts2)==0) return(NULL)
|
205
|
s2a=data.frame(mean=apply(ts2,2,mean,na.rm=T))
|
206
|
s2b=t(apply(ts2,2,quantile,c(0.025,0.975),na.rm=T))
|
207
|
colnames(s2b)=paste("Q",c(2.5,97.5),sep="")
|
208
|
s2=cbind.data.frame(tile=tile, model=model, modelid=mid, month=month,
|
209
|
metric=rownames(s2a),s2a,s2b)
|
210
|
rownames(s2)=1:nrow(s2)
|
211
|
### add some attributes to the object to help ID it later
|
212
|
attr(mod,"month")=ds$month[1]
|
213
|
attr(mod,"model")=model
|
214
|
attr(mod,"modelid")=mid
|
215
|
### generate predictions?
|
216
|
if(predict){
|
217
|
## write prediction
|
218
|
ncfile=paste(outdir,tile,"_pred_",ds$month[1],"_",mid[1],".nc",sep="")
|
219
|
predict(dr,mod,filename=ncfile,format="CDF",overwrite=T)
|
220
|
## add some attributes to nc file
|
221
|
ncopath=""
|
222
|
## add other attributes
|
223
|
system(paste(ncopath,"ncrename -v variable,ppt ",ncfile,sep=""))
|
224
|
system(paste(ncopath,"ncatted -a units,ppt,o,c,\"mm\" ",
|
225
|
"-a model,ppt,o,c,\"",model,"\" ",
|
226
|
"-a long_name,ppt,o,c,\"Mean Monthly Precipitation\" ",
|
227
|
ncfile,sep=""))
|
228
|
## create NC file to hold summary data and then append it to output file
|
229
|
## add table for validation metrics
|
230
|
s2_dim1=ncdim_def("metrics1",units="",vals=1:ncol(s2),unlim=FALSE,create_dimvar=T,longname="Model Fitting and Validation Metrics")
|
231
|
s2_dim2=ncdim_def("metrics2",units="",vals=1:nrow(s2),unlim=FALSE,create_dimvar=T,longname="Model Fitting and Validation Metrics")
|
232
|
s2_var=ncvar_def("modelmetrics",units="varies",list(s2_dim1,s2_dim2),missval=-999,longname="Model Fitting and Validation Metrics",
|
233
|
prec="float")
|
234
|
## simplify data table for incorporation into netcdf file
|
235
|
ds2=ds[,-1];ds2$validation=ifelse(ds2$validation,1,0)
|
236
|
ds2=as.matrix(ds2)
|
237
|
data_dim1=ncdim_def("data",units="",vals=1:ncol(ds2),unlim=FALSE,create_dimvar=T,longname="Data used in model fitting")
|
238
|
data_dim2=ncdim_def("stations",units="",vals=1:nrow(ds2),unlim=FALSE,create_dimvar=T,longname="Stations used in model fitting")
|
239
|
data_var=ncvar_def("modeldata",units="varies",list(data_dim1,data_dim2),missval=-999,longname="Data used in model fitting",
|
240
|
prec="float")
|
241
|
tncf=paste(tempdir(),"/",tile,month,mid,"metrics.nc",sep="") #temp file to hold metric data
|
242
|
nc_create(filename=tncf,vars=list(data_var,s2_var))
|
243
|
tnc=nc_open(tncf,write=T)
|
244
|
# ncvar_put(tnc,s2_var,vals=s2) ## put in validatation data
|
245
|
ncvar_put(tnc,data_var,vals=ds2) ## put in validatation data
|
246
|
nc_close(tnc) ## close the file
|
247
|
system(paste(ncopath,"ncks -A ",tncf," ",ncfile,sep=""))
|
248
|
system(paste(ncopath,"ncatted -a value,modelmetrics,o,c,\"",paste(1:ncol(s2),":",colnames(s2),collapse="
|
249
|
",sep=""),"\" ",ncfile,sep=""))
|
250
|
## also add metrics as variable attributes for easier viewing
|
251
|
for(c in 1:ncol(s2)) system(paste(ncopath,"ncatted -a ",colnames(s2)[c],",ppt,o,",
|
252
|
ifelse(class(s2[1,c])!="numeric","c,\"","d,"),s2[1,c],
|
253
|
ifelse(class(s2[1,c])!="numeric","\" "," "),ncfile,sep=""))
|
254
|
## Add time dimension to ease merging files later
|
255
|
system(paste(ncopath,"ncecat -O -u time ",ncfile," ",ncfile,sep=""))
|
256
|
cat(paste("
|
257
|
netcdf time {
|
258
|
dimensions:
|
259
|
time = 1 ;
|
260
|
variables:
|
261
|
int time(time) ;
|
262
|
time:units = \"months since 2000-01-01 00:00:00\" ;
|
263
|
time:calendar = \"gregorian\";
|
264
|
time:long_name = \"time of observation\";
|
265
|
data:
|
266
|
time=",as.integer(ds$month[1]-1),";
|
267
|
}"),file=paste(tempdir(),"/time.cdl",sep=""))
|
268
|
system(paste("ncgen -o ",tempdir(),"/time.nc ",tempdir(),"/time.cdl",sep=""))
|
269
|
system(paste(ncopath,"ncks -A ",tempdir(),"/time.nc ",ncfile,sep=""))
|
270
|
## global attributes
|
271
|
system(paste(ncopath,"ncatted -a title,global,o,c,\"Interpolated Monthly Precipitation\" ",
|
272
|
"-a institution,global,o,c,\"Yale University\" ",
|
273
|
"-a source,global,o,c,\"Interpolated from station data\" ",
|
274
|
"-a contact,global,o,c,\"adam.wilson@yale.edu\" ",
|
275
|
ncfile,sep=""))
|
276
|
}
|
277
|
print(paste("Finished model ",model," for month",ds$month[1]))
|
278
|
return(list(model=mod,summary=s2,data=ds))
|
279
|
}
|
280
|
|
281
|
mods=lapply(1:nrow(mm),fitmod,predict=F,nv=100)
|
282
|
|
283
|
## extract summary tables and write them
|
284
|
sums=do.call(rbind.data.frame,lapply(mods,function(m) return(m$summary)))
|
285
|
write.csv(sums,paste(outdir,tile,"_validation.csv",sep=""))
|
286
|
pred=lapply(mods,function(m) return(m$data))
|
287
|
### messy error handling to remove tables with incorrect number of columns...
|
288
|
nullpred=which(!do.call(c,lapply(pred,function(x) ncol(x)==20&!is.null(x))))
|
289
|
pred=pred[nullpred]
|
290
|
pred=do.call(rbind.data.frame,pred)
|
291
|
pred$tile=tile
|
292
|
write.csv(pred,paste(outdir,tile,"_predictions.csv",sep=""))
|
293
|
|
294
|
### read them back
|
295
|
|
296
|
#sums=do.call(rbind.data.frame,lapply(tiles,function(tile) read.csv(paste("data/tiles/",tile,"/",tile,"_validation.csv",sep=""))))
|
297
|
sums=read.csv(paste(outdir,tile,"_validation.csv",sep=""))
|
298
|
pred=read.csv(paste(outdir,tile,"_predictions.csv",sep=""))
|
299
|
|
300
|
#sums=rbind.data.frame(sums,read.csv(paste("data/tiles/",tiles[1],"/",tiles[1],"_validation.csv",sep="")))
|
301
|
#pred=read.csv(paste(outdir,tiles[1],"_predictions.csv",sep=""))
|
302
|
|
303
|
|
304
|
## reshape summary validation data
|
305
|
#sumsl=melt(sums,id.vars=c("tile","model","modelid","month"))
|
306
|
sum2=cast(sums[,-1],model+modelid~metric,value="mean",fun=function(x) c(median=round(median(x,na.rm=T),2),sd=round(sd(x,na.rm=T),2)));sum2
|
307
|
|
308
|
#by(sum2,tile,function(x) rank(apply(cbind(rank(x$valid.rmse_median),rank(-x$valid.r2_median)),1,mean,na.rm=T)))
|
309
|
#sum2$rank=rank(apply(cbind(rank(sum2$valid.rmse_median),rank(-sum2$valid.r2_median)),1,mean,na.rm=T))
|
310
|
|
311
|
|
312
|
## add sorting variables across months
|
313
|
sum2$rank=rank(apply(cbind(rank(sum2$valid.rmse_median),rank(-sum2$valid.r2_median)),1,mean,na.rm=T))
|
314
|
sum2[order(sum2$rank),c("model","modelid","valid.r2_median","valid.rmse_median","valid.nrmse_median","rank"),] #"valid.mer_mean",
|
315
|
sums$fmodel=factor(sums$model,ordered=T,levels=rev(sum2$model[order(sum2$rank)]))
|
316
|
|
317
|
## add sorting variables for each month
|
318
|
sumsl2=cast(model~month~metric,value="mean",data=sums)
|
319
|
sumsl2[,,"valid.r2"]
|
320
|
|
321
|
#combineLimits(useOuterStrips(xyplot(value~month|model+variable,data=sumsl,scale=list(relation="free"))))
|
322
|
sums2=sums[sums$metric%in%c("valid.r2","valid.rmse"),]
|
323
|
|
324
|
|
325
|
|
326
|
###############################################################
|
327
|
###############################################################
|
328
|
### Draw some plots
|
329
|
library(maptools)
|
330
|
coast=getRgshhsMap(fn="/home/adamw/acrobates/Global/GSHHS_shp/gshhs/gshhs_l.b",
|
331
|
xlim=c(360+roi_ll@xmin,360+roi_ll@xmax),ylim=c(roi_ll@ymin,roi_ll@ymax),level=1)
|
332
|
coast=as(coast,"SpatialLines")
|
333
|
coast=spTransform(coast,psin)
|
334
|
|
335
|
roi=spTransform(roi,psin)
|
336
|
roil=as(roi,"SpatialLines")
|
337
|
|
338
|
|
339
|
### quantile function
|
340
|
gq=function(x,n=10,cut=F) {
|
341
|
if(!cut) return(unique(quantile(x,seq(0,1,len=n+1),na.rm=T)))
|
342
|
if(cut) return(cut(x,unique(quantile(x,seq(0,1,len=n+1),na.rm=T))))
|
343
|
}
|
344
|
bgyr=colorRampPalette(c("brown","yellow","green","darkgreen","blue"))
|
345
|
X11.options(type="cairo")
|
346
|
|
347
|
|
348
|
png(paste("output/ModelComparisonRasters_",tile,"_%02d.png",sep=""),width=3000,height=2000,res=300,bg="transparent",type="cairo-png")
|
349
|
|
350
|
## COT climatologies
|
351
|
at=unique(seq(0,30,len=100))
|
352
|
p=levelplot(cot,xlab.top=title,at=at,col.regions=bgyr(length(at)))+layer(sp.lines(coast, lwd=1.2, col='black'))
|
353
|
print(p)
|
354
|
|
355
|
## all monthly predictions
|
356
|
p1=brick(stack(list.files(outdir,pattern="pred_.*_10.nc$",full=T),varname="ppt"))
|
357
|
projection(p1)=psin
|
358
|
names(p1)=month.name
|
359
|
title=""#"Interpolated Precipitation"
|
360
|
at=unique(quantile(as.matrix(p1),seq(0,1,len=100),na.rm=T))
|
361
|
at=unique(seq(min(p1@data@min),max(p1@data@max),len=100))
|
362
|
p=levelplot(p1,xlab.top=title,at=at,col.regions=bgyr(length(at)))#+layer(sp.lines(roil, lwd=1.2, col='black'))
|
363
|
print(p)
|
364
|
|
365
|
## compare two models
|
366
|
p2=brick(stack(list.files(outdir,pattern="pred_3_3.nc|pred_3_12.nc",full=T)[2:1],varname="ppt"))
|
367
|
projection(p2)=psin
|
368
|
names(p2)=c("X+Y+Elevation","X+Y+Elevation+MOD06")
|
369
|
at=unique(seq(min(p2@data@min),max(p2@data@max),len=100))
|
370
|
p=levelplot(p2,xlab.top=title,at=at,col.regions=bgyr(length(at)))+layer(sp.lines(coast, lwd=1.2, col='black'))
|
371
|
print(p)
|
372
|
dev.off()
|
373
|
|
374
|
png(paste("output/dem_",tile,".png",sep=""),width=3000,height=3000,res=300,bg="transparent")
|
375
|
at=unique(seq(min(dem@data@min),max(dem@data@max),len=100))
|
376
|
st=unique(coordinates(dm2))
|
377
|
levelplot(dem,col.regions=terrain.colors(100),at=at,margin=F)+
|
378
|
layer(panel.xyplot(st[,1],st[,2],cex=.5,pch=3,col="black"))
|
379
|
dev.off()
|
380
|
|
381
|
pdf(paste("output/ModelComparison_",tile,".pdf",sep=""),width=11,height=5,useDingbats=F)
|
382
|
|
383
|
bwplot(fmodel~mean|metric,data=sums2,scale=list(x=list(relation="free")),subscripts=T,panel=function(x,y,subscripts){
|
384
|
panel.abline(v=mean(x))
|
385
|
types=models$type[match(sort(unique(y)),models$model)]
|
386
|
panel.bwplot(x,y,fill=ifelse(types=="Spatial","grey",ifelse(types=="MOD35","dodgerblue","orangered")))
|
387
|
# panel.text(x,jitter(as.numeric(y),factor=.5),labels=as.character(sums2$month[subscripts]),col="grey40",cex=.3)
|
388
|
panel.xyplot(x,y,pch=16,col="grey48",cex=.5)
|
389
|
},
|
390
|
#strip=strip.custom(factor.levels=c("Validation R^2","Validation RMSE")),
|
391
|
main="Comparison of Validation Metrics",sub="Vertical line indicates overall mean",
|
392
|
key=list(space="right",rect=list(col=c("orangered","dodgerblue","grey")),text=list(c("MOD06","MOD35","Spatial"))))
|
393
|
|
394
|
dev.off()
|
395
|
|
396
|
|
397
|
### illustration of GAM for IBS poster
|
398
|
#mm
|
399
|
i=56
|
400
|
mm[i,]
|
401
|
tm=fitmod(i,predict=F,nv=1)
|
402
|
|
403
|
## partial residual plots
|
404
|
var=4
|
405
|
fv <- predict(tm$mod,type="terms") ## get term estimates
|
406
|
v=sub("[)]","",sub("s[(]","",colnames(fv)))[var]
|
407
|
## compute partial residuals for first smooth...
|
408
|
prsd1 <- residuals(tm$mod,type="working") + fv[,var]
|
409
|
|
410
|
pdf(paste("output/GAMexample.pdf"),width=11,height=6,useDingbats=F)
|
411
|
plot(tm$mod,select=var,las=1,rug=F,cex.lab=2,cex.axis=2) ## plot first smooth
|
412
|
points(tm$mod$model[,v],prsd1,pch=16,cex=.5,col="red")
|
413
|
dev.off()
|
414
|
|
415
|
|
416
|
xyplot(mean~month|metric,groups=model,type="l",data=sums,scale=list(y=list(relation="free")),auto.key=list(space="right"))#+layer(panel.xyplot(x,y,pch=16,col="grey"),under=T)+layer(panel.abline(v=mean(x)))
|
417
|
|
418
|
sums$type=as.factor(models$type[match(sums$model,models$model)])
|
419
|
|
420
|
xyplot(mean~month|metric,groups=model,type="l",data=sums2,panel=function(x,y,subscripts,groups){
|
421
|
tsums=sums[subscripts,]
|
422
|
panel.xyplot(x,y,groups=groups,type="l",subscripts=subscripts,col=ifelse(tsums$type=="Spatial","grey20",ifelse(tsums$type=="MOD35","blue","red")))
|
423
|
panel.segments(tsums$month,tsums$Q2.5,tsums$month,tsums$Q97.5,groups=groups,subscripts=subscripts)
|
424
|
},subscripts=T,scale=list(y=list(relation="free")),
|
425
|
key=list(space="right",text=list(c("Spatial","MOD35","MOD06")),lines=list(col=c("grey20","blue","red"))))
|
426
|
|
427
|
#+layer(panel.xyplot(x,y,pch=16,col="grey"),under=T)+layer(panel.abline(v=mean(x)))
|
428
|
|
429
|
|
430
|
xyplot(pred~ppt|as.factor(month),groups=validation,data=pred,panel=function(x,y,subscripts,groups){
|
431
|
panel.xyplot(x,y,groups=groups,subscripts=subscripts)
|
432
|
panel.abline(0,1,col="red",lwd=2)
|
433
|
},scales=list(relation="free"),
|
434
|
main="Predicted vs. Observed mean monthly precipitation at validation stations",
|
435
|
sub="Line is y=x",auto.key=T)
|
436
|
|
437
|
## plot prediction rasters
|
438
|
|
439
|
p=raster(ncfile,varname="ppt")
|
440
|
|
441
|
plot3D(dem, maxpixels=100000,zfac=.5, drape=p, rev=FALSE, adjust=TRUE)
|
442
|
|
443
|
|
444
|
|
445
|
###################################################################
|
446
|
###################################################################
|
447
|
|
448
|
### add some additional variables
|
449
|
mod06s$month=factor(mod06s$month,labels=format(as.Date(paste("2000",1:12,"15",sep="-")),"%b"))
|
450
|
mod06s$lppt=log(mod06s$ppt)
|
451
|
mod06s$glon=cut(mod06s$lon,gq(mod06s$lon,n=5),include.lowest=T,ordered=T)#gq(mod06s$lon,n=3))
|
452
|
mod06s$glon2=cut(mod06s$lon,breaks=c(-125,-122,-115),labels=c("Coastal","Inland"),include.lowest=T,ordered=T)#gq(mod06s$lon,n=3))
|
453
|
mod06s$gelev=cut(mod06s$elev,breaks=gq(mod06s$elev,n=3),labels=c("Low","Mid","High"),include.lowest=T,ordered=T)
|
454
|
mod06s$gbin=factor(paste(mod06s$gelev,mod06s$glon2,sep="_"),levels=c("Low_Coastal","Mid_Coastal","High_Coastal","Low_Inland","Mid_Inland","High_Inland"),ordered=T)
|
455
|
mod06s$LWP_mean=(2/3)*mod06s$CER_mean*mod06s$COT_mean
|
456
|
|
457
|
## melt it
|
458
|
mod06sl=melt(mod06s[,!grepl("lppt",colnames(mod06s))],id.vars=c("id","lon","lat","elev","month","ppt","glon","glon2","gelev","gbin"))
|
459
|
levels(mod06sl$variable)=c("Effective Radius (um)","Very Cloudy Days (%)","Cloudy Days (%)","Optical Thickness (%)","Liquid Water Path")
|
460
|
|
461
|
pdf("output/MOD06_summary.pdf",width=11,height=8.5)
|
462
|
|
463
|
# % cloudy maps
|
464
|
title="Cloudiness (% cloudy days) "
|
465
|
at=unique(quantile(as.matrix(cld),seq(0,1,len=100),na.rm=T))
|
466
|
p=levelplot(cld,xlab.top=title,at=at,col.regions=bgyr(length(at)))#+layer(sp.lines(roil, lwd=1.2, col='black'))
|
467
|
print(p)
|
468
|
#bwplot(cer,main=title,ylab="Cloud Effective Radius (microns)")
|
469
|
|
470
|
# CER maps
|
471
|
title="Cloud Effective Radius (microns)"
|
472
|
at=quantile(as.matrix(cer),seq(0,1,len=100),na.rm=T)
|
473
|
p=levelplot(cer,xlab.top=title,at=at,col.regions=bgyr(length(at)))#+layer(sp.lines(roil, lwd=1.2, col='black'))
|
474
|
print(p)
|
475
|
#bwplot(cer,main=title,ylab="Cloud Effective Radius (microns)")
|
476
|
|
477
|
# CER20 maps
|
478
|
title="% Days with Cloud Effective Radius > 20 microns"
|
479
|
at=unique(quantile(as.matrix(cer20),seq(0,1,len=100),na.rm=T))
|
480
|
p=levelplot(cer20,xlab.top=title,at=at,col.regions=bgyr(length(at)))#+layer(sp.lines(roil, lwd=1.2, col='black'))
|
481
|
print(p)
|
482
|
#bwplot(cer,main=title,ylab="Cloud Effective Radius (microns)")
|
483
|
|
484
|
# COT maps
|
485
|
title="Cloud Optical Thickness (%)"
|
486
|
at=quantile(as.matrix(cot),seq(0,1,len=100),na.rm=T)
|
487
|
p=levelplot(cot,xlab.top=title,at=at,col.regions=bgyr(length(at)))#+layer(sp.lines(roil, lwd=0.8, col='black'))
|
488
|
print(p)
|
489
|
#bwplot(cot,xlab.top=title,ylab="Cloud Optical Thickness (%)")
|
490
|
dev.off()
|
491
|
|
492
|
|
493
|
|
494
|
|
495
|
|
496
|
|
497
|
### Calculate the slope of each line
|
498
|
mod06s.sl=dapply(mod06s,list(id=mod06s$id),function(x){
|
499
|
lm1=lm(log(x$ppt)~x$CER_mean,)
|
500
|
data.frame(lat=x$lat[1],lon=x$lon[1],elev=x$elev[1],intcpt=coefficients(lm1)[1],cer=coefficients(lm1)[2],r2=summary(lm1)$r.squared)
|
501
|
})
|
502
|
mod06s.sl$cex=gq(mod06s.sl$r2,n=5,cut=T)
|
503
|
mod06s.sl$cer.s=gq(mod06s.sl$cer,n=5,cut=T)
|
504
|
|
505
|
### and plot it on a map
|
506
|
xyplot(lat~lon,group=cer.s,data=mod06s.sl,par.settings = list(superpose.symbol = list(pch =16, col=bgyr(5),cex=1)),auto.key=list(space="right",title="Slope Coefficient"),asp=1,
|
507
|
main="Slopes of linear regressions {log(ppt)~CloudEffectiveRadius}")+
|
508
|
layer(sp.lines(roi_geo, lwd=1.2, col='black'))
|
509
|
|
510
|
### look for relationships with longitude
|
511
|
xyplot(cer~lon,group=cut(mod06s.sl$elev,gq(mod06s.sl$elev,n=5)),data=mod06s.sl,
|
512
|
par.settings = list(superpose.symbol = list(col=bgyr(5),pch=16,cex=1)),auto.key=list(space="right",title="Station Elevation"),
|
513
|
ylab="Slope of lm(ppt~EffectiveRadius)",xlab="Longitude",main="Precipitation~Effective Radius relationship by latitude")
|
514
|
|
515
|
|
516
|
############################################################
|
517
|
### simple regression to get spatial residuals
|
518
|
m="01"
|
519
|
mod06s2=mod06s#[mod06s$month==m,]
|
520
|
|
521
|
lm1=lm(log(ppt)~CER_mean*month*lon,data=mod06s2); summary(lm1)
|
522
|
mod06s2$pred=exp(predict(lm1,mod06s2))
|
523
|
mod06s2$resid=mod06s2$pred-mod06s2$ppt
|
524
|
mod06s2$residg=gq(mod06s2$resid,n=5,cut=T)
|
525
|
mod06s2$presid=mod06s2$resid/mod06s2$ppt
|
526
|
|
527
|
for(l in c(F,T)){
|
528
|
## all months
|
529
|
xyplot(pred~ppt,groups=gelev,data=mod06s2,
|
530
|
par.settings = list(superpose.symbol = list(col=bgyr(3),pch=16,cex=.75)),auto.key=list(space="right",title="Station Elevation"),
|
531
|
scales=list(log=l),
|
532
|
ylab="Predicted Mean Monthly Precipitation (mm)",xlab="Observed Mean Monthly Precipitation (mm)",main="Predicted vs. Observed for Simple Model",
|
533
|
sub="Red line is y=x")+
|
534
|
layer(panel.abline(0,1,col="red"))
|
535
|
|
536
|
## month by month
|
537
|
print(xyplot(pred~ppt|month,groups=gelev,data=mod06s2,
|
538
|
par.settings = list(superpose.symbol = list(col=bgyr(3),pch=16,cex=.75)),auto.key=list(space="right",title="Station Elevation"),
|
539
|
scales=list(log=l),
|
540
|
ylab="Predicted Mean Monthly Precipitation (mm)",xlab="Observed Mean Monthly Precipitation (mm)",main="Predicted vs. Observed for Simple Model",
|
541
|
sub="Red line is y=x")+
|
542
|
layer(panel.abline(0,1,col="red"))
|
543
|
)}
|
544
|
|
545
|
## residuals by month
|
546
|
xyplot(lat~lon|month,group=residg,data=mod06s2,
|
547
|
par.settings = list(superpose.symbol = list(pch =16, col=bgyr(5),cex=.5)),
|
548
|
auto.key=list(space="right",title="Residuals"),
|
549
|
main="Spatial plot of monthly residuals")+
|
550
|
layer(sp.lines(roi_geo, lwd=1.2, col='black'))
|
551
|
|
552
|
|
553
|
dev.off()
|
554
|
|
555
|
|
556
|
|
557
|
|
558
|
|
559
|
|
560
|
|
561
|
|
562
|
|
563
|
|
564
|
load("data/modis/pointsummary.Rdata")
|
565
|
|
566
|
|
567
|
dsl=melt(ds,id.vars=c("id","date","ppt","lon","lat"),measure.vars= c("Cloud_Water_Path","Cloud_Effective_Radius","Cloud_Optical_Thickness"))
|
568
|
|
569
|
dsl=dsl[!is.nan(dsl$value),]
|
570
|
|
571
|
|
572
|
|
573
|
|
574
|
####
|
575
|
## mean annual precip
|
576
|
dp=d[d$variable=="ppt",]
|
577
|
dp$year=format(dp$date,"%Y")
|
578
|
dm=tapply(dp$value,list(id=dp$id,year=dp$year),sum,na.rm=T)
|
579
|
dms=apply(dm,1,mean,na.rm=T)
|
580
|
dms=data.frame(id=names(dms),ppt=dms/10)
|
581
|
|
582
|
dslm=tapply(dsl$value,list(id=dsl$id,variable=dsl$variable),mean,na.rm=T)
|
583
|
dslm=data.frame(id=rownames(dslm),dslm)
|
584
|
|
585
|
dms=merge(dms,dslm)
|
586
|
dmsl=melt(dms,id.vars=c("id","ppt"))
|
587
|
|
588
|
summary(lm(ppt~Cloud_Effective_Radius,data=dms))
|
589
|
summary(lm(ppt~Cloud_Water_Path,data=dms))
|
590
|
summary(lm(ppt~Cloud_Optical_Thickness,data=dms))
|
591
|
summary(lm(ppt~Cloud_Effective_Radius+Cloud_Water_Path+Cloud_Optical_Thickness,data=dms))
|
592
|
|
593
|
|
594
|
#### draw some plots
|
595
|
#pdf("output/MOD06_summary.pdf",width=11,height=8.5)
|
596
|
png("output/MOD06_summary_%d.png",width=1024,height=780)
|
597
|
|
598
|
## daily data
|
599
|
xyplot(value~ppt/10|variable,data=dsl,
|
600
|
scales=list(relation="free"),type=c("p","r"),
|
601
|
pch=16,cex=.5,layout=c(3,1))
|
602
|
|
603
|
|
604
|
densityplot(~value|variable,groups=cut(dsl$ppt,c(0,50,100,500)),data=dsl,auto.key=T,
|
605
|
scales=list(relation="free"),plot.points=F)
|
606
|
|
607
|
## annual means
|
608
|
|
609
|
xyplot(value~ppt|variable,data=dmsl,
|
610
|
scales=list(relation="free"),type=c("p","r"),pch=16,cex=0.5,layout=c(3,1),
|
611
|
xlab="Mean Annual Precipitation (mm)",ylab="Mean value")
|
612
|
|
613
|
densityplot(~value|variable,groups=cut(dsl$ppt,c(0,50,100,500)),data=dmsl,auto.key=T,
|
614
|
scales=list(relation="free"),plot.points=F)
|
615
|
|
616
|
|
617
|
## plot some swaths
|
618
|
|
619
|
nc1=raster(fs$path[3],varname="Cloud_Effective_Radius")
|
620
|
nc2=raster(fs$path[4],varname="Cloud_Effective_Radius")
|
621
|
nc3=raster(fs$path[5],varname="Cloud_Effective_Radius")
|
622
|
|
623
|
nc1[nc1<=0]=NA
|
624
|
nc2[nc2<=0]=NA
|
625
|
nc3[nc3<=0]=NA
|
626
|
|
627
|
plot(roi)
|
628
|
plot(nc3)
|
629
|
|
630
|
plot(nc1,add=T)
|
631
|
plot(nc2,add=T)
|
632
|
|
633
|
|
634
|
dev.off()
|
635
|
|
636
|
|
637
|
|
638
|
|
639
|
|
640
|
|
641
|
|
642
|
|
643
|
####################
|
644
|
#################### OLD JUNK BELOW!
|
645
|
####################
|
646
|
|
647
|
|
648
|
#levelplot(cer)#+points(x,y,data=dm2@data)
|
649
|
|
650
|
### extract MOD06 data for each station
|
651
|
stcer=extract(cer,dm2,fun=mean);colnames(stcer)=paste("cer_mean_",as.numeric(format(as.Date(cer@z$Date),"%m")),sep="")
|
652
|
stcerp=extract(cerp,dm2,fun=mean);colnames(stcerp)=paste("cerp_mean_",as.numeric(format(as.Date(cerp@z$Date),"%m")),sep="")
|
653
|
stcer20=extract(cer20,dm2,fun=mean);colnames(stcer20)=paste("cer20_mean_",as.numeric(format(as.Date(cer20@z$Date),"%m")),sep="")
|
654
|
stcot=extract(cot,dm2);colnames(stcot)=paste("cot_mean_",as.numeric(format(as.Date(cot@z$Date),"%m")),sep="")
|
655
|
stcld=extract(cld,dm2);colnames(stcld)=paste("cld_mean_",as.numeric(format(as.Date(cld@z$Date),"%m")),sep="")
|
656
|
stdem=extract(dem,dm2)
|
657
|
### generate new design matrix
|
658
|
mod06=cbind.data.frame(station=dm$station,stcer,stcerp,stcot,stcld,stcer20)
|
659
|
mod06l=melt(mod06,id.vars=c("station"));colnames(mod06l)[grep("value",colnames(mod06l))]="mod06"
|
660
|
mod06l[,c("variable","moment","month")]=do.call(rbind,strsplit(as.character(mod06l$variable),"_"))
|
661
|
mod06l=unique(mod06l)
|
662
|
mod06l=cast(mod06l,station+moment+month~variable,value="mod06")
|
663
|
mod06l=merge(dm2@data,mod06l,by=c("station","month"))
|
664
|
mod06l=mod06l[!is.na(mod06l$cer),]
|
665
|
|
666
|
mod06l=mod06l[order(mod06l$month),]
|
667
|
mod06l$lppt=log(mod06l$ppt+1)
|
668
|
|
669
|
#xyplot(latitude~longitude|as.factor(month),groups=cut(mod06l$ppt,breaks=quantile(mod06l$ppt,seq(0,1,len=10))),data=mod06l,auto.key=T,col=grey(seq(0,1,len=10)),pch=16,cex=.5)
|
670
|
#plot(cer)
|