Revision 165c04fb
Added by Adam Wilson almost 12 years ago
climate/procedures/MOD06_monthlyinterp.R | ||
---|---|---|
1 | 1 |
################################################################################### |
2 |
### R code to aquire and process MOD06_L2 cloud data from the MODIS platform
|
|
2 |
### R code to interpolate monthly climatologies
|
|
3 | 3 |
|
4 | 4 |
|
5 | 5 |
## connect to server of choice |
... | ... | |
7 | 7 |
#R |
8 | 8 |
|
9 | 9 |
library(sp) |
10 |
library(spgrass6) |
|
10 |
#library(spgrass6)
|
|
11 | 11 |
library(rgdal) |
12 | 12 |
library(reshape) |
13 | 13 |
library(ncdf4) |
14 |
library(geosphere) |
|
15 |
library(rgeos) |
|
14 |
#library(geosphere)
|
|
15 |
#library(rgeos)
|
|
16 | 16 |
library(multicore) |
17 | 17 |
library(raster) |
18 |
library(rasterVis) |
|
18 | 19 |
library(lattice) |
19 | 20 |
library(latticeExtra) |
20 |
library(rgl) |
|
21 |
library(hdf5) |
|
22 |
library(rasterVis) |
|
23 |
library(heR.Misc) |
|
24 |
library(car) |
|
21 |
#library(rgl) |
|
22 |
#library(hdf5) |
|
23 |
#library(heR.Misc) |
|
24 |
#library(car) |
|
25 | 25 |
library(mgcv) |
26 | 26 |
library(sampling) |
27 | 27 |
|
... | ... | |
35 | 35 |
tb$tile=paste("h",sprintf("%02d",tb$ih),"v",sprintf("%02d",tb$iv),sep="") |
36 | 36 |
save(tb,file="modlandTiles.Rdata") |
37 | 37 |
|
38 |
tile="h11v08" #can move this to submit script if needed |
|
39 |
#tile="h09v04" #oregon |
|
40 |
|
|
38 |
#tile="h11v08" #can move this to submit script if needed |
|
39 |
tile="h21v09" #Kenya |
|
40 |
tile="h09v04" #oregon |
|
41 |
tiles=c("h11v08","h09v04") |
|
42 |
|
|
41 | 43 |
psin=CRS("+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs") |
42 | 44 |
|
43 | 45 |
tile_bb=tb[tb$tile==tile,] ## identify tile of interest |
44 | 46 |
roi_ll=extent(tile_bb$lon_min,tile_bb$lon_max,tile_bb$lat_min,tile_bb$lat_max) |
45 |
#roi=spTransform(roi,psin) |
|
46 |
#roil=as(roi,"SpatialLines") |
|
47 | 47 |
|
48 | 48 |
dmod06="data/modis/mod06/summary" |
49 |
outdir=paste("data/tiles/",tile,"/",sep="") |
|
49 | 50 |
|
50 | 51 |
|
51 | 52 |
########################## |
52 | 53 |
#### Organize the data |
53 | 54 |
months=seq(as.Date("2000-01-15"),as.Date("2000-12-15"),by="month") |
54 | 55 |
|
55 |
getmod06<-function(variable){ |
|
56 |
d=brick(list.files(dmod06,pattern=paste("MOD06_",tile,".nc",sep=""),full=T),varname=toupper(variable)) |
|
57 |
# d=dropLayer(d,1) |
|
56 |
getmod06<-function(variable,month=NA){ |
|
57 |
d=brick(list.files(dmod06,pattern=paste("MOD06_",tile,".nc$",sep=""),full=T),varname=toupper(variable)) |
|
58 |
if(!is.na(month)) { |
|
59 |
d=subset(d,subset=month) |
|
60 |
names(d)=variable |
|
61 |
} |
|
62 |
if(is.na(month)){ |
|
63 |
setZ(d,format(as.Date(d@z$Date),"%m"),name="time") |
|
64 |
layerNames(d) <- as.character(format(as.Date(d@z$Date),"%b")) #paste(variable,format(as.Date(d@z$Date),"%m")) |
|
65 |
} |
|
58 | 66 |
projection(d)=psin |
59 |
setZ(d,format(as.Date(d@z$Date),"%m"),name="time") |
|
60 |
# d@z=as.Date(d@z$Date) |
|
61 |
layerNames(d) <- as.character(format(as.Date(d@z$Date),"%b")) #paste(variable,format(as.Date(d@z$Date),"%m")) |
|
62 | 67 |
return(d) |
63 | 68 |
} |
64 | 69 |
|
65 |
# drop #1? |
|
66 |
|
|
67 | 70 |
cer=getmod06("cer") |
68 | 71 |
cld=getmod06("cld") |
69 | 72 |
cot=getmod06("cot") |
70 | 73 |
cer20=getmod06("cer20") |
71 | 74 |
|
75 |
|
|
76 |
|
|
72 | 77 |
pcol=colorRampPalette(c("brown","red","yellow","darkgreen")) |
73 |
#levelplot(cer,col.regions=pcol(20)) |
|
78 |
|
|
79 |
### create data dir for tiled data |
|
80 |
ddir=paste("data/tiles/",tile,sep="") |
|
81 |
if(!file.exists(ddir)) dir.create(ddir) |
|
74 | 82 |
|
75 | 83 |
## load WorldClim data for comparison (download then uncompress) |
76 | 84 |
#system("wget -P data/worldclim/ http://biogeo.ucdavis.edu/data/climate/worldclim/1_4/grid/cur/prec_30s_bil.zip",wait=F) |
77 | 85 |
#system("wget -P data/worldclim/ http://biogeo.ucdavis.edu/data/climate/worldclim/1_4/grid/cur/alt_30s_bil.zip",wait=F) |
78 | 86 |
|
79 | 87 |
### load WORLDCLIM elevation |
80 |
#dem=raster(list.files("data/worldclim/alt_30s_bil/",pattern="bil$",full=T)) |
|
81 |
#projection(dem)=CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0") |
|
82 |
#dem=crop(dem,roi_ll) |
|
83 |
#dem[dem>60000]=NA |
|
84 |
#dem=projectRaster(dem,cer) |
|
85 |
#writeRaster(dem,file=paste("data/tiles/",tile,"/dem_",tile,".tif",sep=""),format="GTiff") |
|
88 |
if(!file.exists(paste(ddir,"/dem_",tile,".tif",sep=""))){ |
|
89 |
dem=raster(list.files("data/worldclim/alt_30s_bil/",pattern="bil$",full=T)) |
|
90 |
projection(dem)=CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0") |
|
91 |
dem=crop(dem,roi_ll) |
|
92 |
dem[dem>30000]=NA |
|
93 |
dem=projectRaster(dem,cer) |
|
94 |
writeRaster(dem,file=paste("data/tiles/",tile,"/dem_",tile,".tif",sep=""),format="GTiff",overwrite=T) |
|
95 |
} |
|
86 | 96 |
dem=raster(paste("data/tiles/",tile,"/dem_",tile,".tif",sep="")) |
97 |
names(dem)="dem" |
|
87 | 98 |
|
88 | 99 |
### get station data, subset to stations in region, and transform to sinusoidal |
89 |
dm=readOGR(paste("data/tiles/",tile,sep=""),paste("station_monthly_",tile,"_PRCP",sep=""))
|
|
90 |
colnames(dm@data)[grep("elevation",colnames(dm@data))]="dem"
|
|
100 |
dm=readOGR(ddir,paste("station_monthly_",tile,"_PRCP",sep=""))
|
|
101 |
colnames(dm@data)[grep("elevation",colnames(dm@data))]="elev"
|
|
91 | 102 |
colnames(dm@data)[grep("value",colnames(dm@data))]="ppt" |
92 | 103 |
|
93 |
#xyplot(latitude~longitude|month,data=dm@data) |
|
104 |
#xyplot(latitude~longitude|month,data=dm@data) |
|
105 |
|
|
106 |
## transform to sinusoidal to overlay with raster data |
|
94 | 107 |
dm2=spTransform(dm,CRS(projection(cer))) |
95 | 108 |
dm2@data[,c("x","y")]=coordinates(dm2) |
96 | 109 |
|
97 |
### extract MOD06 data for each station |
|
98 |
stcer=extract(cer,dm2,fun=mean);colnames(stcer)=paste("cer_mean_",as.numeric(format(as.Date(cer@z$Date),"%m")),sep="") |
|
99 |
stcer20=extract(cer20,dm2,fun=mean);colnames(stcer20)=paste("cer20_mean_",as.numeric(format(as.Date(cer20@z$Date),"%m")),sep="") |
|
100 |
stcot=extract(cot,dm2);colnames(stcot)=paste("cot_mean_",as.numeric(format(as.Date(cot@z$Date),"%m")),sep="") |
|
101 |
stcld=extract(cld,dm2);colnames(stcld)=paste("cld_mean_",as.numeric(format(as.Date(cld@z$Date),"%m")),sep="") |
|
102 |
stdem=extract(dem,dm2) |
|
103 |
#mod06=cbind.data.frame(station=dm$station,stcer[,-1],stcot[,-1],stcld[,-1],stcer20[,-1]) |
|
104 |
mod06=cbind.data.frame(station=dm$station,stcer,stcot,stcld,stcer20) |
|
105 |
mod06l=melt(mod06,id.vars=c("station"));colnames(mod06l)[grep("value",colnames(mod06l))]="mod06" |
|
106 |
mod06l[,c("variable","moment","month")]=do.call(rbind,strsplit(as.character(mod06l$variable),"_")) |
|
107 |
mod06l=unique(mod06l) |
|
108 |
mod06l=cast(mod06l,station+moment+month~variable,value="mod06") |
|
109 |
mod06l=merge(dm2@data,mod06l,by=c("station","month")) |
|
110 |
mod06l=mod06l[!is.na(mod06l$cer),] |
|
111 |
|
|
112 |
mod06l=mod06l[order(mod06l$month),] |
|
110 |
## limit data to station-months with at least 300 daily observations (~10 years) |
|
111 |
dm2=dm2[dm2$count>300,] |
|
113 | 112 |
|
114 |
#xyplot(value~cer|month,data=mod06l,scales=list(relation="free"),pch=16,cex=.5) |
|
115 |
#xyplot(value~cer|station,data=mod06l[mod06l$count>400,],pch=16,cex=.5) |
|
116 |
#xyplot(cot~month,groups=station,data=mod06l,type="l") |
|
113 |
## create coordinate rasters |
|
117 | 114 |
|
118 | 115 |
### create monthly raster bricks for prediction |
119 |
m=2 |
|
120 |
|
|
121 |
pdata=stack( |
|
122 |
subset(cer,subset=m), |
|
123 |
subset(cot,subset=m), |
|
124 |
subset(cer20,subset=m), |
|
125 |
subset(cld,subset=m) |
|
126 |
) |
|
116 |
getd<-function(vars=c("cer","cot","cer20","cld"),month){ |
|
117 |
pdata=stack(lapply(vars,function(v){getmod06(v,month=month)})) |
|
118 |
x=rasterFromXYZ(coordinates(dem)[,c(1,2,1)]) |
|
119 |
y=rasterFromXYZ(coordinates(dem)[,c(1,2,2)]) |
|
120 |
pdata=stack(pdata,dem,x,y) |
|
121 |
return(pdata) |
|
122 |
} |
|
127 | 123 |
|
128 | 124 |
## Set up models to compare |
129 | 125 |
#################################### |
130 | 126 |
#### build table comparing various metrics |
131 |
models=c( |
|
132 |
"ppt~s(y)+ s(x)", |
|
133 |
"ppt~s(y,x)", |
|
134 |
"ppt~s(y,x) + s(dem)", |
|
135 |
"ppt~s(y,x)+s(dem)+cer+cld+cot+cer20", |
|
136 |
"ppt~s(y,x)+s(dem)+s(cer)", |
|
137 |
"ppt~s(y,x)+s(dem)+s(cer20)", |
|
138 |
"ppt~s(y,x)+s(dem)+s(cld)", |
|
139 |
"ppt~s(y,x)+s(dem)+s(cot)", |
|
140 |
"ppt~s(y,x)+s(dem)+s(cer20,cld)", |
|
141 |
"ppt~s(y,x)+s(dem)+s(cer20,cot)", |
|
142 |
"ppt~s(y,x)+s(dem)+s(cer,cld)", |
|
143 |
"ppt~s(dem)+s(cer,cot)") |
|
144 |
months=1:12 |
|
127 |
models=data.frame(model=c( |
|
128 |
# "ppt~s(y)+ s(x)", |
|
129 |
# "ppt~s(y,x)", |
|
130 |
"lppt~s(y,x)", |
|
131 |
"lppt~s(y,x)+s(dem)", |
|
132 |
"lppt~s(y,x,dem)", |
|
133 |
"lppt~s(y,x,dem)+s(cld)", |
|
134 |
"lppt~s(y,x)+s(dem)+s(cld)", |
|
135 |
"lppt~s(y,x)+s(cld)", |
|
136 |
"lppt~s(y,x)+s(cot)", |
|
137 |
"lppt~s(y,x)+s(dem)+s(cot)", |
|
138 |
"lppt~s(y,x,dem)+s(cot)", |
|
139 |
"lppt~s(y,x)+s(cer20)", |
|
140 |
"lppt~s(y,x)+s(dem)+cld+cot+cer20", |
|
141 |
"lppt~s(y,x,dem)+cld+cot+cer20", |
|
142 |
"lppt~s(y,x)+s(dem)+s(cld,cot,cer20)", |
|
143 |
"lppt~s(y,x)+s(dem)+s(cld)+s(cot)+s(cer20)")) |
|
144 |
months=1:12 |
|
145 |
## add category for each model |
|
146 |
models$type="Spatial" |
|
147 |
models$type[grepl("cer|cot|cer20",models$model)]="MOD06" |
|
148 |
models$type[grepl("cld",models$model)&!grepl("cer|cot|cer20",models$model)]="MOD35" |
|
145 | 149 |
|
146 | 150 |
## build the list of models/months to process |
147 |
mm=expand.grid(model=models,month=months) |
|
148 |
mm$model=as.character(mm$model) |
|
149 |
mm$mid=match(mm$model,models) |
|
151 |
mm=expand.grid(model=models$model,months=months,stringsAsFactors=F) |
|
152 |
mm$mid=match(mm$model,models$model) |
|
150 | 153 |
|
151 |
# mod1<- gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM), data=data_s) |
|
152 |
# mod2<- gam(tmax~ s(lat,lon,ELEV_SRTM), data=data_s) |
|
153 |
# mod3<- gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM) + s (Northness)+ s (Eastness) + s(DISTOC), data=data_s) |
|
154 |
# mod4<- gam(tmax~ s(lat) + s (lon) + s(ELEV_SRTM) + s(Northness) + s (Eastness) + s(DISTOC) + s(LST), data=data_s) |
|
155 |
# mod5<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST), data=data_s) |
|
156 |
# mod6<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,LC1), data=data_s) |
|
157 |
# mod7<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,LC3), data=data_s) |
|
158 |
# mod8<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST) + s(LC1), data=data_s) |
|
159 |
|
|
160 | 154 |
### Sample validation stations |
161 | 155 |
prop=0.1 |
162 |
mod06l$validation=F |
|
163 |
mod06l$validation[as.numeric(rownames(strata(mod06l,stratanames="month",size=as.integer(prop*table(mod06l$month)),method="srswor")))]=T |
|
164 | 156 |
|
165 | 157 |
### run it |
166 |
mods=lapply(1:nrow(mm),function(i){ |
|
167 |
mod=try(gam(as.formula(mm$model[i]),data=mod06l[mod06l$month==mm$month[i]&!mod06l$validation,])) |
|
168 |
## add some attributes to the object to help ID it later |
|
169 |
attr(mod,"month")=mm$month[i] |
|
170 |
attr(mod,"model")=mm$model[i] |
|
171 |
attr(mod,"modelid")=mm$mid[i] |
|
172 |
print(paste("Finished model ",mm$model[i]," for month",mm$month[i])) |
|
173 |
return(mod) |
|
174 |
}) |
|
175 |
|
|
176 |
## Get summary stats |
|
177 |
sums=do.call(rbind.data.frame,lapply(mods,function(m,plot=F){ |
|
178 |
if(class(m)=="try-error") { |
|
179 |
return(data.frame(model=attr(m,"model"), |
|
180 |
modelid=attr(m,"modelid"), |
|
181 |
month=attr(m,"month"), |
|
182 |
r2=NA, |
|
183 |
dev=NA, |
|
184 |
aic=NA, |
|
185 |
rmse=NA, |
|
186 |
vr2=NA)) |
|
158 |
fitmod<-function(i,prop=0.1,predict=F, nv,...){ |
|
159 |
model=as.character(mm$model[i]) |
|
160 |
month=mm$month[i] |
|
161 |
mid=mm$mid[i] |
|
162 |
### extract data |
|
163 |
dr=getd(month=month) |
|
164 |
## subset data for this month |
|
165 |
dt=dm2[dm2$month==month,] |
|
166 |
## add log+1 ppt |
|
167 |
dt$lppt=log(dt$ppt+1) |
|
168 |
## extract for points from dm2 |
|
169 |
dr2=subset(dr,subset=grep("^x$|^y$",names(dr),invert=T)) |
|
170 |
ds=cbind.data.frame(dt@data,extract(dr2,dt)) |
|
171 |
### flag stations to hold out for validation |
|
172 |
ts2=do.call(rbind.data.frame,lapply(1:nv,function(r) { |
|
173 |
ds$validation=F |
|
174 |
ds$validation[sample(1:nrow(ds),size=as.integer(prop*nrow(ds)))]=T |
|
175 |
### fit model |
|
176 |
mod<<-try(gam(as.formula(model),data=ds[!ds$validation,]),silent=T) |
|
177 |
## if fitting fails, return a NULL |
|
178 |
if(class(mod)[[1]]=="try-error") return(NULL) |
|
179 |
### Model Validation |
|
180 |
vd=ds[ds$validation,] # extract validation points |
|
181 |
y=as.data.frame(predict(mod,ds,se.fit=T)) # make predictions |
|
182 |
if(attr(terms(mod),"variables")[[2]]=="lppt"){ #if modeling log(ppt+1), transform back to real units |
|
183 |
y$fit=exp(y$fit)-1 |
|
184 |
y$se.fit=exp(y$se.fit)-1 |
|
185 |
} |
|
186 |
ds[,c("pred","pred.se")]=cbind(y$fit,y$se.fit) |
|
187 |
ds$model=model |
|
188 |
ds$modelid=mid |
|
189 |
vlm=lm(pred~ppt,data=ds[ds$validation,]) # lm() to summarize predictive fit |
|
190 |
if(r==nv) ds<<-ds #if on last iteration, save predictions |
|
191 |
### summarize validation |
|
192 |
s1=summary(mod) |
|
193 |
s2=data.frame( |
|
194 |
model.r2=s1$r.sq, |
|
195 |
model.dev=s1$dev.expl, |
|
196 |
model.aic=AIC(mod), |
|
197 |
valid.rmse=sqrt(mean((vd$ppt-y$fit[ds$validation])^2,na.rm=T)), |
|
198 |
valid.nrmse=sqrt(mean((vd$ppt-y$fit[ds$validation])^2,na.rm=T))/mean(vd$ppt+.1,na.rm=T), |
|
199 |
valid.mer=mean(vd$ppt-y$fit[ds$validation],na.rm=T), |
|
200 |
valid.mae=mean(abs(vd$ppt-y$fit[ds$validation]),na.rm=T), |
|
201 |
valid.r2=summary(vlm)$r.squared) |
|
202 |
return(s2)})) |
|
203 |
## summarize the gcv datasets |
|
204 |
if(nrow(ts2)==0) return(NULL) |
|
205 |
s2a=data.frame(mean=apply(ts2,2,mean,na.rm=T)) |
|
206 |
s2b=t(apply(ts2,2,quantile,c(0.025,0.975),na.rm=T)) |
|
207 |
colnames(s2b)=paste("Q",c(2.5,97.5),sep="") |
|
208 |
s2=cbind.data.frame(tile=tile, model=model, modelid=mid, month=month, |
|
209 |
metric=rownames(s2a),s2a,s2b) |
|
210 |
rownames(s2)=1:nrow(s2) |
|
211 |
### add some attributes to the object to help ID it later |
|
212 |
attr(mod,"month")=ds$month[1] |
|
213 |
attr(mod,"model")=model |
|
214 |
attr(mod,"modelid")=mid |
|
215 |
### generate predictions? |
|
216 |
if(predict){ |
|
217 |
## write prediction |
|
218 |
ncfile=paste(outdir,tile,"_pred_",ds$month[1],"_",mid[1],".nc",sep="") |
|
219 |
predict(dr,mod,filename=ncfile,format="CDF",overwrite=T) |
|
220 |
## add some attributes to nc file |
|
221 |
ncopath="" |
|
222 |
## add other attributes |
|
223 |
system(paste(ncopath,"ncrename -v variable,ppt ",ncfile,sep="")) |
|
224 |
system(paste(ncopath,"ncatted -a units,ppt,o,c,\"mm\" ", |
|
225 |
"-a model,ppt,o,c,\"",model,"\" ", |
|
226 |
"-a long_name,ppt,o,c,\"Mean Monthly Precipitation\" ", |
|
227 |
ncfile,sep="")) |
|
228 |
## create NC file to hold summary data and then append it to output file |
|
229 |
## add table for validation metrics |
|
230 |
s2_dim1=ncdim_def("metrics1",units="",vals=1:ncol(s2),unlim=FALSE,create_dimvar=T,longname="Model Fitting and Validation Metrics") |
|
231 |
s2_dim2=ncdim_def("metrics2",units="",vals=1:nrow(s2),unlim=FALSE,create_dimvar=T,longname="Model Fitting and Validation Metrics") |
|
232 |
s2_var=ncvar_def("modelmetrics",units="varies",list(s2_dim1,s2_dim2),missval=-999,longname="Model Fitting and Validation Metrics", |
|
233 |
prec="float") |
|
234 |
## simplify data table for incorporation into netcdf file |
|
235 |
ds2=ds[,-1];ds2$validation=ifelse(ds2$validation,1,0) |
|
236 |
ds2=as.matrix(ds2) |
|
237 |
data_dim1=ncdim_def("data",units="",vals=1:ncol(ds2),unlim=FALSE,create_dimvar=T,longname="Data used in model fitting") |
|
238 |
data_dim2=ncdim_def("stations",units="",vals=1:nrow(ds2),unlim=FALSE,create_dimvar=T,longname="Stations used in model fitting") |
|
239 |
data_var=ncvar_def("modeldata",units="varies",list(data_dim1,data_dim2),missval=-999,longname="Data used in model fitting", |
|
240 |
prec="float") |
|
241 |
tncf=paste(tempdir(),"/",tile,month,mid,"metrics.nc",sep="") #temp file to hold metric data |
|
242 |
nc_create(filename=tncf,vars=list(data_var,s2_var)) |
|
243 |
tnc=nc_open(tncf,write=T) |
|
244 |
# ncvar_put(tnc,s2_var,vals=s2) ## put in validatation data |
|
245 |
ncvar_put(tnc,data_var,vals=ds2) ## put in validatation data |
|
246 |
nc_close(tnc) ## close the file |
|
247 |
system(paste(ncopath,"ncks -A ",tncf," ",ncfile,sep="")) |
|
248 |
system(paste(ncopath,"ncatted -a value,modelmetrics,o,c,\"",paste(1:ncol(s2),":",colnames(s2),collapse=" |
|
249 |
",sep=""),"\" ",ncfile,sep="")) |
|
250 |
## also add metrics as variable attributes for easier viewing |
|
251 |
for(c in 1:ncol(s2)) system(paste(ncopath,"ncatted -a ",colnames(s2)[c],",ppt,o,", |
|
252 |
ifelse(class(s2[1,c])!="numeric","c,\"","d,"),s2[1,c], |
|
253 |
ifelse(class(s2[1,c])!="numeric","\" "," "),ncfile,sep="")) |
|
254 |
## Add time dimension to ease merging files later |
|
255 |
system(paste(ncopath,"ncecat -O -u time ",ncfile," ",ncfile,sep="")) |
|
256 |
cat(paste(" |
|
257 |
netcdf time { |
|
258 |
dimensions: |
|
259 |
time = 1 ; |
|
260 |
variables: |
|
261 |
int time(time) ; |
|
262 |
time:units = \"months since 2000-01-01 00:00:00\" ; |
|
263 |
time:calendar = \"gregorian\"; |
|
264 |
time:long_name = \"time of observation\"; |
|
265 |
data: |
|
266 |
time=",as.integer(ds$month[1]-1),"; |
|
267 |
}"),file=paste(tempdir(),"/time.cdl",sep="")) |
|
268 |
system(paste("ncgen -o ",tempdir(),"/time.nc ",tempdir(),"/time.cdl",sep="")) |
|
269 |
system(paste(ncopath,"ncks -A ",tempdir(),"/time.nc ",ncfile,sep="")) |
|
270 |
## global attributes |
|
271 |
system(paste(ncopath,"ncatted -a title,global,o,c,\"Interpolated Monthly Precipitation\" ", |
|
272 |
"-a institution,global,o,c,\"Yale University\" ", |
|
273 |
"-a source,global,o,c,\"Interpolated from station data\" ", |
|
274 |
"-a contact,global,o,c,\"adam.wilson@yale.edu\" ", |
|
275 |
ncfile,sep="")) |
|
187 | 276 |
} |
277 |
print(paste("Finished model ",model," for month",ds$month[1])) |
|
278 |
return(list(model=mod,summary=s2,data=ds)) |
|
279 |
} |
|
188 | 280 |
|
189 |
## make validation predictions |
|
190 |
vd=mod06l[mod06l$validation&mod06l$month==attr(m,"month"),] |
|
191 |
y=predict(m,mod06l[mod06l$validation&mod06l$month==attr(m,"month"),]) |
|
192 |
vlm=lm(y~vd$ppt) |
|
281 |
mods=lapply(1:nrow(mm),fitmod,predict=F,nv=100) |
|
193 | 282 |
|
194 |
## Draw some plots
|
|
195 |
if(plot){
|
|
196 |
## partial residual plots
|
|
197 |
var=2
|
|
198 |
fv <- predict(m,type="terms") ## get term estimates
|
|
199 |
## compute partial residuals for first smooth...
|
|
200 |
prsd1 <- residuals(m,type="working") + fv[,var]
|
|
201 |
plot(m,select=var) ## plot first smooth
|
|
202 |
points(mod06l$cot[mod06l$month==mm$month[i]&!mod06l$validation],prsd1,pch=16,col="red")
|
|
203 |
}
|
|
283 |
## extract summary tables and write them
|
|
284 |
sums=do.call(rbind.data.frame,lapply(mods,function(m) return(m$summary)))
|
|
285 |
write.csv(sums,paste(outdir,tile,"_validation.csv",sep=""))
|
|
286 |
pred=lapply(mods,function(m) return(m$data))
|
|
287 |
### messy error handling to remove tables with incorrect number of columns...
|
|
288 |
nullpred=which(!do.call(c,lapply(pred,function(x) ncol(x)==20&!is.null(x))))
|
|
289 |
pred=pred[nullpred]
|
|
290 |
pred=do.call(rbind.data.frame,pred)
|
|
291 |
pred$tile=tile
|
|
292 |
write.csv(pred,paste(outdir,tile,"_predictions.csv",sep=""))
|
|
204 | 293 |
|
205 |
## summarize validation |
|
206 |
s1=summary(m) |
|
207 |
data.frame( |
|
208 |
model=attr(m,"model"), |
|
209 |
modelid=attr(m,"modelid"), |
|
210 |
month=attr(m,"month"), |
|
211 |
r2=s1$r.sq, |
|
212 |
dev=s1$dev.expl, |
|
213 |
aic=AIC(m), |
|
214 |
rmse=sqrt(mean((vd$ppt-predict(vlm,vd))^2)), |
|
215 |
vr2=summary(vlm)$r.squared) |
|
216 |
})) |
|
294 |
### read them back |
|
217 | 295 |
|
218 |
### Summary Figures |
|
296 |
#sums=do.call(rbind.data.frame,lapply(tiles,function(tile) read.csv(paste("data/tiles/",tile,"/",tile,"_validation.csv",sep="")))) |
|
297 |
sums=read.csv(paste(outdir,tile,"_validation.csv",sep="")) |
|
298 |
pred=read.csv(paste(outdir,tile,"_predictions.csv",sep="")) |
|
219 | 299 |
|
220 |
sumsl=melt(sums,id.vars=c("model","modelid","month")) |
|
300 |
#sums=rbind.data.frame(sums,read.csv(paste("data/tiles/",tiles[1],"/",tiles[1],"_validation.csv",sep=""))) |
|
301 |
#pred=read.csv(paste(outdir,tiles[1],"_predictions.csv",sep="")) |
|
221 | 302 |
|
222 |
sum2=cast(sumsl,model~variable, fun=function(x) c(mean=mean(x,na.rm=T),sd=sd(x,na.rm=T))) |
|
303 |
|
|
304 |
## reshape summary validation data |
|
305 |
#sumsl=melt(sums,id.vars=c("tile","model","modelid","month")) |
|
306 |
sum2=cast(sums[,-1],model+modelid~metric,value="mean",fun=function(x) c(median=round(median(x,na.rm=T),2),sd=round(sd(x,na.rm=T),2)));sum2 |
|
307 |
|
|
308 |
#by(sum2,tile,function(x) rank(apply(cbind(rank(x$valid.rmse_median),rank(-x$valid.r2_median)),1,mean,na.rm=T))) |
|
309 |
#sum2$rank=rank(apply(cbind(rank(sum2$valid.rmse_median),rank(-sum2$valid.r2_median)),1,mean,na.rm=T)) |
|
310 |
|
|
311 |
|
|
312 |
## add sorting variables across months |
|
313 |
sum2$rank=rank(apply(cbind(rank(sum2$valid.rmse_median),rank(-sum2$valid.r2_median)),1,mean,na.rm=T)) |
|
314 |
sum2[order(sum2$rank),c("model","modelid","valid.r2_median","valid.rmse_median","valid.nrmse_median","rank"),] #"valid.mer_mean", |
|
315 |
sums$fmodel=factor(sums$model,ordered=T,levels=rev(sum2$model[order(sum2$rank)])) |
|
316 |
|
|
317 |
## add sorting variables for each month |
|
318 |
sumsl2=cast(model~month~metric,value="mean",data=sums) |
|
319 |
sumsl2[,,"valid.r2"] |
|
223 | 320 |
|
224 | 321 |
#combineLimits(useOuterStrips(xyplot(value~month|model+variable,data=sumsl,scale=list(relation="free")))) |
225 |
bwplot(model~value|variable,data=sumsl,scale=list(x=list(relation="free"))) |
|
322 |
sums2=sums[sums$metric%in%c("valid.r2","valid.rmse"),] |
|
323 |
|
|
324 |
|
|
226 | 325 |
|
227 |
xyplot(ppt~cld|station,groups=month,data=mod06l,cex=.5,pch=16) |
|
326 |
############################################################### |
|
327 |
############################################################### |
|
328 |
### Draw some plots |
|
329 |
library(maptools) |
|
330 |
coast=getRgshhsMap(fn="/home/adamw/acrobates/Global/GSHHS_shp/gshhs/gshhs_l.b", |
|
331 |
xlim=c(360+roi_ll@xmin,360+roi_ll@xmax),ylim=c(roi_ll@ymin,roi_ll@ymax),level=1) |
|
332 |
coast=as(coast,"SpatialLines") |
|
333 |
coast=spTransform(coast,psin) |
|
228 | 334 |
|
229 |
round(cor(mod06l[,c("ppt","dem","cer","cer20","cld","cot")]),2) |
|
335 |
roi=spTransform(roi,psin) |
|
336 |
roil=as(roi,"SpatialLines") |
|
230 | 337 |
|
231 | 338 |
|
232 |
### draw some plots
|
|
339 |
### quantile function
|
|
233 | 340 |
gq=function(x,n=10,cut=F) { |
234 | 341 |
if(!cut) return(unique(quantile(x,seq(0,1,len=n+1),na.rm=T))) |
235 | 342 |
if(cut) return(cut(x,unique(quantile(x,seq(0,1,len=n+1),na.rm=T)))) |
236 | 343 |
} |
344 |
bgyr=colorRampPalette(c("brown","yellow","green","darkgreen","blue")) |
|
345 |
X11.options(type="cairo") |
|
346 |
|
|
347 |
|
|
348 |
png(paste("output/ModelComparisonRasters_",tile,"_%02d.png",sep=""),width=3000,height=2000,res=300,bg="transparent",type="cairo-png") |
|
349 |
|
|
350 |
## COT climatologies |
|
351 |
at=unique(seq(0,30,len=100)) |
|
352 |
p=levelplot(cot,xlab.top=title,at=at,col.regions=bgyr(length(at)))+layer(sp.lines(coast, lwd=1.2, col='black')) |
|
353 |
print(p) |
|
354 |
|
|
355 |
## all monthly predictions |
|
356 |
p1=brick(stack(list.files(outdir,pattern="pred_.*_10.nc$",full=T),varname="ppt")) |
|
357 |
projection(p1)=psin |
|
358 |
names(p1)=month.name |
|
359 |
title=""#"Interpolated Precipitation" |
|
360 |
at=unique(quantile(as.matrix(p1),seq(0,1,len=100),na.rm=T)) |
|
361 |
at=unique(seq(min(p1@data@min),max(p1@data@max),len=100)) |
|
362 |
p=levelplot(p1,xlab.top=title,at=at,col.regions=bgyr(length(at)))#+layer(sp.lines(roil, lwd=1.2, col='black')) |
|
363 |
print(p) |
|
364 |
|
|
365 |
## compare two models |
|
366 |
p2=brick(stack(list.files(outdir,pattern="pred_3_3.nc|pred_3_12.nc",full=T)[2:1],varname="ppt")) |
|
367 |
projection(p2)=psin |
|
368 |
names(p2)=c("X+Y+Elevation","X+Y+Elevation+MOD06") |
|
369 |
at=unique(seq(min(p2@data@min),max(p2@data@max),len=100)) |
|
370 |
p=levelplot(p2,xlab.top=title,at=at,col.regions=bgyr(length(at)))+layer(sp.lines(coast, lwd=1.2, col='black')) |
|
371 |
print(p) |
|
372 |
dev.off() |
|
373 |
|
|
374 |
png(paste("output/dem_",tile,".png",sep=""),width=3000,height=3000,res=300,bg="transparent") |
|
375 |
at=unique(seq(min(dem@data@min),max(dem@data@max),len=100)) |
|
376 |
st=unique(coordinates(dm2)) |
|
377 |
levelplot(dem,col.regions=terrain.colors(100),at=at,margin=F)+ |
|
378 |
layer(panel.xyplot(st[,1],st[,2],cex=.5,pch=3,col="black")) |
|
379 |
dev.off() |
|
380 |
|
|
381 |
pdf(paste("output/ModelComparison_",tile,".pdf",sep=""),width=11,height=5,useDingbats=F) |
|
382 |
|
|
383 |
bwplot(fmodel~mean|metric,data=sums2,scale=list(x=list(relation="free")),subscripts=T,panel=function(x,y,subscripts){ |
|
384 |
panel.abline(v=mean(x)) |
|
385 |
types=models$type[match(sort(unique(y)),models$model)] |
|
386 |
panel.bwplot(x,y,fill=ifelse(types=="Spatial","grey",ifelse(types=="MOD35","dodgerblue","orangered"))) |
|
387 |
# panel.text(x,jitter(as.numeric(y),factor=.5),labels=as.character(sums2$month[subscripts]),col="grey40",cex=.3) |
|
388 |
panel.xyplot(x,y,pch=16,col="grey48",cex=.5) |
|
389 |
}, |
|
390 |
#strip=strip.custom(factor.levels=c("Validation R^2","Validation RMSE")), |
|
391 |
main="Comparison of Validation Metrics",sub="Vertical line indicates overall mean", |
|
392 |
key=list(space="right",rect=list(col=c("orangered","dodgerblue","grey")),text=list(c("MOD06","MOD35","Spatial")))) |
|
393 |
|
|
394 |
dev.off() |
|
395 |
|
|
396 |
|
|
397 |
### illustration of GAM for IBS poster |
|
398 |
#mm |
|
399 |
i=56 |
|
400 |
mm[i,] |
|
401 |
tm=fitmod(i,predict=F,nv=1) |
|
402 |
|
|
403 |
## partial residual plots |
|
404 |
var=4 |
|
405 |
fv <- predict(tm$mod,type="terms") ## get term estimates |
|
406 |
v=sub("[)]","",sub("s[(]","",colnames(fv)))[var] |
|
407 |
## compute partial residuals for first smooth... |
|
408 |
prsd1 <- residuals(tm$mod,type="working") + fv[,var] |
|
409 |
|
|
410 |
pdf(paste("output/GAMexample.pdf"),width=11,height=6,useDingbats=F) |
|
411 |
plot(tm$mod,select=var,las=1,rug=F,cex.lab=2,cex.axis=2) ## plot first smooth |
|
412 |
points(tm$mod$model[,v],prsd1,pch=16,cex=.5,col="red") |
|
413 |
dev.off() |
|
414 |
|
|
415 |
|
|
416 |
xyplot(mean~month|metric,groups=model,type="l",data=sums,scale=list(y=list(relation="free")),auto.key=list(space="right"))#+layer(panel.xyplot(x,y,pch=16,col="grey"),under=T)+layer(panel.abline(v=mean(x))) |
|
417 |
|
|
418 |
sums$type=as.factor(models$type[match(sums$model,models$model)]) |
|
419 |
|
|
420 |
xyplot(mean~month|metric,groups=model,type="l",data=sums2,panel=function(x,y,subscripts,groups){ |
|
421 |
tsums=sums[subscripts,] |
|
422 |
panel.xyplot(x,y,groups=groups,type="l",subscripts=subscripts,col=ifelse(tsums$type=="Spatial","grey20",ifelse(tsums$type=="MOD35","blue","red"))) |
|
423 |
panel.segments(tsums$month,tsums$Q2.5,tsums$month,tsums$Q97.5,groups=groups,subscripts=subscripts) |
|
424 |
},subscripts=T,scale=list(y=list(relation="free")), |
|
425 |
key=list(space="right",text=list(c("Spatial","MOD35","MOD06")),lines=list(col=c("grey20","blue","red")))) |
|
426 |
|
|
427 |
#+layer(panel.xyplot(x,y,pch=16,col="grey"),under=T)+layer(panel.abline(v=mean(x))) |
|
428 |
|
|
429 |
|
|
430 |
xyplot(pred~ppt|as.factor(month),groups=validation,data=pred,panel=function(x,y,subscripts,groups){ |
|
431 |
panel.xyplot(x,y,groups=groups,subscripts=subscripts) |
|
432 |
panel.abline(0,1,col="red",lwd=2) |
|
433 |
},scales=list(relation="free"), |
|
434 |
main="Predicted vs. Observed mean monthly precipitation at validation stations", |
|
435 |
sub="Line is y=x",auto.key=T) |
|
436 |
|
|
437 |
## plot prediction rasters |
|
438 |
|
|
439 |
p=raster(ncfile,varname="ppt") |
|
440 |
|
|
441 |
plot3D(dem, maxpixels=100000,zfac=.5, drape=p, rev=FALSE, adjust=TRUE) |
|
442 |
|
|
443 |
|
|
444 |
|
|
445 |
################################################################### |
|
446 |
################################################################### |
|
237 | 447 |
|
238 | 448 |
### add some additional variables |
239 | 449 |
mod06s$month=factor(mod06s$month,labels=format(as.Date(paste("2000",1:12,"15",sep="-")),"%b")) |
... | ... | |
248 | 458 |
mod06sl=melt(mod06s[,!grepl("lppt",colnames(mod06s))],id.vars=c("id","lon","lat","elev","month","ppt","glon","glon2","gelev","gbin")) |
249 | 459 |
levels(mod06sl$variable)=c("Effective Radius (um)","Very Cloudy Days (%)","Cloudy Days (%)","Optical Thickness (%)","Liquid Water Path") |
250 | 460 |
|
251 |
################################################################### |
|
252 |
################################################################### |
|
253 |
|
|
254 |
bgyr=colorRampPalette(c("blue","green","yellow","red","purple")) |
|
255 |
|
|
256 |
X11.options(type="cairo") |
|
257 | 461 |
pdf("output/MOD06_summary.pdf",width=11,height=8.5) |
258 | 462 |
|
259 | 463 |
# % cloudy maps |
... | ... | |
430 | 634 |
dev.off() |
431 | 635 |
|
432 | 636 |
|
637 |
|
|
638 |
|
|
639 |
|
|
640 |
|
|
641 |
|
|
642 |
|
|
643 |
#################### |
|
644 |
#################### OLD JUNK BELOW! |
|
645 |
#################### |
|
646 |
|
|
647 |
|
|
648 |
#levelplot(cer)#+points(x,y,data=dm2@data) |
|
649 |
|
|
650 |
### extract MOD06 data for each station |
|
651 |
stcer=extract(cer,dm2,fun=mean);colnames(stcer)=paste("cer_mean_",as.numeric(format(as.Date(cer@z$Date),"%m")),sep="") |
|
652 |
stcerp=extract(cerp,dm2,fun=mean);colnames(stcerp)=paste("cerp_mean_",as.numeric(format(as.Date(cerp@z$Date),"%m")),sep="") |
|
653 |
stcer20=extract(cer20,dm2,fun=mean);colnames(stcer20)=paste("cer20_mean_",as.numeric(format(as.Date(cer20@z$Date),"%m")),sep="") |
|
654 |
stcot=extract(cot,dm2);colnames(stcot)=paste("cot_mean_",as.numeric(format(as.Date(cot@z$Date),"%m")),sep="") |
|
655 |
stcld=extract(cld,dm2);colnames(stcld)=paste("cld_mean_",as.numeric(format(as.Date(cld@z$Date),"%m")),sep="") |
|
656 |
stdem=extract(dem,dm2) |
|
657 |
### generate new design matrix |
|
658 |
mod06=cbind.data.frame(station=dm$station,stcer,stcerp,stcot,stcld,stcer20) |
|
659 |
mod06l=melt(mod06,id.vars=c("station"));colnames(mod06l)[grep("value",colnames(mod06l))]="mod06" |
|
660 |
mod06l[,c("variable","moment","month")]=do.call(rbind,strsplit(as.character(mod06l$variable),"_")) |
|
661 |
mod06l=unique(mod06l) |
|
662 |
mod06l=cast(mod06l,station+moment+month~variable,value="mod06") |
|
663 |
mod06l=merge(dm2@data,mod06l,by=c("station","month")) |
|
664 |
mod06l=mod06l[!is.na(mod06l$cer),] |
|
665 |
|
|
666 |
mod06l=mod06l[order(mod06l$month),] |
|
667 |
mod06l$lppt=log(mod06l$ppt+1) |
|
668 |
|
|
669 |
#xyplot(latitude~longitude|as.factor(month),groups=cut(mod06l$ppt,breaks=quantile(mod06l$ppt,seq(0,1,len=10))),data=mod06l,auto.key=T,col=grey(seq(0,1,len=10)),pch=16,cex=.5) |
|
670 |
#plot(cer) |
Also available in: Unified diff
Merging with Pleiades copies