1 |
1 |
###################################################################################
|
2 |
|
### R code to aquire and process MOD06_L2 cloud data from the MODIS platform
|
|
2 |
### R code to interpolate monthly climatologies
|
3 |
3 |
|
4 |
4 |
|
5 |
5 |
## connect to server of choice
|
... | ... | |
7 |
7 |
#R
|
8 |
8 |
|
9 |
9 |
library(sp)
|
10 |
|
library(spgrass6)
|
|
10 |
#library(spgrass6)
|
11 |
11 |
library(rgdal)
|
12 |
12 |
library(reshape)
|
13 |
13 |
library(ncdf4)
|
14 |
|
library(geosphere)
|
15 |
|
library(rgeos)
|
|
14 |
#library(geosphere)
|
|
15 |
#library(rgeos)
|
16 |
16 |
library(multicore)
|
17 |
17 |
library(raster)
|
|
18 |
library(rasterVis)
|
18 |
19 |
library(lattice)
|
19 |
20 |
library(latticeExtra)
|
20 |
|
library(rgl)
|
21 |
|
library(hdf5)
|
22 |
|
library(rasterVis)
|
23 |
|
library(heR.Misc)
|
24 |
|
library(car)
|
|
21 |
#library(rgl)
|
|
22 |
#library(hdf5)
|
|
23 |
#library(heR.Misc)
|
|
24 |
#library(car)
|
25 |
25 |
library(mgcv)
|
26 |
26 |
library(sampling)
|
27 |
27 |
|
... | ... | |
35 |
35 |
tb$tile=paste("h",sprintf("%02d",tb$ih),"v",sprintf("%02d",tb$iv),sep="")
|
36 |
36 |
save(tb,file="modlandTiles.Rdata")
|
37 |
37 |
|
38 |
|
tile="h11v08" #can move this to submit script if needed
|
39 |
|
#tile="h09v04" #oregon
|
40 |
|
|
|
38 |
#tile="h11v08" #can move this to submit script if needed
|
|
39 |
tile="h21v09" #Kenya
|
|
40 |
tile="h09v04" #oregon
|
|
41 |
tiles=c("h11v08","h09v04")
|
|
42 |
|
41 |
43 |
psin=CRS("+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs")
|
42 |
44 |
|
43 |
45 |
tile_bb=tb[tb$tile==tile,] ## identify tile of interest
|
44 |
46 |
roi_ll=extent(tile_bb$lon_min,tile_bb$lon_max,tile_bb$lat_min,tile_bb$lat_max)
|
45 |
|
#roi=spTransform(roi,psin)
|
46 |
|
#roil=as(roi,"SpatialLines")
|
47 |
47 |
|
48 |
48 |
dmod06="data/modis/mod06/summary"
|
|
49 |
outdir=paste("data/tiles/",tile,"/",sep="")
|
49 |
50 |
|
50 |
51 |
|
51 |
52 |
##########################
|
52 |
53 |
#### Organize the data
|
53 |
54 |
months=seq(as.Date("2000-01-15"),as.Date("2000-12-15"),by="month")
|
54 |
55 |
|
55 |
|
getmod06<-function(variable){
|
56 |
|
d=brick(list.files(dmod06,pattern=paste("MOD06_",tile,".nc",sep=""),full=T),varname=toupper(variable))
|
57 |
|
# d=dropLayer(d,1)
|
|
56 |
getmod06<-function(variable,month=NA){
|
|
57 |
d=brick(list.files(dmod06,pattern=paste("MOD06_",tile,".nc$",sep=""),full=T),varname=toupper(variable))
|
|
58 |
if(!is.na(month)) {
|
|
59 |
d=subset(d,subset=month)
|
|
60 |
names(d)=variable
|
|
61 |
}
|
|
62 |
if(is.na(month)){
|
|
63 |
setZ(d,format(as.Date(d@z$Date),"%m"),name="time")
|
|
64 |
layerNames(d) <- as.character(format(as.Date(d@z$Date),"%b")) #paste(variable,format(as.Date(d@z$Date),"%m"))
|
|
65 |
}
|
58 |
66 |
projection(d)=psin
|
59 |
|
setZ(d,format(as.Date(d@z$Date),"%m"),name="time")
|
60 |
|
# d@z=as.Date(d@z$Date)
|
61 |
|
layerNames(d) <- as.character(format(as.Date(d@z$Date),"%b")) #paste(variable,format(as.Date(d@z$Date),"%m"))
|
62 |
67 |
return(d)
|
63 |
68 |
}
|
64 |
69 |
|
65 |
|
# drop #1?
|
66 |
|
|
67 |
70 |
cer=getmod06("cer")
|
68 |
71 |
cld=getmod06("cld")
|
69 |
72 |
cot=getmod06("cot")
|
70 |
73 |
cer20=getmod06("cer20")
|
71 |
74 |
|
|
75 |
|
|
76 |
|
72 |
77 |
pcol=colorRampPalette(c("brown","red","yellow","darkgreen"))
|
73 |
|
#levelplot(cer,col.regions=pcol(20))
|
|
78 |
|
|
79 |
### create data dir for tiled data
|
|
80 |
ddir=paste("data/tiles/",tile,sep="")
|
|
81 |
if(!file.exists(ddir)) dir.create(ddir)
|
74 |
82 |
|
75 |
83 |
## load WorldClim data for comparison (download then uncompress)
|
76 |
84 |
#system("wget -P data/worldclim/ http://biogeo.ucdavis.edu/data/climate/worldclim/1_4/grid/cur/prec_30s_bil.zip",wait=F)
|
77 |
85 |
#system("wget -P data/worldclim/ http://biogeo.ucdavis.edu/data/climate/worldclim/1_4/grid/cur/alt_30s_bil.zip",wait=F)
|
78 |
86 |
|
79 |
87 |
### load WORLDCLIM elevation
|
80 |
|
#dem=raster(list.files("data/worldclim/alt_30s_bil/",pattern="bil$",full=T))
|
81 |
|
#projection(dem)=CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
|
82 |
|
#dem=crop(dem,roi_ll)
|
83 |
|
#dem[dem>60000]=NA
|
84 |
|
#dem=projectRaster(dem,cer)
|
85 |
|
#writeRaster(dem,file=paste("data/tiles/",tile,"/dem_",tile,".tif",sep=""),format="GTiff")
|
|
88 |
if(!file.exists(paste(ddir,"/dem_",tile,".tif",sep=""))){
|
|
89 |
dem=raster(list.files("data/worldclim/alt_30s_bil/",pattern="bil$",full=T))
|
|
90 |
projection(dem)=CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
|
|
91 |
dem=crop(dem,roi_ll)
|
|
92 |
dem[dem>30000]=NA
|
|
93 |
dem=projectRaster(dem,cer)
|
|
94 |
writeRaster(dem,file=paste("data/tiles/",tile,"/dem_",tile,".tif",sep=""),format="GTiff",overwrite=T)
|
|
95 |
}
|
86 |
96 |
dem=raster(paste("data/tiles/",tile,"/dem_",tile,".tif",sep=""))
|
|
97 |
names(dem)="dem"
|
87 |
98 |
|
88 |
99 |
### get station data, subset to stations in region, and transform to sinusoidal
|
89 |
|
dm=readOGR(paste("data/tiles/",tile,sep=""),paste("station_monthly_",tile,"_PRCP",sep=""))
|
90 |
|
colnames(dm@data)[grep("elevation",colnames(dm@data))]="dem"
|
|
100 |
dm=readOGR(ddir,paste("station_monthly_",tile,"_PRCP",sep=""))
|
|
101 |
colnames(dm@data)[grep("elevation",colnames(dm@data))]="elev"
|
91 |
102 |
colnames(dm@data)[grep("value",colnames(dm@data))]="ppt"
|
92 |
103 |
|
93 |
|
#xyplot(latitude~longitude|month,data=dm@data)
|
|
104 |
#xyplot(latitude~longitude|month,data=dm@data)
|
|
105 |
|
|
106 |
## transform to sinusoidal to overlay with raster data
|
94 |
107 |
dm2=spTransform(dm,CRS(projection(cer)))
|
95 |
108 |
dm2@data[,c("x","y")]=coordinates(dm2)
|
96 |
109 |
|
97 |
|
### extract MOD06 data for each station
|
98 |
|
stcer=extract(cer,dm2,fun=mean);colnames(stcer)=paste("cer_mean_",as.numeric(format(as.Date(cer@z$Date),"%m")),sep="")
|
99 |
|
stcer20=extract(cer20,dm2,fun=mean);colnames(stcer20)=paste("cer20_mean_",as.numeric(format(as.Date(cer20@z$Date),"%m")),sep="")
|
100 |
|
stcot=extract(cot,dm2);colnames(stcot)=paste("cot_mean_",as.numeric(format(as.Date(cot@z$Date),"%m")),sep="")
|
101 |
|
stcld=extract(cld,dm2);colnames(stcld)=paste("cld_mean_",as.numeric(format(as.Date(cld@z$Date),"%m")),sep="")
|
102 |
|
stdem=extract(dem,dm2)
|
103 |
|
#mod06=cbind.data.frame(station=dm$station,stcer[,-1],stcot[,-1],stcld[,-1],stcer20[,-1])
|
104 |
|
mod06=cbind.data.frame(station=dm$station,stcer,stcot,stcld,stcer20)
|
105 |
|
mod06l=melt(mod06,id.vars=c("station"));colnames(mod06l)[grep("value",colnames(mod06l))]="mod06"
|
106 |
|
mod06l[,c("variable","moment","month")]=do.call(rbind,strsplit(as.character(mod06l$variable),"_"))
|
107 |
|
mod06l=unique(mod06l)
|
108 |
|
mod06l=cast(mod06l,station+moment+month~variable,value="mod06")
|
109 |
|
mod06l=merge(dm2@data,mod06l,by=c("station","month"))
|
110 |
|
mod06l=mod06l[!is.na(mod06l$cer),]
|
111 |
|
|
112 |
|
mod06l=mod06l[order(mod06l$month),]
|
|
110 |
## limit data to station-months with at least 300 daily observations (~10 years)
|
|
111 |
dm2=dm2[dm2$count>300,]
|
113 |
112 |
|
114 |
|
#xyplot(value~cer|month,data=mod06l,scales=list(relation="free"),pch=16,cex=.5)
|
115 |
|
#xyplot(value~cer|station,data=mod06l[mod06l$count>400,],pch=16,cex=.5)
|
116 |
|
#xyplot(cot~month,groups=station,data=mod06l,type="l")
|
|
113 |
## create coordinate rasters
|
117 |
114 |
|
118 |
115 |
### create monthly raster bricks for prediction
|
119 |
|
m=2
|
120 |
|
|
121 |
|
pdata=stack(
|
122 |
|
subset(cer,subset=m),
|
123 |
|
subset(cot,subset=m),
|
124 |
|
subset(cer20,subset=m),
|
125 |
|
subset(cld,subset=m)
|
126 |
|
)
|
|
116 |
getd<-function(vars=c("cer","cot","cer20","cld"),month){
|
|
117 |
pdata=stack(lapply(vars,function(v){getmod06(v,month=month)}))
|
|
118 |
x=rasterFromXYZ(coordinates(dem)[,c(1,2,1)])
|
|
119 |
y=rasterFromXYZ(coordinates(dem)[,c(1,2,2)])
|
|
120 |
pdata=stack(pdata,dem,x,y)
|
|
121 |
return(pdata)
|
|
122 |
}
|
127 |
123 |
|
128 |
124 |
## Set up models to compare
|
129 |
125 |
####################################
|
130 |
126 |
#### build table comparing various metrics
|
131 |
|
models=c(
|
132 |
|
"ppt~s(y)+ s(x)",
|
133 |
|
"ppt~s(y,x)",
|
134 |
|
"ppt~s(y,x) + s(dem)",
|
135 |
|
"ppt~s(y,x)+s(dem)+cer+cld+cot+cer20",
|
136 |
|
"ppt~s(y,x)+s(dem)+s(cer)",
|
137 |
|
"ppt~s(y,x)+s(dem)+s(cer20)",
|
138 |
|
"ppt~s(y,x)+s(dem)+s(cld)",
|
139 |
|
"ppt~s(y,x)+s(dem)+s(cot)",
|
140 |
|
"ppt~s(y,x)+s(dem)+s(cer20,cld)",
|
141 |
|
"ppt~s(y,x)+s(dem)+s(cer20,cot)",
|
142 |
|
"ppt~s(y,x)+s(dem)+s(cer,cld)",
|
143 |
|
"ppt~s(dem)+s(cer,cot)")
|
144 |
|
months=1:12
|
|
127 |
models=data.frame(model=c(
|
|
128 |
# "ppt~s(y)+ s(x)",
|
|
129 |
# "ppt~s(y,x)",
|
|
130 |
"lppt~s(y,x)",
|
|
131 |
"lppt~s(y,x)+s(dem)",
|
|
132 |
"lppt~s(y,x,dem)",
|
|
133 |
"lppt~s(y,x,dem)+s(cld)",
|
|
134 |
"lppt~s(y,x)+s(dem)+s(cld)",
|
|
135 |
"lppt~s(y,x)+s(cld)",
|
|
136 |
"lppt~s(y,x)+s(cot)",
|
|
137 |
"lppt~s(y,x)+s(dem)+s(cot)",
|
|
138 |
"lppt~s(y,x,dem)+s(cot)",
|
|
139 |
"lppt~s(y,x)+s(cer20)",
|
|
140 |
"lppt~s(y,x)+s(dem)+cld+cot+cer20",
|
|
141 |
"lppt~s(y,x,dem)+cld+cot+cer20",
|
|
142 |
"lppt~s(y,x)+s(dem)+s(cld,cot,cer20)",
|
|
143 |
"lppt~s(y,x)+s(dem)+s(cld)+s(cot)+s(cer20)"))
|
|
144 |
months=1:12
|
|
145 |
## add category for each model
|
|
146 |
models$type="Spatial"
|
|
147 |
models$type[grepl("cer|cot|cer20",models$model)]="MOD06"
|
|
148 |
models$type[grepl("cld",models$model)&!grepl("cer|cot|cer20",models$model)]="MOD35"
|
145 |
149 |
|
146 |
150 |
## build the list of models/months to process
|
147 |
|
mm=expand.grid(model=models,month=months)
|
148 |
|
mm$model=as.character(mm$model)
|
149 |
|
mm$mid=match(mm$model,models)
|
|
151 |
mm=expand.grid(model=models$model,months=months,stringsAsFactors=F)
|
|
152 |
mm$mid=match(mm$model,models$model)
|
150 |
153 |
|
151 |
|
# mod1<- gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM), data=data_s)
|
152 |
|
# mod2<- gam(tmax~ s(lat,lon,ELEV_SRTM), data=data_s)
|
153 |
|
# mod3<- gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM) + s (Northness)+ s (Eastness) + s(DISTOC), data=data_s)
|
154 |
|
# mod4<- gam(tmax~ s(lat) + s (lon) + s(ELEV_SRTM) + s(Northness) + s (Eastness) + s(DISTOC) + s(LST), data=data_s)
|
155 |
|
# mod5<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST), data=data_s)
|
156 |
|
# mod6<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,LC1), data=data_s)
|
157 |
|
# mod7<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,LC3), data=data_s)
|
158 |
|
# mod8<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST) + s(LC1), data=data_s)
|
159 |
|
|
160 |
154 |
### Sample validation stations
|
161 |
155 |
prop=0.1
|
162 |
|
mod06l$validation=F
|
163 |
|
mod06l$validation[as.numeric(rownames(strata(mod06l,stratanames="month",size=as.integer(prop*table(mod06l$month)),method="srswor")))]=T
|
164 |
156 |
|
165 |
157 |
### run it
|
166 |
|
mods=lapply(1:nrow(mm),function(i){
|
167 |
|
mod=try(gam(as.formula(mm$model[i]),data=mod06l[mod06l$month==mm$month[i]&!mod06l$validation,]))
|
168 |
|
## add some attributes to the object to help ID it later
|
169 |
|
attr(mod,"month")=mm$month[i]
|
170 |
|
attr(mod,"model")=mm$model[i]
|
171 |
|
attr(mod,"modelid")=mm$mid[i]
|
172 |
|
print(paste("Finished model ",mm$model[i]," for month",mm$month[i]))
|
173 |
|
return(mod)
|
174 |
|
})
|
175 |
|
|
176 |
|
## Get summary stats
|
177 |
|
sums=do.call(rbind.data.frame,lapply(mods,function(m,plot=F){
|
178 |
|
if(class(m)=="try-error") {
|
179 |
|
return(data.frame(model=attr(m,"model"),
|
180 |
|
modelid=attr(m,"modelid"),
|
181 |
|
month=attr(m,"month"),
|
182 |
|
r2=NA,
|
183 |
|
dev=NA,
|
184 |
|
aic=NA,
|
185 |
|
rmse=NA,
|
186 |
|
vr2=NA))
|
|
158 |
fitmod<-function(i,prop=0.1,predict=F, nv,...){
|
|
159 |
model=as.character(mm$model[i])
|
|
160 |
month=mm$month[i]
|
|
161 |
mid=mm$mid[i]
|
|
162 |
### extract data
|
|
163 |
dr=getd(month=month)
|
|
164 |
## subset data for this month
|
|
165 |
dt=dm2[dm2$month==month,]
|
|
166 |
## add log+1 ppt
|
|
167 |
dt$lppt=log(dt$ppt+1)
|
|
168 |
## extract for points from dm2
|
|
169 |
dr2=subset(dr,subset=grep("^x$|^y$",names(dr),invert=T))
|
|
170 |
ds=cbind.data.frame(dt@data,extract(dr2,dt))
|
|
171 |
### flag stations to hold out for validation
|
|
172 |
ts2=do.call(rbind.data.frame,lapply(1:nv,function(r) {
|
|
173 |
ds$validation=F
|
|
174 |
ds$validation[sample(1:nrow(ds),size=as.integer(prop*nrow(ds)))]=T
|
|
175 |
### fit model
|
|
176 |
mod<<-try(gam(as.formula(model),data=ds[!ds$validation,]),silent=T)
|
|
177 |
## if fitting fails, return a NULL
|
|
178 |
if(class(mod)[[1]]=="try-error") return(NULL)
|
|
179 |
### Model Validation
|
|
180 |
vd=ds[ds$validation,] # extract validation points
|
|
181 |
y=as.data.frame(predict(mod,ds,se.fit=T)) # make predictions
|
|
182 |
if(attr(terms(mod),"variables")[[2]]=="lppt"){ #if modeling log(ppt+1), transform back to real units
|
|
183 |
y$fit=exp(y$fit)-1
|
|
184 |
y$se.fit=exp(y$se.fit)-1
|
|
185 |
}
|
|
186 |
ds[,c("pred","pred.se")]=cbind(y$fit,y$se.fit)
|
|
187 |
ds$model=model
|
|
188 |
ds$modelid=mid
|
|
189 |
vlm=lm(pred~ppt,data=ds[ds$validation,]) # lm() to summarize predictive fit
|
|
190 |
if(r==nv) ds<<-ds #if on last iteration, save predictions
|
|
191 |
### summarize validation
|
|
192 |
s1=summary(mod)
|
|
193 |
s2=data.frame(
|
|
194 |
model.r2=s1$r.sq,
|
|
195 |
model.dev=s1$dev.expl,
|
|
196 |
model.aic=AIC(mod),
|
|
197 |
valid.rmse=sqrt(mean((vd$ppt-y$fit[ds$validation])^2,na.rm=T)),
|
|
198 |
valid.nrmse=sqrt(mean((vd$ppt-y$fit[ds$validation])^2,na.rm=T))/mean(vd$ppt+.1,na.rm=T),
|
|
199 |
valid.mer=mean(vd$ppt-y$fit[ds$validation],na.rm=T),
|
|
200 |
valid.mae=mean(abs(vd$ppt-y$fit[ds$validation]),na.rm=T),
|
|
201 |
valid.r2=summary(vlm)$r.squared)
|
|
202 |
return(s2)}))
|
|
203 |
## summarize the gcv datasets
|
|
204 |
if(nrow(ts2)==0) return(NULL)
|
|
205 |
s2a=data.frame(mean=apply(ts2,2,mean,na.rm=T))
|
|
206 |
s2b=t(apply(ts2,2,quantile,c(0.025,0.975),na.rm=T))
|
|
207 |
colnames(s2b)=paste("Q",c(2.5,97.5),sep="")
|
|
208 |
s2=cbind.data.frame(tile=tile, model=model, modelid=mid, month=month,
|
|
209 |
metric=rownames(s2a),s2a,s2b)
|
|
210 |
rownames(s2)=1:nrow(s2)
|
|
211 |
### add some attributes to the object to help ID it later
|
|
212 |
attr(mod,"month")=ds$month[1]
|
|
213 |
attr(mod,"model")=model
|
|
214 |
attr(mod,"modelid")=mid
|
|
215 |
### generate predictions?
|
|
216 |
if(predict){
|
|
217 |
## write prediction
|
|
218 |
ncfile=paste(outdir,tile,"_pred_",ds$month[1],"_",mid[1],".nc",sep="")
|
|
219 |
predict(dr,mod,filename=ncfile,format="CDF",overwrite=T)
|
|
220 |
## add some attributes to nc file
|
|
221 |
ncopath=""
|
|
222 |
## add other attributes
|
|
223 |
system(paste(ncopath,"ncrename -v variable,ppt ",ncfile,sep=""))
|
|
224 |
system(paste(ncopath,"ncatted -a units,ppt,o,c,\"mm\" ",
|
|
225 |
"-a model,ppt,o,c,\"",model,"\" ",
|
|
226 |
"-a long_name,ppt,o,c,\"Mean Monthly Precipitation\" ",
|
|
227 |
ncfile,sep=""))
|
|
228 |
## create NC file to hold summary data and then append it to output file
|
|
229 |
## add table for validation metrics
|
|
230 |
s2_dim1=ncdim_def("metrics1",units="",vals=1:ncol(s2),unlim=FALSE,create_dimvar=T,longname="Model Fitting and Validation Metrics")
|
|
231 |
s2_dim2=ncdim_def("metrics2",units="",vals=1:nrow(s2),unlim=FALSE,create_dimvar=T,longname="Model Fitting and Validation Metrics")
|
|
232 |
s2_var=ncvar_def("modelmetrics",units="varies",list(s2_dim1,s2_dim2),missval=-999,longname="Model Fitting and Validation Metrics",
|
|
233 |
prec="float")
|
|
234 |
## simplify data table for incorporation into netcdf file
|
|
235 |
ds2=ds[,-1];ds2$validation=ifelse(ds2$validation,1,0)
|
|
236 |
ds2=as.matrix(ds2)
|
|
237 |
data_dim1=ncdim_def("data",units="",vals=1:ncol(ds2),unlim=FALSE,create_dimvar=T,longname="Data used in model fitting")
|
|
238 |
data_dim2=ncdim_def("stations",units="",vals=1:nrow(ds2),unlim=FALSE,create_dimvar=T,longname="Stations used in model fitting")
|
|
239 |
data_var=ncvar_def("modeldata",units="varies",list(data_dim1,data_dim2),missval=-999,longname="Data used in model fitting",
|
|
240 |
prec="float")
|
|
241 |
tncf=paste(tempdir(),"/",tile,month,mid,"metrics.nc",sep="") #temp file to hold metric data
|
|
242 |
nc_create(filename=tncf,vars=list(data_var,s2_var))
|
|
243 |
tnc=nc_open(tncf,write=T)
|
|
244 |
# ncvar_put(tnc,s2_var,vals=s2) ## put in validatation data
|
|
245 |
ncvar_put(tnc,data_var,vals=ds2) ## put in validatation data
|
|
246 |
nc_close(tnc) ## close the file
|
|
247 |
system(paste(ncopath,"ncks -A ",tncf," ",ncfile,sep=""))
|
|
248 |
system(paste(ncopath,"ncatted -a value,modelmetrics,o,c,\"",paste(1:ncol(s2),":",colnames(s2),collapse="
|
|
249 |
",sep=""),"\" ",ncfile,sep=""))
|
|
250 |
## also add metrics as variable attributes for easier viewing
|
|
251 |
for(c in 1:ncol(s2)) system(paste(ncopath,"ncatted -a ",colnames(s2)[c],",ppt,o,",
|
|
252 |
ifelse(class(s2[1,c])!="numeric","c,\"","d,"),s2[1,c],
|
|
253 |
ifelse(class(s2[1,c])!="numeric","\" "," "),ncfile,sep=""))
|
|
254 |
## Add time dimension to ease merging files later
|
|
255 |
system(paste(ncopath,"ncecat -O -u time ",ncfile," ",ncfile,sep=""))
|
|
256 |
cat(paste("
|
|
257 |
netcdf time {
|
|
258 |
dimensions:
|
|
259 |
time = 1 ;
|
|
260 |
variables:
|
|
261 |
int time(time) ;
|
|
262 |
time:units = \"months since 2000-01-01 00:00:00\" ;
|
|
263 |
time:calendar = \"gregorian\";
|
|
264 |
time:long_name = \"time of observation\";
|
|
265 |
data:
|
|
266 |
time=",as.integer(ds$month[1]-1),";
|
|
267 |
}"),file=paste(tempdir(),"/time.cdl",sep=""))
|
|
268 |
system(paste("ncgen -o ",tempdir(),"/time.nc ",tempdir(),"/time.cdl",sep=""))
|
|
269 |
system(paste(ncopath,"ncks -A ",tempdir(),"/time.nc ",ncfile,sep=""))
|
|
270 |
## global attributes
|
|
271 |
system(paste(ncopath,"ncatted -a title,global,o,c,\"Interpolated Monthly Precipitation\" ",
|
|
272 |
"-a institution,global,o,c,\"Yale University\" ",
|
|
273 |
"-a source,global,o,c,\"Interpolated from station data\" ",
|
|
274 |
"-a contact,global,o,c,\"adam.wilson@yale.edu\" ",
|
|
275 |
ncfile,sep=""))
|
187 |
276 |
}
|
|
277 |
print(paste("Finished model ",model," for month",ds$month[1]))
|
|
278 |
return(list(model=mod,summary=s2,data=ds))
|
|
279 |
}
|
188 |
280 |
|
189 |
|
## make validation predictions
|
190 |
|
vd=mod06l[mod06l$validation&mod06l$month==attr(m,"month"),]
|
191 |
|
y=predict(m,mod06l[mod06l$validation&mod06l$month==attr(m,"month"),])
|
192 |
|
vlm=lm(y~vd$ppt)
|
|
281 |
mods=lapply(1:nrow(mm),fitmod,predict=F,nv=100)
|
193 |
282 |
|
194 |
|
## Draw some plots
|
195 |
|
if(plot){
|
196 |
|
## partial residual plots
|
197 |
|
var=2
|
198 |
|
fv <- predict(m,type="terms") ## get term estimates
|
199 |
|
## compute partial residuals for first smooth...
|
200 |
|
prsd1 <- residuals(m,type="working") + fv[,var]
|
201 |
|
plot(m,select=var) ## plot first smooth
|
202 |
|
points(mod06l$cot[mod06l$month==mm$month[i]&!mod06l$validation],prsd1,pch=16,col="red")
|
203 |
|
}
|
|
283 |
## extract summary tables and write them
|
|
284 |
sums=do.call(rbind.data.frame,lapply(mods,function(m) return(m$summary)))
|
|
285 |
write.csv(sums,paste(outdir,tile,"_validation.csv",sep=""))
|
|
286 |
pred=lapply(mods,function(m) return(m$data))
|
|
287 |
### messy error handling to remove tables with incorrect number of columns...
|
|
288 |
nullpred=which(!do.call(c,lapply(pred,function(x) ncol(x)==20&!is.null(x))))
|
|
289 |
pred=pred[nullpred]
|
|
290 |
pred=do.call(rbind.data.frame,pred)
|
|
291 |
pred$tile=tile
|
|
292 |
write.csv(pred,paste(outdir,tile,"_predictions.csv",sep=""))
|
204 |
293 |
|
205 |
|
## summarize validation
|
206 |
|
s1=summary(m)
|
207 |
|
data.frame(
|
208 |
|
model=attr(m,"model"),
|
209 |
|
modelid=attr(m,"modelid"),
|
210 |
|
month=attr(m,"month"),
|
211 |
|
r2=s1$r.sq,
|
212 |
|
dev=s1$dev.expl,
|
213 |
|
aic=AIC(m),
|
214 |
|
rmse=sqrt(mean((vd$ppt-predict(vlm,vd))^2)),
|
215 |
|
vr2=summary(vlm)$r.squared)
|
216 |
|
}))
|
|
294 |
### read them back
|
217 |
295 |
|
218 |
|
### Summary Figures
|
|
296 |
#sums=do.call(rbind.data.frame,lapply(tiles,function(tile) read.csv(paste("data/tiles/",tile,"/",tile,"_validation.csv",sep=""))))
|
|
297 |
sums=read.csv(paste(outdir,tile,"_validation.csv",sep=""))
|
|
298 |
pred=read.csv(paste(outdir,tile,"_predictions.csv",sep=""))
|
219 |
299 |
|
220 |
|
sumsl=melt(sums,id.vars=c("model","modelid","month"))
|
|
300 |
#sums=rbind.data.frame(sums,read.csv(paste("data/tiles/",tiles[1],"/",tiles[1],"_validation.csv",sep="")))
|
|
301 |
#pred=read.csv(paste(outdir,tiles[1],"_predictions.csv",sep=""))
|
221 |
302 |
|
222 |
|
sum2=cast(sumsl,model~variable, fun=function(x) c(mean=mean(x,na.rm=T),sd=sd(x,na.rm=T)))
|
|
303 |
|
|
304 |
## reshape summary validation data
|
|
305 |
#sumsl=melt(sums,id.vars=c("tile","model","modelid","month"))
|
|
306 |
sum2=cast(sums[,-1],model+modelid~metric,value="mean",fun=function(x) c(median=round(median(x,na.rm=T),2),sd=round(sd(x,na.rm=T),2)));sum2
|
|
307 |
|
|
308 |
#by(sum2,tile,function(x) rank(apply(cbind(rank(x$valid.rmse_median),rank(-x$valid.r2_median)),1,mean,na.rm=T)))
|
|
309 |
#sum2$rank=rank(apply(cbind(rank(sum2$valid.rmse_median),rank(-sum2$valid.r2_median)),1,mean,na.rm=T))
|
|
310 |
|
|
311 |
|
|
312 |
## add sorting variables across months
|
|
313 |
sum2$rank=rank(apply(cbind(rank(sum2$valid.rmse_median),rank(-sum2$valid.r2_median)),1,mean,na.rm=T))
|
|
314 |
sum2[order(sum2$rank),c("model","modelid","valid.r2_median","valid.rmse_median","valid.nrmse_median","rank"),] #"valid.mer_mean",
|
|
315 |
sums$fmodel=factor(sums$model,ordered=T,levels=rev(sum2$model[order(sum2$rank)]))
|
|
316 |
|
|
317 |
## add sorting variables for each month
|
|
318 |
sumsl2=cast(model~month~metric,value="mean",data=sums)
|
|
319 |
sumsl2[,,"valid.r2"]
|
223 |
320 |
|
224 |
321 |
#combineLimits(useOuterStrips(xyplot(value~month|model+variable,data=sumsl,scale=list(relation="free"))))
|
225 |
|
bwplot(model~value|variable,data=sumsl,scale=list(x=list(relation="free")))
|
|
322 |
sums2=sums[sums$metric%in%c("valid.r2","valid.rmse"),]
|
|
323 |
|
|
324 |
|
226 |
325 |
|
227 |
|
xyplot(ppt~cld|station,groups=month,data=mod06l,cex=.5,pch=16)
|
|
326 |
###############################################################
|
|
327 |
###############################################################
|
|
328 |
### Draw some plots
|
|
329 |
library(maptools)
|
|
330 |
coast=getRgshhsMap(fn="/home/adamw/acrobates/Global/GSHHS_shp/gshhs/gshhs_l.b",
|
|
331 |
xlim=c(360+roi_ll@xmin,360+roi_ll@xmax),ylim=c(roi_ll@ymin,roi_ll@ymax),level=1)
|
|
332 |
coast=as(coast,"SpatialLines")
|
|
333 |
coast=spTransform(coast,psin)
|
228 |
334 |
|
229 |
|
round(cor(mod06l[,c("ppt","dem","cer","cer20","cld","cot")]),2)
|
|
335 |
roi=spTransform(roi,psin)
|
|
336 |
roil=as(roi,"SpatialLines")
|
230 |
337 |
|
231 |
338 |
|
232 |
|
### draw some plots
|
|
339 |
### quantile function
|
233 |
340 |
gq=function(x,n=10,cut=F) {
|
234 |
341 |
if(!cut) return(unique(quantile(x,seq(0,1,len=n+1),na.rm=T)))
|
235 |
342 |
if(cut) return(cut(x,unique(quantile(x,seq(0,1,len=n+1),na.rm=T))))
|
236 |
343 |
}
|
|
344 |
bgyr=colorRampPalette(c("brown","yellow","green","darkgreen","blue"))
|
|
345 |
X11.options(type="cairo")
|
|
346 |
|
|
347 |
|
|
348 |
png(paste("output/ModelComparisonRasters_",tile,"_%02d.png",sep=""),width=3000,height=2000,res=300,bg="transparent",type="cairo-png")
|
|
349 |
|
|
350 |
## COT climatologies
|
|
351 |
at=unique(seq(0,30,len=100))
|
|
352 |
p=levelplot(cot,xlab.top=title,at=at,col.regions=bgyr(length(at)))+layer(sp.lines(coast, lwd=1.2, col='black'))
|
|
353 |
print(p)
|
|
354 |
|
|
355 |
## all monthly predictions
|
|
356 |
p1=brick(stack(list.files(outdir,pattern="pred_.*_10.nc$",full=T),varname="ppt"))
|
|
357 |
projection(p1)=psin
|
|
358 |
names(p1)=month.name
|
|
359 |
title=""#"Interpolated Precipitation"
|
|
360 |
at=unique(quantile(as.matrix(p1),seq(0,1,len=100),na.rm=T))
|
|
361 |
at=unique(seq(min(p1@data@min),max(p1@data@max),len=100))
|
|
362 |
p=levelplot(p1,xlab.top=title,at=at,col.regions=bgyr(length(at)))#+layer(sp.lines(roil, lwd=1.2, col='black'))
|
|
363 |
print(p)
|
|
364 |
|
|
365 |
## compare two models
|
|
366 |
p2=brick(stack(list.files(outdir,pattern="pred_3_3.nc|pred_3_12.nc",full=T)[2:1],varname="ppt"))
|
|
367 |
projection(p2)=psin
|
|
368 |
names(p2)=c("X+Y+Elevation","X+Y+Elevation+MOD06")
|
|
369 |
at=unique(seq(min(p2@data@min),max(p2@data@max),len=100))
|
|
370 |
p=levelplot(p2,xlab.top=title,at=at,col.regions=bgyr(length(at)))+layer(sp.lines(coast, lwd=1.2, col='black'))
|
|
371 |
print(p)
|
|
372 |
dev.off()
|
|
373 |
|
|
374 |
png(paste("output/dem_",tile,".png",sep=""),width=3000,height=3000,res=300,bg="transparent")
|
|
375 |
at=unique(seq(min(dem@data@min),max(dem@data@max),len=100))
|
|
376 |
st=unique(coordinates(dm2))
|
|
377 |
levelplot(dem,col.regions=terrain.colors(100),at=at,margin=F)+
|
|
378 |
layer(panel.xyplot(st[,1],st[,2],cex=.5,pch=3,col="black"))
|
|
379 |
dev.off()
|
|
380 |
|
|
381 |
pdf(paste("output/ModelComparison_",tile,".pdf",sep=""),width=11,height=5,useDingbats=F)
|
|
382 |
|
|
383 |
bwplot(fmodel~mean|metric,data=sums2,scale=list(x=list(relation="free")),subscripts=T,panel=function(x,y,subscripts){
|
|
384 |
panel.abline(v=mean(x))
|
|
385 |
types=models$type[match(sort(unique(y)),models$model)]
|
|
386 |
panel.bwplot(x,y,fill=ifelse(types=="Spatial","grey",ifelse(types=="MOD35","dodgerblue","orangered")))
|
|
387 |
# panel.text(x,jitter(as.numeric(y),factor=.5),labels=as.character(sums2$month[subscripts]),col="grey40",cex=.3)
|
|
388 |
panel.xyplot(x,y,pch=16,col="grey48",cex=.5)
|
|
389 |
},
|
|
390 |
#strip=strip.custom(factor.levels=c("Validation R^2","Validation RMSE")),
|
|
391 |
main="Comparison of Validation Metrics",sub="Vertical line indicates overall mean",
|
|
392 |
key=list(space="right",rect=list(col=c("orangered","dodgerblue","grey")),text=list(c("MOD06","MOD35","Spatial"))))
|
|
393 |
|
|
394 |
dev.off()
|
|
395 |
|
|
396 |
|
|
397 |
### illustration of GAM for IBS poster
|
|
398 |
#mm
|
|
399 |
i=56
|
|
400 |
mm[i,]
|
|
401 |
tm=fitmod(i,predict=F,nv=1)
|
|
402 |
|
|
403 |
## partial residual plots
|
|
404 |
var=4
|
|
405 |
fv <- predict(tm$mod,type="terms") ## get term estimates
|
|
406 |
v=sub("[)]","",sub("s[(]","",colnames(fv)))[var]
|
|
407 |
## compute partial residuals for first smooth...
|
|
408 |
prsd1 <- residuals(tm$mod,type="working") + fv[,var]
|
|
409 |
|
|
410 |
pdf(paste("output/GAMexample.pdf"),width=11,height=6,useDingbats=F)
|
|
411 |
plot(tm$mod,select=var,las=1,rug=F,cex.lab=2,cex.axis=2) ## plot first smooth
|
|
412 |
points(tm$mod$model[,v],prsd1,pch=16,cex=.5,col="red")
|
|
413 |
dev.off()
|
|
414 |
|
|
415 |
|
|
416 |
xyplot(mean~month|metric,groups=model,type="l",data=sums,scale=list(y=list(relation="free")),auto.key=list(space="right"))#+layer(panel.xyplot(x,y,pch=16,col="grey"),under=T)+layer(panel.abline(v=mean(x)))
|
|
417 |
|
|
418 |
sums$type=as.factor(models$type[match(sums$model,models$model)])
|
|
419 |
|
|
420 |
xyplot(mean~month|metric,groups=model,type="l",data=sums2,panel=function(x,y,subscripts,groups){
|
|
421 |
tsums=sums[subscripts,]
|
|
422 |
panel.xyplot(x,y,groups=groups,type="l",subscripts=subscripts,col=ifelse(tsums$type=="Spatial","grey20",ifelse(tsums$type=="MOD35","blue","red")))
|
|
423 |
panel.segments(tsums$month,tsums$Q2.5,tsums$month,tsums$Q97.5,groups=groups,subscripts=subscripts)
|
|
424 |
},subscripts=T,scale=list(y=list(relation="free")),
|
|
425 |
key=list(space="right",text=list(c("Spatial","MOD35","MOD06")),lines=list(col=c("grey20","blue","red"))))
|
|
426 |
|
|
427 |
#+layer(panel.xyplot(x,y,pch=16,col="grey"),under=T)+layer(panel.abline(v=mean(x)))
|
|
428 |
|
|
429 |
|
|
430 |
xyplot(pred~ppt|as.factor(month),groups=validation,data=pred,panel=function(x,y,subscripts,groups){
|
|
431 |
panel.xyplot(x,y,groups=groups,subscripts=subscripts)
|
|
432 |
panel.abline(0,1,col="red",lwd=2)
|
|
433 |
},scales=list(relation="free"),
|
|
434 |
main="Predicted vs. Observed mean monthly precipitation at validation stations",
|
|
435 |
sub="Line is y=x",auto.key=T)
|
|
436 |
|
|
437 |
## plot prediction rasters
|
|
438 |
|
|
439 |
p=raster(ncfile,varname="ppt")
|
|
440 |
|
|
441 |
plot3D(dem, maxpixels=100000,zfac=.5, drape=p, rev=FALSE, adjust=TRUE)
|
|
442 |
|
|
443 |
|
|
444 |
|
|
445 |
###################################################################
|
|
446 |
###################################################################
|
237 |
447 |
|
238 |
448 |
### add some additional variables
|
239 |
449 |
mod06s$month=factor(mod06s$month,labels=format(as.Date(paste("2000",1:12,"15",sep="-")),"%b"))
|
... | ... | |
248 |
458 |
mod06sl=melt(mod06s[,!grepl("lppt",colnames(mod06s))],id.vars=c("id","lon","lat","elev","month","ppt","glon","glon2","gelev","gbin"))
|
249 |
459 |
levels(mod06sl$variable)=c("Effective Radius (um)","Very Cloudy Days (%)","Cloudy Days (%)","Optical Thickness (%)","Liquid Water Path")
|
250 |
460 |
|
251 |
|
###################################################################
|
252 |
|
###################################################################
|
253 |
|
|
254 |
|
bgyr=colorRampPalette(c("blue","green","yellow","red","purple"))
|
255 |
|
|
256 |
|
X11.options(type="cairo")
|
257 |
461 |
pdf("output/MOD06_summary.pdf",width=11,height=8.5)
|
258 |
462 |
|
259 |
463 |
# % cloudy maps
|
... | ... | |
430 |
634 |
dev.off()
|
431 |
635 |
|
432 |
636 |
|
|
637 |
|
|
638 |
|
|
639 |
|
|
640 |
|
|
641 |
|
|
642 |
|
|
643 |
####################
|
|
644 |
#################### OLD JUNK BELOW!
|
|
645 |
####################
|
|
646 |
|
|
647 |
|
|
648 |
#levelplot(cer)#+points(x,y,data=dm2@data)
|
|
649 |
|
|
650 |
### extract MOD06 data for each station
|
|
651 |
stcer=extract(cer,dm2,fun=mean);colnames(stcer)=paste("cer_mean_",as.numeric(format(as.Date(cer@z$Date),"%m")),sep="")
|
|
652 |
stcerp=extract(cerp,dm2,fun=mean);colnames(stcerp)=paste("cerp_mean_",as.numeric(format(as.Date(cerp@z$Date),"%m")),sep="")
|
|
653 |
stcer20=extract(cer20,dm2,fun=mean);colnames(stcer20)=paste("cer20_mean_",as.numeric(format(as.Date(cer20@z$Date),"%m")),sep="")
|
|
654 |
stcot=extract(cot,dm2);colnames(stcot)=paste("cot_mean_",as.numeric(format(as.Date(cot@z$Date),"%m")),sep="")
|
|
655 |
stcld=extract(cld,dm2);colnames(stcld)=paste("cld_mean_",as.numeric(format(as.Date(cld@z$Date),"%m")),sep="")
|
|
656 |
stdem=extract(dem,dm2)
|
|
657 |
### generate new design matrix
|
|
658 |
mod06=cbind.data.frame(station=dm$station,stcer,stcerp,stcot,stcld,stcer20)
|
|
659 |
mod06l=melt(mod06,id.vars=c("station"));colnames(mod06l)[grep("value",colnames(mod06l))]="mod06"
|
|
660 |
mod06l[,c("variable","moment","month")]=do.call(rbind,strsplit(as.character(mod06l$variable),"_"))
|
|
661 |
mod06l=unique(mod06l)
|
|
662 |
mod06l=cast(mod06l,station+moment+month~variable,value="mod06")
|
|
663 |
mod06l=merge(dm2@data,mod06l,by=c("station","month"))
|
|
664 |
mod06l=mod06l[!is.na(mod06l$cer),]
|
|
665 |
|
|
666 |
mod06l=mod06l[order(mod06l$month),]
|
|
667 |
mod06l$lppt=log(mod06l$ppt+1)
|
|
668 |
|
|
669 |
#xyplot(latitude~longitude|as.factor(month),groups=cut(mod06l$ppt,breaks=quantile(mod06l$ppt,seq(0,1,len=10))),data=mod06l,auto.key=T,col=grey(seq(0,1,len=10)),pch=16,cex=.5)
|
|
670 |
#plot(cer)
|
Merging with Pleiades copies