Project

General

Profile

« Previous | Next » 

Revision 165c04fb

Added by Adam Wilson over 11 years ago

Merging with Pleiades copies

View differences:

climate/procedures/GHCND_stations_extraction_study_area.R
33 33
new_proj<-"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs"
34 34
#tile="h11v08" 
35 35
tile="h09v04"
36
tile="h21v09"
36 37

  
37 38
#out_prefix<-"20121212"                                                 #User defined output prefix
38 39

  
climate/procedures/MOD06_L2_process.r
26 26
     }
27 27

  
28 28

  
29
date=opta$date  #date="20030301"
29
date=opta$date  #date="20101225"
30 30
tile=opta$tile #tile="h11v08"
31 31
verbose=opta$verbose  #print out extensive information for debugging?
32 32
outdir=paste("daily/",tile,"/",sep="")  #directory for separate daily files
......
42 42
require(raster)
43 43
library(rgdal)
44 44
require(spgrass6)
45
require(RSQLite)
45
#require(RSQLite)
46 46

  
47 47

  
48 48
## specify some working directories
......
115 115
  ## write the gridded file and save the log including the pid of the parent process
116 116
#  log=system(paste("( /nobackupp1/awilso10/software/heg/bin/swtif -p ",tempdir(),"/",basename(file),"_MODparms.txt -d ; echo $$)",sep=""),intern=T)
117 117
  log=system(paste("( /nobackupp1/awilso10/software/heg/bin/swtif -p ",tempdir(),"/",basename(file),"_MODparms.txt -d ; echo $$)",sep=""),intern=T,ignore.stderr=T)
118
  log=system(paste("(sudo MRTDATADIR=/usr/local/heg/data PGSHOME=/usr/local/heg/TOOLKIT_MTD PWD=/home/adamw /usr/local/heg/bin/swtif -p ",tempdir(),"/",basename(file),"_MODparms.txt -d )",sep=""))
119

  
118 120
  ## clean up temporary files in working directory
119 121
  file.remove(list.files(pattern=
120 122
              paste("filetable.temp_",
......
147 149

  
148 150
  ## set up temporary grass instance for this PID
149 151
  initGRASS(gisBase="/u/armichae/pr/grass-6.4.2/",gisDbase=tf,SG=td,override=T,location="mod06",mapset="PERMANENT",home=tf,pid=Sys.getpid())
152
  initGRASS(gisBase="/usr/lib/grass64/",SG=td,gisDbase=tf,override=T,location="mod06",mapset="PERMANENT",home=tf,pid=Sys.getpid())
150 153
  system(paste("g.proj -c proj4=\"",projection(td),"\"",sep=""),ignore.stdout=T,ignore.stderr=T)
151 154

  
152 155
  ## Define region by importing one MOD11A1 raster.
153 156
  print("Import one MOD11A1 raster to define grid")
154
  execGRASS("r.in.gdal",input=paste("HDF4_EOS:EOS_GRID:\"",gridfile,"\":MODIS_Grid_Daily_1km_LST:Night_view_angl",sep=""),
155
            output="modisgrid",flags=c("quiet","overwrite","o"))
156
  system("g.region rast=modisgrid save=roi --overwrite",ignore.stdout=T,ignore.stderr=T)
157

  
157
  execGRASS("r.in.gdal",input=paste("HDF4_EOS:EOS_GRID:\"",gridfile,"\":MODIS_Grid_Daily_1km_LST:Night_view_angl",sep=""),            output="modisgrid",flags=c("quiet","overwrite","o"))
158
  execGRASS("r.in.gdal",input=paste("NETCDF:\"",gridfile,"\":CER",sep=""),            output="modisgrid",flags=c("o"))
159
  system("g.region rast=modisgrid.1 save=roi --overwrite",ignore.stdout=T,ignore.stderr=T)
158 160
  ## Identify which files to process
159 161
  tfs=fs$file[fs$dateid==date]
160 162
  ## drop swaths that did not produce an output file (typically due to not overlapping the ROI)
161
  tfs=tfs[tfs%in%list.files(tempdir())]
163
  tfs=tfs[tfs%in%list.files(tempdir(),pattern="hdf$")]
162 164
  nfs=length(tfs)
163 165

  
164 166
  ## loop through scenes and process QA flags
......
167 169
     ## Cloud Mask
168 170
     execGRASS("r.in.gdal",input=paste("HDF4_EOS:EOS_GRID:\"",file,"\":mod06:Cloud_Mask_1km_0",sep=""),
169 171
              output=paste("CM1_",i,sep=""),flags=c("overwrite","o")) ; print("")
170
    ## extract cloudy and 'confidently clear' pixels
171 172
    system(paste("r.mapcalc <<EOF
172
                CM_cloud_",i," =  ((CM1_",i," / 2^0) % 2) == 1  &&  ((CM1_",i," / 2^1) % 2^2) == 0 
173
                CM_clear_",i," =  ((CM1_",i," / 2^0) % 2) == 1  &&  ((CM1_",i," / 2^1) % 2^2) == 3 
173
                CM_determined_",i," =  ((CM1_",i," / 2^0) % 2) 
174
                CM_state_",i," =  ((CM1_",i," / 2^1) % 2^2)
175
EOF",sep=""))
176

  
177
     ## extract cloudy and 'confidently clear' pixels
178
    system(paste("r.mapcalc <<EOF
179
                CM_cloud_",i," =  (((CM1_",i," / 2^0) % 2) == 1)  &&  (((CM1_",i," / 2^1) % 2^2) == 0 )
180
                CM_clear_",i," =  ((CM1_",i," / 2^0) % 2) == 1  &&  ( ((CM1_",i," / 2^1) % 2^2) > 2  )
174 181
EOF",sep=""))
175 182
    ## QA
176 183
     execGRASS("r.in.gdal",input=paste("HDF4_EOS:EOS_GRID:\"",file,"\":mod06:Quality_Assurance_1km_0",sep=""),
......
178 185
   ## QA_CER
179 186
   system(paste("r.mapcalc <<EOF
180 187
                 QA_COT_",i,"=   ((QA_",i," / 2^0) % 2^1 )==1
181
                 QA_COT2_",i,"=  ((QA_",i," / 2^1) % 2^2 )==3
188
                 QA_COT2_",i,"=  ((QA_",i," / 2^1) % 2^2 )>=2
182 189
                 QA_COT3_",i,"=  ((QA_",i," / 2^3) % 2^2 )==0
183 190
                 QA_CER_",i,"=   ((QA_",i," / 2^5) % 2^1 )==1
184
                 QA_CER2_",i,"=  ((QA_",i," / 2^6) % 2^2 )==3
191
                 QA_CER2_",i,"=  ((QA_",i," / 2^6) % 2^2 )>=2
185 192
EOF",sep="")) 
186 193
#                 QA_CWP_",i,"=   ((QA_",i," / 2^8) % 2^1 )==1
187 194
#                 QA_CWP2_",i,"=  ((QA_",i," / 2^9) % 2^2 )==3
climate/procedures/MOD06_monthlyinterp.R
1 1
###################################################################################
2
###  R code to aquire and process MOD06_L2 cloud data from the MODIS platform
2
###  R code to interpolate monthly climatologies 
3 3

  
4 4

  
5 5
## connect to server of choice
......
7 7
#R
8 8

  
9 9
library(sp)
10
library(spgrass6)
10
#library(spgrass6)
11 11
library(rgdal)
12 12
library(reshape)
13 13
library(ncdf4)
14
library(geosphere)
15
library(rgeos)
14
#library(geosphere)
15
#library(rgeos)
16 16
library(multicore)
17 17
library(raster)
18
library(rasterVis)
18 19
library(lattice)
19 20
library(latticeExtra)
20
library(rgl)
21
library(hdf5)
22
library(rasterVis)
23
library(heR.Misc)
24
library(car)
21
#library(rgl)
22
#library(hdf5)
23
#library(heR.Misc)
24
#library(car)
25 25
library(mgcv)
26 26
library(sampling)
27 27

  
......
35 35
tb$tile=paste("h",sprintf("%02d",tb$ih),"v",sprintf("%02d",tb$iv),sep="")
36 36
save(tb,file="modlandTiles.Rdata")
37 37

  
38
tile="h11v08"   #can move this to submit script if needed
39
#tile="h09v04"   #oregon
40

  
38
#tile="h11v08"   #can move this to submit script if needed
39
tile="h21v09"  #Kenya
40
tile="h09v04"   #oregon
41
tiles=c("h11v08","h09v04")
42
  
41 43
psin=CRS("+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs")
42 44

  
43 45
tile_bb=tb[tb$tile==tile,] ## identify tile of interest
44 46
roi_ll=extent(tile_bb$lon_min,tile_bb$lon_max,tile_bb$lat_min,tile_bb$lat_max) 
45
#roi=spTransform(roi,psin)
46
#roil=as(roi,"SpatialLines")
47 47

  
48 48
dmod06="data/modis/mod06/summary"
49
outdir=paste("data/tiles/",tile,"/",sep="")
49 50

  
50 51

  
51 52
##########################
52 53
#### Organize the data
53 54
months=seq(as.Date("2000-01-15"),as.Date("2000-12-15"),by="month")
54 55

  
55
getmod06<-function(variable){
56
  d=brick(list.files(dmod06,pattern=paste("MOD06_",tile,".nc",sep=""),full=T),varname=toupper(variable))
57
#  d=dropLayer(d,1)
56
getmod06<-function(variable,month=NA){
57
  d=brick(list.files(dmod06,pattern=paste("MOD06_",tile,".nc$",sep=""),full=T),varname=toupper(variable))
58
  if(!is.na(month)) {
59
    d=subset(d,subset=month)
60
    names(d)=variable
61
  }
62
  if(is.na(month)){
63
    setZ(d,format(as.Date(d@z$Date),"%m"),name="time")
64
    layerNames(d) <- as.character(format(as.Date(d@z$Date),"%b")) #paste(variable,format(as.Date(d@z$Date),"%m"))
65
  }
58 66
  projection(d)=psin
59
  setZ(d,format(as.Date(d@z$Date),"%m"),name="time")
60
#  d@z=as.Date(d@z$Date)
61
  layerNames(d) <- as.character(format(as.Date(d@z$Date),"%b")) #paste(variable,format(as.Date(d@z$Date),"%m"))
62 67
  return(d)
63 68
}
64 69

  
65
# drop #1?
66

  
67 70
cer=getmod06("cer")
68 71
cld=getmod06("cld")
69 72
cot=getmod06("cot")
70 73
cer20=getmod06("cer20")
71 74

  
75

  
76

  
72 77
pcol=colorRampPalette(c("brown","red","yellow","darkgreen"))
73
#levelplot(cer,col.regions=pcol(20))
78

  
79
### create data dir for tiled data
80
ddir=paste("data/tiles/",tile,sep="")
81
if(!file.exists(ddir)) dir.create(ddir)
74 82

  
75 83
## load WorldClim data for comparison (download then uncompress)
76 84
#system("wget -P data/worldclim/ http://biogeo.ucdavis.edu/data/climate/worldclim/1_4/grid/cur/prec_30s_bil.zip",wait=F)
77 85
#system("wget -P data/worldclim/ http://biogeo.ucdavis.edu/data/climate/worldclim/1_4/grid/cur/alt_30s_bil.zip",wait=F)
78 86

  
79 87
### load WORLDCLIM elevation 
80
#dem=raster(list.files("data/worldclim/alt_30s_bil/",pattern="bil$",full=T))
81
#projection(dem)=CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
82
#dem=crop(dem,roi_ll)
83
#dem[dem>60000]=NA
84
#dem=projectRaster(dem,cer)
85
#writeRaster(dem,file=paste("data/tiles/",tile,"/dem_",tile,".tif",sep=""),format="GTiff")
88
if(!file.exists(paste(ddir,"/dem_",tile,".tif",sep=""))){
89
  dem=raster(list.files("data/worldclim/alt_30s_bil/",pattern="bil$",full=T))
90
  projection(dem)=CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0")
91
  dem=crop(dem,roi_ll)
92
  dem[dem>30000]=NA
93
  dem=projectRaster(dem,cer)
94
  writeRaster(dem,file=paste("data/tiles/",tile,"/dem_",tile,".tif",sep=""),format="GTiff",overwrite=T)
95
}
86 96
dem=raster(paste("data/tiles/",tile,"/dem_",tile,".tif",sep=""))
97
names(dem)="dem"
87 98

  
88 99
### get station data, subset to stations in region, and transform to sinusoidal
89
dm=readOGR(paste("data/tiles/",tile,sep=""),paste("station_monthly_",tile,"_PRCP",sep=""))
90
colnames(dm@data)[grep("elevation",colnames(dm@data))]="dem"
100
dm=readOGR(ddir,paste("station_monthly_",tile,"_PRCP",sep=""))
101
colnames(dm@data)[grep("elevation",colnames(dm@data))]="elev"
91 102
colnames(dm@data)[grep("value",colnames(dm@data))]="ppt"
92 103

  
93
                                        #xyplot(latitude~longitude|month,data=dm@data)
104
#xyplot(latitude~longitude|month,data=dm@data)
105

  
106
## transform to sinusoidal to overlay with raster data
94 107
dm2=spTransform(dm,CRS(projection(cer)))
95 108
dm2@data[,c("x","y")]=coordinates(dm2)
96 109

  
97
### extract MOD06 data for each station
98
stcer=extract(cer,dm2,fun=mean);colnames(stcer)=paste("cer_mean_",as.numeric(format(as.Date(cer@z$Date),"%m")),sep="")
99
stcer20=extract(cer20,dm2,fun=mean);colnames(stcer20)=paste("cer20_mean_",as.numeric(format(as.Date(cer20@z$Date),"%m")),sep="")
100
stcot=extract(cot,dm2);colnames(stcot)=paste("cot_mean_",as.numeric(format(as.Date(cot@z$Date),"%m")),sep="")
101
stcld=extract(cld,dm2);colnames(stcld)=paste("cld_mean_",as.numeric(format(as.Date(cld@z$Date),"%m")),sep="")
102
stdem=extract(dem,dm2)
103
#mod06=cbind.data.frame(station=dm$station,stcer[,-1],stcot[,-1],stcld[,-1],stcer20[,-1])
104
mod06=cbind.data.frame(station=dm$station,stcer,stcot,stcld,stcer20)
105
mod06l=melt(mod06,id.vars=c("station"));colnames(mod06l)[grep("value",colnames(mod06l))]="mod06"
106
mod06l[,c("variable","moment","month")]=do.call(rbind,strsplit(as.character(mod06l$variable),"_"))
107
mod06l=unique(mod06l)
108
mod06l=cast(mod06l,station+moment+month~variable,value="mod06")
109
mod06l=merge(dm2@data,mod06l,by=c("station","month"))
110
mod06l=mod06l[!is.na(mod06l$cer),]
111

  
112
mod06l=mod06l[order(mod06l$month),]
110
## limit data to station-months with at least 300 daily observations (~10 years)
111
dm2=dm2[dm2$count>300,]
113 112

  
114
#xyplot(value~cer|month,data=mod06l,scales=list(relation="free"),pch=16,cex=.5)
115
#xyplot(value~cer|station,data=mod06l[mod06l$count>400,],pch=16,cex=.5)
116
#xyplot(cot~month,groups=station,data=mod06l,type="l")
113
## create coordinate rasters
117 114

  
118 115
### create monthly raster bricks for prediction
119
m=2
120

  
121
pdata=stack(
122
  subset(cer,subset=m),
123
  subset(cot,subset=m),
124
  subset(cer20,subset=m),
125
  subset(cld,subset=m)
126
  )
116
getd<-function(vars=c("cer","cot","cer20","cld"),month){
117
  pdata=stack(lapply(vars,function(v){getmod06(v,month=month)}))
118
  x=rasterFromXYZ(coordinates(dem)[,c(1,2,1)])
119
  y=rasterFromXYZ(coordinates(dem)[,c(1,2,2)])
120
  pdata=stack(pdata,dem,x,y)
121
  return(pdata)
122
}
127 123

  
128 124
## Set up models to compare
129 125
####################################
130 126
#### build table comparing various metrics
131
models=c(
132
  "ppt~s(y)+ s(x)",
133
  "ppt~s(y,x)",
134
  "ppt~s(y,x) + s(dem)",
135
  "ppt~s(y,x)+s(dem)+cer+cld+cot+cer20",
136
  "ppt~s(y,x)+s(dem)+s(cer)",
137
  "ppt~s(y,x)+s(dem)+s(cer20)",
138
  "ppt~s(y,x)+s(dem)+s(cld)",
139
  "ppt~s(y,x)+s(dem)+s(cot)",
140
  "ppt~s(y,x)+s(dem)+s(cer20,cld)",
141
  "ppt~s(y,x)+s(dem)+s(cer20,cot)",
142
  "ppt~s(y,x)+s(dem)+s(cer,cld)",
143
  "ppt~s(dem)+s(cer,cot)")
144
months=1:12
127
models=data.frame(model=c(
128
#  "ppt~s(y)+ s(x)",
129
#  "ppt~s(y,x)",
130
  "lppt~s(y,x)",
131
  "lppt~s(y,x)+s(dem)",
132
  "lppt~s(y,x,dem)",
133
  "lppt~s(y,x,dem)+s(cld)",
134
  "lppt~s(y,x)+s(dem)+s(cld)",
135
  "lppt~s(y,x)+s(cld)",
136
  "lppt~s(y,x)+s(cot)",
137
  "lppt~s(y,x)+s(dem)+s(cot)",
138
  "lppt~s(y,x,dem)+s(cot)",
139
  "lppt~s(y,x)+s(cer20)",
140
  "lppt~s(y,x)+s(dem)+cld+cot+cer20",
141
  "lppt~s(y,x,dem)+cld+cot+cer20",
142
  "lppt~s(y,x)+s(dem)+s(cld,cot,cer20)",
143
  "lppt~s(y,x)+s(dem)+s(cld)+s(cot)+s(cer20)"))
144
  months=1:12
145
## add category for each model
146
models$type="Spatial"
147
models$type[grepl("cer|cot|cer20",models$model)]="MOD06"
148
models$type[grepl("cld",models$model)&!grepl("cer|cot|cer20",models$model)]="MOD35"
145 149

  
146 150
## build the list of models/months to process
147
mm=expand.grid(model=models,month=months)
148
mm$model=as.character(mm$model)
149
mm$mid=match(mm$model,models)
151
mm=expand.grid(model=models$model,months=months,stringsAsFactors=F)
152
mm$mid=match(mm$model,models$model)
150 153
  
151
#  mod1<- gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM), data=data_s)
152
#  mod2<- gam(tmax~ s(lat,lon,ELEV_SRTM), data=data_s)
153
#  mod3<- gam(tmax~ s(lat) + s (lon) + s (ELEV_SRTM) +  s (Northness)+ s (Eastness) + s(DISTOC), data=data_s)
154
#  mod4<- gam(tmax~ s(lat) + s (lon) + s(ELEV_SRTM) + s(Northness) + s (Eastness) + s(DISTOC) + s(LST), data=data_s)
155
#  mod5<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST), data=data_s)
156
#  mod6<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,LC1), data=data_s)
157
#  mod7<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,LC3), data=data_s)
158
#  mod8<- gam(tmax~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST) + s(LC1), data=data_s)
159

  
160 154
### Sample validation stations
161 155
prop=0.1
162
mod06l$validation=F
163
mod06l$validation[as.numeric(rownames(strata(mod06l,stratanames="month",size=as.integer(prop*table(mod06l$month)),method="srswor")))]=T
164 156

  
165 157
### run it
166
mods=lapply(1:nrow(mm),function(i){
167
    mod=try(gam(as.formula(mm$model[i]),data=mod06l[mod06l$month==mm$month[i]&!mod06l$validation,]))
168
  ## add some attributes to the object to help ID it later
169
  attr(mod,"month")=mm$month[i]
170
  attr(mod,"model")=mm$model[i]
171
  attr(mod,"modelid")=mm$mid[i]
172
  print(paste("Finished model ",mm$model[i]," for month",mm$month[i]))
173
  return(mod)
174
})
175

  
176
## Get summary stats
177
sums=do.call(rbind.data.frame,lapply(mods,function(m,plot=F){
178
  if(class(m)=="try-error") {
179
    return(data.frame(model=attr(m,"model"),
180
                      modelid=attr(m,"modelid"),
181
                      month=attr(m,"month"),
182
                      r2=NA,
183
                      dev=NA,
184
                      aic=NA,
185
                      rmse=NA,
186
                      vr2=NA))
158
fitmod<-function(i,prop=0.1,predict=F, nv,...){
159
  model=as.character(mm$model[i])
160
  month=mm$month[i]
161
  mid=mm$mid[i]
162
### extract data
163
  dr=getd(month=month)
164
  ##  subset data for this month
165
  dt=dm2[dm2$month==month,]
166
  ## add log+1 ppt
167
  dt$lppt=log(dt$ppt+1)
168
  ## extract for points from dm2
169
  dr2=subset(dr,subset=grep("^x$|^y$",names(dr),invert=T))
170
  ds=cbind.data.frame(dt@data,extract(dr2,dt))
171
### flag stations to hold out for validation
172
  ts2=do.call(rbind.data.frame,lapply(1:nv,function(r) {
173
    ds$validation=F
174
    ds$validation[sample(1:nrow(ds),size=as.integer(prop*nrow(ds)))]=T
175
### fit model
176
    mod<<-try(gam(as.formula(model),data=ds[!ds$validation,]),silent=T)
177
      ## if fitting fails, return a NULL
178
      if(class(mod)[[1]]=="try-error")  return(NULL)
179
### Model Validation
180
      vd=ds[ds$validation,]  # extract validation points
181
      y=as.data.frame(predict(mod,ds,se.fit=T))      # make predictions
182
      if(attr(terms(mod),"variables")[[2]]=="lppt"){  #if modeling log(ppt+1), transform back to real units
183
        y$fit=exp(y$fit)-1
184
        y$se.fit=exp(y$se.fit)-1
185
      }
186
      ds[,c("pred","pred.se")]=cbind(y$fit,y$se.fit)
187
      ds$model=model
188
      ds$modelid=mid
189
      vlm=lm(pred~ppt,data=ds[ds$validation,])       # lm() to summarize predictive fit
190
    if(r==nv) ds<<-ds  #if on last iteration, save predictions
191
### summarize validation
192
      s1=summary(mod)
193
      s2=data.frame(
194
        model.r2=s1$r.sq,
195
        model.dev=s1$dev.expl,
196
        model.aic=AIC(mod),
197
        valid.rmse=sqrt(mean((vd$ppt-y$fit[ds$validation])^2,na.rm=T)),
198
        valid.nrmse=sqrt(mean((vd$ppt-y$fit[ds$validation])^2,na.rm=T))/mean(vd$ppt+.1,na.rm=T),
199
        valid.mer=mean(vd$ppt-y$fit[ds$validation],na.rm=T),
200
        valid.mae=mean(abs(vd$ppt-y$fit[ds$validation]),na.rm=T),
201
        valid.r2=summary(vlm)$r.squared)
202
      return(s2)}))
203
  ## summarize the gcv datasets
204
  if(nrow(ts2)==0) return(NULL)
205
  s2a=data.frame(mean=apply(ts2,2,mean,na.rm=T))
206
  s2b=t(apply(ts2,2,quantile,c(0.025,0.975),na.rm=T))
207
  colnames(s2b)=paste("Q",c(2.5,97.5),sep="")
208
  s2=cbind.data.frame(tile=tile, model=model, modelid=mid, month=month,
209
    metric=rownames(s2a),s2a,s2b)
210
  rownames(s2)=1:nrow(s2)
211
### add some attributes to the object to help ID it later
212
      attr(mod,"month")=ds$month[1]
213
      attr(mod,"model")=model
214
      attr(mod,"modelid")=mid
215
### generate predictions?
216
  if(predict){
217
    ## write prediction
218
    ncfile=paste(outdir,tile,"_pred_",ds$month[1],"_",mid[1],".nc",sep="")
219
    predict(dr,mod,filename=ncfile,format="CDF",overwrite=T)
220
    ## add some attributes to nc file
221
    ncopath=""
222
    ## add other attributes
223
    system(paste(ncopath,"ncrename -v variable,ppt ",ncfile,sep=""))
224
    system(paste(ncopath,"ncatted -a units,ppt,o,c,\"mm\" ",
225
                 "-a model,ppt,o,c,\"",model,"\" ",
226
                 "-a long_name,ppt,o,c,\"Mean Monthly Precipitation\" ",
227
                 ncfile,sep=""))
228
    ## create NC file to hold summary data and then append it to output file
229
    ## add table for validation metrics
230
    s2_dim1=ncdim_def("metrics1",units="",vals=1:ncol(s2),unlim=FALSE,create_dimvar=T,longname="Model Fitting and Validation Metrics")
231
    s2_dim2=ncdim_def("metrics2",units="",vals=1:nrow(s2),unlim=FALSE,create_dimvar=T,longname="Model Fitting and Validation Metrics")
232
    s2_var=ncvar_def("modelmetrics",units="varies",list(s2_dim1,s2_dim2),missval=-999,longname="Model Fitting and Validation Metrics",
233
      prec="float")
234
    ## simplify data table for incorporation into netcdf file
235
    ds2=ds[,-1];ds2$validation=ifelse(ds2$validation,1,0)
236
    ds2=as.matrix(ds2)
237
    data_dim1=ncdim_def("data",units="",vals=1:ncol(ds2),unlim=FALSE,create_dimvar=T,longname="Data used in model fitting")
238
    data_dim2=ncdim_def("stations",units="",vals=1:nrow(ds2),unlim=FALSE,create_dimvar=T,longname="Stations used in model fitting")
239
    data_var=ncvar_def("modeldata",units="varies",list(data_dim1,data_dim2),missval=-999,longname="Data used in model fitting",
240
      prec="float")
241
    tncf=paste(tempdir(),"/",tile,month,mid,"metrics.nc",sep="") #temp file to hold metric data
242
    nc_create(filename=tncf,vars=list(data_var,s2_var))
243
    tnc=nc_open(tncf,write=T)
244
#    ncvar_put(tnc,s2_var,vals=s2)  ## put in validatation data
245
    ncvar_put(tnc,data_var,vals=ds2)  ## put in validatation data
246
    nc_close(tnc)  ## close the file
247
  system(paste(ncopath,"ncks -A ",tncf," ",ncfile,sep=""))
248
  system(paste(ncopath,"ncatted -a value,modelmetrics,o,c,\"",paste(1:ncol(s2),":",colnames(s2),collapse="
249
",sep=""),"\" ",ncfile,sep=""))
250
  ## also add metrics as variable attributes for easier viewing
251
  for(c in 1:ncol(s2))  system(paste(ncopath,"ncatted -a ",colnames(s2)[c],",ppt,o,",
252
                                    ifelse(class(s2[1,c])!="numeric","c,\"","d,"),s2[1,c],
253
                                    ifelse(class(s2[1,c])!="numeric","\" "," "),ncfile,sep=""))
254
  ## Add time dimension to ease merging files later
255
  system(paste(ncopath,"ncecat -O -u time ",ncfile," ",ncfile,sep=""))
256
  cat(paste("
257
    netcdf time {
258
      dimensions:
259
        time = 1 ;
260
      variables:
261
        int time(time) ;
262
      time:units = \"months since 2000-01-01 00:00:00\" ;
263
      time:calendar = \"gregorian\";
264
      time:long_name = \"time of observation\"; 
265
    data:
266
      time=",as.integer(ds$month[1]-1),";
267
    }"),file=paste(tempdir(),"/time.cdl",sep=""))
268
  system(paste("ncgen -o ",tempdir(),"/time.nc ",tempdir(),"/time.cdl",sep=""))
269
  system(paste(ncopath,"ncks -A ",tempdir(),"/time.nc ",ncfile,sep=""))
270
  ## global attributes
271
  system(paste(ncopath,"ncatted -a title,global,o,c,\"Interpolated Monthly Precipitation\" ",
272
               "-a institution,global,o,c,\"Yale University\" ",
273
               "-a source,global,o,c,\"Interpolated from station data\" ",
274
               "-a contact,global,o,c,\"adam.wilson@yale.edu\" ",
275
               ncfile,sep=""))
187 276
  }
277
  print(paste("Finished model ",model," for month",ds$month[1]))
278
  return(list(model=mod,summary=s2,data=ds))
279
}
188 280

  
189
  ## make validation predictions
190
  vd=mod06l[mod06l$validation&mod06l$month==attr(m,"month"),]
191
  y=predict(m,mod06l[mod06l$validation&mod06l$month==attr(m,"month"),])
192
  vlm=lm(y~vd$ppt)
281
mods=lapply(1:nrow(mm),fitmod,predict=F,nv=100)
193 282

  
194
    ## Draw some plots
195
  if(plot){
196
    ## partial residual plots
197
    var=2
198
    fv <- predict(m,type="terms") ## get term estimates
199
    ## compute partial residuals for first smooth...
200
    prsd1 <- residuals(m,type="working") + fv[,var]
201
    plot(m,select=var) ## plot first smooth
202
    points(mod06l$cot[mod06l$month==mm$month[i]&!mod06l$validation],prsd1,pch=16,col="red")
203
  }
283
## extract summary tables and write them
284
sums=do.call(rbind.data.frame,lapply(mods,function(m) return(m$summary)))
285
write.csv(sums,paste(outdir,tile,"_validation.csv",sep=""))
286
pred=lapply(mods,function(m) return(m$data))
287
### messy error handling to remove tables with incorrect number of columns...
288
nullpred=which(!do.call(c,lapply(pred,function(x) ncol(x)==20&!is.null(x))))
289
pred=pred[nullpred]
290
pred=do.call(rbind.data.frame,pred)
291
pred$tile=tile
292
write.csv(pred,paste(outdir,tile,"_predictions.csv",sep=""))
204 293

  
205
  ## summarize validation
206
  s1=summary(m)
207
  data.frame(
208
             model=attr(m,"model"),
209
             modelid=attr(m,"modelid"),
210
             month=attr(m,"month"),
211
             r2=s1$r.sq,
212
             dev=s1$dev.expl,
213
             aic=AIC(m),
214
             rmse=sqrt(mean((vd$ppt-predict(vlm,vd))^2)),
215
             vr2=summary(vlm)$r.squared)
216
}))
294
### read them back
217 295

  
218
### Summary Figures
296
#sums=do.call(rbind.data.frame,lapply(tiles,function(tile) read.csv(paste("data/tiles/",tile,"/",tile,"_validation.csv",sep=""))))
297
sums=read.csv(paste(outdir,tile,"_validation.csv",sep=""))
298
pred=read.csv(paste(outdir,tile,"_predictions.csv",sep=""))
219 299

  
220
sumsl=melt(sums,id.vars=c("model","modelid","month"))
300
#sums=rbind.data.frame(sums,read.csv(paste("data/tiles/",tiles[1],"/",tiles[1],"_validation.csv",sep="")))
301
#pred=read.csv(paste(outdir,tiles[1],"_predictions.csv",sep=""))
221 302

  
222
sum2=cast(sumsl,model~variable, fun=function(x) c(mean=mean(x,na.rm=T),sd=sd(x,na.rm=T)))
303

  
304
## reshape summary validation data
305
#sumsl=melt(sums,id.vars=c("tile","model","modelid","month"))
306
sum2=cast(sums[,-1],model+modelid~metric,value="mean",fun=function(x) c(median=round(median(x,na.rm=T),2),sd=round(sd(x,na.rm=T),2)));sum2
307

  
308
#by(sum2,tile,function(x) rank(apply(cbind(rank(x$valid.rmse_median),rank(-x$valid.r2_median)),1,mean,na.rm=T)))
309
#sum2$rank=rank(apply(cbind(rank(sum2$valid.rmse_median),rank(-sum2$valid.r2_median)),1,mean,na.rm=T))
310

  
311

  
312
## add sorting variables across months
313
sum2$rank=rank(apply(cbind(rank(sum2$valid.rmse_median),rank(-sum2$valid.r2_median)),1,mean,na.rm=T))
314
sum2[order(sum2$rank),c("model","modelid","valid.r2_median","valid.rmse_median","valid.nrmse_median","rank"),] #"valid.mer_mean",
315
sums$fmodel=factor(sums$model,ordered=T,levels=rev(sum2$model[order(sum2$rank)]))
316

  
317
## add sorting variables for each month
318
sumsl2=cast(model~month~metric,value="mean",data=sums)
319
sumsl2[,,"valid.r2"]
223 320

  
224 321
#combineLimits(useOuterStrips(xyplot(value~month|model+variable,data=sumsl,scale=list(relation="free"))))
225
bwplot(model~value|variable,data=sumsl,scale=list(x=list(relation="free")))
322
sums2=sums[sums$metric%in%c("valid.r2","valid.rmse"),]
323

  
324

  
226 325

  
227
xyplot(ppt~cld|station,groups=month,data=mod06l,cex=.5,pch=16)
326
###############################################################
327
###############################################################
328
### Draw some plots
329
library(maptools)
330
coast=getRgshhsMap(fn="/home/adamw/acrobates/Global/GSHHS_shp/gshhs/gshhs_l.b",
331
  xlim=c(360+roi_ll@xmin,360+roi_ll@xmax),ylim=c(roi_ll@ymin,roi_ll@ymax),level=1)
332
coast=as(coast,"SpatialLines")
333
coast=spTransform(coast,psin)
228 334

  
229
round(cor(mod06l[,c("ppt","dem","cer","cer20","cld","cot")]),2)
335
roi=spTransform(roi,psin)
336
roil=as(roi,"SpatialLines")
230 337

  
231 338

  
232
### draw some plots
339
### quantile function
233 340
gq=function(x,n=10,cut=F) {
234 341
  if(!cut) return(unique(quantile(x,seq(0,1,len=n+1),na.rm=T)))
235 342
  if(cut)  return(cut(x,unique(quantile(x,seq(0,1,len=n+1),na.rm=T))))
236 343
}
344
bgyr=colorRampPalette(c("brown","yellow","green","darkgreen","blue"))
345
X11.options(type="cairo")
346

  
347

  
348
png(paste("output/ModelComparisonRasters_",tile,"_%02d.png",sep=""),width=3000,height=2000,res=300,bg="transparent",type="cairo-png")
349

  
350
## COT climatologies
351
at=unique(seq(0,30,len=100))
352
p=levelplot(cot,xlab.top=title,at=at,col.regions=bgyr(length(at)))+layer(sp.lines(coast, lwd=1.2, col='black'))
353
print(p)
354

  
355
## all monthly predictions
356
p1=brick(stack(list.files(outdir,pattern="pred_.*_10.nc$",full=T),varname="ppt"))
357
projection(p1)=psin
358
names(p1)=month.name
359
title=""#"Interpolated Precipitation"
360
at=unique(quantile(as.matrix(p1),seq(0,1,len=100),na.rm=T))
361
at=unique(seq(min(p1@data@min),max(p1@data@max),len=100))
362
p=levelplot(p1,xlab.top=title,at=at,col.regions=bgyr(length(at)))#+layer(sp.lines(roil, lwd=1.2, col='black'))
363
print(p)
364

  
365
## compare two models
366
p2=brick(stack(list.files(outdir,pattern="pred_3_3.nc|pred_3_12.nc",full=T)[2:1],varname="ppt"))
367
projection(p2)=psin
368
names(p2)=c("X+Y+Elevation","X+Y+Elevation+MOD06")
369
at=unique(seq(min(p2@data@min),max(p2@data@max),len=100))
370
p=levelplot(p2,xlab.top=title,at=at,col.regions=bgyr(length(at)))+layer(sp.lines(coast, lwd=1.2, col='black'))
371
print(p)
372
dev.off()
373

  
374
png(paste("output/dem_",tile,".png",sep=""),width=3000,height=3000,res=300,bg="transparent")
375
at=unique(seq(min(dem@data@min),max(dem@data@max),len=100))
376
st=unique(coordinates(dm2))
377
levelplot(dem,col.regions=terrain.colors(100),at=at,margin=F)+
378
  layer(panel.xyplot(st[,1],st[,2],cex=.5,pch=3,col="black"))
379
dev.off()
380

  
381
pdf(paste("output/ModelComparison_",tile,".pdf",sep=""),width=11,height=5,useDingbats=F)
382

  
383
bwplot(fmodel~mean|metric,data=sums2,scale=list(x=list(relation="free")),subscripts=T,panel=function(x,y,subscripts){
384
  panel.abline(v=mean(x))
385
  types=models$type[match(sort(unique(y)),models$model)]
386
  panel.bwplot(x,y,fill=ifelse(types=="Spatial","grey",ifelse(types=="MOD35","dodgerblue","orangered")))
387
  #  panel.text(x,jitter(as.numeric(y),factor=.5),labels=as.character(sums2$month[subscripts]),col="grey40",cex=.3)
388
  panel.xyplot(x,y,pch=16,col="grey48",cex=.5)
389
},
390
                                        #strip=strip.custom(factor.levels=c("Validation R^2","Validation RMSE")),
391
       main="Comparison of Validation Metrics",sub="Vertical line indicates overall mean",
392
       key=list(space="right",rect=list(col=c("orangered","dodgerblue","grey")),text=list(c("MOD06","MOD35","Spatial"))))
393

  
394
dev.off()
395

  
396

  
397
### illustration of GAM for IBS poster
398
#mm
399
i=56
400
mm[i,]
401
tm=fitmod(i,predict=F,nv=1)
402

  
403
    ## partial residual plots
404
var=4
405
    fv <- predict(tm$mod,type="terms") ## get term estimates
406
    v=sub("[)]","",sub("s[(]","",colnames(fv)))[var]
407
    ## compute partial residuals for first smooth...
408
    prsd1 <- residuals(tm$mod,type="working") + fv[,var]
409

  
410
pdf(paste("output/GAMexample.pdf"),width=11,height=6,useDingbats=F)
411
plot(tm$mod,select=var,las=1,rug=F,cex.lab=2,cex.axis=2) ## plot first smooth
412
    points(tm$mod$model[,v],prsd1,pch=16,cex=.5,col="red")
413
dev.off()
414

  
415

  
416
xyplot(mean~month|metric,groups=model,type="l",data=sums,scale=list(y=list(relation="free")),auto.key=list(space="right"))#+layer(panel.xyplot(x,y,pch=16,col="grey"),under=T)+layer(panel.abline(v=mean(x)))
417

  
418
sums$type=as.factor(models$type[match(sums$model,models$model)])
419

  
420
xyplot(mean~month|metric,groups=model,type="l",data=sums2,panel=function(x,y,subscripts,groups){
421
  tsums=sums[subscripts,]
422
  panel.xyplot(x,y,groups=groups,type="l",subscripts=subscripts,col=ifelse(tsums$type=="Spatial","grey20",ifelse(tsums$type=="MOD35","blue","red")))
423
  panel.segments(tsums$month,tsums$Q2.5,tsums$month,tsums$Q97.5,groups=groups,subscripts=subscripts)
424
},subscripts=T,scale=list(y=list(relation="free")),
425
       key=list(space="right",text=list(c("Spatial","MOD35","MOD06")),lines=list(col=c("grey20","blue","red"))))
426

  
427
                                        #+layer(panel.xyplot(x,y,pch=16,col="grey"),under=T)+layer(panel.abline(v=mean(x)))
428

  
429

  
430
xyplot(pred~ppt|as.factor(month),groups=validation,data=pred,panel=function(x,y,subscripts,groups){
431
  panel.xyplot(x,y,groups=groups,subscripts=subscripts)
432
  panel.abline(0,1,col="red",lwd=2)
433
},scales=list(relation="free"),
434
       main="Predicted vs. Observed mean monthly precipitation at validation stations",
435
       sub="Line is y=x",auto.key=T)
436

  
437
## plot prediction rasters
438

  
439
p=raster(ncfile,varname="ppt")
440

  
441
plot3D(dem, maxpixels=100000,zfac=.5, drape=p, rev=FALSE, adjust=TRUE)
442

  
443

  
444

  
445
###################################################################
446
###################################################################
237 447

  
238 448
### add some additional variables
239 449
mod06s$month=factor(mod06s$month,labels=format(as.Date(paste("2000",1:12,"15",sep="-")),"%b"))
......
248 458
mod06sl=melt(mod06s[,!grepl("lppt",colnames(mod06s))],id.vars=c("id","lon","lat","elev","month","ppt","glon","glon2","gelev","gbin"))
249 459
levels(mod06sl$variable)=c("Effective Radius (um)","Very Cloudy Days (%)","Cloudy Days (%)","Optical Thickness (%)","Liquid Water Path")
250 460

  
251
###################################################################
252
###################################################################
253

  
254
bgyr=colorRampPalette(c("blue","green","yellow","red","purple"))
255

  
256
X11.options(type="cairo")
257 461
pdf("output/MOD06_summary.pdf",width=11,height=8.5)
258 462

  
259 463
# % cloudy maps
......
430 634
dev.off()
431 635

  
432 636

  
637

  
638

  
639

  
640

  
641

  
642

  
643
####################
644
####################  OLD JUNK BELOW!
645
####################
646

  
647

  
648
#levelplot(cer)#+points(x,y,data=dm2@data)
649

  
650
### extract MOD06 data for each station
651
stcer=extract(cer,dm2,fun=mean);colnames(stcer)=paste("cer_mean_",as.numeric(format(as.Date(cer@z$Date),"%m")),sep="")
652
stcerp=extract(cerp,dm2,fun=mean);colnames(stcerp)=paste("cerp_mean_",as.numeric(format(as.Date(cerp@z$Date),"%m")),sep="")
653
stcer20=extract(cer20,dm2,fun=mean);colnames(stcer20)=paste("cer20_mean_",as.numeric(format(as.Date(cer20@z$Date),"%m")),sep="")
654
stcot=extract(cot,dm2);colnames(stcot)=paste("cot_mean_",as.numeric(format(as.Date(cot@z$Date),"%m")),sep="")
655
stcld=extract(cld,dm2);colnames(stcld)=paste("cld_mean_",as.numeric(format(as.Date(cld@z$Date),"%m")),sep="")
656
stdem=extract(dem,dm2)
657
### generate new design matrix
658
mod06=cbind.data.frame(station=dm$station,stcer,stcerp,stcot,stcld,stcer20)
659
mod06l=melt(mod06,id.vars=c("station"));colnames(mod06l)[grep("value",colnames(mod06l))]="mod06"
660
mod06l[,c("variable","moment","month")]=do.call(rbind,strsplit(as.character(mod06l$variable),"_"))
661
mod06l=unique(mod06l)
662
mod06l=cast(mod06l,station+moment+month~variable,value="mod06")
663
mod06l=merge(dm2@data,mod06l,by=c("station","month"))
664
mod06l=mod06l[!is.na(mod06l$cer),]
665

  
666
mod06l=mod06l[order(mod06l$month),]
667
mod06l$lppt=log(mod06l$ppt+1)
668

  
669
#xyplot(latitude~longitude|as.factor(month),groups=cut(mod06l$ppt,breaks=quantile(mod06l$ppt,seq(0,1,len=10))),data=mod06l,auto.key=T,col=grey(seq(0,1,len=10)),pch=16,cex=.5)
670
#plot(cer)

Also available in: Unified diff