Project

General

Profile

« Previous | Next » 

Revision f07e0e66

Added by Adam Wilson about 11 years ago

udpates to validation and addition of initial biodiversity script

View differences:

climate/procedures/MOD09_Biodiversity.R
1
#### Assessing relevance of MODIS cloud data for SDM
2
library(dismo)
3

  
4
##  Species
5
sp=c("Spondias radlkoferi","Tapirira mexicana","Lozanella enantiophylla")
6
spl=strsplit(sp," ")
7

  
8
tsp=spl[[3]]
9
td=gbif(genus=tsp[1],species=tsp[2],geo=TRUE, sp=FALSE, removeZeros=FALSE, download=TRUE, getAlt=TRUE, start=1, end=NULL, feedback=3)
10

  
11

  
12
td
13

  
climate/procedures/MOD09_CloudFigures.R
7 7
library(rasterVis)
8 8
library(latticeExtra)
9 9
library(xtable)
10
library(texreg)
10 11
library(reshape)
11 12
library(caTools)
12 13

  
14

  
13 15
## read in data
14
cld=read.csv("data/NDP026D/cld.csv")
16
#cld=read.csv("data/NDP026D/cld.csv")
15 17
cldm=read.csv("data/NDP026D/cldm.csv")
16
cldy=read.csv("data/NDP026D/cldy.csv")
17
clda=read.csv("data/NDP026D/clda.csv")
18
#cldy=read.csv("data/NDP026D/cldy.csv")
19
#clda=read.csv("data/NDP026D/clda.csv")
18 20
st=read.csv("data/NDP026D/stations.csv")
19 21

  
20 22

  
......
24 26
IGBP$class=rownames(IGBP);rownames(IGBP)=1:nrow(IGBP)
25 27

  
26 28
## month factors
27
cld$month2=factor(cld$month,labels=month.name)
29
#cld$month2=factor(cld$month,labels=month.name)
28 30
cldm$month2=factor(cldm$month,labels=month.name)
29 31

  
30 32
coordinates(st)=c("lon","lat")
31 33
projection(st)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
32 34

  
33 35
##make spatial object
34
cldms=cldm
35
coordinates(cldms)=c("lon","lat")
36
projection(cldms)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
37

  
38
##make spatial object
39
cldys=cldy
40
coordinates(cldys)=c("lon","lat")
41
projection(cldys)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
36
#cldys=cldy
37
#coordinates(cldys)=c("lon","lat")
38
#projection(cldys)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
42 39

  
43 40
#### Evaluate MOD35 Cloud data
44 41
mod09=brick("data/cloud_monthly.nc")
42
mod09s=brick("data/cloud_yseasmean.nc",varname="CF");names(mod09s)=c("DJF","MAM","JJA","SON")
45 43
mod09c=brick("data/cloud_ymonmean.nc",varname="CF");names(mod09c)=month.name
46
mod09a=brick("data/cloud_mean.nc",varname="CF_annual")#;names(mod09c)=month.name
44
mod09a=brick("data/cloud_mean.nc",varname="CF");names(mod09a)="Mean Annual Cloud Frequency"
47 45

  
48
mod09min=raster("data/cloud_min.nc",varname="CFmin")
49
mod09max=raster("data/cloud_max.nc",varname="CFmax")
50
mod09sd=raster("data/cloud_std.nc",varname="CFsd")
46
mod09min=brick("data/cloud_min.nc",varname="CFmin")
47
mod09max=brick("data/cloud_max.nc",varname="CFmax")
48
mod09sd=brick("data/cloud_std.nc",varname="CFsd")
51 49
#mod09mean=raster("data/mod09_clim_mac.nc")
52 50
#names(mod09d)=c("Mean","Minimum","Maximum","Standard Deviation")
53 51

  
54 52
#plot(mod09a,layers=1,margin=F,maxpixels=100)
55 53

  
56 54
## calculated differences
57
cldm$dif=cldm$mod09-cldm$cld
58
clda$dif=clda$mod09-clda$cld
55
cldm$difm=cldm$mod09-cldm$cld_all
56
cldm$difs=cldm$mod09sd+cldm$cldsd_all
57

  
58
#clda$dif=clda$mod09-clda$cld
59 59

  
60 60
## read in global coasts for nice plotting
61 61
library(maptools)
62
library(rgdal)
63

  
64
#coast=getRgshhsMap("/mnt/data/jetzlab/Data/environ/global/gshhg/gshhs_h.b", xlim = NULL, ylim = NULL, level = 4) 
65
land=readShapePoly("/mnt/data/jetzlab/Data/environ/global/gshhg/GSHHS_shp/c/GSHHS_c_L1.shp",force_ring=TRUE)
66
projection(land)="+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
67
CP <- as(extent(-180, 180, -60, 84), "SpatialPolygons")
68
proj4string(CP) <- CRS(proj4string(land))
69
coast=as(land[land$area>50,],"SpatialLines")
70
## Clip the map
71
land <- gIntersection(land, CP, byid=F)
72
coast <- gIntersection(coast, CP, byid=F)
73

  
74

  
75
#### get biome data
76
##make spatial object
77
cldms=cldm
78
coordinates(cldms)=c("lon","lat")
79
projection(cldms)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
62 80

  
63
data(wrld_simpl)
64
coast <- unionSpatialPolygons(wrld_simpl, rep("land",nrow(wrld_simpl)), threshold=5)
65
coast=as(coast,"SpatialLines")
81
if(!file.exists("data/teow/biomes.shp")){
82
    teow=readOGR("/mnt/data/jetzlab/Data/environ/global/teow/official/","wwf_terr_ecos")
83
    teow=teow[teow$BIOME<90,]
84
    biome=unionSpatialPolygons(teow,teow$BIOME, threshold=5)
85
    biomeid=read.csv("/mnt/data/jetzlab/Data/environ/global/teow/official/biome.csv",stringsAsFactors=F)
86
    biome=SpatialPolygonsDataFrame(biome,data=biomeid)
87
    writeOGR(biome,"data/teow","biomes",driver="ESRI Shapefile")
88
}
89
biome=readOGR("data/teow/","biomes")
90
projection(biome)=projection(st)
91
st$biome=over(st,biome,returnList=F)$BiomeID
92

  
93
cldm$biome=st$biome[match(cldm$StaID,st$id)]
94

  
95
## get stratified sample of points from biomes for illustration
96
if(!file.exists("output/biomesamplepoints.csv")){
97
    n_biomesamples=1000
98
    library(multicore)
99
    biomesample=do.call(rbind.data.frame,mclapply(1:length(biome),function(i)
100
        data.frame(biome=biome$BiomeID[i],coordinates(spsample(biome[i,],n=n_biomesamples,type="stratified",nsig=2)))))
101
    write.csv(biomesample,"output/biomesamplepoints.csv",row.names=F)
102
}
103
biomesample=read.csv("output/biomesamplepoints.csv")
104
coordinates(biomesample)=c("x1","x2")
66 105

  
106
#biomesample=extract(biomesample,mod09c)
67 107

  
68 108
## Figures
69 109
n=100
......
72 112
cols=colr(n)
73 113

  
74 114

  
75
pdf("output/Figures.pdf",width=11,height=8.5)
115
#pdf("output/Figures.pdf",width=11,height=8.5)
116
png("output/Figures_%03d.png",width=5000,height=4000,res=600,pointsize=36,bg="transparent")
76 117

  
118
res=1e5
119
greg=list(ylim=c(-60,84),xlim=c(-180,180))
120
    
77 121
## Figure 1: 4-panel summaries
78 122
#- Annual average
79 123
levelplot(mod09a,col.regions=colr(n),cuts=100,at=seq(0,100,len=100),colorkey=list(space="bottom",adj=1),
80
  margin=F,maxpixels=1e6,ylab="Latitude",xlab="Longitude",useRaster=T)+
124
  margin=F,maxpixels=res,ylab="",xlab="",useRaster=T,ylim=greg$ylim)+
81 125
  layer(sp.lines(coast,col="black"),under=F)
82
#- Monthly minimum
83
#- Monthly maximum
84
#- STDEV or Min-Max
85
p_mac=levelplot(mod09a,col.regions=colr(n),cuts=99,margin=F,maxpixels=1e5,colorkey=list(space="bottom",height=.75),xlab="",ylab="",main=names(regs)[r],useRaster=T)
86
p_min=levelplot(mod09min,col.regions=colr(n),cuts=99,margin=F,maxpixels=1e5,colorkey=list(space="bottom",height=.75),useRaster=T)
87
p_max=levelplot(mod09max,col.regions=colr(n),cuts=99,margin=F,maxpixels=1e5,colorkey=list(space="bottom",height=.75),useRaster=T)
88
p_sd=levelplot(mod09sd,col.regions=colr(n),cuts=99,margin=F,maxpixels=1e5,colorkey=list(space="bottom",height=.75),useRaster=T)
89
p3=c("Mean Cloud Frequency (%)"=p_mac,"Max Cloud Frequency (%)"=p_max,"Min Cloud Frequency (%)"=p_min,"Cloud Frequency Variability (SD)"=p_sd,x.same=T,y.same=T,merge.legends=T,layout=c(2,2))
90
print(p3)
91

  
92

  
93
### maps of mod09 and NDP
94
## map of stations
95
p_mac=levelplot(mod09a,col.regions=colr(100),cuts=100,at=seq(0,100,len=100),colorkey=list(space="bottom",adj=1),
96
  margin=F,maxpixels=1e6,ylab="Latitude",xlab="Longitude",useRaster=T)+
126
## Mean annual with validation stations
127
levelplot(mod09a,col.regions=colr(n),cuts=100,at=seq(0,100,len=100),colorkey=list(space="bottom",adj=1),
128
  margin=F,maxpixels=res,ylab="",xlab="",useRaster=T,ylim=greg$ylim)+
97 129
  layer(panel.xyplot(lon,lat,pch=16,cex=.3,col="black"),data=data.frame(coordinates(st)))+
98 130
  layer(sp.lines(coast,col="black"),under=F)
99 131

  
100
p_mace=xyplot(lat~dif,data=cldm[cldm$lat>-60,],panel=function(x,y,subscripts=T){
101
  x2=x[order(y)]
102
  y2=y[order(y)]
103
  win=8000
104
  Q50=runquantile(x2,win,probs=c(.25,.5,.75))
105
  ## polygon
106
  panel.polygon(c(Q50[,1],rev(Q50[,3])),c(y2,rev(y2)),type="l",col=grey(.8))
107
### hist
108
  n=150
109
  xbins=seq(-70,50,len=n)
110
  ybins=seq(-60,90,len=n)
111
  tb=melt(as.matrix(table(
112
    x=cut(x,xbins,labels=xbins[-1]),
113
    y=cut(y,ybins,labels=ybins[-1]))))
114
  qat=unique(tb$value)
115
  print(qat)
116
  panel.levelplot(tb$x,tb$y,tb$value,at=qat,col.regions=c("transparent",hmcols(length(qat))),subscripts=1:nrow(tb))
117
###
118
#  panel.xyplot(x,y,pch=16,cex=.1,col=)
119
#  cuts=cut(y,lats,labels=lats[-10])
120
#  qs=do.call(rbind,tapply(x,cuts,quantile,c(.25,.50,.75),na.rm=T))
121
  colnames(qs)=c("Q25","Q50","Q75")
122
  panel.lines(Q50[,1],y2,type="l",col=grey(.5))
123
  panel.lines(Q50[,2],y2,type="l",col="black")
124
  panel.lines(Q50[,3],y2,type="l",col=grey(.5))
125
},asp=1,xlab="Difference (MOD09-Observed)")+layer(panel.abline(v=0,lty="dashed",col="red"))
126

  
127
print(p_mac,position=c(0,0,.75,1),more=T)
128
print(p_mace,position=c(0.75,0,1,1))
129

  
130

  
131
p1=c("MODIS Cloud Frequency and NDP-026D Validation Stations"=p_mac,"Difference (MOD09-NDP026D)"=p_mace,x.same=F,y.same=T,layout=c(2,1))
132
resizePanels(p1,w=c(.75,.25))
133

  
134
quantile(cldm$dif,seq(0,1,len=6),na.rm=T)
135
at=c(-70,-50,-25,-10,-5,0,5,10,25,50,70)
136
bwr=colorRampPalette(c("blue","grey","red"))
137
xyplot(lat~lon|month2,data=cldm,groups=cut(cldm$dif,at),
138
       par.settings=list(superpose.symbol=list(col=bwr(length(at)-1))),pch=16,cex=.25,
139
       auto.key=list(space="right",title="Difference\n(MOD09-NDP026D)",cex.title=1),asp=1,
140
       main="NDP-026D Cloud Climatology Stations",ylab="Latitude",xlab="Longitude")+
141
  layer(sp.lines(coast,col="black",lwd=.1),under=F)
132
## Seasonal Means
133
levelplot(mod09s,col.regions=colr(n),cuts=100,at=seq(0,100,len=100),colorkey=list(space="bottom",adj=2),
134
  margin=F,maxpixels=res,ylab="",xlab="",useRaster=T,ylim=greg$ylim)+
135
  layer(sp.lines(coast,col="black"),under=F)
142 136

  
137
## Monthly Means
138
levelplot(mod09c,col.regions=colr(n),cuts=100,at=seq(0,100,len=100),colorkey=list(space="bottom",adj=1),
139
  margin=F,maxpixels=res,ylab="Latitude",xlab="Longitude",useRaster=T,ylim=greg$ylim)+
140
  layer(sp.lines(coast,col="black"),under=F)
143 141

  
142
#- Monthly minimum
143
#- Monthly maximum
144
#- STDEV or Min-Max
145
#p_mac=levelplot(mod09a,col.regions=colr(n),cuts=99,at=seq(0,100,length=100),margin=F,maxpixels=1e5,colorkey=list(space="bottom",height=.75),xlab="",ylab="",useRaster=T)+
146
#      layer(sp.lines(coast,col="black"),under=F)
147
#p_min=levelplot(mod09min,col.regions=colr(n),cuts=99,margin=F,maxpixels=1e5,colorkey=list(space="bottom",height=.75),useRaster=T)
148
#p_max=levelplot(mod09max,col.regions=colr(n),cuts=99,margin=F,maxpixels=1e5,colorkey=list(space="bottom",height=.75),useRaster=T)
149
#p_sd=levelplot(mod09sd,col.regions=colr(n),cuts=99,at=seq(0,100,length=100),margin=F,maxpixels=1e5,colorkey=list(space="bottom",height=.75),useRaster=T)
150
#p3=c("Mean Cloud Frequency (%)"=p_mac,"Max Cloud Frequency (%)"=p_max,"Min Cloud Frequency (%)"=p_min,"Cloud Frequency Variability (SD)"=p_sd,x.same=T,y.same=T,merge.legends=T,layout=c(2,2))
151
#print(p3)
152

  
153
bgr=function(x,n=100,br=0,c1=c("darkblue","blue","grey"),c2=c("grey","red","purple")){
154
    at=unique(c(seq(min(x,na.rm=T),max(x,na.rm=T),len=n)))
155
    bg=colorRampPalette(c1)
156
    gr=colorRampPalette(c2)
157
    return(list(at=at,col=c(bg(sum(at<br)),gr(sum(at>=br)))))
158
}
159

  
160
colat=bgr(cldm$difm)
161
phist=histogram(cldm$difm,breaks=colat$at,border=NA,col=colat$col,xlim=c(-50,40),type="count",xlab="Difference (MOD09-NDP026D)")#,seq(0,1,len=6),na.rm=T)
162
pmap=xyplot(lat~lon|month2,data=cldm,groups=cut(cldm$difm,rev(colat$at)),
163
       par.settings=list(superpose.symbol=list(col=colat$col)),pch=16,cex=.25,
164
       auto.key=F,#list(space="right",title="Difference\n(MOD09-NDP026D)",cex.title=1),asp=1,
165
       ylab="Latitude",xlab="Longitude")+
166
  layer(sp.lines(coast,col="black",lwd=.1),under=F)
167
print(phist,position=c(0,.75,1,1),more=T)
168
print(pmap,position=c(0,0,1,.78))
144 169

  
145 170
### heatmap of mod09 vs. NDP for all months
146
hmcols=colorRampPalette(c("grey","blue","red"))
171
hmcols=colorRampPalette(c("grey","blue","red","purple"))
147 172
#hmcols=colorRampPalette(c(grey(.8),grey(.3),grey(.2)))
148
tr=c(0,120)
173
tr=c(0,80)
149 174
colkey <- draw.colorkey(list(col = hmcols(tr[2]), at = tr[1]:tr[2],height=.25))
150 175

  
151
xyplot(cld~mod09,data=cld[cld$Nobs>10,],panel=function(x,y,subscripts){
152
  n=150
153
  bins=seq(0,100,len=n)
154
  tb=melt(as.matrix(table(
155
    x=cut(x,bins,labels=bins[-1]),
156
    y=cut(y,bins,labels=bins[-1]))))
157
  qat=tr[1]:tr[2]#unique(tb$value)
158
  print(qat)
159
  panel.levelplot(tb$x,tb$y,tb$value,at=qat,col.regions=c("transparent",hmcols(length(qat))),subscripts=subscripts)
160
  },asp=1,scales=list(at=seq(0,100,len=6)),ylab="NDP Mean Cloud Amount (%)",xlab="MOD09 Cloud Frequency (%)",
161
       legend= list(right = list(fun = colkey,title="Station Count")))+
162
  layer(panel.abline(0,1,col="black",lwd=2))
163
#  layer(panel.ablineq(lm(y ~ x), r.sq = TRUE,at = 0.6,pos=1, offset=22,digits=2,col="blue"), style = 1)
164

  
165

  
166
xyplot(cld~mod09|month2,data=cld[cld$Nobs>50,],panel=function(x,y,subscripts){
176
xyplot(cld_all~mod09|month2,data=cldm,panel=function(x,y,subscripts){
167 177
  n=50
168 178
  bins=seq(0,100,len=n)
169 179
  tb=melt(as.matrix(table(
170 180
    x=cut(x,bins,labels=bins[-1]),
171 181
    y=cut(y,bins,labels=bins[-1]))))
172 182
  qat=unique(tb$value)
173
  qat=0:78
183
  print(max(qat))
174 184
  qat=tr[1]:tr[2]#unique(tb$value)
175 185
  panel.levelplot(tb$x,tb$y,tb$value,at=qat,col.regions=c("transparent",hmcols(length(qat))),subscripts=1:nrow(tb))
176
  panel.abline(0,1,col="black",lwd=2)
186
#  panel.abline(0,1,col="black",lwd=2)
187
  panel.abline(lm(y ~ x),col="black",lwd=2)
177 188
#  panel.ablineq(lm(y ~ x), r.sq = TRUE,at = 0.6,pos=1, offset=0,digits=2,col="blue")
178 189
  panel.text(70,10,bquote(paste(R^2,"=",.(round(summary(lm(y ~ x))$r.squared,2)))),cex=1.2)
179 190
},asp=1,scales=list(at=seq(0,100,len=6),useRaster=T,colorkey=list(width=.5,title="Number of Stations")),
180 191
          ylab="NDP Mean Cloud Amount (%)",xlab="MOD09 Cloud Frequency (%)",
181
              legend= list(right = list(fun = colkey)))+ layer(panel.abline(0,1,col="black",lwd=2))
192
              legend= list(right = list(fun = colkey)))#+ layer(panel.abline(0,1,col="black",lwd=2))
182 193

  
183 194

  
184 195
## Monthly Climatologies
185
for(i in 1:2){
186
 p1=xyplot(cld~mod09|month2,data=cldm,cex=.2,pch=16,subscripts=T,ylab="NDP Mean Cloud Amount",xlab="MOD09 Cloud Frequency (%)")+
187
  layer(panel.lines(1:100,predict(lm(y~x),newdata=data.frame(x=1:100)),col="green"))+
188
  layer(panel.lines(1:100,predict(lm(y~x+I(x^2)),newdata=data.frame(x=1:100)),col="blue"))+
189
  layer(panel.abline(0,1,col="red"))
190
    if(i==2){
191
     p1=p1+layer(panel.segments(mod09[subscripts],cld[subscripts]-cldsd[subscripts],mod09[subscripts],cld[subscripts]+cldsd[subscripts],subscripts=subscripts,col="grey"),data=cldm,under=T,magicdots=T)
192
     p1=p1+layer(panel.segments(mod09[subscripts]-mod09sd[subscripts],cld[subscripts],mod09[subscripts]+mod09sd[subscripts],cld[subscripts],subscripts=subscripts,col="grey"),data=cldm,under=T,magicdots=T) 
193
       }
194
print(p1)
195
}
196
## for(i in 1:2){
197
##  p1=xyplot(cld~mod09|month2,data=cldm,cex=.2,pch=16,subscripts=T,ylab="NDP Mean Cloud Amount",xlab="MOD09 Cloud Frequency (%)")+
198
##   layer(panel.lines(1:100,predict(lm(y~x),newdata=data.frame(x=1:100)),col="green"))+
199
##   layer(panel.lines(1:100,predict(lm(y~x+I(x^2)),newdata=data.frame(x=1:100)),col="blue"))+
200
##   layer(panel.abline(0,1,col="red"))
201
##     if(i==2){
202
##      p1=p1+layer(panel.segments(mod09[subscripts],cld[subscripts]-cldsd[subscripts],mod09[subscripts],cld[subscripts]+cldsd[subscripts],subscripts=subscripts,col="grey"),data=cldm,under=T,magicdots=T)
203
##      p1=p1+layer(panel.segments(mod09[subscripts]-mod09sd[subscripts],cld[subscripts],mod09[subscripts]+mod09sd[subscripts],cld[subscripts],subscripts=subscripts,col="grey"),data=cldm,under=T,magicdots=T) 
204
##        }
205
## print(p1)
206
## }
207

  
208
bwplot(lulcc~difm,data=cldm,horiz=T,xlab="Difference (MOD09-Observed)",varwidth=T,notch=T)+layer(panel.abline(v=0))
196 209

  
197
bwplot(lulcc~dif,data=cldm,horiz=T,xlab="Difference (MOD09-Observed)",varwidth=T,notch=T)+layer(panel.abline(v=0))
210
#library(BayesFactor)
211
#coplot(difm~month2|lulcc,data=cldm[!is.na(cldm$dif)&!is.na(cldm$lulcc),],panel = panel.smooth)
198 212

  
199 213

  
200 214
dev.off()
201 215

  
202 216

  
203
summary(lm(cld~mod09,data=cld))
217

  
218
## Validation table construction
219

  
220
summary(lm(cld_all~mod09+lat,data=cldm))
221

  
222
               
223
## assess latitude bias
224
cldm$abslat=abs(cldm$lat)
225
cldm$absdif=abs(cldm$difm)
226

  
227
###################################################################
228
### validation by biome
229
bs=biome$BiomeID
230
mod_bs=lapply(bs,function(bs) lm(cld_all~mod09,data=cldm[cldm$biome==bs,]))
231
names(mod_bs)=biome$Biome
232

  
233
screenreg(mod_bs,digits=2,single.row=F,custom.model.names=names(mod_bs))
234

  
235

  
236
#lm_all=lm(cld_all~mod09,data=cldm[!is.na(cldm$cld),])
237
lm_mod=lm(cld~mod09+biome,data=cldm)
238

  
239
screenreg(lm_mod,digits=2,single.row=F)
240

  
241
####################################################################
242
## assess temporal stability
243

  
244
## spatialy subset data to stations at least 10km apart
245
st2=remove.duplicates(st,zero=10)
246

  
247
## Subset data
248
## drop missing observations
249
cldm.t=cldm[!is.na(cldm$cld_all)&!is.na(cldm$mod09)&!is.na(cldm$biome),]
250
cldm.t=cldm.t[cldm.t$lat>=-60,]
251
#  make sure all stations have all mod09 data
252
stdrop=names(which(tapply(cldm.t$month,cldm.t$StaID,length)!=12))
253
cldm.t=cldm.t[!cldm.t$StaID%in%stdrop,]
254
# Keep only stations at least 10km apart 
255
cldm.t=cldm.t[cldm.t$StaID%in%st2$id,]
256
## Subset to only some months, if desired
257
#cldm.t=cldm.t[cldm.t$month%in%1:3,]
258

  
259

  
260
## Select Knots
261
knots=spsample(land,500,type="regular")
262

  
263
                                        #  reshape data
264
m.cld=cast(cldm.t,StaID+lat+lon+biome~month,value="cld_all");colnames(m.cld)[-(1:4)]=paste("cld.",colnames(m.cld)[-(1:4)],sep="")
265
m.mod09=cast(cldm.t,StaID~month,value="mod09");colnames(m.mod09)[-1]=paste("mod09.",colnames(m.mod09)[-1],sep="")
266
mdata=cbind(m.cld,m.mod09)
267

  
268
## cast to 
269
coords <- as.matrix(m.cld[,c("lon","lat")])#as.matrix(ne.temp[,c("UTMX", "UTMY")]/1000)
270
max.d <- max(iDist(coords))
271

  
272
##make symbolic model formula statement for each month
273
mods <- lapply(paste(paste(paste("cld.",1:N.t,sep=''),paste("mod09.",1:N.t,sep=''),sep='~'),"",sep=""), as.formula)
274

  
275
tlm=model.matrix(lm(mods[[1]],data=mdata))
276

  
277
N.t <- ncol(m.mod09)-1 ##number of months
278
n <- nrow(m.cld) ##number of observation per months
279
p <- ncol(tlm) #number of regression parameters in each month
280

  
281
starting <- list("beta"=rep(0,N.t*p), "phi"=rep(3/(0.5*max.d), N.t),
282
                 "sigma.sq"=rep(2,N.t), "tau.sq"=rep(1, N.t),
283
                 "sigma.eta"=diag(rep(0.01, p)))
284
tuning <- list("phi"=rep(5, N.t))
285

  
286
priors <- list("beta.0.Norm"=list(rep(0,p), diag(1000,p)),
287
               "phi.Unif"=list(rep(3/(0.9*max.d), N.t), rep(3/(0.05*max.d), N.t)),
288
               "sigma.sq.IG"=list(rep(2,N.t), rep(10,N.t)),
289
               "tau.sq.IG"=list(rep(2,N.t), rep(5,N.t)),
290
               "sigma.eta.IW"=list(2, diag(0.001,p)))
291
cov.model <- "exponential"
292

  
293
## Run the model
294
n.samples <- 100
295
m.1=spDynLM(mods,data=mdata,coords=coords,knots=coordinates(knots),n.samples=n.samples,starting=starting,tuning=tuning,priors=priors,cov.model=cov.model,get.fitted=T,n.report=25)
296

  
297
## summarize
298
burn.in <- floor(0.75*n.samples)
299
quant <- function(x){quantile(x, prob=c(0.5, 0.025, 0.975))}
300
beta <- apply(m.1$p.beta.samples[burn.in:n.samples,], 2, quant)
301
beta.0 <- beta[,grep("Intercept", colnames(beta))]
302
beta.1 <- beta[,grep("mod09", colnames(beta))]
303

  
304

  
305

  
306

  
307

  
308

  
309

  
310

  
311
library(texreg)
312
 extract.lm <- function(model) {
313
     s <- summary(model)
314
     names <- rownames(s$coef)
315
     co <- s$coef[, 1]
316
     se <- s$coef[, 2]
317
     pval <- s$coef[, 4]
318
     rs <- s$r.squared
319
     n <- nobs(model)
320
     rmse=sqrt(mean((residuals(s)^2)))
321
     gof <- c(rs, rmse, n)
322
     gof.names <- c("R-Squared","RMSE","n")
323
     tr <- createTexreg(coef.names = names, coef = co, se = se, 
324
                        pvalues = pval, gof.names = gof.names, gof = gof)
325
     return(tr)
326
 }
327
setMethod("extract", signature = className("lm", "stats"),definition = extract.lm)
328

  
329
forms=c("cld~mod09+month2+lat")
330
lm_all=lm(cld_all~mod09+lat,data=cldm[!is.na(cldm$cld),])
331

  
332

  
333
### Compare two time periods
334
lm_all1=lm(cld_all~mod09,data=cldm[!is.na(cldm$cld),])
335
lm_mod=lm(cld~mod09,data=cldm)
336
mods=list("1970-2000 complete"=lm_all1,"2000-2009"=lm_mod)
337

  
338
screenreg(mods,digits=2,single.row=T,custom.model.names=names(mods))
339
htmlreg(mods,file = "output/tempstab.doc",
340
        custom.model.names = names(mods),
341
        single.row = T, inline.css = FALSE,doctype = TRUE, html.tag = TRUE, head.tag = TRUE, body.tag = TRUE)
342

  
343

  
344

  
345
abslm=lm(absdif~abslat:I(abslat^2),data=cldm)
346

  
347
plot(absdif~abslat,data=cldm,cex=.25,pch=16)
348
lines(0:90,predict(abslm,newdata=data.frame(abslat=0:90),type="response"),col="red")
349

  
350
bf=anovaBF(dif~lulcc+month2,data=cldm[!is.na(cldm$dif)&!is.na(cldm$lulcc),])
351
ch=posterior(bf, iterations = 1000)
352
summary(bf)
353
plot(bf)
204 354

  
205 355
## explore validation error
206 356
cldm$lulcc=as.factor(IGBP$class[match(cldm$lulc,IGBP$ID)])
207 357

  
208 358
## Table of RMSE's by lulc by month
209
lulcrmsel=ddply(cldm,c("month","lulc"),function(x) c(count=nrow(x),rmse=sqrt(mean((x$mod09-x$cld)^2,na.rm=T))))
210
lulcrmsel=lulcrmsel[!is.na(lulcrmsel$lulc),]
211
lulcrmsel$lulcc=as.factor(IGBP$class[match(lulcrmsel$lulc,IGBP$ID)])
359
lulctl=ddply(cldm,c("month","lulc"),function(x) c(count=nrow(x),rmse=sqrt(mean((x$mod09-x$cld)^2,na.rm=T))))
360
lulctl=lulctl[!is.na(lulctl$lulc),]
361
lulctl$lulcc=as.factor(IGBP$class[match(lulctl$lulc,IGBP$ID)])
362

  
363
lulctl= ddply(cldm,c("lulc"),function(x) c(count=nrow(x),mean=paste(mean(x$difm,na.rm=T)," (",sd(x$difm,na.rm=T),")",sep=""),rmse=sqrt(mean((x$difm)^2,na.rm=T))))
364
lulctl$lulcc=as.factor(IGBP$class[match(lulctl$lulc,IGBP$ID)]
365
print(xtable(lulctl[,c("lulcc","count","mean","rmse")],digits=1),"html")
366

  
212 367

  
213 368
lulcrmse=cast(lulcrmsel,lulcc~month,value="rmse")
214 369
lulcrmse
370

  
371
lulcrmse.q=round(do.call(rbind,apply(lulcrmse,1,function(x) data.frame(Min=min(x,na.rm=T),Mean=mean(x,na.rm=T),Max=max(x,na.rm=T),SD=sd(x,na.rm=T)))),1)#quantile,c(0.025,0.5,.975),na.rm=T)),1)
372
lulcrmse.q=lulcrmse.q[order(lulcrmse.q$Mean,decreasing=T),]
373
lulcrmse.q
374

  
215 375
print(xtable(lulcrmse,digits=1),"html")
216
  
217
levelplot(rmse~lulc*month,data=lulcrmsel,col.regions=heat.colors(20))
376

  
377
bgyr=colorRampPalette(c("blue","green","yellow","red"))
378
levelplot(rmse~month*lulcc,data=lulcrmsel,col.regions=bgyr(1000),at=quantile(lulcrmsel$rmse,seq(0,1,len=100),na.rm=T))
218 379

  
219 380

  
220 381
### Linear models
climate/procedures/NDP-026D.R
49 49
## drop missing values
50 50
cld=cld[,!grepl("Fq|AWP|NC",colnames(cld))]
51 51
cld$Amt[cld$Amt<0]=NA
52
#cld$Fq[cld$Fq<0]=NA
53
#cld$AWP[cld$AWP<0]=NA
54
#cld$NC[cld$NC<0]=NA
55
#cld=cld[cld$Nobs>0,]
52
cld$Amt=cld$Amt/100
53

  
54
## calculate means and sds for full record (1970-2009)
55
Nobsthresh=20 #minimum number of observations to include 
56 56

  
57
## calculate means and sds
58 57
cldm=do.call(rbind.data.frame,by(cld,list(month=as.factor(cld$month),StaID=as.factor(cld$StaID)),function(x){
59 58
  data.frame(
60
             month=x$month[1],
61
             StaID=x$StaID[1],
62
             cld=mean(x$cld[x$Nobs>60],na.rm=T),
63
             cldsd=sd(x$cld[x$Nobs>60],na.rm=T))}))
59
      month=x$month[1],
60
      StaID=x$StaID[1],
61
      cld_all=mean(x$Amt[x$Nobs>=Nobsthresh],na.rm=T),  # full record
62
      cldsd_all=sd(x$Amt[x$Nobs>=Nobsthresh],na.rm=T),
63
      cld=mean(x$Amt[x$YR>=2000&x$Nobs>=Nobsthresh],na.rm=T), #only MODIS epoch
64
      cldsd=sd(x$Amt[x$YR>=2000&x$Nobs>=Nobsthresh],na.rm=T))}))
64 65
cldm[,c("lat","lon")]=coordinates(st)[match(cldm$StaID,st$id),c("lat","lon")]
65 66

  
66 67

  
68

  
67 69
## add the MOD09 data to cld
68 70
#### Evaluate MOD35 Cloud data
69 71
mod09=brick("~/acrobates/adamw/projects/cloud/data/cloud_ymonmean.nc")
......
91 93
})#,mc.cores=3)
92 94

  
93 95
## read it back in
94
mod09st=read.csv("valid.csv",header=F)[,-c(1,2)]
95

  
96
colnames(mod09st)=c(names(mod09)[-1],"id")
97
mod09stl=melt(mod09st,id.vars=c("id","sd"))
96
mod09st=read.csv("valid.csv",header=F)[,-c(1)]
97
colnames(mod09st)=c(names(mod09),"id","type")
98
mod09stl=melt(mod09st,id.vars=c("id","type"))
98 99
mod09stl[,c("year","month")]=do.call(rbind,strsplit(sub("X","",mod09stl$variable),"[.]"))[,1:2]
99 100
mod09stl$value[mod09stl$value<0]=NA
101
mod09stl=cast(mod09stl,id+year+month~type,value="value")
100 102

  
101 103
## add it to cld
102
cldm$mod09=mod09stl$value[match(paste(cldm$StaID,cldm$month),paste(mod09stl$id,as.numeric(mod09stl$month)))]
104
cldm$mod09=mod09stl$mean[match(paste(cldm$StaID,cldm$month),paste(mod09stl$id,as.numeric(mod09stl$month)))]
105
cldm$mod09sd=mod09stl$sd[match(paste(cldm$StaID,cldm$month),paste(mod09stl$id,as.numeric(mod09stl$month)))]
103 106

  
104 107

  
105 108
## LULC
......
121 124
cldm$lulc=lulcst$lulc[match(cldm$StaID,lulcst$id)]
122 125
cldm$lulcc=IGBP$class[match(cldm$lulc,IGBP$ID)]
123 126

  
124
## update cld column names
125
colnames(cldm)[grep("Amt",colnames(cldm))]="cld"
126
cldm$cld=cldm$cld/100
127
cldm[,c("lat","lon")]=coordinates(st)[match(cldm$StaID,st$id),c("lat","lon")]
128

  
129
## calculate means and sds
130
#cldm=do.call(rbind.data.frame,by(cld,list(month=as.factor(cld$month),StaID=as.factor(cld$StaID)),function(x){
131
#  data.frame(
132
#             month=x$month[1],
133
#             lulc=x$lulc[1],
134
#             StaID=x$StaID[1],
135
#             mod09=mean(x$mod09,na.rm=T),
136
#             mod09sd=sd(x$mod09,na.rm=T),
137
#             cld=mean(x$cld[x$Nobs>50],na.rm=T),
138
#             cldsd=sd(x$cld[x$Nobs>50],na.rm=T))}))
139
#cldm[,c("lat","lon")]=coordinates(st)[match(cldm$StaID,st$id),c("lat","lon")]
140

  
141
## means by year
142
#cldy=do.call(rbind.data.frame,by(cld,list(year=as.factor(cld$YR),StaID=as.factor(cld$StaID)),function(x){
143
#  data.frame(
144
#             year=x$YR[1],
145
#             StaID=x$StaID[1],
146
#             lulc=x$lulc[1],
147
#             mod09=mean(x$mod09,na.rm=T),
148
#             mod09sd=sd(x$mod09,na.rm=T),
149
#             cld=mean(x$cld[x$Nobs>50]/100,na.rm=T),
150
#             cldsd=sd(x$cld[x$Nobs>50]/100,na.rm=T))}))
151
#cldy[,c("lat","lon")]=coordinates(st)[match(cldy$StaID,st$id),c("lat","lon")]
152

  
153
## overall mean
154
clda=do.call(rbind.data.frame,by(cldm,list(StaID=as.factor(cldm$StaID)),function(x){
155
  data.frame(
156
             StaID=x$StaID[1],
157
             lulc=x$lulc[1],
158
             mod09=mean(x$mod09,na.rm=T),
159
             mod09sd=sd(x$mod09,na.rm=T),
160
             cld=mean(x$cld,na.rm=T),
161
             cldsd=sd(x$cld,na.rm=T))}))
162
clda[,c("lat","lon")]=coordinates(st)[match(clda$StaID,st$id),c("lat","lon")]
163

  
164 127

  
165 128
## write out the tables
166 129
write.csv(cld,file="cld.csv",row.names=F)
167
#write.csv(cldy,file="cldy.csv",row.names=F)
168 130
write.csv(cldm,file="cldm.csv",row.names=F)
169
write.csv(clda,file="clda.csv",row.names=F)
170 131

  
171 132
#########################################################################
172 133

  
climate/procedures/ee_compile.R
106 106

  
107 107
### merge all the tiles to a single global composite
108 108
#system(paste("ncdump -h ",list.files(tempdir,pattern="mod09.*.nc$",full=T)[10]))
109
system(paste("cdo -O  mergetime -setctomiss,-32768 ",paste(list.files(tempdir,pattern="mod09.*.nc$",full=T),collapse=" ")," data/cloud_monthly.nc"))
109
file.remove("tmp/mod09_2000-01-15.nc")
110
system(paste("cdo -O  mergetime -setrtomiss,-32768,-1 ",paste(list.files(tempdir,pattern="mod09.*.nc$",full=T),collapse=" ")," data/cloud_monthly.nc"))
110 111

  
111 112
#  Overall mean
112
system(paste("cdo -O  -chname,CF,CF_annual -timmean data/cloud_monthly.nc  data/cloud_mean.nc"))
113
system(paste("cdo -O  timmean data/cloud_monthly.nc  data/cloud_mean.nc"))
113 114

  
114 115
### generate the monthly mean and sd
115 116
#system(paste("cdo -P 10 -O merge -ymonmean data/mod09.nc -chname,CF,CF_sd -ymonstd data/mod09.nc data/mod09_clim.nc"))
116 117
system(paste("cdo  -f nc4c -O -ymonmean data/cloud_monthly.nc data/cloud_ymonmean.nc"))
118

  
119

  
120
## Seasonal Means
121
system(paste("cdo  -f nc4c -O -yseasmean data/cloud_monthly.nc data/cloud_yseasmean.nc"))
122
system(paste("cdo  -f nc4c -O -yseasstd data/cloud_monthly.nc data/cloud_yseasstd.nc"))
123

  
117 124
## standard deviations, had to break to limit memory usage
118 125
system(paste("cdo  -f nc4c -O -chname,CF,CF_sd -ymonstd -selmon,1,2,3,4,5,6 data/cloud_monthly.nc data/cloud_ymonsd_1-6.nc"))
119 126
system(paste("cdo  -f nc4c -O -chname,CF,CF_sd -ymonstd -selmon,7,8,9,10,11,12 data/cloud_monthly.nc data/cloud_ymonsd_7-12.nc"))
......
121 128

  
122 129

  
123 130
#if(!file.exists("data/mod09_metrics.nc")) {
124
    system("cdo -f nc4c -chname,CF,CFmin -timmin data/cloud_ymonmean.nc data/cloud_min.nc")
125
    system("cdo -f nc4c -chname,CF,CFmax -timmax data/cloud_ymonmean.nc data/cloud_max.nc")
131
    system("cdo -f nc4c  timmin data/cloud_ymonmean.nc data/cloud_min.nc")
132
    system("cdo -f nc4c  timmax data/cloud_ymonmean.nc data/cloud_max.nc")
126 133
    system("cdo -f nc4c -chname,CF,CFsd -timstd data/cloud_ymonmean.nc data/cloud_std.nc")
127 134
#    system("cdo -f nc2 merge data/mod09_std.nc data/mod09_min.nc data/cloud_max.nc data/cloud_metrics.nc")
128 135
#    system("cdo merge -chname,CF,CFmin -timmin data/cloud_ymonmean.nc -chname,CF,CFmax -timmax data/cloud_ymonmean.nc  -chname,CF,CFsd -timstd data/cloud_ymonmean.nc  data/cloud_metrics.nc")
129 136
#}
130 137

  
131 138

  
139
# Regressions through time by season
140
s=c("DJF","MAM","JJA","SON")
141

  
142
system(paste("cdo  -f nc4c -O regres -selseas,",s[1]," data/cloud_monthly.nc data/slope_",s[1],".nc &",sep=""))
143
system(paste("cdo  -f nc4c -O regres -selseas,",s[2]," data/cloud_monthly.nc data/slope_",s[2],".nc &",sep=""))
144
system(paste("cdo  -f nc4c -O regres -selseas,",s[3]," data/cloud_monthly.nc data/slope_",s[3],".nc &",sep=""))
145
system(paste("cdo  -f nc4c -O regres -selseas,",s[4]," data/cloud_monthly.nc data/slope_",s[4],".nc &",sep=""))
132 146

  
133 147

  
134 148

  

Also available in: Unified diff