/ - Diff - Environment and organisms - NCEAS Projects

« Previous | Next »

Revision 0924578a

Added by Adam Wilson over 12 years ago

ID 0924578a4ba91df092cde27e01df3fea9a712ac6
Parent 858fd2e7
Child 3682f238

Added script to evaluate climatic stationarity (Task #479)

     ### copy lulc data to litoria
     setwd("data/lulc")
     system("scp atlas:/home/parmentier/data_Oregon_stations/W_Layer* .")
     #setwd("data/lulc")
     #system("scp atlas:/home/parmentier/data_Oregon_stations/W_Layer* .")
     setwd("/home/adamw/acrobates/projects/interp")
-...
     d2[,c("lon","lat")]=coordinates(st2)[match(d2$id,st2$id),]
     d2$elev=st2$elev[match(d2$id,st2$id)]
     d2$month=format(d2$date,"%m")
     #d2$value=d2$value/10 #convert to mm
     d2$value=d2$value/10 #convert to mm
     ## load topographical data
     topo=brick(as.list(list.files("data/topography",pattern="rst$",full=T)))
     topo=brick(as.list(list.files("data/regions/oregon/topo",pattern="SRTM.*rst$",full=T)))
     topo=calc(topo,function(x) ifelse(x<0,NA,x))
     names(topo)=c("aspect","dem","slope")
     colnames(topo@data@values)=c("aspect","dem","slope")
-...
     ### load the lulc data as a brick
     lulc=brick(as.list(list.files("data/lulc",pattern="rst$",full=T)))
     lulc=brick(as.list(list.files("data/regions/oregon/lulc",pattern="rst$",full=T)))
     #projection(lulc)=
     #plot(lulc)
-...
     lulc=calc(lulc,function(x) ifelse(is.na(x),0,x))
     projection(lulc)=projs
     ### reclass/sum classes
     ShrubGrass=subset(lulc,"Shrub")+subset(lulc,"Grass");layerNames(ShrubGrass)="ShrubGrass"
     Other=subset(lulc,"Mosaic")+subset(lulc,"Barren")+subset(lulc,"Snow")+subset(lulc,"Wetland");layerNames(Other)="Other"
     lulc2=stack(subset(lulc,"Forest"),subset(lulc,"Urban"),subset(lulc,"Crop"),Other,ShrubGrass)
     ### load the LST data
     lst=brick(as.list(list.files("data/lst",pattern="rescaled.rst$",full=T)[c(4:12,1:3)]))
     lst=brick(as.list(list.files("data/regions/oregon/lst",pattern="rescaled.rst$",full=T)[c(4:12,1:3)]))
     lst=lst-273.15
     colnames(lst@data@values)=format(as.Date(paste("2000-",as.numeric(gsub("[a-z]|[A-Z]|[_]|83","",layerNames(lst))),"-15",sep="")),"%b")
     layerNames(lst)=format(as.Date(paste("2000-",as.numeric(gsub("[a-z]|[A-Z]|[_]|83","",layerNames(lst))),"-15",sep="")),"%b")
-...
     ######################################
     ## compare LULC with station data
     st2=SpatialPointsDataFrame(st2,data=cbind.data.frame(st2@data,demb=extract(demb,st2),extract(topo,st2),extract(topo2,st2),extract(lulc,st2),extract(lst,st2)))
     stlulc=extract(lulc,st2) #overlay stations and LULC values
     st2=st2[!is.na(extract(demb,st2)),]
     st2=SpatialPointsDataFrame(st2,data=cbind.data.frame(st2@data,demb=extract(demb,st2),extract(topo,st2),extract(topo2,st2),extract(lulc2,st2,buffer=1500,fun=mean),extract(lst,st2)))
     stlulc=extract(lulc2,st2,buffer=1500,fun=mean) #overlay stations and LULC values
     st2$lulc=do.call(c,lapply(apply(stlulc,1,function(x) which.max(x)),function(x) ifelse(is.null(names(x)),NA,names(x))))
     ###  add MODIS metric to station data for month corresponding to that date
     ### reshape for easy merging
     sdata.ul=melt(st2@data,id.vars=c("id","lat","lon","Forest","ShrubGrass","Crop","Urban","Other","lulc"),measure.vars=format(as.Date(paste("2000-",1:12,"-15",sep="")),"%b"))
     ### generate sample of points to speed processing
     n=10000/length(unique(demb))
     n2=30  #number of knots
-...
     #ms1[ms1$parm%in%c("x","y","dem"),c("Q2.5","Q50","Q97.5")]=ms1[ms1$parm%in%c("x","y","dem"),c("Q2.5","Q50","Q97.5")]
     #######################################################################
     #### look at interaction of tmax~lst*lulc using monthly means
     ### add monthly data to sdata table by matching unique station_month ids.
     d2$month=as.numeric(format(d2$date,"%m"))
     ### get monthly means and sd's
     dm=melt(cast(d2,id+lon+lat+elev~month,value="value",fun.aggregate=mean,na.rm=T),id.vars=c("id","lon","lat","elev"));colnames(dm)[grep("value",colnames(dm))]="mean"
     ds=melt(cast(d2,id+lon+lat+elev~month,value="value",fun.aggregate=sd,na.rm=T),id.vars=c("id","lon","lat","elev"))  #sd of tmax
     dn=melt(cast(d2,id+lon+lat+elev~month,value="value",fun.aggregate=length),id.vars=c("id","lon","lat","elev"))  #number of observations
     dm$sd=ds$value
     dm$n=dn$value[match(paste(dm$month,dm$id),paste(dn$month,dn$id))]/max(dn$value)  # % complete record
     #get lulc classes
     lcs=layerNames(lulc2)
     dm$lst=sdata.ul$value[match(paste(dm$id,format(as.Date(paste("2000-",dm$month,"-15",sep=""),"%Y-%m-%d"),"%b"),sep="_"),paste(sdata.ul$id,sdata.ul$variable,sep="_"))]
     dm[,lcs]=sdata.ul[match(dm$id,sdata.ul$id),lcs]
     dm=dm[!is.na(dm$ShrubGrass),]
     dm$class=lcs[apply(dm[,lcs],1,which.max)]
     ## update month names
     dm$m2=format(as.Date(paste("2000-",dm$month,"-15",sep="")),"%B")
     dm$m2=factor(as.character(dm$m2),levels=format(as.Date(paste("2000-",1:12,"-15",sep="")),"%B"),ordered=T)
     xyplot(mean~lst|m2,groups=class,data=dm,panel=function(x,y,subscripts,groups){  #+cut(dm$elev,breaks=quantile(dm$elev,seq(0,1,len=4)),labels=c("low","medium","high"))
       dt=dm[subscripts,]
       #panel.segments(dt$lst,dt$mean-dt$sd,dt$lst,dt$mean+dt$sd,groups=groups,lwd=.5,col="#C1CDCD")
       panel.xyplot(dt$lst,dt$mean,groups=groups,subscripts=subscripts,type=c("p","r"),cex=0.5)
       panel.abline(0,1,col="black",lwd=2)
     },par.settings = list(superpose.symbol = list(pch=1:6,col=c("lightgreen","darkgreen","grey","brown","red"))),
            auto.key=list(space="right"),scales=list(relation="free"),
            sub="Each point represents a monthly mean (climatology) for a single station \n Points are colored by LULC class with largest % \n Heavy black line is y=x",main="Monthly Mean LST and Monthly Mean Tmax",
            ylab="Mean Monthly Tmax (C)",xlab="Mean Monthly LST")
     mods=data.frame(
       form=c(
         "mean~lst+elev",
         "mean~lst+elev+ShrubGrass+Urban+Crop+Other",
         "mean~lst+elev+lst*ShrubGrass+lst*Urban+lst*Crop+lst*Other"
         ),
       type=c("lst","intercept","interact"),
       stringsAsFactors=F)
     mods2=expand.grid(form=mods$form,month=1:12)
     mods2$type=mods$type[match(mods2$form,mods$form)]
     #summary(lm(mods$form[4],data=dm,weight=dm$n))
     ms=lapply(1:nrow(mods2),function(i) {
       m=lm(as.formula(as.character(mods2$form[i])),data=dm[dm$month==mods2$month[i],],weight=dm$n[dm$month==mods2$month[i]])
       return(list(model=m,
                   res=data.frame(
                     Formula=as.character(mods2$form[i]),
                     Month=mods2$month[i],
                     type=mods2$type[i],
                     AIC=AIC(m),
                     R2=summary(m)$r.squared)))
     })
     ### identify lowest AIC per month
     ms1=do.call(rbind.data.frame,lapply(ms,function(m) m$res))
     aicw= cast(ms1,Month~type,value="AIC")
     aicwt=as.data.frame(t(apply(aicw[,-1],1,function(x) ifelse(x==min(x),"Minimum",ifelse((x-min(x))<7,"NS Minimum","NS")))));colnames(aicwt)=colnames(aicw)[-1];aicwt$Month=aicw$Month
     aic=melt(aicwt,id.vars="Month");colnames(aic)=c("Month","type","minAIC")
     aic$minAIC=factor(aic$minAIC,ordered=F)
     xyplot(AIC~as.factor(Month),groups=Formula,data=ms1,type=c("p","l"),pch=16,auto.key=list(space="top"),main="Model Comparison across months",
            par.settings = list(superpose.symbol = list(pch=16,cex=1)),xlab="Month")
     ms2=lapply(ms,function(m) m$model)
     mi=rep(c(1:12),each=3)  #month indices
     fs=do.call(rbind.data.frame,lapply(1:12,function(i){
       it=which(mi==i)
       x=anova(ms2[[it[1]]],ms2[[it[2]]],ms2[[it[3]]])
       fs=c(
         paste(as.character(formula(ms2[[it[1]]]))[c(2,1,3)],collapse=" "),
         paste(as.character(formula(ms2[[it[2]]]))[c(2,1,3)],collapse=" "),
         paste(as.character(formula(ms2[[it[3]]]))[c(2,1,3)],collapse=" "))
       data.frame(month=rep(i,3),model=fs,p=as.data.frame(x)[,6],sig=ifelse(as.data.frame(x)[,6]<0.05,T,F))
     }))
     table(fs$sig,fs$model)
     which(fs$sig)
     ### load oregon boundary for comparison
     roi=spTransform(as(readOGR("data/regions/Test_sites/Oregon.shp","Oregon"),"SpatialLines"),projs)
-...
     ### Summary plots of covariates
     ## LULC
     at=seq(0.1,100,length=100)
     levelplot(lulc,at=at,col.regions=bgyr(length(at)),
     levelplot(lulc2,at=at,col.regions=bgyr(length(at)),
               main="Land Cover Classes",sub="Sub-pixel %")+
       layer(sp.lines(roi, lwd=1.2, col='black'))

     ##################    Data preparation for interpolation   #######################################
     ############################ Extraction of station data ##########################################
     #This script perform queries on the Postgres database ghcn for stations matching the             #
     #interpolation area. It requires the following inputs:                                           #
     # 1)the text file ofGHCND  stations from NCDC matching the database version release              #
     # 2)a shape file of the study area with geographic coordinates: lonlat WGS84                     #                                                     #
     # 3)a new coordinate system can be provided as an argument                                       #
     # 4)the variable of interest: "TMAX","TMIN" or "PRCP"                                            #
     #                                                                                                #
     #The outputs are text files and a shape file of a time subset of the database                    #
     #AUTHOR: Benoit Parmentier                                                                       #
     #DATE: 06/02/212                                                                                 #
     #PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#363--                                  #
     ##################################################################################################
     ###Loading R library and packages
     library(RPostgreSQL)
     library(sp)                                             # Spatial pacakge with class definition by Bivand et al.
     library(spdep)                                          # Spatial pacakge with methods and spatial stat. by Bivand et al.
     library(rgdal)                                          # GDAL wrapper for R, spatial utilities
     library(rgeos)                                          # Polygon buffering and other vector operations
     library(reshape)
     ### Parameters and arguments
     db.name <- "ghcn"                #name of the Postgres database
     var <- c("TMAX","TMIN","PRCP")                    #name of the variables to keep: TMIN, TMAX or PRCP
     year_start<-"1970"               #starting year for the query (included)
     year_end<-"2011"                 #end year for the query (excluded)
     path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations/"        #Jupiter LOCATION on EOS/Atlas
     #path<-"H:/Data/IPLANT_project/data_Oregon_stations"                      #Jupiter Location on XANDERS
     outpath=path                                                              # create different output path because we don't have write access to other's home dirs
     setwd(path)
     out_prefix<-"stationarity"                                                 #User defined output prefix
     buffer=100
     #for Adam
     outpath="/home/wilson/data/"
     ############ START OF THE SCRIPT #################
     #####  Connect to Station database
     drv <- dbDriver("PostgreSQL")
     db <- dbConnect(drv, dbname=db.name)#,options="statement_timeout = 1m")
     ##### STEP 1: Select station in the study area
     infile1<- "ORWGS84_state_outline.shp"        #This is the shape file of outline of the study area.
     filename<-sub(".shp","",infile1)             #Removing the extension from file.
     interp_area <- readOGR(".",filename)
     CRS_interp<-proj4string(interp_area)         #Storing the coordinate information: geographic coordinates longlat WGS84
     #####  Buffer shapefile if desired
     ##     This is done to include stations from outside the region in the interpolation fitting process and reduce edge effects when stiching regions
     if(buffer>0){  #only apply buffer if buffer >0
       interp_area=gUnionCascaded(interp_area)  #dissolve any subparts of roi (if there are islands, lakes, etc.)
       interp_areaC=gCentroid(interp_area)       # get centroid of region
       interp_areaB=spTransform(                # buffer roi (transform to azimuthal equidistant with centroid of region for most (?) accurate buffering, add buffer, then transform to WGS84)
         gBuffer(
           spTransform(interp_area,
                       CRS(paste("+proj=aeqd +lat_0=",interp_areaC@coords[2]," +lon_0=",interp_areaC@coords[1]," +ellps=WGS84 +datum=WGS84 +units=m +no_defs ",sep=""))),
           width=buffer*1000),                  # convert buffer (km) to meters
         CRS(CRS_interp))                       # reproject back to original projection
     #  interp_area=interp_areaB                 # replace original region with buffered region
+    }
     ## get bounding box of study area
     bbox=bbox(interp_areab)
     ### read in station location information from database
     ### use the bbox of the region to include only station in rectangular region to speed up overlay
     dat_stat=dbGetQuery(db, paste("SELECT id,name,latitude,longitude
                       FROM stations
                       WHERE latitude>=",bbox[2,1]," AND latitude<=",bbox[2,2],"
                       AND longitude>=",bbox[1,1]," AND longitude<=",bbox[1,2],"
                       ;",sep=""))
     coordinates(dat_stat)<-c("longitude","latitude")
     proj4string(dat_stat)<-CRS_interp
     # Spatial query to find relevant stations
     inside <- !is.na(over(dat_stat, as(interp_areab, "SpatialPolygons")))  #Finding stations contained in the current interpolation area
     stat_roi<-dat_stat[inside,]                                            #Finding stations contained in the current interpolation area
     #stat_roi<-spTransform(stat_roi,CRS(new_proj))         # Project from WGS84 to new coord. system
     #Quick visualization of station locations
     plot(interp_area, axes =TRUE)
     plot(stat_roi, pch=1, col="red", cex= 0.7, add=TRUE)
     #legend("topleft", pch=1,col="red",bty="n",title= "Stations",cex=1.6)
     #################################################################
     ### STEP 2: generate monthly means for climate-aided interpolation
     ##  Query to link station location information and observations
     ##  Concatenate date columns into single field for easy convert to date
     ##  Divide value by 10 to convert to degrees C and mm
     ##  Subset to years in year_start -> year_end
     ##  Drop missing values (-9999)
     ##  Drop observations that failed quality control (keep only qflag==NA)
     ### first extract average daily values by month.
     system.time(
                d<<-dbGetQuery(db,  # create dm object (data monthly)
                               paste("SELECT station,month,element,count30,value30,count10,value10,latitude,longitude,elevation
                                      FROM
                                               (SELECT station,month,element,count(value) as count30,avg(value)/10.0 as value30,latitude,longitude,elevation
                                                FROM ghcn, stations
                                                WHERE station = id
                                                AND id IN ('",paste(stat_roi$id,collapse="','"),"')
                                                AND element IN ('",paste(var,collapse="','"),"')
                                                AND year>=",1970,"
                                                AND year<",2000,"
                                                AND value<>-9999
                                                AND qflag IS NULL
                                                GROUP BY station, month,latitude,longitude,elevation,element
                                                ) as a30
                                       INNER JOIN
                                                (SELECT station,month,element,count(value) as count10,avg(value)/10.0 as value10
                                                FROM ghcn, stations
                                                WHERE station = id
                                                AND id IN ('",paste(stat_roi$id,collapse="','"),"')
                                                AND element IN ('",paste(var,collapse="','"),"')
                                                AND year>=",2000,"
                                                AND year<",2010,"
                                                AND value<>-9999
                                                AND qflag IS NULL
                                                GROUP BY station, month,element
                                                ) as a10
                                           USING (station,element,month)
                                      ;",sep=""))
                 )  ### print used time in seconds  ~ 10 minutes
     save(d,file=paste(outpath,"stationarity.Rdata"))
     #####################################################################33
     #### Explore it
     load(paste(outpath,"stationarity.Rdata"))
     ## subset by # of observations?
     thresh=.75 #threshold % to keep
     d$keep=d$count30/900>thresh&d$count10/300>thresh
     table(d$keep)
     ## create month factor
     d$monthname=factor(d$month,labels=format(as.Date(paste(2000,1:12,1,sep="-")),"%B"),ordered=T)
     ### start PDF
     pdf(paste(outpath,"ClimateStationarity.pdf",sep=""),width=11,height=8.5)
     library(latticeExtra)
     #combineLimits(useOuterStrips(xyplot(value10~value30|monthname+element,data=d[d$keep,],scales=list(relation="free",rot=0),cex=.5,pch=16,
     #                      ylab="2000-2010 Mean Daily Value",xlab="1970-2000 Mean Daily Value",
     #                      main="Comparison of Mean Daily Values",asp=1)))+
     #  layer(panel.abline(0,1,col="red"))+
     #  layer(panel.text(max(x),min(y),paste("R^2=",round(summary(lm(y~x))$r.squared,2)),cex=.5,pos=2))
     for(v in unique(d$element)){
     print(xyplot(value10~value30|monthname,data=d[d$keep&d$element==v,],scales=list(relation="free",rot=0),cex=.5,pch=16,
                           ylab="2000-2010 Mean Daily Value",xlab="1970-2000 Mean Daily Value",
                           main=paste("Comparison of Mean Daily Values for",v),asp=1)+
       layer(panel.abline(0,1,col="red"))+
       layer(panel.text(max(x),min(y),paste("R^2=",round(summary(lm(y~x))$r.squared,2)),cex=1,pos=2)))
+    }
     ## look at deviances
     d$dif=d$value10-d$value30
     trellis.par.set(superpose.symbol = list(col=c("blue","grey","green","red"),cex=.5,pch=16))
      for(v in unique(d$element)){
         print(xyplot(latitude~longitude|monthname,group=cut(dif,quantile(d$dif[d$keep&d$element==v],seq(0,1,len=5))),
            data=d[d$keep&d$element==v,],auto.key=list(space="right"),
                      main=paste("Current-Past anomolies for",v," (2000-2010 Daily Means Minus 1970-2000 Daily Means)"),
                      sub="Positive values indicate stations that were warmer/wetter in 2000-2010 than 1970-2000")+
               layer(sp.lines(as(interp_area,"SpatialLines"),col="black")))
+    }
     dev.off()

climate/research/oregon/interpolation/Extraction_raster_covariates_study_area.R
71	71	sdata.u@data=cbind.data.frame(sdata.u@data,extract(subset(covar,subset=which(getZ(covar)!="00")), sdata.u)) #Extracting values from the raster stack for every point
72	72	sdata.u=sdata.u@data #drop the spatial-ness
73	73
	74	### add MODIS metric to station data for month corresponding to that date
74	75	### reshape for easy merging
75	76	sdata.ul=melt(sdata.u,id.vars=c("station","latitude","longitude","x","y"))
76	77	sdata.ul[,c("metric","type","month")]=do.call(rbind.data.frame,strsplit(as.character(sdata.ul$variable),"_"))

         "value ~ s(CLD_mean) + elev + ns + ew",
         "value ~ s(COT_mean) + elev + ns + ew",
         "value ~ s(CER_P20um) + elev + ns + ew",
         "value ~ s(CER_mean) + elev + ns + ew"
         "value ~ s(CER_mean) + elev + ns + ew",
         "value ~ s(CLD_mean) + s(CER_P20um) + elev + ns + ew",
         "value ~ s(COT_mean) + s(CLD_mean) + s(CER_P20um) + elev + ns + ew"
                                             #    "value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew + s(CER_P20um)",
     #    "value ~ s(x_OR83M,y_OR83M,CER_P20um) +s(x_OR83M,y_OR83M,CLD_mean) + elev + ns + ew",
     #    "value ~ s(x_OR83M,y_OR83M) + s(CER_P20um,CLD_mean) + elev + ns + ew",
-...
     ghcn.subsets <-lapply(dates, function(d) subset(ghcn@data, date==d)) #this creates a list of 10 subset data
       results=do.call(rbind.data.frame,                   # Collect the results in a single data.frame
        lapply(1:length(dates),function(i,savemodel=T,saveFullPrediction=T,scale=F,verbose=T) {            # loop over dates
        lapply(1:length(dates),function(i,savemodel=F,saveFullPrediction=F,scale=F,verbose=T) {            # loop over dates
          if(verbose)      print(paste("Starting Date:",dates[i]))
          date<-dates[i]                                  # get date
          month<-strftime(date, "%m")                     # get month

Also available in: Unified diff

Project

General

Profile

Revision 0924578a

Added by Adam Wilson over 12 years ago