Revision 41cf46ba
Added by Adam Wilson over 12 years ago
climate/procedures/MOD06_L2_data_compile.r | ||
---|---|---|
29 | 29 |
setwd("/home/adamw/personal/projects/interp") |
30 | 30 |
setwd("/home/adamw/acrobates/projects/interp") |
31 | 31 |
|
32 |
## read in shapefile as Region of Interest (roi) |
|
32 | 33 |
roi=readOGR("data/regions/Test_sites/Oregon.shp","Oregon") |
33 | 34 |
roi=spTransform(roi,CRS(" +proj=sinu +lon_0=0 +x_0=0 +y_0=0")) |
34 | 35 |
|
36 |
### use MODIS tile as ROI instead |
|
37 |
modt=readOGR("/home/adamw/acrobates/Global/modis_sinusoidal","modis_sinusoidal_grid_world",) |
|
38 |
tiles=c("H9V4") |
|
39 |
roi=modt[modt$HV%in%tiles,] |
|
40 |
|
|
41 |
## Bounding box of region in lat/lon |
|
42 |
roi_ll=spTransform(roi,CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")) |
|
43 |
roi_bb=bbox(roi_ll) |
|
44 |
|
|
35 | 45 |
### Downloading data from LAADSWEB |
36 | 46 |
# subset by geographic area of interest |
37 |
# subset: 40-47, -115--125 |
|
38 | 47 |
|
39 | 48 |
## download data from ftp site. Unfortunately the selection has to be selected using the website and orders downloaded via ftp. |
40 | 49 |
|
... | ... | |
62 | 71 |
|
63 | 72 |
## output ROI |
64 | 73 |
#get bounding box of region in m |
65 |
ge=SpatialPoints(data.frame(lon=c(-125,-115),lat=c(40,47))) |
|
66 |
projection(ge)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs") |
|
67 |
ge2=spTransform(ge, CRS(" +proj=sinu +lon_0=0 +x_0=0 +y_0=0")) |
|
68 |
|
|
74 |
#ge=SpatialPoints(data.frame(lon=c(-125,-115),lat=c(40,47))) |
|
75 |
#projection(ge)=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs") |
|
76 |
#ge2=spTransform(ge, CRS(" +proj=sinu +lon_0=0 +x_0=0 +y_0=0")) |
|
69 | 77 |
|
70 | 78 |
## vars |
71 | 79 |
vars=as.data.frame(matrix(c( |
... | ... | |
89 | 97 |
|
90 | 98 |
|
91 | 99 |
#### Function to generate hegtool parameter file for multi-band HDF-EOS file |
92 |
swath2grid=function(i=1,files,vars,outdir,upleft="47 -125",lowright="41 -115"){
|
|
100 |
swath2grid=function(i=1,files,vars,outdir,upleft,lowright){
|
|
93 | 101 |
file=fs$path[i] |
94 | 102 |
print(paste("Starting file",basename(file))) |
95 | 103 |
outfile=paste(outdir,"/",fs$file[i],sep="") |
96 |
# date=fs$date[1] |
|
97 |
# origin=as.POSIXct("1970-01-01 00:00:00",tz="GMT") |
|
98 | 104 |
### First write the parameter file (careful, heg is very finicky!) |
99 | 105 |
hdr=paste("NUM_RUNS = ",length(vars),"|MULTI_BAND_HDFEOS:",length(vars),sep="") |
100 | 106 |
grp=paste(" |
... | ... | |
109 | 115 |
SPATIAL_SUBSET_LR_CORNER = ( ",lowright," ) |
110 | 116 |
#RESAMPLING_TYPE =",ifelse(grepl("Flag|Mask|Quality",vars),"NN","CUBIC")," |
111 | 117 |
RESAMPLING_TYPE =NN |
112 |
OUTPUT_PROJECTION_TYPE = SIN |
|
118 |
OUTPUT_PROJECTION_TYPE = UTM |
|
119 |
UTM_ZONE = 10 |
|
120 |
# OUTPUT_PROJECTION_PARAMETERS = ( 6371007.181 0.0 0.0 0.0 0.0 0.0 0.0 0.0 86400.0 0.0 1.0 0.0 0.0 0.0 0.0 ) |
|
121 |
# projection parameters from http://landweb.nascom.nasa.gov/cgi-bin/QA_WWW/newPage.cgi?fileName=is_gctp |
|
113 | 122 |
ELLIPSOID_CODE = WGS84 |
114 |
OUTPUT_PROJECTION_PARAMETERS = ( 6371007.181 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 86400.0 0.0 0.0 ) |
|
115 | 123 |
OUTPUT_TYPE = HDFEOS |
116 | 124 |
OUTPUT_FILENAME = ",outfile," |
117 | 125 |
END |
118 | 126 |
|
119 | 127 |
",sep="") |
128 |
|
|
120 | 129 |
## if any remnants from previous runs remain, delete them |
121 | 130 |
if(length(list.files(tempdir(),pattern=basename(file)))>0) |
122 | 131 |
file.remove(list.files(tempdir(),pattern=basename(file),full=T)) |
123 | 132 |
## write it to a file |
124 | 133 |
cat(c(hdr,grp) , file=paste(tempdir(),"/",basename(file),"_MODparms.txt",sep="")) |
125 |
## now run the swath2grid tool - must be run as root (argh!)!
|
|
134 |
## now run the swath2grid tool |
|
126 | 135 |
## write the tiff file |
127 | 136 |
log=system(paste("sudo MRTDATADIR=/usr/local/heg/data ", |
128 | 137 |
"PGSHOME=/usr/local/heg/TOOLKIT_MTD PWD=/home/adamw /usr/local/heg/bin/swtif -p ", |
... | ... | |
135 | 144 |
fs$complete=fs$file%in%list.files(outdir,pattern="hdf$") |
136 | 145 |
table(fs$complete) |
137 | 146 |
|
138 |
#### Run the gridding procedure
|
|
139 |
|
|
140 |
system("sudo ls")
|
|
147 |
## must be run as root to access the data directory! argh!
|
|
148 |
## run sudo once here to enter password before continuing |
|
149 |
system('sudo ls')
|
|
141 | 150 |
|
151 |
#### Run the gridding procedure |
|
142 | 152 |
mclapply(which(!fs$complete),function(fi){ |
143 | 153 |
swath2grid(fi,vars=vars$variable,files=fs, |
144 | 154 |
outdir=outdir, |
145 |
upleft="47 -125",lowright="40 -115")}, |
|
155 |
upleft=paste(roi_bb[2,2],roi_bb[1,1]), |
|
156 |
lowright=paste(roi_bb[2,1],roi_bb[1,2]))}, |
|
146 | 157 |
mc.cores=24) |
147 | 158 |
|
148 | 159 |
|
... | ... | |
151 | 162 |
|
152 | 163 |
#fs$grass=paste(fs$month,fs$year,fs$file,sep="_") |
153 | 164 |
td=readGDAL(paste("HDF4_EOS:EOS_GRID:\"",outdir,"/",fs$file[1],"\":mod06:Cloud_Mask_1km_0",sep="")) |
154 |
projection(td)="+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs +datum=WGS84 +ellps=WGS84 " |
|
165 |
#projection(td)="+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs +datum=WGS84 +ellps=WGS84 " |
|
166 |
projection(td)="+proj=utm +zone=10 +ellps=WGS84 +datum=WGS84 +units=m +no_defs" |
|
155 | 167 |
|
156 | 168 |
## fucntion to convert binary to decimal to assist in identifying correct values |
157 | 169 |
b2d=function(x) sum(x * 2^(rev(seq_along(x)) - 1)) #http://tolstoy.newcastle.edu.au/R/e2/help/07/02/10596.html |
... | ... | |
169 | 181 |
## temporary objects to test function below |
170 | 182 |
i=1 |
171 | 183 |
file=paste(outdir,"/",fs$file[1],sep="") |
172 |
date=as.Date("2000-03-02")
|
|
184 |
date=as.Date("2000-05-23")
|
|
173 | 185 |
|
174 | 186 |
|
175 | 187 |
### Function to extract various SDSs from a single gridded HDF file and use QA data to throw out 'bad' observations |
176 | 188 |
loadcloud<-function(date,fs){ |
177 |
### set up grass session |
|
189 |
### set up unique grass session
|
|
178 | 190 |
tf=paste(tempdir(),"/grass", Sys.getpid(),"/", sep="") |
179 | 191 |
|
180 | 192 |
## set up tempfile for this PID |
181 | 193 |
initGRASS(gisBase="/usr/lib/grass64",gisDbase=tf,SG=td,override=T,location="mod06",mapset="PERMANENT",home=tf,pid=Sys.getpid()) |
182 |
system(paste("g.proj -c proj4=\"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +datum=WGS84 +units=m +no_defs\"",sep="")) |
|
194 |
# system(paste("g.proj -c proj4=\"+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +datum=WGS84 +units=m +no_defs\"",sep="")) |
|
195 |
system(paste("g.proj -c proj4=\"+proj=utm +zone=10 +ellps=WGS84 +datum=WGS84 +units=m +no_defs\"",sep="")) |
|
183 | 196 |
|
184 |
#system("g.mapset PERMANENT")
|
|
197 |
## Define region by importing one raster.
|
|
185 | 198 |
execGRASS("r.in.gdal",input=paste("HDF4_EOS:EOS_GRID:\"",outdir,"/",fs$file[1],"\":mod06:Cloud_Mask_1km_0",sep=""), |
186 | 199 |
output="modisgrid",flags=c("quiet","overwrite","o")) |
187 | 200 |
system("g.region rast=modisgrid save=roi --overwrite") |
188 | 201 |
system("g.region roi") |
189 | 202 |
system("g.region -p") |
190 |
# getLocationProj() |
|
191 |
|
|
192 | 203 |
|
193 | 204 |
## Identify which files to process |
194 | 205 |
tfs=fs$file[fs$date==date] |
... | ... | |
253 | 264 |
# system(paste("r.mapcalc \"CWP_",i,"=if(QA_CWP_",i,"&&QA_CWP2_",i,"&&CWP_",i,">=0,CWP_",i,"*0.009999999776482582,null())\"",sep="")) |
254 | 265 |
## set CER to 0 in clear-sky pixels |
255 | 266 |
# system(paste("r.mapcalc \"CWP2_",i,"=if(CM_clear_",i,"==0,CWP_",i,",0)\"",sep="")) |
256 |
|
|
257 | 267 |
|
258 | 268 |
} #end loop through sub daily files |
259 | 269 |
|
... | ... | |
269 | 279 |
CLD_daily=max(",paste("if(isnull(CM_cloud_",1:nfs,"),0,CM_cloud_",1:nfs,")",sep="",collapse=","),") |
270 | 280 |
EOF",sep="")) |
271 | 281 |
|
272 |
#### Write the file to a geotiff |
|
282 |
#### Write the files to a geotiff
|
|
273 | 283 |
execGRASS("r.out.gdal",input="CER_daily",output=paste(tifdir,"/CER_",format(date,"%Y%m%d"),".tif",sep=""),nodata=-999,flags=c("quiet")) |
274 | 284 |
execGRASS("r.out.gdal",input="COT_daily",output=paste(tifdir,"/COT_",format(date,"%Y%m%d"),".tif",sep=""),nodata=-999,flags=c("quiet")) |
275 | 285 |
execGRASS("r.out.gdal",input="CLD_daily",output=paste(tifdir,"/CLD_",format(date,"%Y%m%d"),".tif",sep=""),nodata=99,flags=c("quiet")) |
... | ... | |
315 | 325 |
vs=expand.grid(type=unique(fs2$type),month=c("01","02","03","04","05","06","07","08","09","10","11","12")) |
316 | 326 |
|
317 | 327 |
## identify which have been completed |
318 |
#done=
|
|
319 |
# do.call(rbind,strsplit(list.files(summarydatadir),"_|[.]"))[,3]
|
|
320 |
#table(done)
|
|
321 |
#tdates=tdates[!done]
|
|
328 |
done.vs=unique(do.call(rbind,strsplit(list.files(summarydatadir),"_|[.]"))[,c(1,3)])
|
|
329 |
done.vs=paste(vs$type,vs$month,sep="_")%in%paste(done.vs[,1],done.vs[,2],sep="_")
|
|
330 |
table(done.vs)
|
|
331 |
vs=vs[!done.vs,]
|
|
322 | 332 |
|
333 |
tfs=fs2$path[which(fs2$month==vs$month[i]&fs2$type==vs$type[i])] |
|
334 |
do.call(c,mclapply(2:length(tfs),function(f) identical(extent(raster(tfs[f-1])),extent(raster(tfs[f]))))) |
|
335 |
raster(tfs[23]) |
|
323 | 336 |
|
324 |
## process the summaries using the raster package |
|
337 |
## process the monthly summaries using the raster package
|
|
325 | 338 |
mclapply(1:nrow(vs),function(i){ |
326 | 339 |
print(paste("Loading ",vs$type[i]," for month ",vs$month[i])) |
327 | 340 |
td=stack(fs2$path[which(fs2$month==vs$month[i]&fs2$type==vs$type[i])]) |
328 | 341 |
print(paste("Processing Metric ",vs$type[i]," for month ",vs$month[i])) |
329 | 342 |
calc(td,mean,na.rm=T, |
330 | 343 |
filename=paste(summarydatadir,"/",vs$type[i],"_mean_",vs$month[i],".tif",sep=""), |
331 |
format="GTiff") |
|
344 |
format="GTiff",overwrite=T)
|
|
332 | 345 |
calc(td,sd,na.rm=T, |
333 | 346 |
filename=paste(summarydatadir,"/",vs$type[i],"_sd_",vs$month[i],".tif",sep=""), |
334 |
format="GTiff") |
|
347 |
format="GTiff",overwrite=T)
|
|
335 | 348 |
if(vs$type[i]%in%c("CER")) { |
336 | 349 |
## Calculate number of days with effective radius > 20um, found to be linked to precipitating clouds |
337 | 350 |
## (Kobayashi 2007) |
338 | 351 |
calc(td,function(x) mean(ifelse(x<20,0,1),na.rm=T), |
339 | 352 |
filename=paste(summarydatadir,"/",vs$type[i],"_P20um_",vs$month[i],".tif",sep=""), |
340 |
format="GTiff") |
|
353 |
format="GTiff",overwrite=T)
|
|
341 | 354 |
# td2[td2<20]=0 |
342 | 355 |
# td2[td2>=20]=1 |
343 | 356 |
# calc(td2,mean,na.rm=T, |
344 | 357 |
# filename=paste(summarydatadir,"/",vs$type[i],"_P20um_",vs$month[i],".tif",sep=""), |
345 |
# format="GTiff") |
|
358 |
# format="GTiff",overwrite=T)
|
|
346 | 359 |
} |
347 | 360 |
print(paste("Processing missing data for ",vs$type[i]," for month ",vs$month[i])) |
348 | 361 |
calc(td,function(i) |
349 | 362 |
sum(!is.na(i)),filename=paste(summarydatadir,"/",vs$type[i],"_count_",vs$month[i],".tif",sep=""), |
350 |
format="GTiff") |
|
363 |
format="GTiff",overwrite=T)
|
|
351 | 364 |
calc(td,function(i) sum(ifelse(i==0,0,1)), |
352 |
filename=paste(summarydatadir,"/",vs$type[i],"_clear_",vs$month[i],".tif",sep=""),format="GTiff") |
|
365 |
filename=paste(summarydatadir,"/",vs$type[i],"_clear_",vs$month[i],".tif",sep=""),format="GTiff",overwrite=T)
|
|
353 | 366 |
gc() |
354 | 367 |
} |
355 | 368 |
) |
356 | 369 |
|
357 | 370 |
|
371 |
vs[c(10:18,22:24,34:36),] |
|
372 |
|
|
373 |
#### reproject to Oregon state plane for easy comparison with Benoit's data |
|
374 |
dest=raster("data/regions/oregon/lulc/W_Layer1_ClippedTo_OR83M.rst") # choose one to match (projection, resolution, extent) |
|
375 |
projection(dest)=CRS("+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs") #define projection |
|
376 |
tifs=list.files(summarydatadir,pattern="*.tif$",full=T) #get list of files to process |
|
377 |
mclapply(tifs,function(f) projectRaster(raster(f),dest,filename=paste(dirname(f),"/OR03M_",basename(f),sep=""),overwrite=T)) # warp them |
|
378 |
|
|
379 |
|
|
380 |
|
climate/research/oregon/interpolation/Extraction_raster_covariates_study_area.R | ||
---|---|---|
19 | 19 |
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al. |
20 | 20 |
library(rgdal) # GDAL wrapper for R, spatial utilities |
21 | 21 |
library(raster) # Raster package for image processing by Hijmans et al. |
22 |
library(reshape) |
|
22 | 23 |
|
23 | 24 |
###Parameters and arguments |
24 | 25 |
|
25 | 26 |
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations" #Path to all datasets on Atlas |
27 |
path<-"/home/adamw/acrobates/projects/interp/data/regions/oregon" #Path to all datasets on Atlas |
|
28 |
|
|
26 | 29 |
setwd(path) #Setting the working directory |
27 | 30 |
|
28 |
infile1<-"ghcn_data_TMAXy2010_2010_OR_0626012.shp" #Weather station location with interp. var. (TMAX, TMIN or PRCP) |
|
29 |
infile1<-"/home/wilson/data/ghcn_data_PRCPy2010_OR_20110705.shp" #User defined output prefix |
|
31 |
infile1<-"station_monthly_PRCP_20120705.shp" #Weather station location with interp. var. (TMAX, TMIN or PRCP) |
|
32 |
|
|
33 |
inlistf<-"covar.txt" #Covariates as list of raster files and output names separated by space |
|
34 |
|
|
35 |
#Name of raster files should come with extension |
|
36 |
### Alternatively, create this list using script: ppt_build_covariatelist.r |
|
30 | 37 |
|
31 |
inlistf<-"list_files_05032012.txt" #Covariates as list of raster files and output names separated by space
|
|
32 |
#Name of raster files should come with extension
|
|
33 |
outfile<-'ghcn_or_ppt_covariates_20120705_OR83M.shp' #Name of the new shapefile created with covariates extracted at station locations
|
|
34 |
outpath="/home/wilson/data/"
|
|
38 |
## a complicating factor here is that we don't have write permission to other's folders, so may need to change things as follows for personal directory...
|
|
39 |
outfile<-'station_monthly_PRCP_covariates_20120705' #Name of the new shapefile created with covariates extracted at station locations
|
|
40 |
outpath=path
|
|
41 |
#inlistf<-paste(outpath,"/covar.txt",sep="") #Covariates as list of raster files and output names separated by space
|
|
35 | 42 |
|
36 | 43 |
#######START OF THE SCRIPT ############# |
37 | 44 |
|
38 | 45 |
###Reading the station data |
39 |
filename<-sub(".shp","",infile1) #Removing the extension from file. |
|
40 |
ghcn3<-readOGR(dirname(infile1),layer=basename(infile1)) #Reading shape file using rgdal library |
|
46 |
sdata<-readOGR(dirname(infile1),layer=sub(".shp","",basename(infile1))) #Reading shape file using rgdal library |
|
41 | 47 |
|
42 | 48 |
###Extracting the variables values from the raster files |
43 |
lines<-read.table(paste(path,"/",inlistf,sep=""), sep=" ") #Column 1 contains the names of raster files |
|
44 |
inlistvar<-lines[,1] |
|
45 |
inlistvar<-paste(path,"/",as.character(inlistvar),sep="") |
|
46 |
covar_names<-as.character(lines[,2]) #Column two contains short names for covaraites |
|
49 |
covarf<-read.table(paste(path,"/",inlistf,sep=""), stringsAsFactors=F) #Column 1 contains the names of raster files |
|
47 | 50 |
|
48 |
s_raster<- stack(inlistvar) #Creating a stack of raster images from the list of variables. |
|
49 |
layerNames(s_raster)<-covar_names #Assigning names to the raster layers |
|
50 |
stat_val<- extract(s_raster, ghcn3) #Extracting values from the raster stack for every point location in coords data frame. |
|
51 |
covar<- stack(covarf$files) #Creating a stack of raster images from the list of variables. |
|
51 | 52 |
|
53 |
### Add slope/aspect transformations |
|
54 |
ns=sin(subset(covar,grep("slope",covarf$files,ig=T))*pi/180)*cos(subset(covar,grep("aspect",covarf$files,ig=T))*pi/180) |
|
55 |
ew=sin(subset(covar,grep("slope",covarf$files,ig=T))*pi/180)*sin(subset(covar,grep("aspect",covarf$files,ig=T))*pi/180) |
|
56 |
covar=stack(covar,ns,ew) |
|
57 |
layerNames(covar)<-c(covarf$var,"ns","ew") #Assigning names to the raster layers |
|
52 | 58 |
|
53 |
#TODO: Add lon and lat as layers to make easier predictions
|
|
54 |
#TODO: subset list to only those used (drop number of obs, etc.)
|
|
59 |
## set time attribute to distinguish static and temporal variables
|
|
60 |
covar=setZ(covar,z=c(sprintf("%02d",covarf$time),"00","00"),name="month") #assign month indicators (and 00s for static)
|
|
55 | 61 |
|
56 |
#create a shape file and data_frame with names |
|
62 |
## add projection information |
|
63 |
projection(covar)=CRS(projection(sdata)) |
|
57 | 64 |
|
58 |
data_RST<-as.data.frame(stat_val) #This creates a data frame with the values extracted |
|
65 |
#Extracting values from the raster stack for every point location in coords data frame. |
|
66 |
sdata@data=cbind.data.frame(sdata@data,extract(subset(covar,subset=which(getZ(covar)=="00")), sdata)) |
|
59 | 67 |
|
60 |
data_RST_SDF<-cbind(ghcn3,data_RST) |
|
61 |
coordinates(data_RST_SDF)<-coordinates(ghcn3) #Transforming data_RST_SDF into a spatial point dataframe |
|
62 |
CRS<-proj4string(ghcn3) |
|
63 |
proj4string(data_RST_SDF)<-CRS #Need to assign coordinates... |
|
68 |
### get unique station locations |
|
69 |
sdata.u=sdata[!duplicated(sdata@data[,c("station","latitude","longitude")]),c("station","latitude","longitude")] |
|
70 |
sdata.u@data[,c("x","y")]=coordinates(sdata.u) |
|
71 |
sdata.u@data=cbind.data.frame(sdata.u@data,extract(subset(covar,subset=which(getZ(covar)!="00")), sdata.u)) #Extracting values from the raster stack for every point |
|
72 |
sdata.u=sdata.u@data #drop the spatial-ness |
|
64 | 73 |
|
65 |
#write out a new shapefile (including .prj component) |
|
66 |
outfile<-sub(".shp","",outfile) #Removing extension if it is present |
|
74 |
### reshape for easy merging |
|
75 |
sdata.ul=melt(sdata.u,id.vars=c("station","latitude","longitude","x","y")) |
|
76 |
sdata.ul[,c("metric","type","month")]=do.call(rbind.data.frame,strsplit(as.character(sdata.ul$variable),"_")) |
|
77 |
sdata.ul$metric=paste(sdata.ul$metric,sdata.ul$type,sep="_") |
|
78 |
sdata.ul2=cast(sdata.ul,station+month~metric) |
|
79 |
## create a station_month unique id. If month column exists (as in monthly data), use it, otherwise extract it from date column (as in daily data) |
|
80 |
if(!is.null(sdata$month)) stmo=paste(sdata$station,sprintf("%02d",sdata$month),sep="_") |
|
81 |
if(is.null(sdata$month)) stmo=paste(sdata$station,format(as.Date(sdata$date),"%m"),sep="_") |
|
82 |
|
|
83 |
### add monthly data to sdata table by matching unique station_month ids. |
|
84 |
sdata@data[,unique(sdata.ul$metric)]=sdata.ul2[ |
|
85 |
match(stmo,paste(sdata.ul2$station,sprintf("%02d",sdata.ul2$month),sep="_")), |
|
86 |
unique(sdata.ul$metric)] |
|
87 |
|
|
88 |
################################################################### |
|
89 |
### save the data |
|
67 | 90 |
|
68 | 91 |
## save the raster stack for prediction |
69 |
writeRaster(s_raster,filename=paste(outpath,"covariates",sep="")) |
|
92 |
writeRaster(covar,format="raster",filename=paste(path,"/covariates",sep=""),overwrite=T) |
|
93 |
## save layer Zs as a separate csv file because they are lost when saving, argh! |
|
94 |
write.table(getZ(covar),paste(path,"/covariates-Z.csv",sep=""),row.names=F,col.names=F,sep=",") |
|
70 | 95 |
|
71 | 96 |
#Save a textfile and shape file of all the subset data |
72 |
write.table(as.data.frame(data_RST_SDF),paste(outfile,".txt",sep=""), sep=",") |
|
73 |
writeOGR(data_RST_SDF, paste(outpath,outfile, ".shp", sep=""), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension |
|
97 |
#write.table(as.data.frame(data_RST_SDF),paste(outpath,"/",outfile,".txt",sep=""), sep=",") |
|
98 |
writeOGR(sdata, outpath, outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension |
|
99 |
|
|
74 | 100 |
|
75 | 101 |
##### END OF SCRIPT ########## |
climate/research/oregon/interpolation/GAM.R | ||
---|---|---|
20 | 20 |
library(rgdal) |
21 | 21 |
library(multicore) # if installed allows easy parallelization |
22 | 22 |
library(reshape) # very useful for switching from 'wide' to 'long' data formats |
23 |
library(lattice); library(latticeExtra) |
|
24 |
library(raster) |
|
23 | 25 |
|
24 | 26 |
###Parameters and arguments |
25 | 27 |
|
26 |
infile1<-"ghcn_or_ppt_covariates_20120705_OR83M.shp" |
|
28 |
infile1<-"station_monthly_PRCP_covariates_20120705.shp" |
|
29 |
monthly=T # indicate if these are monthly or daily data |
|
30 |
|
|
27 | 31 |
path<-"/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations" |
28 |
#path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations" |
|
32 |
path<-"/home/adamw/acrobates/projects/interp/data/regions/oregon" #Path to all datasets on Atlas |
|
33 |
|
|
34 |
#path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations" |
|
29 | 35 |
#path<-"H:/Data/IPLANT_project/data_Oregon_stations" |
30 |
path="/home/wilson/data" |
|
31 | 36 |
setwd(path) |
32 |
infile2<-"dates_interpolation_03052012.txt" #List of 10 dates for the regression |
|
33 |
prop<-0.3 #Proportion of testing retained for validation |
|
34 |
n_runs<- 30 #Number of runs |
|
35 |
out_prefix<-"_20120705_run01_LST" |
|
36 |
infile3<-"LST_dates_var_names.txt" #List of LST averages. |
|
37 |
infile4<-"models_interpolation_05142012.txt" |
|
37 |
out_prefix<-"_20120713_precip" |
|
38 |
prop=0.7 # proportion of data used for fitting |
|
39 |
|
|
40 |
################################## |
|
41 |
## Load covariate raster brick |
|
42 |
covar=brick(paste(path,"/covariates.gri",sep="")) |
|
43 |
## udpate layer names (deleted in save) |
|
44 |
covar=setZ(covar,scan(paste(path,"/covariates-Z.csv",sep=""),what="char"),name="month") |
|
38 | 45 |
|
39 | 46 |
#######START OF THE SCRIPT ############# |
40 | 47 |
|
41 | 48 |
###Reading the station data and setting up for models' comparison |
42 | 49 |
filename<-sub(".shp","",infile1) #Removing the extension from file. |
43 | 50 |
ghcn<-readOGR(".", filename) #reading shapefile |
44 |
|
|
45 |
ghcn = transform(ghcn,Northness = cos(ASPECT*pi/180)) #Adding a variable to the dataframe |
|
46 |
ghcn = transform(ghcn,Eastness = sin(ASPECT*pi/180)) #adding variable to the dataframe. |
|
47 |
ghcn = transform(ghcn,Northness_w = sin(slope*pi/180)*cos(ASPECT*pi/180)) #Adding a variable to the dataframe |
|
48 |
ghcn = transform(ghcn,Eastness_w = sin(slope*pi/180)*sin(ASPECT*pi/180)) #adding variable to the dataframe. |
|
51 |
|
|
49 | 52 |
#Note that "transform" output is a data.frame not spatial object |
50 | 53 |
#set.seed(100) #modify this to a seed variable allowing different runs. |
51 | 54 |
|
52 |
dates <-readLines(paste(path,"/",infile2, sep=""))
|
|
53 |
LST_dates <-readLines(paste(path,"/",infile3, sep="")) |
|
54 |
models <-readLines(paste(path,"/",infile4, sep="")) |
|
55 |
#dates <-readLines(paste(path,"/",infile2, sep=""))
|
|
56 |
#LST_dates <-readLines(paste(path,"/",infile3, sep=""))
|
|
57 |
#models <-readLines(paste(path,"/",infile4, sep=""))
|
|
55 | 58 |
|
59 |
if(monthly) ghcn$date=as.Date(paste(2000,ghcn$month,15,sep="-")) # generate dates for monthly (climate) data |
|
60 |
dates=unique(ghcn$date) |
|
56 | 61 |
#results <- matrix(1,length(dates),14) #This is a matrix containing the diagnostic measures from the GAM models. |
57 | 62 |
|
58 |
#### Define GAM models |
|
59 |
var="tmax" |
|
60 | 63 |
|
64 |
#### Define GAM models |
|
61 | 65 |
mods=data.frame(formula=c( |
62 |
paste(var,"~ s(lat) + s (lon) + s (ELEV_SRTM)",sep=""), |
|
63 |
paste(var,"~ s(lat,lon,ELEV_SRTM)",sep=""), |
|
64 |
paste(var,"~ s(lat) + s (lon) + s (ELEV_SRTM) + s (Northness)+ s (Eastness) + s(DISTOC)",sep=""), |
|
65 |
paste(var,"~ s(lat) + s (lon) + s(ELEV_SRTM) + s(Northness) + s (Eastness) + s(DISTOC) + s(LST)",sep=""), |
|
66 |
paste(var,"~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST)",sep=""), |
|
67 |
paste(var,"~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,LC1)",sep=""), |
|
68 |
paste(var,"~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST,LC3)",sep=""), |
|
69 |
paste(var,"~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC) + s(LST) + s(LC1)",sep="") |
|
66 |
"value ~ s(x_OR83M,y_OR83M)", |
|
67 |
"value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev", |
|
68 |
"value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew", |
|
69 |
"value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew + s(CER_mean)", |
|
70 |
"value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew + s(CER_P20um)", |
|
71 |
"value ~ s(x_OR83M,y_OR83M) + elev + ns + ew + s(CER_P20um)", |
|
72 |
"value ~ s(x_OR83M,y_OR83M,CER_P20um) + elev + ns + ew", |
|
73 |
"value ~ s(x_OR83M,y_OR83M) + s(distoc,CER_P20um)+elev + ns + ew", |
|
74 |
"value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew + s(CLD_mean)", |
|
75 |
"value ~ s(x_OR83M,y_OR83M) + s(distoc) + elev + ns + ew + s(COT_mean)" |
|
70 | 76 |
),stringsAsFactors=F) |
71 | 77 |
mods$model=paste("mod",1:nrow(mods),sep="") |
72 | 78 |
|
79 |
## confirm all model terms are in covar raster object for prediction |
|
80 |
terms=gsub("[ ]|s[(]|[)]","", # clean up model terms (remove "s(", etc.) |
|
81 |
unique(do.call(c, # find unique terms |
|
82 |
lapply(mods$formula,function(i) #get terms from all models & split smoothed terms |
|
83 |
unlist(strsplit(attr(terms(as.formula(i)),"term.labels"),split=",")))))) |
|
84 |
if(any(terms%in%layerNames(covarm))) |
|
85 |
print("All model terms are present in the raster object") else |
|
86 |
warning("Some model terms not present in raster object, prediction may fail for some models") |
|
87 |
|
|
88 |
### define transformation functions |
|
89 |
##Transform response? |
|
90 |
transform=T |
|
91 |
if(transform) { |
|
92 |
trans=function(x) log(x+1) |
|
93 |
itrans=function(x) exp(x)-1 |
|
94 |
} |
|
95 |
if(!transform) { |
|
96 |
trans=function(x) x |
|
97 |
itrans=function(x) x |
|
98 |
} |
|
99 |
|
|
73 | 100 |
|
74 | 101 |
### subset dataset? |
75 |
ghcn_all<-ghcn
|
|
76 |
ghcn_test<-subset(ghcn,ghcn$tmax>-150 & ghcn$tmax<400)
|
|
77 |
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0)
|
|
78 |
ghcn<-ghcn_test2
|
|
102 |
#sdata_all<-sdata
|
|
103 |
#sdata_test<-subset(sdata,sdata$tmax>-150 & sdata$tmax<400)
|
|
104 |
#sdata_test2<-subset(sdata_test,sdata_test$ELEV_SRTM>0)
|
|
105 |
#sdata<-sdata_test2
|
|
79 | 106 |
|
80 | 107 |
|
81 | 108 |
## loop through the dates... |
82 | 109 |
|
83 |
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date==d)) #this creates a list of 10 subset data |
|
110 |
ghcn.subsets <-lapply(dates, function(d) subset(ghcn@data, date==d)) #this creates a list of 10 subset data
|
|
84 | 111 |
|
85 | 112 |
results=do.call(rbind.data.frame, # Collect the results in a single data.frame |
86 |
mclapply(1:length(dates),function(i) { # loop over dates
|
|
87 |
date<-strptime(dates[i], "%Y%m%d") # get date
|
|
113 |
lapply(1:length(dates),function(i) { # loop over dates |
|
114 |
date<-dates[i] # get date
|
|
88 | 115 |
month<-strftime(date, "%m") # get month |
89 |
LST_month<-paste("mm_",month,sep="") # get LST_month name |
|
116 |
# LST_month<-paste("mm_",month,sep="") # get LST_month name |
|
117 |
|
|
118 |
## subset full raster brick to include correct month of satellite data |
|
119 |
covarm=subset(covar,subset=which(covar@z$month=="00"|covar@z$month==month)) |
|
120 |
## update layer names to match those in ghcn table |
|
121 |
layerNames(covarm)[getZ(covarm)!="00"]=sub(paste("_",month,sep=""),"",layerNames(covarm)[getZ(covarm)!="00"]) |
|
90 | 122 |
|
123 |
## extract subset of data for this day |
|
124 |
tdata=ghcn.subsets[[i]] |
|
91 | 125 |
##Regression part 1: Creating a validation dataset by creating training and testing datasets |
92 |
mod <-ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))] |
|
93 |
ghcn.subsets[[i]] = transform(ghcn.subsets[[i]],LST = mod) |
|
126 |
# mod <-ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))]
|
|
127 |
# ghcn.subsets[[i]] = transform(ghcn.subsets[[i]],LST = mod)
|
|
94 | 128 |
##Screening LST values |
95 | 129 |
##ghcn.subsets[[i]]<-subset(ghcn.subsets[[i]],ghcn.subsets[[i]]$LST> 258 & ghcn.subsets[[i]]$LST<313) |
96 |
n<-nrow(ghcn.subsets[[i]]) |
|
130 |
|
|
131 |
## transform the response |
|
132 |
tdata$value<-trans(tdata$value) |
|
133 |
tdata$weights=tdata$count/max(tdata$count) |
|
134 |
|
|
135 |
n<-nrow(tdata) |
|
97 | 136 |
ns<-n-round(n*prop) #Create a sample from the data frame with 70% of the rows |
98 | 137 |
nv<-n-ns #create a sample for validation with prop of the rows |
99 |
ind.training <- sample(nrow(ghcn.subsets[[i]]), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly
|
|
100 |
ind.testing <- setdiff(1:nrow(ghcn.subsets[[i]]), ind.training)
|
|
101 |
data_s <- ghcn.subsets[[i]][ind.training, ]
|
|
102 |
data_v <- ghcn.subsets[[i]][ind.testing, ]
|
|
138 |
ind.training <- sample(nrow(tdata), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly
|
|
139 |
ind.testing <- setdiff(1:nrow(tdata), ind.training)
|
|
140 |
data_s <- tdata[ind.training, ]
|
|
141 |
data_v <- tdata[ind.testing, ]
|
|
103 | 142 |
|
104 | 143 |
## lapply loops through models for the ith day, calculates the validation metrics, and saves them as R objects |
105 | 144 |
results=do.call(rbind.data.frame, |
106 |
lapply(1:nrow(mods),function(m,savemodel=F,saveFullPrediction=T) { |
|
107 |
## run the model |
|
108 |
mod=gam(formula(mods$formula[m]),data=data_s) |
|
145 |
lapply(1:nrow(mods),function(m,savemodel=F,saveFullPrediction=F) { |
|
146 |
## will gam() fail? If so, return NAs instead of crashing |
|
147 |
err=try(gam(formula(mods$formula[m]),data=data_s),silent=T) |
|
148 |
if(length(attr(err,"class"))==1) if(attr(err,"class")=="try-error") |
|
149 |
return(## this table must match the results table below |
|
150 |
data.frame(date=dates[i],month=format(dates[i],"%m"), |
|
151 |
model=mods$model[m],form=mods$formula[m],ns=ns, |
|
152 |
AIC=NA, GCV=NA,DEV=NA,RMSE=NA,R2=NA)) |
|
153 |
## run the models |
|
154 |
|
|
155 |
mod=gam(formula(mods$formula[m]),data=data_s,weights=data_s$weights) |
|
109 | 156 |
|
110 |
##VALIDATION: Prediction checking the results using the testing data######## |
|
157 |
##VALIDATION: Prediction checking the results using the testing data ########
|
|
111 | 158 |
y_mod<- predict(mod, newdata=data_v, se.fit = TRUE) #Using the coeff to predict new values. |
112 |
res_mod<- data_v$tmax - y_mod$fit #Residuals for GMA model that resembles the ANUSPLIN interpolation |
|
113 |
|
|
159 |
res_mod<- itrans(data_v$value) - itrans(y_mod$fit) #Residuals |
|
160 |
plot(itrans(y_mod$fit)~itrans(data_v$value),cex=data_v$weights);abline(0,1) |
|
161 |
|
|
114 | 162 |
##Regression part 3: Calculating and storing diagnostic measures |
115 | 163 |
tresults=data.frame( # build 1-row dataframe for this model-date |
116 | 164 |
date=dates[i], # interpolation date |
165 |
month=format(dates[i],"%m"), # interpolation month |
|
117 | 166 |
model=mods$model[m], # model number |
167 |
form=mods$formula[m], # model number |
|
118 | 168 |
ns=ns, # number of stations used in the training stage |
119 | 169 |
AIC=AIC(mod), # AIC |
120 | 170 |
GCV=mod$gcv.ubre, # GCV |
121 | 171 |
DEV=mod$deviance, # Deviance |
122 |
RMSE=sqrt(sum(res_mod^2)/nv) # RMSE |
|
172 |
RMSE=sqrt(sum(res_mod^2,na.rm=T)/nv), # RMSE |
|
173 |
R2=summary(lm(itrans(y_mod$fit)~itrans(data_v$value)))$r.squared # R^2 |
|
123 | 174 |
) |
124 |
|
|
175 |
|
|
176 |
|
|
125 | 177 |
## Save the model object if desired |
126 |
if(savemodel) save(mod,file= paste(path,"/","results_GAM_Model","_",out_prefix,"_",dates[i],".Rdata",sep="")) |
|
178 |
if(savemodel) save(mod,file= paste(path,"/","results_GAM_Model","_",out_prefix,"_", |
|
179 |
dates[i],"_",mods$model[m],".Rdata",sep="")) |
|
127 | 180 |
|
128 | 181 |
## do the full prediction and save it if desired |
129 | 182 |
if(saveFullPrediction){ |
130 |
s_raster=readRaster(filename=paste(path,"covariates.gri")) |
|
131 |
predict(s_raster,mod,filename=paste(outpath,"_",sub("-","",date),"_prediction.tif",sep=""),format="GTiff",progress="text",fun="predict") |
|
132 |
predict(s_raster,mod,filename=paste(outpath,"_",sub("-","",date),"_prediction.se.tif",sep=""),format="GTiff",progress="text",fun="predict.se") |
|
183 |
p1=predict(covarm,mod,progress="text",fun="predict") |
|
184 |
p1=predict(mod,sdata@data,type="response",progress="text",fun="predict") |
|
185 |
|
|
186 |
predict(covarm,mod,type="response", |
|
187 |
filename=paste(outpath,"/",gsub("-","",date),"_",mods$model[m],"_prediction.tif",sep=""), |
|
188 |
format="GTiff",progress="text",fun="predict",overwrite=T) |
|
189 |
predict(covarm,mod,filename=paste(outpath,"/",gsub("-","",date),"_",mods$model[m],"_prediction.se.tif",sep=""), |
|
190 |
format="GTiff",progress="text",fun="predict.se",overwrite=T) |
|
133 | 191 |
} |
134 | 192 |
|
135 | 193 |
## print progress |
... | ... | |
138 | 196 |
return(tresults) |
139 | 197 |
# end of the for loop #2 (nested in loop #1) |
140 | 198 |
})) |
199 |
## identify model with minium AIC |
|
200 |
results$lowAIC=F |
|
201 |
results$lowAIC[which.min(results$AIC)]=T |
|
202 |
|
|
203 |
## print progress and return the results |
|
141 | 204 |
print(paste("Finshed Date:",dates[i])) |
142 | 205 |
return(results) |
143 | 206 |
} |
... | ... | |
147 | 210 |
|
148 | 211 |
write.table(results_RMSE_all2, file= paste(path,"/","results_GAM_Assessment",out_prefix,"all.txt",sep=""), sep=",", col.names=TRUE) |
149 | 212 |
|
150 |
resl=melt(results,id=c("run","date","model","ns")) |
|
213 |
resl=melt(results,id=c("date","month","model","form","ns","lowAIC")) |
|
214 |
|
|
215 |
xyplot(value~date|variable,groups=form,data=resl,scales=list(y=list(relation="free")),auto.key=T) |
|
216 |
|
|
217 |
xyplot(form~value|variable,groups=month,data=resl,scales=list(x=list(relation="free")),auto.key=T) |
|
218 |
|
|
219 |
bwplot(form~value|variable,data=resl,scales=list(x=list(relation="free"))) |
|
151 | 220 |
|
152 |
xyplot(value~date|variable,groups=model,data=resl,scales=list(y=list(relation="free")),auto.key=list() |
|
221 |
resl2=cast(resl,date+month+variable~model) |
|
222 |
xyplot(mod5~mod6|variable,groups=month,data=resl2,scales=list(relation="free"))+layer(panel.abline(0,1)) |
|
153 | 223 |
|
154 | 224 |
|
155 | 225 |
|
climate/research/oregon/interpolation/GHCND_stations_extraction_study_area.R | ||
---|---|---|
19 | 19 |
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al. |
20 | 20 |
library(rgdal) # GDAL wrapper for R, spatial utilities |
21 | 21 |
library(rgeos) # Polygon buffering and other vector operations |
22 |
library(reshape) |
|
22 | 23 |
|
23 | 24 |
### Parameters and arguments |
24 | 25 |
|
... | ... | |
31 | 32 |
infile2<-"ghcnd-stations.txt" #This is the textfile of station locations from GHCND |
32 | 33 |
new_proj<-"+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs"; |
33 | 34 |
|
35 |
|
|
34 | 36 |
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations/" #Jupiter LOCATION on EOS/Atlas |
35 | 37 |
#path<-"H:/Data/IPLANT_project/data_Oregon_stations" #Jupiter Location on XANDERS |
36 | 38 |
|
37 | 39 |
outpath=path # create different output path because we don't have write access to other's home dirs |
38 | 40 |
setwd(path) |
39 |
out_prefix<-"y2010_2010_OR_20110705" #User defined output prefix
|
|
41 |
out_prefix<-"20120705" #User defined output prefix
|
|
40 | 42 |
|
41 | 43 |
#for Adam |
42 |
#outpath="/home/wilson/data"
|
|
43 |
#out_prefix<-"y2010_OR_20110705" #User defined output prefix |
|
44 |
outpath="/home/wilson/data/oregon"
|
|
45 |
|
|
44 | 46 |
|
45 | 47 |
############ START OF THE SCRIPT ################# |
46 | 48 |
|
... | ... | |
85 | 87 |
|
86 | 88 |
# Spatial query to find relevant stations |
87 | 89 |
inside <- !is.na(over(dat_stat, as(interp_area, "SpatialPolygons"))) #Finding stations contained in the current interpolation area |
88 |
stat_OR<-dat_stat[inside,] #Finding stations contained in the current interpolation area |
|
90 |
stat_roi<-dat_stat[inside,] #Finding stations contained in the current interpolation area |
|
91 |
stat_roi<-spTransform(stat_roi,CRS(new_proj)) # Project from WGS84 to new coord. system |
|
89 | 92 |
|
90 | 93 |
#Quick visualization of station locations |
91 | 94 |
plot(interp_area, axes =TRUE) |
92 |
plot(stat_OR, pch=1, col="red", cex= 0.7, add=TRUE)
|
|
95 |
plot(stat_roi, pch=1, col="red", cex= 0.7, add=TRUE)
|
|
93 | 96 |
#legend("topleft", pch=1,col="red",bty="n",title= "Stations",cex=1.6) |
94 | 97 |
|
95 | 98 |
#### STEP 2: Connecting to the database and query for relevant data |
... | ... | |
101 | 104 |
## Drop missing values (-9999) |
102 | 105 |
## Drop observations that failed quality control (keep only qflag==NA) |
103 | 106 |
system.time( |
104 |
data_table<<-dbGetQuery(db,
|
|
107 |
dd<<-dbGetQuery(db, #save object dd (data daily)
|
|
105 | 108 |
paste("SELECT station,year||'-'||month||'-'||day AS date,value / 10.0 as value,latitude,longitude,elevation |
106 | 109 |
FROM ghcn, stations |
107 | 110 |
WHERE station = id |
108 |
AND id IN ('",paste(stat_OR$id,collapse="','"),"')
|
|
111 |
AND id IN ('",paste(stat_roi$id,collapse="','"),"')
|
|
109 | 112 |
AND element='",var,"' |
110 | 113 |
AND year>=",year_start," |
111 | 114 |
AND year<",year_end," |
... | ... | |
114 | 117 |
;",sep="")) |
115 | 118 |
) ### print used time in seconds - only taking ~30 seconds.... |
116 | 119 |
|
117 |
|
|
118 |
#Transform the subset data frame in a spatial data frame and reproject |
|
119 |
data3<-data_table #Make a copy of the data frame |
|
120 |
coords<- data3[c('longitude','latitude')] #Define coordinates in a data frame |
|
121 |
coordinates(data3)<-coords #Assign coordinates to the data frame |
|
122 |
proj4string(data3)<-CRS_interp #Assign coordinates reference system in PROJ4 format |
|
123 |
data_proj<-spTransform(data3,CRS(new_proj)) #Project from WGS84 to new coord. system |
|
120 |
dd=dd[!is.na(dd$value),] #drop any NA values |
|
121 |
|
|
122 |
################################################################# |
|
123 |
### STEP 3: generate monthly means for climate-aided interpolation |
|
124 |
|
|
125 |
### first extract average daily values by year. |
|
126 |
system.time( |
|
127 |
dm<<-dbGetQuery(db, # create dm object (data monthly) |
|
128 |
paste("SELECT station,month,count(value) as count,avg(value)/10.0 as value,latitude,longitude,elevation |
|
129 |
FROM ghcn, stations |
|
130 |
WHERE station = id |
|
131 |
AND id IN ('",paste(stat_roi$id,collapse="','"),"') |
|
132 |
AND element='",var,"' |
|
133 |
AND year>=",2000," |
|
134 |
AND year<",2020," |
|
135 |
AND value<>-9999 |
|
136 |
AND qflag IS NULL |
|
137 |
GROUP BY station, month,latitude,longitude,elevation |
|
138 |
;",sep="")) |
|
139 |
) ### print used time in seconds - only taking ~30 seconds.... |
|
140 |
|
|
141 |
### drop months with fewer than 75% of the data observations |
|
142 |
### is 75% the right number? |
|
143 |
#dm=dm[dm$count>=(.75*10*30),] |
|
144 |
|
|
145 |
## add the monthly data to the daily table |
|
146 |
dd$month=as.numeric(as.character(format(as.Date(dd$date),"%m"))) |
|
147 |
dd$avgvalue=dm$value[match(paste(dd$station,dd$month),paste(dm$station,dm$month))] |
|
148 |
dd$avgcount=dm$count[match(paste(dd$station,dd$month),paste(dm$station,dm$month))] |
|
149 |
|
|
150 |
### Generate log-transformed ratio of daily:monthly ppt and add it to daily data |
|
151 |
if(var=="PRCP"){ |
|
152 |
dd$anom=log((1+dd$value)/(1+dd$avgvalue)) |
|
153 |
dd$anom[dd$anom==Inf|dd$anom2==-Inf]=NA |
|
154 |
} |
|
155 |
|
|
156 |
### Generate temperature anomolies for each daily value |
|
157 |
if(var!="PRCP"){ |
|
158 |
dd$anom=dd$value-dd$avgvalue |
|
159 |
} |
|
160 |
|
|
161 |
|
|
162 |
### Transform the subset data frame in a spatial data frame and reproject |
|
163 |
dd_sp<-SpatialPointsDataFrame (dd[,c('longitude','latitude')],data=dd,proj=CRS(CRS_interp)) |
|
164 |
dd_sp<-spTransform(dd_sp,CRS(new_proj)) # Project from WGS84 to new coord. system |
|
165 |
|
|
166 |
### Transform the subset data frame in a spatial data frame and reproject |
|
167 |
dm_sp<-SpatialPointsDataFrame (dm[,c('longitude','latitude')],data=dm,proj=CRS(CRS_interp)) |
|
168 |
dm_sp<-spTransform(dm_sp,CRS(new_proj)) # Project from WGS84 to new coord. system |
|
169 |
|
|
170 |
################################################################ |
|
171 |
### STEP 4: Save results and outuput in textfile and a shape file |
|
124 | 172 |
|
125 |
### STEP 3: Save results and outuput in textfile and a shape file |
|
173 |
## Save shapefile of station locations |
|
174 |
writeOGR(stat_roi,outpath,paste("station_location_",var,"_",out_prefix,sep=""),driver ="ESRI Shapefile") |
|
126 | 175 |
|
127 |
#Save a textfile of the locations of meteorological stations in the study area
|
|
128 |
write.table(as.data.frame(stat_OR), file=paste(outpath,"/","location_study_area_",out_prefix,".txt",sep=""),sep=",")
|
|
176 |
## save shapefile of daily observations
|
|
177 |
writeOGR(dd_sp,outpath,paste("/station_daily_",var,"_",out_prefix,sep=""), driver ="ESRI Shapefile") #Note that the layer name is the file name without extension
|
|
129 | 178 |
|
130 |
#Save a textfile and shape file of all the subset data |
|
131 |
write.table(data_table, file= paste(outpath,"/","ghcn_data_",var,out_prefix,".txt",sep=""), sep=",") |
|
132 |
#outfile<-paste(path,"ghcn_data_",var,out_prefix,sep="") #Removing extension if it is present |
|
133 |
outfile<-paste(outpath,"/ghcn_data_",var,out_prefix,sep="") #Name of the file |
|
134 |
writeOGR(data_proj, paste(outfile, "shp", sep="."), outfile, driver ="ESRI Shapefile") #Note that the layer name is the file name without extension |
|
179 |
### write monthly data |
|
180 |
writeOGR(dm_sp, outpath, paste("/station_monthly_",var,"_",out_prefix,sep=""), driver ="ESRI Shapefile") #Note that the layer name is the file name without extension |
|
135 | 181 |
|
136 | 182 |
##### END OF SCRIPT ########## |
climate/research/oregon/interpolation/covariates.r | ||
---|---|---|
1 |
### script to assemble, reproject and align covariate raster datasets. |
|
2 |
|
|
3 |
###Loading R library and packages |
|
4 |
library(sp) # Spatial pacakge with class definition by Bivand et al. |
|
5 |
library(spdep) # Spatial pacakge with methods and spatial stat. by Bivand et al. |
|
6 |
library(rgdal) # GDAL wrapper for R, spatial utilities |
|
7 |
library(raster) # Raster package for image processing by Hijmans et al. |
|
8 |
|
|
9 |
###Parameters and arguments |
|
10 |
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations" #Path to all datasets on Atlas |
|
11 |
path<-"/home/adamw/acrobates/projects/interp" #Path to all datasets on Atlas |
|
12 |
|
|
13 |
region="oregon" |
|
14 |
datapath=paste(path,"/data/regions/",region,sep="") |
|
15 |
|
|
16 |
setwd(path) #Setting the working directory |
|
17 |
|
|
18 |
## define working projection |
|
19 |
dproj="+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs" #define projection |
|
20 |
|
|
21 |
## load roi polygon |
|
22 |
|
|
23 |
### use MODIS tile as ROI instead |
|
24 |
modt=readOGR("/home/adamw/acrobates/Global/modis_sinusoidal","modis_sinusoidal_grid_world",) |
|
25 |
tiles=c("H9V4") |
|
26 |
roi=modt[modt$HV%in%tiles,] |
|
27 |
|
|
28 |
## Bounding box of region in lat/lon |
|
29 |
roi_ll=spTransform(roi,CRS("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")) |
|
30 |
roi_bb=bbox(roi_ll) |
|
31 |
|
|
32 |
## Bounding box of region in lcc |
|
33 |
roi_lcc=bbox(spTransform(roi,CRS(dproj))) |
|
34 |
|
|
35 |
## Get DEM data |
|
36 |
## mount atlas to subset elevation data |
|
37 |
system("sshfs -o idmap='user' wilson@atlas.nceas.ucsb.edu:/ /media/data/atlas") |
|
38 |
## run subset on atlas |
|
39 |
system(paste("ssh atlas \"gdalwarp -r cubic -multi -t_srs \'",dproj,"\' -te ",paste(as.numeric(roi_lcc),collapse=" ")," /home/layers/data/terrain/dem-fused/SRTM_West_S60_N55.tif /home/wilson/data/oregon/topo/dem.tif \"",sep="")) |
|
40 |
## copy to litoria |
|
41 |
file.copy("/media/data/atlas/home/wilson/data/oregon/topo/dem.tif",paste(datapath,"/topo/dem.tif",sep=""),overwrite=T) |
|
42 |
|
|
43 |
|
|
44 |
### Resample fine resolution DEM to generate 1km version |
|
45 |
#GDALinfo(paste(datapath,"/topo/dem.tif",sep="")) |
|
46 |
#dem2=aggregate(raster(paste(datapath,"/topo/dem.tif",sep="")),fact=7) |
|
47 |
|
|
48 |
|
|
49 |
dest=raster("data/lulc/W_Layer1_ClippedTo_OR83M.rst") # choose one to match (projection, resolution, extent) |
|
50 |
projection(dest)=CRS() |
|
51 |
tifs=list.files(summarydatadir,pattern="*.tif$",full=T) #get list of files to process |
|
52 |
mclapply(tifs,function(f) projectRaster(raster(f),dest,filename=paste(dirname(f),"/OR03M_",basename(f),sep=""),overwrite=T)) # warp them |
|
53 |
|
|
54 |
|
|
55 |
#### Topography |
climate/research/oregon/interpolation/ppt_build_covariatelist.r | ||
---|---|---|
1 |
### script to build the list of covariates for use by the "extraction_raster_covariates_study_area.R" script for precipitation |
|
2 |
### should be run while in the environment set up by that script... |
|
3 |
|
|
4 |
### Or build list of covariates to include |
|
5 |
covarf=data.frame(files=c( |
|
6 |
# list.files(pattern="month.*_rescaled.*rst$",full=T), # LST values |
|
7 |
list.files(path,pattern="coord.*rst$",full=T,recursive=T), # coordinates |
|
8 |
list.files(path,pattern="SRTM.*rst$",full=T,recursive=T), # topography |
|
9 |
list.files(path,pattern="W_Layer.*M.rst$",full=T,recursive=T), # LULC |
|
10 |
list.files(path,pattern="DIST.rst$",full=T,recursive=T), # DISTOC |
|
11 |
list.files(path,pattern="W_Global.*M.rst$",full=T,recursive=T), #canopy |
|
12 |
list.files(path,pattern="[_]mean[_]|P20um[_]",full=T,recursive=T) # MOD06 |
|
13 |
),stringsAsFactors=F) |
|
14 |
|
|
15 |
covarf$var=c( |
|
16 |
# paste("mm_",c(10:12,1:9),sep=""), |
|
17 |
"x_OR83M","y_OR83M", |
|
18 |
"aspect","elev","slope", |
|
19 |
paste("LC",c(10,1:9),sep=""), |
|
20 |
"distoc","canheight", |
|
21 |
sub("OR03M_","",sub(".tif","",basename(list.files(path,pattern="[_]mean[_]|P20um[_]",recursive=T)))) |
|
22 |
) |
|
23 |
|
|
24 |
### add column to indicate temporally varying covariates (monthly data) |
|
25 |
covarf$time=c(rep(0,17),rep(1:12,4)) # vector indicating which of the covar files have a month and which are static (0) |
|
26 |
|
|
27 |
|
|
28 |
## check it |
|
29 |
covarf |
|
30 |
|
|
31 |
|
|
32 |
#write it out |
|
33 |
write.table(covarf,file="covar.txt") |
Also available in: Unified diff
Further updates to interpolation procedures including:
1) bugfix in MOD06_L2_data_compile. Root access is needed to run swtif in its current directory (bad design!).
2) Switched to UTM output from swtif (from sinusoidal) becuase it looked like gdal was struggling with integerized sinusoidal
3) many updates to extraction_raster_covariates to simplify design matrix (only one column for each MOD06 var with the correct month, etc.)
4) added script to build the covariate list for preciptiation
5) updates to GAM script to include transformation of response and full prediction