Revision 2a0267ad
Added by Benoit Parmentier about 12 years ago
climate/research/oregon/interpolation/methods_comparison_assessment_part3.R | ||
---|---|---|
1 |
##################################### METHODS COMPARISON part 3 ########################################## |
|
2 |
#################################### Spatial Analysis ############################################ |
|
3 |
#This script utilizes the R ojbects created during the interpolation phase. # |
|
4 |
#At this stage the script produces figures of various accuracy metrics and compare methods: # |
|
5 |
#- rank station in terms of residuals and metrics (MAE and RMSE) # |
|
6 |
#- calculate accuary metrics for climtology predictions... # |
|
7 |
#- spatial density of station network and accuracy metric # |
|
8 |
#- visualization of maps of prediction and difference for comparison # |
|
9 |
#AUTHOR: Benoit Parmentier # |
|
10 |
#DATE: 10/23/2012 # |
|
11 |
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#491 -- # |
|
12 |
################################################################################################### |
|
13 |
|
|
14 |
###Loading R library and packages |
|
15 |
library(gtools) # loading some useful tools such as mixedsort |
|
16 |
library(mgcv) # GAM package by Wood 2006 (version 2012) |
|
17 |
library(sp) # Spatial pacakge with class definition by Bivand et al. 2008 |
|
18 |
library(spdep) # Spatial package with methods and spatial stat. by Bivand et al. 2012 |
|
19 |
library(rgdal) # GDAL wrapper for R, spatial utilities (Keitt et al. 2012) |
|
20 |
library(gstat) # Kriging and co-kriging by Pebesma et al. 2004 |
|
21 |
library(automap) # Automated Kriging based on gstat module by Hiemstra et al. 2008 |
|
22 |
library(spgwr) |
|
23 |
library(gpclib) |
|
24 |
library(maptools) |
|
25 |
library(graphics) |
|
26 |
library(parallel) # Urbanek S. and Ripley B., package for multi cores & parralel processing |
|
27 |
library(raster) |
|
28 |
library(rasterVis) |
|
29 |
library(plotrix) #Draw circle on graph |
|
30 |
library(reshape) |
|
31 |
## Functions |
|
32 |
#loading R objects that might have similar names |
|
33 |
load_obj <- function(f) |
|
34 |
{ |
|
35 |
env <- new.env() |
|
36 |
nm <- load(f, env)[1] |
|
37 |
env[[nm]] |
|
38 |
} |
|
39 |
|
|
40 |
###Parameters and arguments |
|
41 |
|
|
42 |
infile1<- "ghcn_or_tmax_covariates_06262012_OR83M.shp" #GHCN shapefile containing variables for modeling 2010 |
|
43 |
#infile2<-"list_10_dates_04212012.txt" #List of 10 dates for the regression |
|
44 |
infile2<-"list_365_dates_04212012.txt" #list of dates |
|
45 |
infile3<-"LST_dates_var_names.txt" #LST dates name |
|
46 |
infile4<-"models_interpolation_05142012.txt" #Interpolation model names |
|
47 |
infile5<-"mean_day244_rescaled.rst" #mean LST for day 244 |
|
48 |
inlistf<-"list_files_05032012.txt" #list of raster images containing the Covariates |
|
49 |
infile6<-"OR83M_state_outline.shp" |
|
50 |
#stat_loc<-read.table(paste(path,"/","location_study_area_OR_0602012.txt",sep=""),sep=",", header=TRUE) |
|
51 |
|
|
52 |
out_prefix<-"methods_09262012_" |
|
53 |
#output path |
|
54 |
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_07192012_GAM" |
|
55 |
setwd(path) |
|
56 |
|
|
57 |
##### LOAD USEFUL DATA |
|
58 |
|
|
59 |
obj_list<-"list_obj_08262012.txt" #Results of fusion from the run on ATLAS |
|
60 |
path<-"/home/parmentier/Data/IPLANT_project/methods_interpolation_comparison" #Jupiter LOCATION on Atlas for kriging #Jupiter Location on XANDERS |
|
61 |
#path<-"/Users/benoitparmentier/Dropbox/Data/NCEAS/Oregon_covariates/" #Local dropbox folder on Benoit's laptop |
|
62 |
setwd(path) |
|
63 |
proj_str="+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs"; |
|
64 |
#User defined output prefix |
|
65 |
|
|
66 |
sampling_CAI<-load_obj("results2_CAI_sampling_obj_09132012_365d_GAM_CAI2_multisampling2.RData") |
|
67 |
sampling_fus<-load_obj("results2_fusion_sampling_obj_10d_GAM_fusion_multisamp4_09192012.RData") |
|
68 |
fus_CAI_mod<-load_obj("results2_CAI_Assessment_measure_all_09132012_365d_GAM_CAI2_multisampling2.RData") |
|
69 |
gam_fus_mod1<-load_obj("results2_fusion_Assessment_measure_all_10d_GAM_fusion_multisamp4_09192012.RData") |
|
70 |
|
|
71 |
filename<-sub(".shp","",infile1) #Removing the extension from file. |
|
72 |
ghcn<-readOGR(".", filename) #reading shapefile |
|
73 |
|
|
74 |
#CRS<-proj4string(ghcn) #Storing projection information (ellipsoid, datum,etc.) |
|
75 |
lines<-read.table(paste(path,"/",inlistf,sep=""), sep="") #Column 1 contains the names of raster files |
|
76 |
inlistvar<-lines[,1] |
|
77 |
inlistvar<-paste(path,"/",as.character(inlistvar),sep="") |
|
78 |
covar_names<-as.character(lines[,2]) #Column two contains short names for covaraites |
|
79 |
|
|
80 |
s_raster<- stack(inlistvar) #Creating a stack of raster images from the list of variables. |
|
81 |
layerNames(s_raster)<-covar_names #Assigning names to the raster layers |
|
82 |
projection(s_raster)<-proj_str |
|
83 |
|
|
84 |
#Create mask using land cover data |
|
85 |
pos<-match("LC10",layerNames(s_raster)) #Find the layer which contains water bodies |
|
86 |
LC10<-subset(s_raster,pos) |
|
87 |
LC10[is.na(LC10)]<-0 #Since NA values are 0, we assign all zero to NA |
|
88 |
#LC10_mask[LC10_mask<100]<-1 |
|
89 |
mask_land<-LC10==100 #All values below 100% water are assigned the value 1, value 0 is "water" |
|
90 |
mask_land_NA<-mask_land |
|
91 |
mask_land_NA[mask_land_NA==0]<-NA #Water bodies are assigned value 1 |
|
92 |
|
|
93 |
data_name<-"mask_land_OR" |
|
94 |
raster_name<-paste(data_name,".rst", sep="") |
|
95 |
writeRaster(mask_land, filename=raster_name,overwrite=TRUE) #Writing the data in a raster file format...(IDRISI) |
|
96 |
#writeRaster(r2, filename=raster_name,overwrite=TRUE) #Writing the data in a raster file format...(IDRISI) |
|
97 |
|
|
98 |
pos<-match("mm_01",layerNames(s_raster)) |
|
99 |
mm_01<-subset(s_raster,pos) |
|
100 |
mm_01<-mm_01-273.15 |
|
101 |
mm_01<-mask(mm_01,mask_land_NA) #Raster image used as backround |
|
102 |
#mention this is the last... files |
|
103 |
|
|
104 |
################# PART 1: COMPARING RESULTS USING ALL MONTHNLY DATA AND SAMPLED FROM DAILY################### |
|
105 |
######## Visualize data: USING ALL MONTHLY DATA... |
|
106 |
|
|
107 |
file_pat<-glob2rx("GAMCAI_tmax_pred_predicted_mod*_365d_GAM_CAI2_all_lstd_10262012.rst") |
|
108 |
lf_cai_all<-list.files(pattern=file_pat) |
|
109 |
image_file<-grep(file_pat,lf_ck,value=TRUE) # using grep with "value" extracts the matching names |
|
110 |
raster_pred_k<-raster(image_file) |
|
111 |
|
|
112 |
#### CHECKING RESULTS ON 10/28 BY VISUALIZING PREDICTIONS... DO THAT FOR ALL MODELS AND TWO DATES... |
|
113 |
|
|
114 |
date_selected<-"20100103" |
|
115 |
lf_fus<-list.files(pattern=paste("GAM_bias_tmax_predicted_mod8_",date_selected,"_30_1_365d_GAM_fusion_all_lstd_10242012.rst$",sep="")) #Search for files in relation to fusion |
|
116 |
lf_cai<-list.files(pattern=paste("*CAI_tmax_pred.*",date_selected,"*.08072012_365d_GAM_CAI2.rst$",sep="")) #Search for files in relation to fusion |
|
117 |
|
|
118 |
lf_cai<-list.files(pattern=paste("GAMCAI_tmax_pred_predicted_mod8_20100103_30_1_365d_GAM_CAI2_all_lstd_10262012.rst",sep="")) |
|
119 |
|
|
120 |
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_10242012_GAM" |
|
121 |
setwd(path) |
|
122 |
tmax_predicted_mod8<-raster("GAM_bias_tmax_predicted_mod8_20100103_30_1_365d_GAM_fusion_all_lstd_10242012.rst") |
|
123 |
tmax_bias_mod8<-raster("GAM_bias_predicted_mod8_20100103_30_1_365d_GAM_fusion_all_lstd_10242012.rst") |
|
124 |
daily_delta_tmax<-raster("fusion_daily_delta_20100103_30_1_365d_GAM_fusion_all_lstd_10242012.rst") |
|
125 |
fusion<-raster("fusion_tmax_predicted_20100103_30_1_365d_GAM_fusion_all_lstd_10242012.rst") |
|
126 |
LST<-(raster("mean_month1_rescaled.rst"))-273.16 |
|
127 |
|
|
128 |
tmp_rast<-LST+daily_delta_tmax-tmax_bias_mod8 |
|
129 |
dif_rast<-tmp_rast-tmax_predicted_mod8 |
|
130 |
eq<-tmp_rast==tmax_predicted_mod8 |
|
131 |
plot(tmax_predicted_mod8) |
|
132 |
|
|
133 |
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_10242012_CAI" |
|
134 |
setwd(path) |
|
135 |
cai_tmax_mod8_rast<-raster("GAMCAI_tmax_pred_predicted_mod8_20100103_30_1_365d_GAM_CAI2_all_lstd_10262012.rst") |
|
136 |
cai_delta_rast<-raster("CAI_deltaclim_20100103_30_1_365d_GAM_CAI2_all_lstd_10262012.rst") |
|
137 |
cai_clim_rast<-raster("CAI_clim_20100103_30_1_365d_GAM_CAI2_all_lstd_10262012.rst") |
|
138 |
cai_climod8_rast<-raster("GAMCAI_predicted_mod8_20100103_30_1_365d_GAM_CAI2_all_lstd_10262012.rst") |
|
139 |
|
|
140 |
# s.range <- c(min(minValue(diff)), max(maxValue(diff))) |
|
141 |
# col.breaks <- pretty(s.range, n=50) |
|
142 |
# lab.breaks <- pretty(s.range, n=5) |
|
143 |
temp.colors <- colorRampPalette(c('blue', 'white', 'red')) |
|
144 |
# plot(diff, breaks=col.breaks, col=temp.colors(length(col.breaks)-1) |
|
145 |
# |
|
146 |
pred_cai_mod8<-stack(cai_climod8_rast,cai_delta_rast,cai_tmax_mod8_rast) |
|
147 |
layerNames(pred_cai_mod8)<-paste(c("cai_climod8_rast","cai_delta_rast","cai_tmax_mod8_rast"),"20100103",sep=" ") |
|
148 |
pred_fus_mod8<-stack(tmax_bias_mod8,daily_delta_tmax,LST,tmax_predicted_mod8) |
|
149 |
layerNames(pred_fus_mod8)<-paste(c("tmax_bias_mod8","daily_delta_tmax","LST","tmax_predicted_mod8"),"20100103",sep=" ") |
|
150 |
pred_fus_cai_comp<-stack(pred_fus_mod8,pred_cai_mod8) |
|
151 |
|
|
152 |
X11(12,12) |
|
153 |
#for (i in 1:) |
|
154 |
#plot(pred_cai_mod8,col=temp.colors) |
|
155 |
plot(pred_cai_mod8,col=temp.colors(25)) |
|
156 |
#title("prediction for 20100103") |
|
157 |
plot(pred_fus_mod8,col=temp.colors(25)) |
|
158 |
plot(pred_fus_cai_comp,col=temp.colors(25)) |
|
159 |
dev.off |
|
160 |
tmp_rast2<-cai_delta_rast + cai_climod8_rast |
|
161 |
dif2_rast<-tmp_rast2-cai_tmax_mod8_rast |
|
162 |
|
|
163 |
dif2<-cai_rast-tmax_predicted_mod8 #This seems to be the same!!! |
|
164 |
plot(cai_climod8_rast) #THe bias surface is different when compared to the climatology based on mod8 |
|
165 |
plot(tmax_bias_mod8) |
|
166 |
plot(tmax_predicted_mod8) |
|
167 |
plot(cai_tmax_rast) |
|
168 |
|
|
169 |
cellStats(pred_fus_cai_comp) |
|
170 |
|
|
171 |
#t<-"results2_CAI_Assessment_measure_all_08072012_365d_GAM_CAI2.RData" |
|
172 |
|
|
173 |
## Examining data with simplified CAI3 models in comparison to other: 11/03/2012 |
|
174 |
|
|
175 |
oldpath<-getwd() |
|
176 |
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_10242012_CAI" |
|
177 |
setwd(path) |
|
178 |
|
|
179 |
date_selected<-"20100103" |
|
180 |
#lf_fus<-list.files(pattern=paste("GAM_bias_tmax_predicted_mod8_",date_selected,"_30_1_365d_GAM_fusion_all_lstd_10242012.rst$",sep="")) #Search for files in relation to fusion |
|
181 |
file_pat<-glob2rx(paste("*tmax_pred*",date_selected,"*_365d_GAM_CAI3_all_10302012.rst",sep="")) #Search for files in relation to fusion |
|
182 |
lf_cai3<-list.files(pattern=file_pat) #Search for files in relation to fusion |
|
183 |
|
|
184 |
cai3_mod<-load_obj("results_mod_obj__365d_GAM_CAI3_all_10302012.RData") |
|
185 |
names(cai3_mod$gam_CAI_mod[[1]]) #check names within object |
|
186 |
formulas<-as.character(cai3_mod$gam_CAI_mod[[1]]$formulas) |
|
187 |
rmse<-(cai3_mod$gam_CAI_mod[[1]]$tb_metrics1) |
|
188 |
nmodels<-6 |
|
189 |
for(i in 4:(nmodels+3)){ # start of the for loop #1 |
|
190 |
rmse[,i]<-as.numeric(as.character(rmse[,i])) |
|
191 |
} |
|
192 |
rmse_val<-as.numeric(c(rmse[1,9],rmse[1,4:8])) |
|
193 |
rmse_val<-format(rmse_val,digits=3) |
|
194 |
rmse_val<-paste("(",rmse_val,")",sep="") |
|
195 |
rname<-c("CAI_Kr",formulas) |
|
196 |
rname<-paste(rname,rmse_val,sep=" ") |
|
197 |
|
|
198 |
rast_cai3<-stack(lf_cai3) |
|
199 |
rast_cai3<-mask(rast_cai3,mask_ELEV_SRTM) |
|
200 |
#s_range <- c(min(minValue(diff)), max(maxValue(diff))) |
|
201 |
s_range<-c(minValue(rast_cai3),maxValue(rast_cai3)) #stack min and max |
|
202 |
s_range<-c(min(s_range),max(s_range)) |
|
203 |
col_breaks <- pretty(s_range, n=50) |
|
204 |
lab_breaks <- pretty(s_range, n=5) |
|
205 |
temp_colors <- colorRampPalette(c('blue', 'white', 'red')) |
|
206 |
par(mfrow=c(2,3)) |
|
207 |
for (k in 1:length(lf_cai3)){ |
|
208 |
cai3_r<-raster(rast_cai3,k) |
|
209 |
plot(cai3_r, breaks=col_breaks, col=temp_colors(length(col_breaks)-1), |
|
210 |
axis=list(at=lab_breaks, labels=lab_breaks),main=rname[k]) |
|
211 |
} |
|
212 |
|
|
213 |
#Wihtout setting range |
|
214 |
s_range<-c(-12,18) |
|
215 |
#col_breaks <- pretty(s_range, n=50) |
|
216 |
#lab_breaks <- pretty(s_range, n=5) |
|
217 |
col_breaks<-49 |
|
218 |
temp_colors <- colorRampPalette(c('blue', 'white', 'red')) |
|
219 |
par(mfrow=c(2,3)) |
|
220 |
for (k in 1:length(lf_cai3)){ |
|
221 |
cai3_r<-raster(rast_cai3,k) |
|
222 |
plot(cai3_r, col=temp_colors(col_breaks), |
|
223 |
,main=rname[k]) |
|
224 |
} |
|
225 |
|
|
226 |
###### PART 2: ################ |
|
227 |
#### EXAMINING LST AND ELEVATION DATASETS TO CHECK FOR EXTREMES AND SELECT SCREENING VALUES.... |
|
228 |
|
|
229 |
# Examining ELEV_SRTM and screening out... |
|
230 |
|
|
231 |
pos<-match("ELEV_SRTM",layerNames(s_raster)) |
|
232 |
ELEV_SRTM<-raster(s_raster,pos) |
|
233 |
head(freq(ELEV_SRTM)) #Show how many pixels are below 0 |
|
234 |
|
|
235 |
# mask out values below zero |
|
236 |
elev<-ELEV_SRTM |
|
237 |
elev[0<elev]<-NA #Remove all negative elevation |
|
238 |
#Show how many were NA... |
|
239 |
|
|
240 |
mask_ELEV_SRTM<-elev >0 |
|
241 |
quartz() |
|
242 |
plot(elev, main="Elevation in Oregon") |
|
243 |
savePlot(paste("elevation_Oregon",out_prefix,".png", sep=""), type="png") |
|
244 |
|
|
245 |
# Examining LST data: extremes values and number of valid observation |
|
246 |
|
|
247 |
l<-list.files(pattern="mean_month.*rescaled.rst") |
|
248 |
ln<-list.files(pattern="mean_month_valid_obs_.*_Sum.rst") #number of observation missing |
|
249 |
l<-mixedsort(l) |
|
250 |
ln<-mixedsort(ln) |
|
251 |
|
|
252 |
#l <-readLines(paste(path,"/",infile6, sep="")) |
|
253 |
molst<-stack(l) #Creating a raster stack... |
|
254 |
nobslst<-stack(ln) #Creating a raster stack... |
|
255 |
|
|
256 |
#setwd(old) |
|
257 |
molst<-molst-273.16 #K->C #LST stack of monthly average... |
|
258 |
idx <- seq(as.Date('2010-01-15'), as.Date('2010-12-15'), 'month') |
|
259 |
molst <- setZ(molst, idx) |
|
260 |
layerNames(molst) <- month.abb |
|
261 |
layerNames(nobslst)<-month.abb |
|
262 |
|
|
263 |
X11(width=18,height=12) |
|
264 |
#PLOT THE STACK AND SAVE IMAGE |
|
265 |
plot(molst,col=temp.colors(25)) |
|
266 |
savePlot("lst_climatology_OR.png",type="png") |
|
267 |
levelplot(molst,col.regions=temp.colors(25)) |
|
268 |
plot(nobslst) |
|
269 |
savePlot("lst_climatology_OR_nobs.png",type="png") |
|
270 |
dev.off) |
|
271 |
|
|
272 |
#note differnces in patternin agricultural areas and |
|
273 |
extremeValues(molst) #not yet implemented... |
|
274 |
min_values<-cellStats(molst,"min") |
|
275 |
max_values<-cellStats(molst,"max") |
|
276 |
mean_values<-cellStats(molst,"mean") |
|
277 |
sd_values<-cellStats(molst,"sd") |
|
278 |
#median_values<-cellStats(molst,"median") Does not extist |
|
279 |
statistics_LST_s<-cbind(min_values,max_values,mean_values,sd_values) #This shows that some values are extremes...especially in October |
|
280 |
LST_stat_data<-as.data.frame(statistics_LST_s) |
|
281 |
names(LST_stat_data)<-c("min","max","mean","sd") |
|
282 |
# Statistics for number of valid observation stack |
|
283 |
min_values<-cellStats(nobslst,"min") |
|
284 |
max_values<-cellStats(nobslst,"max") |
|
285 |
mean_values<-cellStats(nobslst,"mean") |
|
286 |
sd_values<-cellStats(nobslst,"sd") |
|
287 |
statistics_LSTnobs_s<-cbind(min_values,max_values,mean_values,sd_values) #This shows that some values are extremes...especially in October |
|
288 |
LSTnobs_stat_data<-as.data.frame(statistics_LSTnobs_s) |
|
289 |
|
|
290 |
X11(width=12,height=12) |
|
291 |
#Plot statiscs (mean,min,max) for monthly LST images |
|
292 |
plot(1:12,LST_stat_data$mean,type="b",ylim=c(-15,60),col="black",xlab="month",ylab="tmax (degree C)") |
|
293 |
lines(1:12,LST_stat_data$min,type="b",col="blue") |
|
294 |
lines(1:12,LST_stat_data$max,type="b",col="red") |
|
295 |
text(1:12,LST_stat_data$mean,rownames(LST_stat_data),cex=1,pos=2) |
|
296 |
|
|
297 |
legend("topleft",legend=c("min","mean","max"), cex=1.5, col=c("blue","black","red"), |
|
298 |
lty=1) |
|
299 |
title(paste("LST statistics for Oregon", "2010",sep=" ")) |
|
300 |
savePlot("lst_statistics_OR.png",type="png") |
|
301 |
|
|
302 |
#Plot number of valid observations for LST |
|
303 |
plot(1:12,LSTnobs_stat_data$mean,type="b",ylim=c(0,280),col="black",xlab="month",ylab="tmax (degree C)") |
|
304 |
lines(1:12,LSTnobs_stat_data$min,type="b",col="blue") |
|
305 |
lines(1:12,LSTnobs_stat_data$max,type="b",col="red") |
|
306 |
text(1:12,LSTnobs_stat_data$mean,rownames(LSTnobs_stat_data),cex=1,pos=2) |
|
307 |
|
|
308 |
legend("topleft",legend=c("min","mean","max"), cex=1.5, col=c("blue","black","red"), |
|
309 |
lty=1) |
|
310 |
title(paste("LST number of valid observations for Oregon", "2010",sep=" ")) |
|
311 |
savePlot("lst_nobs_OR.png",type="png") |
|
312 |
|
|
313 |
mm_10<-raster(molst,10) |
|
314 |
tab_mm_10<-freq(mm_10) #This shows that there are only 6 pixels with values between -86 and -89.. and 8 pixels with values between 38 and 31 |
|
315 |
head(tab_mm_10) |
|
316 |
#screening maybe good...? |
|
317 |
|
|
318 |
mm_01<-raster(molst,1) |
|
319 |
tab_mm_10<-freq(mm_01) #This shows that there are only 6 pixels with values between -86 and -89.. and 8 pixels with values between 38 and 31 |
|
320 |
#head(tab_mm_10) |
|
321 |
|
|
322 |
mm_07<-raster(molst,7) |
|
323 |
tab_mm_10<-freq(mm_07) #This shows that there are only 6 pixels with values between -86 and -89.. and 8 pixels with values between 38 and 31 |
|
324 |
#head(tab_mm_10) |
|
325 |
|
|
326 |
LST_comp<-stack(mm_01,mm_07) |
|
327 |
|
|
328 |
X11(width=18,height=10) |
|
329 |
#PLOT THE STACK AND SAVE IMAGE |
|
330 |
plot(LST_comp,col=temp.colors(25)) |
|
331 |
#title("LST spatial pattern in January and August") |
|
332 |
savePlot("lst_climatology_OR.png",type="png") |
|
333 |
|
|
334 |
|
|
335 |
###### PART 3: EXAMANING MODEL OUTPUTS USING MOD OBJECTS TO CHECK AND COMPARE RESULTS ############ |
|
336 |
############ LOOKING AT SPECIFIC STATIONS... |
|
337 |
path_data_cai<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_10242012_CAI" |
|
338 |
path_data_fus<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_10242012_GAM" |
|
339 |
setwd(path_data) #output data |
|
340 |
gam_fus<-load_obj(file.path(path_data_fus, |
|
341 |
"results2_fusion_Assessment_measure_all_365d_GAM_fusion_all_lstd_10242012.RData")) |
|
342 |
gam_cai<-load_obj(file.path(path_dat_cai, |
|
343 |
"results2_CAI_Assessment_measure_all_365d_GAM_CAI2_all_lstd_10262012.RData") #This contains all the info |
|
344 |
sampling_obj_cai<-load_obj(file.path(path_data_cai, |
|
345 |
"results2_CAI_sampling_obj_365d_GAM_CAI2_all_lstd_10262012.RData")) |
|
346 |
sampling_date_list<-sampling_obj_cai$sampling_dat$date |
|
347 |
date_selected="20100103" |
|
348 |
|
|
349 |
k<-match(date_selected,sampling_date_list) |
|
350 |
|
|
351 |
fus_d<-gam_fus[[k]] #object for the first date...20100103 |
|
352 |
fus_d$tb_metrics1 #This is a summary for validation metrics...(RMSE and MAE) |
|
353 |
|
|
354 |
cai_d<-gam_cai[[k]] #object for the first date...20100103 |
|
355 |
cai_d$tb_metrics1 #This is a summary for validation metrics...(RMSE and MAE) |
|
356 |
|
|
357 |
names(fus_d$mod_obj) #list models, nine models: mod1, mod2,...,mod9a, mod9b |
|
358 |
names(cai_d$mod_obj) |
|
359 |
|
|
360 |
mod1_f<-fus_d$mod_obj$mod1 # extract model 1 for fusion on |
|
361 |
mod4_f<-fus_d$mod_obj$mod4 |
|
362 |
mod7_f<-fus_d$mod_obj$mod7 |
|
363 |
data_sf<-fus_d$data_s |
|
364 |
data_vf<-fus_d$data_v |
|
365 |
|
|
366 |
plot(mod1_f) #Examining the fit |
|
367 |
plot(mod1) #Examing the fit |
|
368 |
#data_monthf<-fus_d1$data_month |
|
369 |
|
|
370 |
mod1<-cai_d1$mod_obj$mod1 |
|
371 |
mod4<-cai_d1$mod_obj$mod4 |
|
372 |
mod7<-cai_d1$mod_obj$mod7 |
|
373 |
data_s<-cai_d1$data_s |
|
374 |
data_v<-cai_d1$data_v |
|
375 |
data_month<-cai_d1$data_month |
|
376 |
|
|
377 |
#### COMPARE LST AND TMAX averages over the year... ############### |
|
378 |
#### Screening necessary? |
|
379 |
|
|
380 |
LSTD_bias_month<-vector("list",length(gam_cai)) # list to store bias information |
|
381 |
data_month_list<-vector("list",length(gam_cai)) # list to store data_month used for training |
|
382 |
|
|
383 |
for (i in 1:length(gam_cai)){ |
|
384 |
data_month_list[[i]]<-gam_cai[[i]]$data_month |
|
385 |
#LSTD_bias_month[[i]]<-aggregate(LSTD_bias~month,data=data_month_list[[i]],mean) |
|
386 |
} |
|
387 |
|
|
388 |
#tmp<-do.call(rbind,LSTD_bias_month) |
|
389 |
data_month_all<-do.call(rbind,data_month_list) |
|
390 |
|
|
391 |
data_month_all$LST<-data_month_all$LST-273.16 |
|
392 |
LSTD_bias_avgm<-aggregate(LSTD_bias~month,data=data_month_all,mean) |
|
393 |
LSTD_bias_sdm<-aggregate(LSTD_bias~month,data=data_month_all,sd) |
|
394 |
|
|
395 |
LST_avgm<-aggregate(LST~month,data=data_month_all,mean) |
|
396 |
TMax_avgm<-aggregate(TMax~month,data=data_month_all,mean) |
|
397 |
|
|
398 |
#plot(LST_avgm,type="b") |
|
399 |
#lines(TMax_avgm,col="blue",add=T) |
|
400 |
plot(data_month_all$month,data_month_all$LST, xlab="month",ylab="LST at station") |
|
401 |
title(paste("Monthly LST at stations in Oregon", "2010",sep=" ")) |
|
402 |
savePlot(paste("LST_at_stations_per_month_",out_prefix,".png", sep=""), type="png") |
|
403 |
|
|
404 |
tab_freq<-cbind(hist_LST$breaks,hist_LST$counts) |
|
405 |
median(data_month_all$LST,na.rm=T) |
|
406 |
median(data_month_all$TMax,na.rm=T) |
|
407 |
|
|
408 |
statistics_LST_s<-as.data.frame(statistics_LST_s) |
|
409 |
plot(TMax_avgm,type="b",ylim=c(0,35),col="red",xlab="month",ylab="tmax (degree C)") |
|
410 |
lines(1:12,statistics_LST_s$mean_values,type="b",col="blue") |
|
411 |
text(TMax_avgm[,1],TMax_avgm[,2],rownames(statistics_LST_s),cex=0.8,pos=2) |
|
412 |
|
|
413 |
legend("topleft",legend=c("TMax","LST"), cex=1, col=c("red","blue"), |
|
414 |
lty=1) |
|
415 |
title(paste("Monthly mean tmax and LST at stations in Oregon", "2010",sep=" ")) |
|
416 |
savePlot(paste("Temporal_profile_res",id[i],out_prefix,".png", sep=""), type="png") |
|
417 |
|
|
418 |
#Account for forest |
|
419 |
data_month_all_forest<-data_month_all[data_month_all$LC1>=50,] |
|
420 |
data_month_all_grass<-data_month_all[data_month_all$LC3>=10,] |
|
421 |
LST_avgm_forest<-aggregate(LST~month,data=data_month_all_forest,mean) |
|
422 |
LST_avgm_grass<-aggregate(LST~month,data=data_month_all_grass,mean) |
|
423 |
|
|
424 |
plot(TMax_avgm,type="b",ylim=c(-7,42)) |
|
425 |
lines(LST_avgm,col="blue",add=T) |
|
426 |
lines(LST_avgm_forest,col="green",add=T) |
|
427 |
lines(LST_avgm_grass,col="red",add=T) |
|
428 |
legend("topleft",legend=c("TMax","LST","LST_forest","LST_grass"), cex=0.8, col=c("black","blue","green","red"), |
|
429 |
lty=1) |
|
430 |
title(paste("Monthly average tmax for stations in Oregon ", "2010",sep="")) |
|
431 |
|
|
432 |
savePlot(paste("Temporal_profile_res",id[i],out_prefix,".png", sep=""), type="png") |
|
433 |
|
|
434 |
#Check where forest >50 and grass>10 |
|
435 |
#Create mask using land cover data |
|
436 |
pos<-match("LC1",layerNames(s_raster)) #Find the layer which contains water bodies |
|
437 |
LC1<-subset(s_raster,pos) |
|
438 |
LC1[is.na(LC1)]<-0 #Since NA values are 0, we assign all zero to NA |
|
439 |
LC1_50<-LC1>= 50 |
|
440 |
plot(LC1_50) |
|
441 |
pos<-match("LC3",layerNames(s_raster)) #Find the layer which contains water bodies |
|
442 |
LC3<-subset(s_raster,pos) |
|
443 |
LC3[is.na(LC3)]<-0 #Since NA values are 0, we assign all zero to NA |
|
444 |
LC3_10<-LC3>= 10 |
|
445 |
|
|
446 |
LC1_mean <- cellStats(LC3, mean) |
|
447 |
LC1_mean <- cellStats(LC3, mean) |
|
448 |
# |
|
449 |
avl<-c(0,10,1,10,20,2,20,30,3,30,40,4,40,50,5,50,60,6,60,70,7,70,80,8,80,90,9,90,100,10) |
|
450 |
rclmat<-matrix(avl,ncol=3,byrow=TRUE) |
|
451 |
LC3_rec<-reclass(LC3,rclmat) #Loss of layer names when using reclass |
|
452 |
tab_freq<-freq(LC3_rec) |
|
453 |
|
|
454 |
crosstab(LC1_50,LC3_10) #Very little overalp between classes... |
|
455 |
X11(12,12) |
|
456 |
LC_rec<-stack(LC1_50,LC3_10) |
|
457 |
layerNames(LC_rec)<-c("forest: LC1>50%","grass: LC3>10%") |
|
458 |
plot(LC_rec,col=c("black","red")) |
|
459 |
dev.off() |
|
460 |
#legend("topleft",legend=c("fo","LST","LST_forest","LST_grass"), cex=0.8, col=c("black","blue","green","red"), |
|
461 |
#lty=1) |
|
462 |
|
|
463 |
############ PART 4: RESIDUALS ANALYSIS: ranking, plots, focus regions ################## |
|
464 |
############## EXAMINING STATION RESIDUALS ########### |
|
465 |
########### CONSTANT OVER 365 AND SAMPLING OVER 365 |
|
466 |
#Plot daily_deltaclim_rast, bias_rast,add data_s and data_v |
|
467 |
|
|
468 |
# RANK STATION by average or median RMSE |
|
469 |
# Count the number of times a station is in the extremum group of outliers... |
|
470 |
# LOOK at specific date... |
|
471 |
|
|
472 |
#Examine residuals for a spciefic date...Jan, 1 using run of const_all i.e. same training over 365 dates |
|
473 |
path_data_cai<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_10242012_CAI" #Change to constant |
|
474 |
path_data_fus<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_10242012_GAM" |
|
475 |
setwd(path_data) #output data |
|
476 |
|
|
477 |
#list files that contain raster tmax prediction (maps) for CAI and Fusion |
|
478 |
#lf_c<-"GAM_predicted_mod7_20101231_30_1_365d_GAM_fusion_const_10172012.rst" |
|
479 |
lf_cg<-list.files(pattern="GAM_predicted_mod2_.*_30_1_365d_GAM_fusion_const_10172012.rst$") #changee to constant |
|
480 |
lf_ck<-list.files(pattern="fusion_tmax_predicted_.*_30_1_365d_GAM_fusion_const_10172012.rst$") #Fusion+Kriging |
|
481 |
|
|
482 |
#list files that contain model objects and ratingin-testing information for CAI and Fusion |
|
483 |
fus1_c<-load_obj("results2_fusion_Assessment_measure_all_365d_GAM_fusion_const_10172012.RData") |
|
484 |
#fus1_c<-load_obj("results2_fusion_Assessment_measure_all_365d_GAM_fusion_const_10172012.RData") |
|
485 |
|
|
486 |
gam_fus<-load_obj(file.path(path_data_fus, |
|
487 |
"results2_fusion_Assessment_measure_all_365d_GAM_fusion_all_lstd_10242012.RData")) |
|
488 |
gam_cai<-load_obj(file.path(path_dat_cai, |
|
489 |
"results2_CAI_Assessment_measure_all_365d_GAM_CAI2_all_lstd_10262012.RData") #This contains all the info |
|
490 |
sampling_obj_cai<-load_obj(file.path(path_data_cai, |
|
491 |
"results2_CAI_sampling_obj_365d_GAM_CAI2_all_lstd_10262012.RData")) |
|
492 |
sampling_date_list<-sampling_obj_cai$sampling_dat$date |
|
493 |
|
|
494 |
date_selected<-"20100103" |
|
495 |
|
|
496 |
k<-match(date_selected,sampling_date_list) |
|
497 |
|
|
498 |
data_sf<-gam_fus[[k]]$data_s #object for the first date...20100103 |
|
499 |
data_vf<-gam_fus[[k]]$data_v #object for the first date...20100103 |
|
500 |
data_sc<-gam_fus[[k]]$data_s #object for the first date...20100103 |
|
501 |
data_vc<-gam_fus[[k]]$data_v #object for the first date...20100103 |
|
502 |
|
|
503 |
#Select background image for plotting |
|
504 |
mod_pat<-glob2rx(paste("*",date_selected,"*",sep="")) |
|
505 |
image_file<-grep(mod_pat,lf_ck,value=TRUE) # using grep with "value" extracts the matching names |
|
506 |
raster_pred_k<-raster(image_file) |
|
507 |
image_file<-grep(mod_pat,lf_cg,value=TRUE) # using grep with "value" extracts the matching names |
|
508 |
raster_pred_g<-raster(image_file) |
|
509 |
plot(data_sf,add=TRUE) |
|
510 |
plot(data_vf,pch=1,add=TRUE) |
|
511 |
|
|
512 |
#Create a residual table... |
|
513 |
res_mod9_list<-vector("list",365) |
|
514 |
res_mod2_list<-vector("list",365) |
|
515 |
|
|
516 |
tab_nv<-matrix(NA,365,1) |
|
517 |
for(k in 1:365){ |
|
518 |
tab_nv[k]<-length(fus1_c[[k]]$data_v$res_mod9) |
|
519 |
} |
|
520 |
#note that there might be some variation in the number!!! |
|
521 |
|
|
522 |
for(k in 1:365){ |
|
523 |
res_mod9<-fus1_c[[k]]$data_v$res_mod9 |
|
524 |
res_mod2<-fus1_c[[k]]$data_v$res_mod2 |
|
525 |
res_mod9_list[[k]]<-res_mod9 |
|
526 |
res_mod2_list[[k]]<-res_mod2 |
|
527 |
#subset data frame? or rbind them...and reshape?? think about it |
|
528 |
} |
|
529 |
tab_resmod9<-do.call(rbind,res_mod9_list) |
|
530 |
|
|
531 |
data_v_list<-vector("list",365) |
|
532 |
|
|
533 |
for(k in 1:365){ |
|
534 |
data_v<-as.data.frame(fus1_c[[k]]$data_v) |
|
535 |
data_v<-data_v[,c("id","res_mod9")] |
|
536 |
#subset data frame? or rbind them...and reshape?? think about it |
|
537 |
data_v_list[[k]]<-data_v |
|
538 |
} |
|
539 |
tab_resmod9<-do.call(rbind,data_v_list) |
|
540 |
|
|
541 |
#NOW USE RESHAPE TO CREATE TABLE.... |
|
542 |
|
|
543 |
#updated the analysis |
|
544 |
|
|
545 |
"tmax_predicted*20100103_30_1_365d_GAM_fusion_all_lstd_10242012.rst" |
|
546 |
oldpath<-getwd() |
|
547 |
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_10242012_GAM" |
|
548 |
setwd(path) |
|
549 |
date_selected<-"20100103" |
|
550 |
#lf_fus<-list.files(pattern=paste("GAM_bias_tmax_predicted_mod8_",date_selected,"_30_1_365d_GAM_fusion_all_lstd_10242012.rst$",sep="")) #Search for files in relation to fusion |
|
551 |
file_pat<-glob2rx(paste("*tmax_pred*",date_selected,"*_365d_GAM_fusion_all_lstd_10242012.rst",sep="")) #Search for files in relation to fusion |
|
552 |
lf_fus1a<-list.files(pattern=file_pat) #Search for files in relation to fusion |
|
553 |
|
|
554 |
rast_fus1a<-stack(lf_fus1a) |
|
555 |
rast_fus1a<-mask(rast_fus1a,mask_ELEV_SRTM) |
|
556 |
|
|
557 |
path<-"/home/parmentier/Data/IPLANT_project/data_Oregon_stations_07192012_GAM" |
|
558 |
setwd(path) |
|
559 |
date_selected<-"20100103" |
|
560 |
#lf_fus<-list.files(pattern=paste("GAM_bias_tmax_predicted_mod8_",date_selected,"_30_1_365d_GAM_fusion_all_lstd_10242012.rst$",sep="")) #Search for files in relation to fusion |
|
561 |
#lf_fus1<-list.files(pattern=paste("*.tmax_predicted.*.",date_selected,".*._365d_GAM_fusion_lstd_10062012.rst$",sep="")) |
|
562 |
file_pat<-glob2rx(paste("*tmax_predicted*",date_selected,"*_365d_GAM_fusion_lstd_10062012.rst",sep="")) #Search for files in relation to fusion |
|
563 |
lf_fus1s<-list.files(pattern=file_pat) #Search for files in relation to fusion |
|
564 |
|
|
565 |
rast_fus1s<-stack(lf_fus1s) |
|
566 |
rast_fus1s<-mask(rast_fus1s,mask_ELEV_SRTM) |
|
567 |
|
|
568 |
s_range<-c(minValue(rast_fusa1),maxValue(rast_fusa1)) #stack min and max |
|
569 |
s_range<-c(min(s_range),max(s_range)) |
|
570 |
col_breaks <- pretty(s_range, n=50) |
|
571 |
lab_breaks <- pretty(s_range, n=5) |
|
572 |
temp_colors <- colorRampPalette(c('blue', 'white', 'red')) |
|
573 |
X11(width=18,height=12) |
|
574 |
par(mfrow=c(3,3)) |
|
575 |
for (k in 1:length(lf_fus1a)){ |
|
576 |
fus1a_r<-raster(rast_fus1a,k) |
|
577 |
plot(fus1a_r, breaks=col_breaks, col=temp_colors(length(col_breaks)-1), |
|
578 |
axis=list(at=lab_breaks, labels=lab_breaks)) |
|
579 |
} |
|
580 |
|
|
581 |
#Wihtout setting range |
|
582 |
s_range<-c(-12,18) |
|
583 |
#col_breaks <- pretty(s_range, n=50) |
|
584 |
#lab_breaks <- pretty(s_range, n=5) |
|
585 |
col_breaks<-49 |
|
586 |
temp_colors <- colorRampPalette(c('blue', 'white', 'red')) |
|
587 |
lab_breaks <- pretty(s_range, n=5) |
|
588 |
X11(width=18,height=12) |
|
589 |
par(mfrow=c(3,3)) |
|
590 |
mod_name<-paste("mod",1:8, sep="") |
|
591 |
rname<-c("FUS_kr",mod_name) |
|
592 |
for (k in 1:length(lf_fus1a)){ |
|
593 |
fus1a_r<-raster(rast_fus1a,k) |
|
594 |
plot(fus1a_r, breaks=col_breaks, col=temp_colors(col_breaks-1), |
|
595 |
axis=list(at=lab_breaks, labels=lab_breaks),main=rname[k]) |
|
596 |
} |
|
597 |
|
|
598 |
#Wihtout setting range |
|
599 |
s_range<-c(-12,23) |
|
600 |
#col_breaks <- pretty(s_range, n=50) |
|
601 |
#lab_breaks <- pretty(s_range, n=5) |
|
602 |
col_breaks<-49 |
|
603 |
temp_colors <- colorRampPalette(c('blue', 'white', 'red')) |
|
604 |
lab_breaks <- pretty(s_range, n=5) |
|
605 |
X11(width=18,height=12) |
|
606 |
par(mfrow=c(3,3)) |
|
607 |
mod_name<-paste("mod",1:8, sep="") |
|
608 |
rname<-c("FUS_kr",mod_name) |
|
609 |
for (k in 1:length(lf_fus1s)){ |
|
610 |
fus1s_r<-raster(rast_fus1s,k) |
|
611 |
plot(fus1s_r, breaks=col_breaks, col=temp_colors(col_breaks-1), |
|
612 |
axis=list(at=lab_breaks, labels=lab_breaks),main=rname[k]) |
|
613 |
} |
|
614 |
|
|
615 |
|
|
616 |
|
|
617 |
|
Also available in: Unified diff
Method comp part3,initial commit
task#491check extremes for LST and ELEV, exploratory analysis Land cover