Project

General

Profile

Download (16 KB) Statistics
| Branch: | Revision:
1
##################    Covariates and stations analyses  #######################################
2
############################ Covariate production for a given tile/region ##########################################
3
#This script examines inputs and outputs from the interpolation step.                             
4
#Part 1: Script produces summary information about stations used in the interpolation
5
#Part 2: Script produces plots of input covariates for study region
6
#AUTHOR: Benoit Parmentier                                                                       
7
#DATE: 04/01/2013                                                                                 
8

    
9
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#???--   
10

    
11
##Comments and TODO:
12
#Add interpolation analyses of number of stations used for every day of the year...  
13

    
14
## Input arguments
15
#The output is a list of four shapefile names produced by the function:
16
#1) loc_stations: locations of stations as shapefile in EPSG 4326
17
#2) loc_stations_ghcn: ghcn daily data for the year range of interpolation (locally projected)
18
#3) daily_query_ghcn_data: ghcn daily data from daily query before application of quality flag
19
#4) daily_covar_ghcn_data: ghcn daily data with covariates for the year range of interpolation (locally projected)
20
#5) monthly_query_ghcn_data: ghcn daily data from monthly query before application of quality flag
21
#6) monthly_covar_ghcn_data: ghcn monthly averaged data with covariates for the year range of interpolation (locally projected)
22
#7) var
23
#8) range_years
24
#9) range_years_clim
25
#8) infile_covariate: s_raster brick file
26
#9)  covar_names: variable mames
27
#10) raster_prediction
28
#11) world_countries
29
#12) region_outline
30
#13) out_prefix
31

    
32
## Functions used in the script
33

    
34
load_obj <- function(f) 
35
{
36
  env <- new.env()
37
  nm <- load(f, env)[1]  
38
  env[[nm]]
39
}
40

    
41
extract_number_obs<-function(list_param){
42
  #Function to extract the number of observations used in modeling
43
  
44
  method_mod_obj<-list_param$method_mod_obj
45
  
46
  #Change to results_mod_obj[[i]]$data_s to make it less specific!!!!
47
  
48
  #number of observations 
49
  list_nrow_data_s<-lapply(1:length(method_obj),function(k) nrow(method_mod_obj[[k]]$data_s))
50
  list_nrow_data_v<-lapply(1:length(method_obj),function(k) nrow(method_mod_obj[[k]]$data_v))
51
  #lapply(1:length(clim_obj),function(k) nrow(method_mod_obj[[k]]$data_month))
52
  list_nrow_data_month<-lapply(1:length(gamclim_fus_mod),function(k) nrow(gamclim_fus_mod[[k]]$data_month))
53
  
54
  #number of valid observations 
55
  list_nrow_valid_data_s<-lapply(1:length(method_obj),function(k) length(method_mod_obj[[k]]$data_s$[[y_var_name]])
56
  list_nrow_data_v<-lapply(1:length(method_obj),function(k) length(method_mod_obj[[k]]$data_v$[[y_var_name]])
57
  list_nrow_valid_data_month<-lapply(1:length(gamclim_fus_mod),function(k) length(gamclim_fus_mod[[k]]$data_month$y_var))
58
  
59
  c1<-do.call(rbind,list_nrow_data_s)
60
  c2<-do.call(rbind,list_nrow_data_v)
61
  c3<-do.call(rbind,list_nrow_data_month)
62
  
63
  c1v<-do.call(rbind,list_valid_nrow_data_s)
64
  c2v<-do.call(rbind,list_valid_nrow_data_v)
65
  c3v<-do.call(rbind,list_valid_nrow_data_month)
66
  
67
  n_data_s<-cbdind(c1,c1v)
68
  n_data_v<-cbdind(c2,c2v)
69
  n_data_month<-cbdind(c3,c3vv)
70
  list_observations<-list(n_data_s,n_data_v,n_data_month)                         
71
  return(list_observations)
72
}
73

    
74
###########################################################################
75
########################## BEGIN SCRIPT/FUNCTION ##########################
76

    
77

    
78
########################################
79
#### STEP 1: read in data
80

    
81
#Stations in the processing region/study area
82
stat_reg <- readOGR(dsn=dirname(list_outfiles$loc_stations),sub(".shp","",basename(list_outfiles$loc_stations)))
83
#Stations available before screening the data query: ghcn daily data for the year range of interpolation
84
data_d <- readOGR(dsn=dirname(list_outfiles$loc_stations_ghcn),sub(".shp","",basename(list_outfiles$loc_stations_ghcn)))
85
#Stations available after screening the data query: ghcn daily data for the year range of interpolation
86
data_reg <- readOGR(dsn=dirname(list_outfiles$daily_query_ghcn_data),sub(".shp","",basename(list_outfiles$daily_query_ghcn_data)))
87
#Covariates data available after screening:ghcn daily data with covariates for the year range of interpolation
88
data_RST_SDF <-readOGR(dsn=dirname(list_outfiles$daily_covar_ghcn_data),sub(".shp","",basename(list_outfiles$daily_covar_ghcn_data)))
89
#Stations before screening monthly_query_ghcn_data: ghcn daily data from monthly query before application of quality flag
90
data_m <- readOGR(dsn=dirname(list_outfiles$monthly_query_ghcn_data),sub(".shp","",basename(list_outfiles$monthly_query_ghcn_data)))
91
#Stations after screening monthly_query_ghcn_data, extraction of covariates and monthly averages
92
dst<-readOGR(dsn=dirname(list_outfiles$monthly_covar_ghcn_data),sub(".shp","",basename(list_outfiles$monthly_covar_ghcn_data)))
93

    
94
### Load data used in fitting the model at monthly scale...
95
data_month<-gamclim_fus_mod[[1]]$data_month
96
list_data_month<-lapply(1:length(gamclim_fus_mod),function(k) gamclim_fus_mod[[k]]$data_month)
97

    
98
#Loading covariates raster images
99
s_raster<-brick(infile_covariates)
100
names(s_raster)<-covar_names
101
rast_ref<-subset(s_raster,"mm_01") # mean month for January
102

    
103
names(raster_prediction_obj)
104
var<-list_param$var
105

    
106
raster_prediction_obj<-load_obj(list_param$raster_prediction_obj)
107
#method_mod_obj<-raster_prediction_obj$method_mod_obj
108
method_mod_obj<-raster_prediction_obj$gam_fus_mod #change later for any model type
109
#validation_obj<-raster_prediction_obj$validation_obj
110
validation_obj<-raster_prediction_obj$gam_fus_validation_mod #change later for any model type
111
#clim_obj<-raster_prediction_obj$clim_obj
112
clim_obj<-raster_prediction_obj$gamclim_fus_mod #change later for any model type
113

    
114
names(raster_prediction_obj$method_obj[[1]])
115
data_s<-validation_obj[[1]]$data_s
116
summary_data_v<-validation_obj[[1]]$summary_data_v
117
names(validation_obj[[1]])
118
###################################################################
119
######## PART I: Script produces summary information about stations used in the interpolation ########
120

    
121
### Figue 1: stations in the study area/processing tile  
122
png(paste("Total_number_of_stations_in_study_area_",out_prefix,".png", sep=""))
123
plot(rast_ref)
124
plot(stat_reg,add=TRUE)
125
nb_point1<-paste("n_station=",length(stat_reg$STAT_ID))
126
#Add the number of data points on the plot
127
title("Stations located in the study area")
128
legend("topleft",legend=c(nb_point1),bty="n",cex=1.2)
129
dev.off()
130

    
131
### Figue 2: stations in the study area/processing tile: from query without flag screening             
132
png(paste("Stations_for_range_",range_years[1],"_",range_years[2],"_no_screening",out_prefix,".png", sep=""))
133
plot(rast_ref)
134
plot(data_d, add=TRUE)
135
nb_point<-paste("nrow=",nrow(data_d),sep="")
136
nb_point2<-paste("ns_stations=",length(unique(data_d$station)),sep="")
137
#Add the number of data points on the plot
138
legend("topleft",legend=c(nb_point,nb_point2),bty="n",cex=1)
139
title(paste("Stations available for year ",range_years[1],sep=""))
140
dev.off()
141

    
142
### Figue 3: stations in the study area/processing tile: after screening     
143
png(paste("Stations_for_range_",range_years[1],"_",range_years[2],"_after_screening",out_prefix,".png", sep=""))
144
plot(rast_ref)
145
plot(data_reg,add=TRUE)
146
nb_point<-paste("nrow=",nrow(data_reg),sep="")
147
nb_point2<-paste("ns_stations=",length(unique(data_reg$station)),sep="")
148
#Add the number of data points on the plot
149
legend("topleft",legend=c(nb_point,nb_point2),bty="n",cex=1)
150
title(paste("Stations available for year ",range_years[1]," after screening",sep=""))
151
#Add the number of data points on the plot
152
dev.off()
153

    
154
### Figue 4: stations in the study area/processing tile: after screening and covar extraction
155
png(paste("Stations_for_range_",range_years[1],"_",range_years[2],"_after_screening",out_prefix,".png", sep=""))
156
plot(rast_ref)
157
plot(data_RST_SDF,add=TRUE)
158
nb_point<-paste("nrow=",nrow(data_RST_SDF),sep="")
159
nb_point2<-paste("ns_stations=",length(unique(data_RST_SDF$station)),sep="")
160
#Add the number of data points on the plot
161
legend("topleft",legend=c(nb_point,nb_point2),bty="n",cex=1)
162
title(paste("Stations year ",range_years[1]," after screening and covar extraction",sep=""))
163
#Add the number of data points on the plot
164
dev.off()
165

    
166
### Figue 5: stations in the study area/processing tile: monthly query for specified range of years before screening    
167
png(paste("Stations_monthly_for_range_",range_years[1],"_",range_years[2],"_before_screening",out_prefix,".png", sep=""))
168
plot(rast_ref)
169
plot(data_m,add=TRUE)
170
nb_point<-paste("nrow=",nrow(data_m),sep="")
171
nb_point2<-paste("ns_stations=",length(unique(data_m$station)),sep="")
172
#Add the number of data points on the plot
173
legend("topleft",legend=c(nb_point,nb_point2),bty="n",cex=1)
174
title(paste("Stations ",range_years[1],"-",range_years[2]," after screening and covar extraction",sep=""))
175
#Add the number of data points on the plot
176
dev.off()
177
             
178
### Figue 6: histogram            
179
    
180
#dst$nobs_station
181
png(paste("Stations_data_month_modeled_for_range_",range_years_clim[1],"_",range_years_clim[2],out_prefix,".png", sep=""))
182
dst$nobs_station<-dst$nbs_stt
183
hist(dst$nobs_station)
184
dev.off()
185

    
186
### Figure 7: data month
187

    
188
png(paste("Stations_data_month_modeled_for_range_",range_years_clim[1],"_",range_years_clim[2],out_prefix,".png", sep=""))
189
par(mfrow=c(3,4))
190
for (j in 1:12){
191

    
192
  data_month<-list_data_month[[j]]
193
  plot(rast_ref)
194
  plot(data_month,add=TRUE)
195
  nb_point<-paste("nrow=",nrow(data_month),sep="")
196
  nb_point2<-paste("ns_stations=",length(unique(data_month$station)),sep="")
197
  nb_point3<-paste("ns_non_na_stations=",length(unique(data_month$y_var)),sep="")
198
  #Add the number of data points on the plot
199
  legend("topleft",legend=c(nb_point,nb_point2,nb_point3),bty="n",cex=0.9)
200
  title(paste("Stations ",range_years_clim[1],"-",range_years_clim[2]," used for modeling on ",sep=""))
201
  #data_NA<-subset(data=data_month,is.na(data_month$y_var))
202
  data_NA<-data_month[is.na(data_month$y_var),]
203
  plot(data_NA,add=TRUE,pch=2,col=c("red"))
204
  legend("topright",legend=c("NA value"),pch=2,col=c("red"),bty="n",cex=0.9)
205
}
206
dev.off()
207

    
208
### Figue 8: LST and TMax
209

    
210
names_tmp<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12")
211
LST_s<-subset(s_raster,names_tmp)
212
names(LST_s)<-names_tmp
213
names_tmp<-c("nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08",
214
             "nobs_09","nobs_10","nobs_11","nobs_12")
215
LST_nobs<-subset(s_raster,names_tmp)
216
    
217
list_statistic_values_LST_s<-mclapply(c("min","max","mean","sd"),FUN=cellStats,x=LST_s,mc.preschedule=FALSE,mc.cores = 4)
218
list_statistic_values_LST_nobs<-mclapply(c("min","max","mean","sd"),FUN=cellStats,x=LST_nobs,mc.preschedule=FALSE,mc.cores = 4)
219

    
220
statistics_LST_s<-do.call(cbind,list_statistic_values_LST_s)
221
statistics_LST_nobs<-do.call(cbind,list_statistic_values_LST_nobs)
222
LST_stat_data<-as.data.frame(statistics_LST_s)
223
names(LST_stat_data)<-c("min","max","mean","sd")
224
statistics_LSTnobs_s<-cbind(min_values,max_values,mean_values,sd_values) #This shows that some values are extremes...especially in October
225
LSTnobs_stat_data<-as.data.frame(statistics_LSTnobs_s)
226
names(LSTnobs_stat_data)<-c("min","max","mean","sd")
227

    
228
png(paste("Stations_data_month_modeled_for_range_",range_years_clim[1],"_",range_years_clim[2],out_prefix,".png", sep=""))    
229
plot(1:12,LST_stat_data$mean,type="b",ylim=c(-15,70),col="black",xlab="month",ylab="tmax (degree C)")
230
lines(1:12,LST_stat_data$min,type="b",col="blue")
231
lines(1:12,LST_stat_data$max,type="b",col="red")
232
text(1:12,LST_stat_data$mean,rownames(LST_stat_data),cex=1,pos=2)
233
legend("topleft",legend=c("min","mean","max"), cex=0.8, col=c("blue","black","red"),
234
       lty=1,lwd=1.4,bty="n")
235
title(paste("LST statistics for Study area",sep=" "))
236
# savePlot("lst_statistics_OR.png",type="png")
237
dev.off()
238
    
239
#### Fig9...#####
240

    
241
# #Plot number of valid observations for LST
242
png(paste("Stations_data_month_modeled_for_range_",range_years_clim[1],"_",range_years_clim[2],out_prefix,".png", sep=""))    
243
plot(1:12,LSTnobs_stat_data$mean,type="b",ylim=c(0,280),col="black",xlab="month",ylab="tmax (degree C)")
244
lines(1:12,LSTnobs_stat_data$min,type="b",col="blue")
245
lines(1:12,LSTnobs_stat_data$max,type="b",col="red")
246
text(1:12,LSTnobs_stat_data$mean,rownames(LSTnobs_stat_data),cex=1,pos=2)
247
legend("topleft",legend=c("min","mean","max"), cex=1.5, col=c("blue","black","red"),lty=1)
248
title(paste("LST number of valid observations for Oregon", "2010",sep=" "))
249
# savePlot("lst_nobs_OR.png",type="png")
250
dev.off()
251
    
252
#### Fig 10...#####
253

    
254
#Use data_month!!!
255

    
256
d_month<-aggregate(TMax~month, data=dst, mean)  #Calculate monthly mean for every station in OR
257
d_month<-aggregate(TMax~month, data=dst, length)  #Calculate monthly mean for every station in OR
258

    
259
plot(d_month,type="l")
260
# plot(data_month$TMax,add=TRUE)
261

    
262
#data_month #-> plot for everymonth
263

    
264
#number of stations
265
## Summarize information for the day: write out textfile...
266

    
267
#Number of station per month
268
#Number of station per day (training, testing,NA)
269
#metrics_v,metrics_s
270
#
271

    
272
# ################
273
# #PART 2: Region Covariate analyses ###
274
# ################
275
# 
276
# # This should be in a separate script to analyze covariates from region.
277
# 
278
# #MAP1:Study area with LC mask and tiles/polygon outline
279
             
280
#LC_mask<-subset(s_raster,"LC12")
281
#LC_mask[LC_mask==100]<-NA
282
#LC_mask <- LC_mask < 100
283
#LC_mask_rec<-LC_mask
284
#LC_mask_rec[is.na(LC_mask_rec)]<-0
285
             
286
#Add proportion covered by study area+ total of image pixels
287
#tmp_tb<-freq(LC_mask_rec)
288
#tmp_tb[2,2]/sum(tmp_tb[,2])*100
289
#png(paste("Study_area_",
290
#         out_prefix,".png", sep=""))
291
#plot(LC_mask_rec,legend=FALSE,col=c("black","red"))
292
#legend("topright",legend=c("Outside","Inside"),title="Study area",
293
#          pt.cex=0.9,fill=c("black","red"),bty="n")
294
#           title("Study area")
295
#             dev.off()
296
             
297
# 
298
# #MAP 2: plotting land cover in the study region:
299
# 
300
# l1<-"LC1,Evergreen/deciduous needleleaf trees"
301
# l2<-"LC2,Evergreen broadleaf trees"
302
# l3<-"LC3,Deciduous broadleaf trees"
303
# l4<-"LC4,Mixed/other trees"
304
# l5<-"LC5,Shrubs"
305
# l6<-"LC6,Herbaceous vegetation"
306
# l7<-"LC7,Cultivated and managed vegetation"
307
# l8<-"LC8,Regularly flooded shrub/herbaceous vegetation"
308
# l9<-"LC9,Urban/built-up"
309
# l10<-"LC10,Snow/ice"
310
# l11<-"LC11,Barren lands/sparse vegetation"
311
# l12<-"LC12,Open water"
312
# lc_names_str<-c(l1,l2,l3,l4,l5,l6,l7,l8,l9,l10,l11,l12)
313
# 
314
# names(lc_reg_s)<-lc_names_str
315
# 
316
# png(paste("LST_TMax_scatterplot_",sampling_dat$date[i],"_",sampling_dat$prop[i],"_",sampling_dat$run_samp[i], out_prefix,".png", sep=""))
317
# plot(modst$TMax,sta_tmax_from_lst,xlab="Station mo Tmax",ylab="LST mo Tmax",main=paste("LST vs TMax for",datelabel,sep=" "))
318
# abline(0,1)
319
# nb_point<-paste("n=",length(modst$TMax),sep="")
320
# mean_bias<-paste("LST bigrasas= ",format(mean(modst$LSTD_bias,na.rm=TRUE),digits=3),sep="")
321
# #Add the number of data points on the plot
322
# legend("topleft",legend=c(mean_bias,nb_point),bty="n")
323
# dev.off()
324
# 
325
#
326
### MAP3: Majority land cover for every pixels in the study region
327

    
328
#Add barplot of majority map...
329

    
330
###Map 4: Elevation and LST in January
331
# tmp_s<-stack(LST,elev_1)
332
# png(paste("LST_elev_",sampling_dat$date[i],"_",sampling_dat$prop[i],"_",sampling_dat$run_samp[i], out_prefix,".png", sep=""))
333
# plot(tmp_s)
334
#
335
###Histogram elevation?
336
#
337
# #Map 5: mean TMIN/TMAX: LST climatology per month
338
#         
339
# names_tmp<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12")
340
# LST_s<-subset(s_raster,names_tmp)
341
# names_tmp<-c("nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08",
342
#              "nobs_09","nobs_10","nobs_11","nobs_12")
343
# LST_nobs<-subset(s_raster,names_tmp)
344
# 
345
# #Map 6: number of obs- LST climatology per month
346
#         
347
# names_tmp<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12")
348
# LST_s<-subset(s_raster,names_tmp)
349
# names_tmp<-c("nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08",
350
#              "nobs_09","nobs_10","nobs_11","nobs_12")
351
# LST_nobs<-subset(s_raster,names_tmp)
352
# 
353

    
354
# LST_nobs<-mask(LST_nobs,LC_mask,filename="test2.tif")
355
# LST_s<-mask(LST_s,LC_mask,filename="test3.tif")
356
# c("Jan","Feb")
357
# plot(LST_s)
358
# plot(LST_nobs)
359
# 
360
# #Map 7: LST-TMAX/TMIN fit for all 12 months...
361
#
362
#
363
#
364
#### End of script ####
(39-39/41)