1
|
################## Covariates and stations analyses #######################################
|
2
|
############################ Covariate production for a given tile/region ##########################################
|
3
|
#This script examines inputs and outputs from the interpolation step.
|
4
|
#Part 1: Script produces summary information about stations used in the interpolation
|
5
|
#Part 2: Script produces plots of input covariates for study region
|
6
|
#AUTHOR: Benoit Parmentier
|
7
|
#DATE: 04/01/2013
|
8
|
|
9
|
#PROJECT: NCEAS INPLANT: Environment and Organisms --TASK#???--
|
10
|
|
11
|
##Comments and TODO:
|
12
|
#Add interpolation analyses of number of stations used for every day of the year...
|
13
|
|
14
|
## Input arguments
|
15
|
#The output is a list of four shapefile names produced by the function:
|
16
|
#1) loc_stations: locations of stations as shapefile in EPSG 4326
|
17
|
#2) loc_stations_ghcn: ghcn daily data for the year range of interpolation (locally projected)
|
18
|
#3) daily_query_ghcn_data: ghcn daily data from daily query before application of quality flag
|
19
|
#4) daily_covar_ghcn_data: ghcn daily data with covariates for the year range of interpolation (locally projected)
|
20
|
#5) monthly_query_ghcn_data: ghcn daily data from monthly query before application of quality flag
|
21
|
#6) monthly_covar_ghcn_data: ghcn monthly averaged data with covariates for the year range of interpolation (locally projected)
|
22
|
#7) var
|
23
|
#8) range_years
|
24
|
#9) range_years_clim
|
25
|
#8) infile_covariate: s_raster brick file
|
26
|
#9) covar_names: variable mames
|
27
|
#10) raster_prediction
|
28
|
#11) world_countries
|
29
|
#12) region_outline
|
30
|
#13) out_prefix
|
31
|
|
32
|
## Functions used in the script
|
33
|
|
34
|
load_obj <- function(f)
|
35
|
{
|
36
|
env <- new.env()
|
37
|
nm <- load(f, env)[1]
|
38
|
env[[nm]]
|
39
|
}
|
40
|
|
41
|
extract_number_obs<-function(list_param){
|
42
|
#Function to extract the number of observations used in modeling
|
43
|
|
44
|
method_mod_obj<-list_param$method_mod_obj
|
45
|
|
46
|
#Change to results_mod_obj[[i]]$data_s to make it less specific!!!!
|
47
|
|
48
|
#number of observations
|
49
|
list_nrow_data_s<-lapply(1:length(method_obj),function(k) nrow(method_mod_obj[[k]]$data_s))
|
50
|
list_nrow_data_v<-lapply(1:length(method_obj),function(k) nrow(method_mod_obj[[k]]$data_v))
|
51
|
#lapply(1:length(clim_obj),function(k) nrow(method_mod_obj[[k]]$data_month))
|
52
|
list_nrow_data_month<-lapply(1:length(gamclim_fus_mod),function(k) nrow(gamclim_fus_mod[[k]]$data_month))
|
53
|
|
54
|
#number of valid observations
|
55
|
list_nrow_valid_data_s<-lapply(1:length(method_obj),function(k) length(method_mod_obj[[k]]$data_s$[[y_var_name]])
|
56
|
list_nrow_data_v<-lapply(1:length(method_obj),function(k) length(method_mod_obj[[k]]$data_v$[[y_var_name]])
|
57
|
list_nrow_valid_data_month<-lapply(1:length(gamclim_fus_mod),function(k) length(gamclim_fus_mod[[k]]$data_month$y_var))
|
58
|
|
59
|
c1<-do.call(rbind,list_nrow_data_s)
|
60
|
c2<-do.call(rbind,list_nrow_data_v)
|
61
|
c3<-do.call(rbind,list_nrow_data_month)
|
62
|
|
63
|
c1v<-do.call(rbind,list_valid_nrow_data_s)
|
64
|
c2v<-do.call(rbind,list_valid_nrow_data_v)
|
65
|
c3v<-do.call(rbind,list_valid_nrow_data_month)
|
66
|
|
67
|
n_data_s<-cbdind(c1,c1v)
|
68
|
n_data_v<-cbdind(c2,c2v)
|
69
|
n_data_month<-cbdind(c3,c3vv)
|
70
|
list_observations<-list(n_data_s,n_data_v,n_data_month)
|
71
|
return(list_observations)
|
72
|
}
|
73
|
|
74
|
###########################################################################
|
75
|
########################## BEGIN SCRIPT/FUNCTION ##########################
|
76
|
|
77
|
|
78
|
########################################
|
79
|
#### STEP 1: read in data
|
80
|
|
81
|
#Stations in the processing region/study area
|
82
|
stat_reg <- readOGR(dsn=dirname(list_outfiles$loc_stations),sub(".shp","",basename(list_outfiles$loc_stations)))
|
83
|
#Stations available before screening the data query: ghcn daily data for the year range of interpolation
|
84
|
data_d <- readOGR(dsn=dirname(list_outfiles$loc_stations_ghcn),sub(".shp","",basename(list_outfiles$loc_stations_ghcn)))
|
85
|
#Stations available after screening the data query: ghcn daily data for the year range of interpolation
|
86
|
data_reg <- readOGR(dsn=dirname(list_outfiles$daily_query_ghcn_data),sub(".shp","",basename(list_outfiles$daily_query_ghcn_data)))
|
87
|
#Covariates data available after screening:ghcn daily data with covariates for the year range of interpolation
|
88
|
data_RST_SDF <-readOGR(dsn=dirname(list_outfiles$daily_covar_ghcn_data),sub(".shp","",basename(list_outfiles$daily_covar_ghcn_data)))
|
89
|
#Stations before screening monthly_query_ghcn_data: ghcn daily data from monthly query before application of quality flag
|
90
|
data_m <- readOGR(dsn=dirname(list_outfiles$monthly_query_ghcn_data),sub(".shp","",basename(list_outfiles$monthly_query_ghcn_data)))
|
91
|
#Stations after screening monthly_query_ghcn_data, extraction of covariates and monthly averages
|
92
|
dst<-readOGR(dsn=dirname(list_outfiles$monthly_covar_ghcn_data),sub(".shp","",basename(list_outfiles$monthly_covar_ghcn_data)))
|
93
|
|
94
|
### Load data used in fitting the model at monthly scale...
|
95
|
data_month<-gamclim_fus_mod[[1]]$data_month
|
96
|
list_data_month<-lapply(1:length(gamclim_fus_mod),function(k) gamclim_fus_mod[[k]]$data_month)
|
97
|
|
98
|
#Loading covariates raster images
|
99
|
s_raster<-brick(infile_covariates)
|
100
|
names(s_raster)<-covar_names
|
101
|
rast_ref<-subset(s_raster,"mm_01") # mean month for January
|
102
|
|
103
|
names(raster_prediction_obj)
|
104
|
var<-list_param$var
|
105
|
|
106
|
raster_prediction_obj<-load_obj(list_param$raster_prediction_obj)
|
107
|
#method_mod_obj<-raster_prediction_obj$method_mod_obj
|
108
|
method_mod_obj<-raster_prediction_obj$gam_fus_mod #change later for any model type
|
109
|
#validation_obj<-raster_prediction_obj$validation_obj
|
110
|
validation_obj<-raster_prediction_obj$gam_fus_validation_mod #change later for any model type
|
111
|
#clim_obj<-raster_prediction_obj$clim_obj
|
112
|
clim_obj<-raster_prediction_obj$gamclim_fus_mod #change later for any model type
|
113
|
|
114
|
names(raster_prediction_obj$method_obj[[1]])
|
115
|
data_s<-validation_obj[[1]]$data_s
|
116
|
summary_data_v<-validation_obj[[1]]$summary_data_v
|
117
|
names(validation_obj[[1]])
|
118
|
###################################################################
|
119
|
######## PART I: Script produces summary information about stations used in the interpolation ########
|
120
|
|
121
|
### Figue 1: stations in the study area/processing tile
|
122
|
png(paste("Total_number_of_stations_in_study_area_",out_prefix,".png", sep=""))
|
123
|
plot(rast_ref)
|
124
|
plot(stat_reg,add=TRUE)
|
125
|
nb_point1<-paste("n_station=",length(stat_reg$STAT_ID))
|
126
|
#Add the number of data points on the plot
|
127
|
title("Stations located in the study area")
|
128
|
legend("topleft",legend=c(nb_point1),bty="n",cex=1.2)
|
129
|
dev.off()
|
130
|
|
131
|
### Figue 2: stations in the study area/processing tile: from query without flag screening
|
132
|
png(paste("Stations_for_range_",range_years[1],"_",range_years[2],"_no_screening",out_prefix,".png", sep=""))
|
133
|
plot(rast_ref)
|
134
|
plot(data_d, add=TRUE)
|
135
|
nb_point<-paste("nrow=",nrow(data_d),sep="")
|
136
|
nb_point2<-paste("ns_stations=",length(unique(data_d$station)),sep="")
|
137
|
#Add the number of data points on the plot
|
138
|
legend("topleft",legend=c(nb_point,nb_point2),bty="n",cex=1)
|
139
|
title(paste("Stations available for year ",range_years[1],sep=""))
|
140
|
dev.off()
|
141
|
|
142
|
### Figue 3: stations in the study area/processing tile: after screening
|
143
|
png(paste("Stations_for_range_",range_years[1],"_",range_years[2],"_after_screening",out_prefix,".png", sep=""))
|
144
|
plot(rast_ref)
|
145
|
plot(data_reg,add=TRUE)
|
146
|
nb_point<-paste("nrow=",nrow(data_reg),sep="")
|
147
|
nb_point2<-paste("ns_stations=",length(unique(data_reg$station)),sep="")
|
148
|
#Add the number of data points on the plot
|
149
|
legend("topleft",legend=c(nb_point,nb_point2),bty="n",cex=1)
|
150
|
title(paste("Stations available for year ",range_years[1]," after screening",sep=""))
|
151
|
#Add the number of data points on the plot
|
152
|
dev.off()
|
153
|
|
154
|
### Figue 4: stations in the study area/processing tile: after screening and covar extraction
|
155
|
png(paste("Stations_for_range_",range_years[1],"_",range_years[2],"_after_screening",out_prefix,".png", sep=""))
|
156
|
plot(rast_ref)
|
157
|
plot(data_RST_SDF,add=TRUE)
|
158
|
nb_point<-paste("nrow=",nrow(data_RST_SDF),sep="")
|
159
|
nb_point2<-paste("ns_stations=",length(unique(data_RST_SDF$station)),sep="")
|
160
|
#Add the number of data points on the plot
|
161
|
legend("topleft",legend=c(nb_point,nb_point2),bty="n",cex=1)
|
162
|
title(paste("Stations year ",range_years[1]," after screening and covar extraction",sep=""))
|
163
|
#Add the number of data points on the plot
|
164
|
dev.off()
|
165
|
|
166
|
### Figue 5: stations in the study area/processing tile: monthly query for specified range of years before screening
|
167
|
png(paste("Stations_monthly_for_range_",range_years[1],"_",range_years[2],"_before_screening",out_prefix,".png", sep=""))
|
168
|
plot(rast_ref)
|
169
|
plot(data_m,add=TRUE)
|
170
|
nb_point<-paste("nrow=",nrow(data_m),sep="")
|
171
|
nb_point2<-paste("ns_stations=",length(unique(data_m$station)),sep="")
|
172
|
#Add the number of data points on the plot
|
173
|
legend("topleft",legend=c(nb_point,nb_point2),bty="n",cex=1)
|
174
|
title(paste("Stations ",range_years[1],"-",range_years[2]," after screening and covar extraction",sep=""))
|
175
|
#Add the number of data points on the plot
|
176
|
dev.off()
|
177
|
|
178
|
### Figue 6: histogram
|
179
|
|
180
|
#dst$nobs_station
|
181
|
png(paste("Stations_data_month_modeled_for_range_",range_years_clim[1],"_",range_years_clim[2],out_prefix,".png", sep=""))
|
182
|
dst$nobs_station<-dst$nbs_stt
|
183
|
hist(dst$nobs_station)
|
184
|
dev.off()
|
185
|
|
186
|
### Figure 7: data month
|
187
|
|
188
|
png(paste("Stations_data_month_modeled_for_range_",range_years_clim[1],"_",range_years_clim[2],out_prefix,".png", sep=""))
|
189
|
par(mfrow=c(3,4))
|
190
|
for (j in 1:12){
|
191
|
|
192
|
data_month<-list_data_month[[j]]
|
193
|
plot(rast_ref)
|
194
|
plot(data_month,add=TRUE)
|
195
|
nb_point<-paste("nrow=",nrow(data_month),sep="")
|
196
|
nb_point2<-paste("ns_stations=",length(unique(data_month$station)),sep="")
|
197
|
nb_point3<-paste("ns_non_na_stations=",length(unique(data_month$y_var)),sep="")
|
198
|
#Add the number of data points on the plot
|
199
|
legend("topleft",legend=c(nb_point,nb_point2,nb_point3),bty="n",cex=0.9)
|
200
|
title(paste("Stations ",range_years_clim[1],"-",range_years_clim[2]," used for modeling on ",sep=""))
|
201
|
#data_NA<-subset(data=data_month,is.na(data_month$y_var))
|
202
|
data_NA<-data_month[is.na(data_month$y_var),]
|
203
|
plot(data_NA,add=TRUE,pch=2,col=c("red"))
|
204
|
legend("topright",legend=c("NA value"),pch=2,col=c("red"),bty="n",cex=0.9)
|
205
|
}
|
206
|
dev.off()
|
207
|
|
208
|
### Figue 8: LST and TMax
|
209
|
|
210
|
names_tmp<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12")
|
211
|
LST_s<-subset(s_raster,names_tmp)
|
212
|
names(LST_s)<-names_tmp
|
213
|
names_tmp<-c("nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08",
|
214
|
"nobs_09","nobs_10","nobs_11","nobs_12")
|
215
|
LST_nobs<-subset(s_raster,names_tmp)
|
216
|
|
217
|
list_statistic_values_LST_s<-mclapply(c("min","max","mean","sd"),FUN=cellStats,x=LST_s,mc.preschedule=FALSE,mc.cores = 4)
|
218
|
list_statistic_values_LST_nobs<-mclapply(c("min","max","mean","sd"),FUN=cellStats,x=LST_nobs,mc.preschedule=FALSE,mc.cores = 4)
|
219
|
|
220
|
statistics_LST_s<-do.call(cbind,list_statistic_values_LST_s)
|
221
|
statistics_LST_nobs<-do.call(cbind,list_statistic_values_LST_nobs)
|
222
|
LST_stat_data<-as.data.frame(statistics_LST_s)
|
223
|
names(LST_stat_data)<-c("min","max","mean","sd")
|
224
|
statistics_LSTnobs_s<-cbind(min_values,max_values,mean_values,sd_values) #This shows that some values are extremes...especially in October
|
225
|
LSTnobs_stat_data<-as.data.frame(statistics_LSTnobs_s)
|
226
|
names(LSTnobs_stat_data)<-c("min","max","mean","sd")
|
227
|
|
228
|
png(paste("Stations_data_month_modeled_for_range_",range_years_clim[1],"_",range_years_clim[2],out_prefix,".png", sep=""))
|
229
|
plot(1:12,LST_stat_data$mean,type="b",ylim=c(-15,70),col="black",xlab="month",ylab="tmax (degree C)")
|
230
|
lines(1:12,LST_stat_data$min,type="b",col="blue")
|
231
|
lines(1:12,LST_stat_data$max,type="b",col="red")
|
232
|
text(1:12,LST_stat_data$mean,rownames(LST_stat_data),cex=1,pos=2)
|
233
|
legend("topleft",legend=c("min","mean","max"), cex=0.8, col=c("blue","black","red"),
|
234
|
lty=1,lwd=1.4,bty="n")
|
235
|
title(paste("LST statistics for Study area",sep=" "))
|
236
|
# savePlot("lst_statistics_OR.png",type="png")
|
237
|
dev.off()
|
238
|
|
239
|
#### Fig9...#####
|
240
|
|
241
|
# #Plot number of valid observations for LST
|
242
|
png(paste("Stations_data_month_modeled_for_range_",range_years_clim[1],"_",range_years_clim[2],out_prefix,".png", sep=""))
|
243
|
plot(1:12,LSTnobs_stat_data$mean,type="b",ylim=c(0,280),col="black",xlab="month",ylab="tmax (degree C)")
|
244
|
lines(1:12,LSTnobs_stat_data$min,type="b",col="blue")
|
245
|
lines(1:12,LSTnobs_stat_data$max,type="b",col="red")
|
246
|
text(1:12,LSTnobs_stat_data$mean,rownames(LSTnobs_stat_data),cex=1,pos=2)
|
247
|
legend("topleft",legend=c("min","mean","max"), cex=1.5, col=c("blue","black","red"),lty=1)
|
248
|
title(paste("LST number of valid observations for Oregon", "2010",sep=" "))
|
249
|
# savePlot("lst_nobs_OR.png",type="png")
|
250
|
dev.off()
|
251
|
|
252
|
#### Fig 10...#####
|
253
|
|
254
|
#Use data_month!!!
|
255
|
|
256
|
d_month<-aggregate(TMax~month, data=dst, mean) #Calculate monthly mean for every station in OR
|
257
|
d_month<-aggregate(TMax~month, data=dst, length) #Calculate monthly mean for every station in OR
|
258
|
|
259
|
plot(d_month,type="l")
|
260
|
# plot(data_month$TMax,add=TRUE)
|
261
|
|
262
|
#data_month #-> plot for everymonth
|
263
|
|
264
|
#number of stations
|
265
|
## Summarize information for the day: write out textfile...
|
266
|
|
267
|
#Number of station per month
|
268
|
#Number of station per day (training, testing,NA)
|
269
|
#metrics_v,metrics_s
|
270
|
#
|
271
|
|
272
|
# ################
|
273
|
# #PART 2: Region Covariate analyses ###
|
274
|
# ################
|
275
|
#
|
276
|
# # This should be in a separate script to analyze covariates from region.
|
277
|
#
|
278
|
# #MAP1:Study area with LC mask and tiles/polygon outline
|
279
|
|
280
|
#LC_mask<-subset(s_raster,"LC12")
|
281
|
#LC_mask[LC_mask==100]<-NA
|
282
|
#LC_mask <- LC_mask < 100
|
283
|
#LC_mask_rec<-LC_mask
|
284
|
#LC_mask_rec[is.na(LC_mask_rec)]<-0
|
285
|
|
286
|
#Add proportion covered by study area+ total of image pixels
|
287
|
#tmp_tb<-freq(LC_mask_rec)
|
288
|
#tmp_tb[2,2]/sum(tmp_tb[,2])*100
|
289
|
#png(paste("Study_area_",
|
290
|
# out_prefix,".png", sep=""))
|
291
|
#plot(LC_mask_rec,legend=FALSE,col=c("black","red"))
|
292
|
#legend("topright",legend=c("Outside","Inside"),title="Study area",
|
293
|
# pt.cex=0.9,fill=c("black","red"),bty="n")
|
294
|
# title("Study area")
|
295
|
# dev.off()
|
296
|
|
297
|
#
|
298
|
# #MAP 2: plotting land cover in the study region:
|
299
|
#
|
300
|
# l1<-"LC1,Evergreen/deciduous needleleaf trees"
|
301
|
# l2<-"LC2,Evergreen broadleaf trees"
|
302
|
# l3<-"LC3,Deciduous broadleaf trees"
|
303
|
# l4<-"LC4,Mixed/other trees"
|
304
|
# l5<-"LC5,Shrubs"
|
305
|
# l6<-"LC6,Herbaceous vegetation"
|
306
|
# l7<-"LC7,Cultivated and managed vegetation"
|
307
|
# l8<-"LC8,Regularly flooded shrub/herbaceous vegetation"
|
308
|
# l9<-"LC9,Urban/built-up"
|
309
|
# l10<-"LC10,Snow/ice"
|
310
|
# l11<-"LC11,Barren lands/sparse vegetation"
|
311
|
# l12<-"LC12,Open water"
|
312
|
# lc_names_str<-c(l1,l2,l3,l4,l5,l6,l7,l8,l9,l10,l11,l12)
|
313
|
#
|
314
|
# names(lc_reg_s)<-lc_names_str
|
315
|
#
|
316
|
# png(paste("LST_TMax_scatterplot_",sampling_dat$date[i],"_",sampling_dat$prop[i],"_",sampling_dat$run_samp[i], out_prefix,".png", sep=""))
|
317
|
# plot(modst$TMax,sta_tmax_from_lst,xlab="Station mo Tmax",ylab="LST mo Tmax",main=paste("LST vs TMax for",datelabel,sep=" "))
|
318
|
# abline(0,1)
|
319
|
# nb_point<-paste("n=",length(modst$TMax),sep="")
|
320
|
# mean_bias<-paste("LST bigrasas= ",format(mean(modst$LSTD_bias,na.rm=TRUE),digits=3),sep="")
|
321
|
# #Add the number of data points on the plot
|
322
|
# legend("topleft",legend=c(mean_bias,nb_point),bty="n")
|
323
|
# dev.off()
|
324
|
#
|
325
|
#
|
326
|
### MAP3: Majority land cover for every pixels in the study region
|
327
|
|
328
|
#Add barplot of majority map...
|
329
|
|
330
|
###Map 4: Elevation and LST in January
|
331
|
# tmp_s<-stack(LST,elev_1)
|
332
|
# png(paste("LST_elev_",sampling_dat$date[i],"_",sampling_dat$prop[i],"_",sampling_dat$run_samp[i], out_prefix,".png", sep=""))
|
333
|
# plot(tmp_s)
|
334
|
#
|
335
|
###Histogram elevation?
|
336
|
#
|
337
|
# #Map 5: mean TMIN/TMAX: LST climatology per month
|
338
|
#
|
339
|
# names_tmp<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12")
|
340
|
# LST_s<-subset(s_raster,names_tmp)
|
341
|
# names_tmp<-c("nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08",
|
342
|
# "nobs_09","nobs_10","nobs_11","nobs_12")
|
343
|
# LST_nobs<-subset(s_raster,names_tmp)
|
344
|
#
|
345
|
# #Map 6: number of obs- LST climatology per month
|
346
|
#
|
347
|
# names_tmp<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12")
|
348
|
# LST_s<-subset(s_raster,names_tmp)
|
349
|
# names_tmp<-c("nobs_01","nobs_02","nobs_03","nobs_04","nobs_05","nobs_06","nobs_07","nobs_08",
|
350
|
# "nobs_09","nobs_10","nobs_11","nobs_12")
|
351
|
# LST_nobs<-subset(s_raster,names_tmp)
|
352
|
#
|
353
|
|
354
|
# LST_nobs<-mask(LST_nobs,LC_mask,filename="test2.tif")
|
355
|
# LST_s<-mask(LST_s,LC_mask,filename="test3.tif")
|
356
|
# c("Jan","Feb")
|
357
|
# plot(LST_s)
|
358
|
# plot(LST_nobs)
|
359
|
#
|
360
|
# #Map 7: LST-TMAX/TMIN fit for all 12 months...
|
361
|
#
|
362
|
#
|
363
|
#
|
364
|
#### End of script ####
|