Revision 63d8201d
Added by Benoit Parmentier over 12 years ago
climate/research/oregon/interpolation/GAM_LST_reg.R | ||
---|---|---|
3 | 3 |
#and perform two types of regression: multiple linear model and general additive model (GAM). Note that this program: |
4 | 4 |
#1)assumes that the csv file is in the current working |
5 | 5 |
#2)extract relevant variables from raster images before performing the regressions. |
6 |
#This scripts predicts tmas using GAM and LST derived from MOD11A1.
|
|
6 |
#This scripts predicts tmas xsing GAM and LST derived from MOD11A1.
|
|
7 | 7 |
#Interactions terms are also included and assessed using the RMSE from validation dataset. |
8 | 8 |
#There are 10 dates used for the GAM interpolation. The dates must be provided as a textfile. |
9 | 9 |
#Script created by Benoit Parmentier on April 4, 2012. |
10 | 10 |
|
11 |
###Loading r library and packages # loading the raster package
|
|
11 |
###Loading r library and packages # loading the raster package |
|
12 | 12 |
library(gtools) # loading ... |
13 | 13 |
library(mgcv) |
14 | 14 |
library(sp) |
... | ... | |
17 | 17 |
|
18 | 18 |
###Parameters and arguments |
19 | 19 |
|
20 |
infile1<-"ghcn_or_tmax_b_04032012_OR83M.shp" |
|
21 |
#path<-"C:/Data/Benoit/NCEAS/window_Oregon_data |
|
22 |
path<-"/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations" |
|
23 |
setwd(path) |
|
24 |
#infile2<-"dates_interpolation_03012012.txt" # list of 10 dates for the regression |
|
25 |
infile2<-"dates_interpolation_03052012.txt" |
|
20 |
infile1<-"ghcn_or_tmax_b_04142012_OR83M.shp" |
|
21 |
#path<-"/data/computer/parmentier/Data/IPLANT_project/data_Oregon_stations" |
|
22 |
path<-"H:/Data/IPLANT_project/data_Oregon_stations" |
|
23 |
setwd(path) |
|
24 |
infile2<-"dates_interpolation_03052012.txt" #List of 10 dates for the regression |
|
26 | 25 |
prop<-0.3 #Proportion of testing retained for validation |
27 |
out_prefix<-"_04032012_LST_r4"
|
|
26 |
out_prefix<-"_04142012_LST_r4"
|
|
28 | 27 |
infile3<-"LST_dates_var_names.txt" |
29 | 28 |
infile4<-"models_interpolation_04032012b.txt" |
30 | 29 |
|
31 |
|
|
32 | 30 |
#######START OF THE SCRIPT ############# |
33 | 31 |
|
34 | 32 |
###Reading the station data and setting up for models' comparison |
35 |
ghcn<-readOGR(".", "ghcn_or_tmax_b_04032012_OR83M") |
|
36 |
|
|
33 |
filename<-sub(".shp","",infile1) #Removing the extension from file. |
|
34 |
ghcn<-readOGR(".", filename) #reading shapefile |
|
35 |
|
|
37 | 36 |
ghcn = transform(ghcn,Northness = cos(ASPECT)) #Adding a variable to the dataframe |
38 | 37 |
ghcn = transform(ghcn,Eastness = sin(ASPECT)) #adding variable to the dataframe. |
39 |
|
|
40 | 38 |
ghcn = transform(ghcn,Northness_w = sin(slope)*cos(ASPECT)) #Adding a variable to the dataframe |
41 | 39 |
ghcn = transform(ghcn,Eastness_w = sin(slope)*sin(ASPECT)) #adding variable to the dataframe. |
40 |
|
|
42 | 41 |
set.seed(100) |
43 | 42 |
dates <-readLines(paste(path,"/",infile2, sep="")) |
44 | 43 |
LST_dates <-readLines(paste(path,"/",infile3, sep="")) |
... | ... | |
50 | 49 |
results_GCV<- matrix(1,length(dates),length(models)+2) |
51 | 50 |
results_DEV<- matrix(1,length(dates),length(models)+2) |
52 | 51 |
results_RMSE<- matrix(1,length(dates),length(models)+2) |
53 |
|
|
52 |
cor_LST_LC1<-matrix(1,10,1) #correlation LST-LC1 |
|
53 |
cor_LST_LC3<-matrix(1,10,1) #correlation LST-LC3 |
|
54 |
cor_LST_tmax<-matrix(1,10,1) #correlation LST-tmax |
|
54 | 55 |
#Screening for bad values |
55 | 56 |
|
56 | 57 |
ghcn_all<-ghcn |
... | ... | |
58 | 59 |
ghcn_test2<-subset(ghcn_test,ghcn_test$ELEV_SRTM>0) |
59 | 60 |
ghcn<-ghcn_test2 |
60 | 61 |
|
62 |
month_var<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09", "mm_10", "mm_11", "mm_12") |
|
61 | 63 |
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date==d)) #this creates a list of 10 subsets data |
62 | 64 |
#note that compare to the previous version date_ column was changed to date |
63 | 65 |
|
... | ... | |
65 | 67 |
#Change this into a nested loop, looping through the number of models |
66 | 68 |
|
67 | 69 |
for(i in 1:length(dates)){ # start of the for loop #1 |
68 |
|
|
70 |
date<-strptime(dates[i], "%Y%m%d") |
|
71 |
month<-strftime(date, "%m") |
|
72 |
LST_month<-paste("mm_",month,sep="") |
|
69 | 73 |
###Regression part 1: Creating a validation dataset by creating training and testing datasets |
70 | 74 |
|
71 |
mod <-ghcn.subsets[[i]][,match(LST_dates[i], names(ghcn.subsets[[i]]))]
|
|
75 |
mod <-ghcn.subsets[[i]][,match(LST_month, names(ghcn.subsets[[i]]))]
|
|
72 | 76 |
ghcn.subsets[[i]] = transform(ghcn.subsets[[i]],LST = mod) |
73 | 77 |
#Screening LST values |
74 | 78 |
#ghcn.subsets[[i]]<-subset(ghcn.subsets[[i]],ghcn.subsets[[i]]$LST> 258 & ghcn.subsets[[i]]$LST<313) |
... | ... | |
191 | 195 |
write.table(results_table_AIC, file= paste(path,"/","results_GAM_Assessment",out_prefix,".txt",sep=""),sep=",", append=TRUE) |
192 | 196 |
|
193 | 197 |
#Can also use file connection |
194 |
f<-paste(path,"/","results_GAM_Assessment",out_prefix,"2.txt",sep="")
|
|
198 |
f<-file(paste(path,"/","results_GAM_Assessment",out_prefix,"2.txt",sep=""),"w")
|
|
195 | 199 |
write(results_table_RMSE, file=f) |
196 | 200 |
close(f) |
197 | 201 |
# End of script########## |
198 | 202 |
|
199 | 203 |
#Selecting dates and files based on names |
200 |
#date<-strptime(dates[1], "%Y%m%d") |
|
201 |
#class(date) #This shows the type of the object |
|
202 |
#strftime(t, "%m") #This gives as output the month of the date... |
|
204 |
#cor_LST_LC<-matrix(1,10,1) |
|
205 |
# for(i in 1:length(dates)){ |
|
206 |
# cor_LST_LC1[i]<-cor(ghcn.subsets[[i]]$LST,ghcn.subsets[[i]]$LC1) |
|
207 |
# } |
|
208 |
# for(i in 1:length(dates)){ |
|
209 |
# cor_LST_LC3[i]<-cor(ghcn.subsets[[i]]$LST,ghcn.subsets[[i]]$LC3) |
|
210 |
# } |
|
203 | 211 |
|
Also available in: Unified diff
GAM LST, used monthly LST average instead of daily, task #361