Revision 7f643c83
Added by Benoit Parmentier over 12 years ago
- ID 7f643c8351a35d326c414446fe32dd007aa645d8
climate/research/oregon/interpolation/Linear_reg.R | ||
---|---|---|
1 |
#This script interpolates station value for the Oregon case study. This program loads the station data from a csv file and perform two types of regression: multiple linear model and general additive model (GAM). Note that this program 1)assumes that the csv file is in the current working 2)extract relevant variables from raster images before performing the regressions. The user must provide the list of raster images |
|
2 |
#in a textile. |
|
3 |
#Created by Benoit Parmentier on February 12, 2012. |
|
1 |
#This script interpolates station value for the Oregon case study. This program loads the station data from a csv file |
|
2 |
#and perform two types of regression: multiple linear model and general additive model (GAM). Note that this program: |
|
3 |
#1)assumes that the csv file is in the current working |
|
4 |
#2)extract relevant variables from raster images before performing the regressions. |
|
5 |
#The user must provide the list of raster images in a textile. |
|
6 |
#Created by Benoit Parmentier on February 15, 2012. |
|
4 | 7 |
|
5 | 8 |
###Loading r library and packages |
6 |
library(raster) # loading the raster package
|
|
7 |
library(gtools) l# loading ...
|
|
9 |
library(raster) # loading the raster package |
|
10 |
library(gtools) l# loading ... |
|
8 | 11 |
library(sp) |
9 | 12 |
library(mgcv) |
10 | 13 |
|
... | ... | |
12 | 15 |
infile1<-"ghcn_or_b_02122012_OR83M.csv" |
13 | 16 |
inlistf<-"list_files.txt" |
14 | 17 |
path<-getwd() |
15 |
#list_var<-list.files() # displaying the files in the current folder which should contain only the relevant raster images. |
|
16 |
#dump("list_var", file="list_var.txt") # Saving the values of list_var in a ascii text file. |
|
17 |
|
|
18 |
#list_var<-list.files() # displaying the files in the current folder which should contain only the relevant raster images. |
|
19 |
#dump("list_var", file="list_var.txt") # Saving the values of list_var in a ascii text file. |
|
18 | 20 |
#inlistvar<-"ghcn_var_02122012.txt" |
19 | 21 |
|
22 |
#######START OF THE SCRIPT ############# |
|
23 |
|
|
20 | 24 |
###Reading the station data |
21 |
ghcn<-read.csv(paste(path,"/",infile1, sep=""), header=TRUE) #The "paste" function concatenates the path and file name in common string. |
|
25 |
ghcn<-read.csv(paste(path,"/",infile1, sep=""), header=TRUE) #The "paste" function concatenates the path and file name in common string.
|
|
22 | 26 |
ghcn<-read.csv(infile1) #Use read.table if the input file is space delimited or read.table(infile1, headername=TRUE, sep=',') |
23 |
names(ghcn) #Checking that the columns are correctly labelled.
|
|
27 |
names(ghcn) #Checking that the columns are correctly labelled. |
|
24 | 28 |
|
25 | 29 |
###Extracting the variables values from the raster files |
26 |
coords<- ghcn[,c('x_OR83M','y_OR83M')] #Selecting all rows for the x and y columns and packaging the output in a data frame. |
|
30 |
coords<- ghcn[,c('x_OR83M','y_OR83M')] #Selecting all rows for the x and y columns and packaging the output in a data frame.
|
|
27 | 31 |
lines <-readLines(inlistf) |
28 | 32 |
inlistvar<-paste(path,"/",lines,sep="") |
29 | 33 |
|
30 |
#s_raster<- stack(inlistvar) #Creating a stack of raster images from the list of variables.
|
|
31 |
#stat_val<- extract(s_raster, coords) #Extracting values from the raster stack for every point location in coords data frame. |
|
34 |
#s_raster<- stack(inlistvar) #Creating a stack of raster images from the list of variables. |
|
35 |
#stat_val<- extract(s_raster, coords) #Extracting values from the raster stack for every point location in coords data frame.
|
|
32 | 36 |
#create a shape file and data_frame with names?? |
33 | 37 |
|
38 |
###Creating a validation dataset by creating training and testing datasets (%30) |
|
39 |
n<-nrow(ghcn1507) |
|
40 |
ns<-n-round(n*0.3) #Create a sample from the data frame with 70% of the rows |
|
41 |
ind.training <- sample(nrow(ghcn1507), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly |
|
42 |
ind.testing <- setdiff(1:nrow(ghcn1507), ind.training) |
|
43 |
ghcn1507_s <- ghcn1507[ind.training, ] |
|
44 |
ghcn1507_v <- ghcn1507[ind.testing, ] |
|
45 |
|
|
46 |
############ REGRESSION ############### |
|
34 | 47 |
|
35 | 48 |
###Regression part 1: linear models |
36 | 49 |
lm_PRISM1=lm(tmax~lat+lon+ELEV_SRTM+ASPECT+DISTOC, data=ghcn) |
... | ... | |
59 | 72 |
#GAM plot of partial residuals |
60 | 73 |
gam.check(GAM1) #This will produce basic plots of residuals |
61 | 74 |
|
62 |
###Creating a validation dataset by creating training and testing datasets (%30) |
|
63 |
n<-nrow(ghcn1507) |
|
64 |
ns<-n-round(n*0.3) #Create a sample from the data frame with 70% of the rows |
|
65 |
ind.training <- sample(nrow(ghcn1507), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly |
|
66 |
ind.testing <- setdiff(1:nrow(ghcn1507), ind.training) |
|
67 |
ghcn1507_s <- ghcn1507[ind.training, ] |
|
68 |
ghcn1507_v <- ghcn1507[ind.testing, ] |
|
69 |
|
|
70 |
|
|
71 |
#ghcn1507_s<- sample(ghcn1507,size=ns, replace=T) |
|
72 |
ghcn1507_s<- ghcn1507[sample(nrow(ghcn1507), size=ns), ] |
|
73 |
#ghcn1507_v <- ghcn1507$STAT_ID[ !ghcn1507_s$STAT_ID ] #dataset for validation that can be used to predict values. |
|
74 |
|
|
75 |
|
|
76 | 75 |
|
77 | 76 |
##End of script |
78 | 77 |
|
Also available in: Unified diff
updated GAM script for tmax