Revision ec229367
Added by Benoit Parmentier over 12 years ago
- ID ec229367f7438821b4823ffad7fd351ac6d89f5b
climate/research/oregon/interpolation/Linear_reg.R | ||
---|---|---|
22 | 22 |
|
23 | 23 |
###Reading the station data |
24 | 24 |
ghcn<-read.csv(paste(path,"/",infile1, sep=""), header=TRUE) #The "paste" function concatenates the path and file name in common string. |
25 |
#ghcn<-read.csv(infile1) #Use read.table if the input file is space delimited or read.table(infile1, headername=TRUE, sep=',') #Checking that the columns are correctly labelled. |
|
26 | 25 |
dates <-readLines(paste(path,"/",infile2, sep="")) |
27 | 26 |
|
28 | 27 |
###Creating a validation dataset by creating training and testing datasets (%30) |
29 |
#ghcn1507 <-subset(ghcn,ghcn$date_== date) |
|
30 |
#for loops |
|
31 |
ddat <- as.list(rep("", length(dates)))
|
|
28 |
|
|
29 |
|
|
30 |
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date_==d))
|
|
32 | 31 |
|
33 | 32 |
for(i in 1:length(dates)){ |
34 |
#assign(paste("ddat",i,sep="_"),data) |
|
35 |
data_name<-cat("ghcn_",dates[[1]],sep="") |
|
36 |
#data<-as.data.frame(data) |
|
37 |
#ddat[[i]] <- data.frame(subset(ghcn,ghcn$date_==dates[[i]])) |
|
38 |
#data<-subset(ghcn,ghcn$date_==dates[[i]]) |
|
39 |
data<-subset(ghcn,ghcn$date_==is.numeric(dates[[i]]) |
|
33 |
#data_name<-cat("ghcn_",dates[[i]],sep="") |
|
34 |
data_name<-paste("ghcn_",dates[[i]],sep="") |
|
35 |
data<-subset(ghcn,ghcn$date_==dates[[i]]) |
|
40 | 36 |
assign(data_name,data) |
41 |
n<-nrow(data[[i]]) |
|
42 |
ns<-n-round(n*0.3) #Create a sample from the data frame with 70% of the rows |
|
43 |
#ns<-n-round(n*prop) #Create a sample from the data frame with 70% of the rows |
|
44 |
ind.training <- sample(nrow(data), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly |
|
45 |
ind.testing <- setdiff(1:nrow(data), ind.training) |
|
46 |
data_s <- data[ind.training, ] |
|
47 |
data_v <- data[ind.testing, ] |
|
48 |
} |
|
37 |
} #This loops creates 2 subsets of ghcn based on dates and reassign the names using the date |
|
49 | 38 |
|
50 | 39 |
# ############ REGRESSION ############### |
51 | 40 |
# |
Also available in: Unified diff
Streamlining and slight changes to the loop to create subset dataset for different dates