Revision ec229367
Added by Benoit Parmentier almost 13 years ago
- ID ec229367f7438821b4823ffad7fd351ac6d89f5b
climate/research/oregon/interpolation/Linear_reg.R | ||
22 | 22 |
23 | 23 |
###Reading the station data |
24 | 24 |
ghcn<-read.csv(paste(path,"/",infile1, sep=""), header=TRUE) #The "paste" function concatenates the path and file name in common string. |
25 |
#ghcn<-read.csv(infile1) #Use read.table if the input file is space delimited or read.table(infile1, headername=TRUE, sep=',') #Checking that the columns are correctly labelled. |
26 | 25 |
dates <-readLines(paste(path,"/",infile2, sep="")) |
27 | 26 |
28 | 27 |
###Creating a validation dataset by creating training and testing datasets (%30) |
29 |
#ghcn1507 <-subset(ghcn,ghcn$date_== date) |
30 |
#for loops |
31 |
ddat <- as.list(rep("", length(dates)))
28 |
29 |
30 |
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date_==d))
32 | 31 |
33 | 32 |
for(i in 1:length(dates)){ |
34 |
#assign(paste("ddat",i,sep="_"),data) |
35 |
data_name<-cat("ghcn_",dates[[1]],sep="") |
36 |
#data< |
37 |
#ddat[[i]] <- data.frame(subset(ghcn,ghcn$date_==dates[[i]])) |
38 |
#data<-subset(ghcn,ghcn$date_==dates[[i]]) |
39 |
data<-subset(ghcn,ghcn$date_==is.numeric(dates[[i]]) |
33 |
#data_name<-cat("ghcn_",dates[[i]],sep="") |
34 |
data_name<-paste("ghcn_",dates[[i]],sep="") |
35 |
data<-subset(ghcn,ghcn$date_==dates[[i]]) |
40 | 36 |
assign(data_name,data) |
41 |
n<-nrow(data[[i]]) |
42 |
ns<-n-round(n*0.3) #Create a sample from the data frame with 70% of the rows |
43 |
#ns<-n-round(n*prop) #Create a sample from the data frame with 70% of the rows |
44 | <- sample(nrow(data), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly |
45 |
ind.testing <- setdiff(1:nrow(data), |
46 |
data_s <- data[, ] |
47 |
data_v <- data[ind.testing, ] |
48 |
} |
37 |
} #This loops creates 2 subsets of ghcn based on dates and reassign the names using the date |
49 | 38 |
50 | 39 |
# ############ REGRESSION ############### |
51 | 40 |
# |
Also available in: Unified diff
Streamlining and slight changes to the loop to create subset dataset for different dates