Project

General

Profile

« Previous | Next » 

Revision ec229367

Added by Benoit Parmentier over 12 years ago

  • ID ec229367f7438821b4823ffad7fd351ac6d89f5b

Streamlining and slight changes to the loop to create subset dataset for different dates

View differences:

climate/research/oregon/interpolation/Linear_reg.R
22 22

  
23 23
###Reading the station data
24 24
ghcn<-read.csv(paste(path,"/",infile1, sep=""), header=TRUE)                            #The "paste" function concatenates the path and file name in common string. 
25
#ghcn<-read.csv(infile1)                                                                #Use read.table if the input file is space delimited or read.table(infile1, headername=TRUE, sep=',')                                                                            #Checking that the columns are correctly labelled.
26 25
dates <-readLines(paste(path,"/",infile2, sep=""))
27 26
                  
28 27
###Creating a validation dataset by creating training and testing datasets (%30)
29
#ghcn1507 <-subset(ghcn,ghcn$date_== date)
30
#for loops
31
ddat <- as.list(rep("", length(dates)))
28

  
29

  
30
ghcn.subsets <-lapply(dates, function(d) subset(ghcn, date_==d))
32 31

  
33 32
for(i in 1:length(dates)){
34
  #assign(paste("ddat",i,sep="_"),data)
35
  data_name<-cat("ghcn_",dates[[1]],sep="")
36
  #data<-as.data.frame(data)
37
  #ddat[[i]] <- data.frame(subset(ghcn,ghcn$date_==dates[[i]]))
38
  #data<-subset(ghcn,ghcn$date_==dates[[i]])
39
  data<-subset(ghcn,ghcn$date_==is.numeric(dates[[i]])
33
  #data_name<-cat("ghcn_",dates[[i]],sep="")
34
  data_name<-paste("ghcn_",dates[[i]],sep="")
35
  data<-subset(ghcn,ghcn$date_==dates[[i]])
40 36
  assign(data_name,data)
41
  n<-nrow(data[[i]])
42
  ns<-n-round(n*0.3)  #Create a sample from the data frame with 70% of the rows
43
  #ns<-n-round(n*prop)  #Create a sample from the data frame with 70% of the rows
44
  ind.training <- sample(nrow(data), size=ns, replace=FALSE) #This selects the index position for 70% of the rows taken randomly
45
  ind.testing <- setdiff(1:nrow(data), ind.training)
46
  data_s <- data[ind.training, ]
47
  data_v <- data[ind.testing, ]
48
  }
37
  }                                                   #This loops creates 2 subsets of ghcn based on dates and reassign the names using the date
49 38

  
50 39
# ############ REGRESSION ###############
51 40
# 

Also available in: Unified diff