Project

General

Profile

« Previous | Next » 

Revision 0163d0e2

Added by Benoit Parmentier about 12 years ago

GAM Fusion fusion initial reorganization of code to reduce time and predict for Venezuela

View differences:

climate/research/oregon/interpolation/GAM_fusion_analysis_raster_prediction_multisampling.R
21 21
library(spdep)                               # Spatial pacakge with methods and spatial stat. by Bivand et al.
22 22
library(rgdal)                               # GDAL wrapper for R, spatial utilities
23 23
library(gstat)                               # Kriging and co-kriging by Pebesma et al.
24
library(fields)                              # NCAR Spatial Interpolation methods such as kriging, splines
24
library(fields)                             # NCAR Spatial Interpolation methods such as kriging, splines
25 25
library(raster)                              # Hijmans et al. package for raster processing
26 26
library(rasterVis)
27 27
library(parallel)                            # Urbanek S. and Ripley B., package for multi cores & parralel processing
climate/research/oregon/interpolation/GAM_fusion_function_multisampling.R
1
#Function to be used with GAM_fusion_analysis_raster_prediction_mutlisampling.R
2
#runClimFusion<-function(r_stack,data_training,data_testing,data_training){
3

  
4
####
5
#TODO:
6
#Add log file and calculate time and sizes for processes-outputs
7

  
8

  
9
runClimFusion<-function(j){
10
  #Make this a function with multiple argument that can be used by mcmapply??
11
  #This creates clim fusion layers...
12
  
13
  #Functions used in the script
14
  predict_raster_model<-function(in_models,r_stack,out_filename){
15
    #This functions performs predictions on a raster grid given input models.
16
    #Arguments: list of fitted models, raster stack of covariates
17
    #Output: spatial grid data frame of the subset of tiles
18
    #s_sgdf<-as(r_stack,"SpatialGridDataFrame") #Conversion to spatial grid data frame
19
    list_rast_pred<-vector("list",length(in_models))
20
    for (i in 1:length(in_models)){
21
      mod <-in_models[[i]] #accessing GAM model ojbect "j"
22
      raster_name<-out_filename[[i]]
23
      if (inherits(mod,"gam")) {           
24
        #rpred<- predict(mod, newdata=s_sgdf, se.fit = TRUE) #Using the coeff to predict new values.
25
        #rast_pred2<- predict(object=s_raster,model=mod,na.rm=TRUE) #Using the coeff to predict new values.
26
        raster_pred<- predict(object=s_raster,model=mod,na.rm=FALSE) #Using the coeff to predict new values.
27
        names(raster_pred)<-"y_pred"  
28
        writeRaster(raster_pred, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
29
        print(paste("Interpolation:","mod", j ,sep=" "))
30
        list_rast_pred[[i]]<-raster_name
31
      }
32
    }
33
    if (inherits(mod,"try-error")) {
34
      print(paste("no gam model fitted:",mod[1],sep=" "))
35
    }
36
    return(list_rast_pred)
37
  }
38
  
39
  fit_models<-function(list_formulas,data_training){
40
    #This functions several models and returns model objects.
41
    #Arguments: - list of formulas for GAM models
42
    #           - fitting data in a data.frame or SpatialPointDataFrame
43
    #Output: list of model objects 
44
    list_fitted_models<-vector("list",length(list_formulas))
45
    for (k in 1:length(list_formulas)){
46
      formula<-list_formulas[[k]]
47
      mod<- try(gam(formula, data=data_training))
48
      model_name<-paste("mod",k,sep="")
49
      assign(model_name,mod) 
50
      list_fitted_models[[k]]<-mod
51
    }
52
    return(list_fitted_models) 
53
  }
54
  #Model and response variable can be changed without affecting the script
55
  prop_month<-0 #proportion retained for validation
56
  run_samp<-1
57
  list_formulas<-vector("list",nmodels)
58
  
59
  list_formulas[[1]] <- as.formula("y_var ~ s(elev_1)", env=.GlobalEnv)
60
  list_formulas[[2]] <- as.formula("y_var ~ s(LST)", env=.GlobalEnv)
61
  list_formulas[[3]] <- as.formula("y_var ~ s(elev_1,LST)", env=.GlobalEnv)
62
  list_formulas[[4]] <- as.formula("y_var ~ s(lat) + s(lon)+ s(elev_1)", env=.GlobalEnv)
63
  list_formulas[[5]] <- as.formula("y_var ~ s(lat,lon,elev_1)", env=.GlobalEnv)
64
  list_formulas[[6]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST)", env=.GlobalEnv)
65
  list_formulas[[7]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(LC2)", env=.GlobalEnv)
66
  list_formulas[[8]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(LC6)", env=.GlobalEnv)
67
  list_formulas[[9]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(DISTOC)", env=.GlobalEnv)
68
  lst_avg<-c("mm_01","mm_02","mm_03","mm_04","mm_05","mm_06","mm_07","mm_08","mm_09","mm_10","mm_11","mm_12")  
69
  
70
  data_month<-dst[dst$month==j,] #Subsetting dataset for the relevant month of the date being processed
71
  LST_name<-lst_avg[j] # name of LST month to be matched
72
  data_month$LST<-data_month[[LST_name]]
73
  
74
  #LST bias to model...
75
  data_month$LSTD_bias<-data_month$LST-data_month$TMax
76
  data_month$y_var<-data_month$LSTD_bias #Adding bias as the variable modeled
77
  mod_list<-fit_models(list_formulas,data_month) #only gam at this stage
78
  cname<-paste("mod",1:length(mod_list),sep="")
79
  names(mod_list)<-cname
80
  #Adding layer LST to the raster stack  
81
  pos<-match("elev",layerNames(s_raster))
82
  layerNames(s_raster)[pos]<-"elev_1"
83
  
84
  pos<-match("LST",names(s_raster)) #Find the position of the layer with name "LST", if not present pos=NA
85
  s_raster<-dropLayer(s_raster,pos)      # If it exists drop layer
86
  LST<-subset(s_raster,LST_name)
87
  names(LST)<-"LST"
88
  #Screen for extreme values": this needs more thought, min and max val vary with regions
89
  #min_val<-(-15+273.16) #if values less than -15C then screen out (note the Kelvin units that will need to be changed later in all datasets)
90
  #r1[r1 < (min_val)]<-NA
91
  s_raster<-addLayer(s_raster,LST)            #Adding current month
92
  
93
  #Now generate file names for the predictions...
94
  list_out_filename<-vector("list",length(in_models))
95
    
96
  for (k in 1:length(list_out_filename)){
97
    data_name<-paste("bias_LST_month_",j,"_mod_",k,"_",prop_month,
98
                     "_",run_samp,sep="")
99
    raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
100
    list_out_filename[[k]]<-raster_name
101
  }
102

  
103
  #now predict values for raster image...
104
  rast_list<-predict_raster_model(mod_list,s_raster,out_filename)
105
  rast_list<-rast_list[!sapply(rast_list,is.null)] #remove NULL elements in list
106
 
107
  mod_rast<-stack(rast_list)  
108
  rast_clim_list<-vector("list",nlayers(mod_rast))
109
  for (k in 1:nlayers(mod_rast)){
110
    clim_fus_rast<-LST-subset(mod_rast,k)
111
    data_name<-paste("clim_LST_month_",j,"_mod_",k,"_",prop_month,
112
                     "_",run_samp,sep="")
113
    raster_name<-paste("fusion_",data_name,out_prefix,".tif", sep="")
114
    rast_clim_list[[k]]<-raster_name
115
    writeRaster(clim_fus_rast, filename=raster_name,overwrite=TRUE)  #Wri
116
  }
117
  clim_obj<-list(rast_list,rast_clim_list,data_month,mod_list,list_formulas)
118
  names(clim_obj)<-c("bias","clim","data_month","mod","formulas")
119
  return(clim_obj)
120
}
121

  
122
## Run function for kriging...?
123

  
124

  
1 125
runGAMFusion <- function(i) {            # loop over dates
2 126
  
127
  #Function used in the script
128
  
129
  lookup<-function(r,lat,lon) {
130
    #This functions extracts values in a projected raster
131
    #given latitude and longitude vector locations
132
    #Output: matrix with values
133
    xy<-project(cbind(lon,lat),proj_str);
134
    cidx<-cellFromXY(r,xy);
135
    return(r[cidx])
136
  }
137
  
138

  
3 139
  date<-strptime(sampling_dat$date[i], "%Y%m%d")   # interpolation date being processed
4 140
  month<-strftime(date, "%m")          # current month of the date being processed
5 141
  LST_month<-paste("mm_",month,sep="") # name of LST month to be matched
......
62 198
  #sta_lola=modst[,c("lon","lat")] #Extracting locations of stations for the current month..
63 199
  
64 200
  #proj_str="+proj=lcc +lat_1=43 +lat_2=45.5 +lat_0=41.75 +lon_0=-120.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs";
65
  lookup<-function(r,lat,lon) {
66
    xy<-project(cbind(lon,lat),proj_str);
67
    cidx<-cellFromXY(r,xy);
68
    return(r[cidx])
69
  }
201

  
70 202
  #sta_tmax_from_lst=lookup(themolst,sta_lola$lat,sta_lola$lon) #Extracted values of LST for the stations
71 203
  sta_tmax_from_lst<-modst$LST
72 204
  #########
......
74 206
  #########
75 207
  
76 208
  sta_bias=sta_tmax_from_lst-modst$TMax; #That is the difference between the monthly LST mean and monthly station mean
77
  #Added by Benoit
78 209
  modst$LSTD_bias<-sta_bias  #Adding bias to data frame modst containning the monthly average for 10 years
79 210
  
80 211
  #bias_xy=project(as.matrix(sta_lola),proj_str)
......
138 269
  # STEP 5 - interpolate bias
139 270
  ########
140 271
  
141
  # ?? include covariates like elev, distance to coast, cloud frequency, tree heig
142
  #fitbias<-Tps(bias_xy,sta_bias) #use TPS or krige
143
  
144 272
  #Adding options to use only training stations : 07/11/2012
145 273
  #bias_xy=project(as.matrix(sta_lola),proj_str)
146 274
  #bias_xy2=project(as.matrix(c(dmoday$lon,dmoday$lat),proj_str)
......
298 426
  list_formulas[[8]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(LC6)", env=.GlobalEnv)
299 427
  list_formulas[[9]] <- as.formula("y_var ~ s(lat,lon) + s(elev_1) + s(N_w,E_w) + s(LST) + s(DISTOC)", env=.GlobalEnv)
300 428
  
301
  #list_formulas[[1]] <- as.formula("y_var ~ s(ELEV_SRTM)", env=.GlobalEnv)
302
  #list_formulas[[2]] <- as.formula("y_var ~ s(lat,lon)", env=.GlobalEnv)
303
  #list_formulas[[3]] <- as.formula("y_var~ s(lat,lon,ELEV_SRTM)", env=.GlobalEnv)
304
  #list_formulas[[4]] <- as.formula("y_var~ s(lat) + s (lon) + s (ELEV_SRTM) + s(DISTOC)", env=.GlobalEnv)
305
  #list_formulas[[5]] <- as.formula("y_var~ s(lat,lon,ELEV_SRTM) + s(Northness) + s (Eastness) + s(DISTOC)", env=.GlobalEnv)
306
  #list_formulas[[6]] <- as.formula("y_var~ s(lat,lon) +s(ELEV_SRTM) + s(Northness,Eastness) + s(DISTOC)", env=.GlobalEnv)
307
  
308
  if (bias_prediction==1){
309
    #mod1<- try(gam(formula1, data=data_month))
310
    #mod2<- try(gam(formula2, data=data_month)) #modified nesting....from 3 to 2
311
    #mod3<- try(gam(formula3, data=data_month))
312
    #mod4<- try(gam(formula4, data=data_month))
313
    #mod5<- try(gam(formula5, data=data_month))
314
    #mod6<- try(gam(formula6, data=data_month))
315
    #mod7<- try(gam(formula7, data=data_month))
316
    #mod8<- try(gam(formula8, data=data_month)) 
317
    
318
    for (j in 1:nmodels){
319
      formula<-list_formulas[[j]]
320
      mod<- try(gam(formula, data=data_month))
321
      model_name<-paste("mod",j,sep="")
322
      assign(model_name,mod) 
323
    }
324
    
325
  } else if (bias_prediction==0){      #Use daily data for direct prediction using GAM
326
    
327
    #mod1<- try(gam(formula1, data=data_s))
328
    #mod2<- try(gam(formula2, data=data_s)) #modified nesting....from 3 to 2
329
    #mod3<- try(gam(formula3, data=data_s))
330
    #mod4<- try(gam(formula4, data=data_s))
331
    #mod5<- try(gam(formula5, data=data_s))
332
    #mod6<- try(gam(formula6, data=data_s))
333
    #mod7<- try(gam(formula7, data=data_s))
334
    #mod8<- try(gam(formula8, data=data_s))
335
    
336
    for (j in 1:nmodels){
337
      formula<-list_formulas[[j]]
338
      mod<- try(gam(formula, data=data_s))
339
      model_name<-paste("mod",j,sep="")
340
      assign(model_name,mod) 
341
    }
342
    
343
  }
429
  mod_list<-fit_models(list_formulas,data_month) #only gam at this stage
344 430

  
345 431
  #Added
346 432
  #tmax_predicted=themolst+daily_delta_rast-bias_rast #Final surface?? but daily_rst
......
399 485
  mod_obj<-vector("list",nmodels+2)  #This will contain the model objects fitting: 10/30
400 486
  mod_obj[[nmodels+1]]<-mod_kr_month  #Storing climatology object
401 487
  mod_obj[[nmodels+2]]<-mod_kr_day  #Storing delta object
488
  pred_sgdf<-as(raster_pred,"SpatialGridDataFrame") #Conversion to spatial grid data frame
402 489
  
403 490
  for (j in 1:nmodels){
404 491
    
405 492
    ##Model assessment: specific diagnostic/metrics for GAM
406 493
    
407 494
    name<-paste("mod",j,sep="")  #modj is the name of The "j" model (mod1 if j=1) 
408
    mod<-get(name)               #accessing GAM model ojbect "j"
495
    #mod<-get(name)               #accessing GAM model ojbect "j"
496
    mod<-mod_list[[j]]
409 497
    mod_obj[[j]]<-mod  #storing current model object
410
    
411 498
    #If mod "j" is not a model object
412 499
    if (inherits(mod,"try-error")) {
413 500
      results_AIC[1]<- sampling_dat$date[i]  #storing the interpolation dates in the first column
......
524 611
          writeRaster(raster_pred, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
525 612
          
526 613
        }
527
        
528
        if (bias_prediction==0){
529
          data_name<-paste("predicted_mod",j,"_",sampling_dat$date[i],"_",sampling_dat$prop[i],
530
                           "_",sampling_dat$run_samp[i],sep="")
531
          raster_name<-paste("GAM_",data_name,out_prefix,".rst", sep="")
532
          writeRaster(raster_pred, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
533
          #writeRaster(r2, filename=raster_name,overwrite=TRUE)  #Writing the data in a raster file format...(IDRISI)
534
          
535
        }
536
        
537
        
538
        pred_sgdf<-as(raster_pred,"SpatialGridDataFrame") #Conversion to spatial grid data frame
614
               
539 615
        #rpred_val_s <- overlay(raster_pred,data_s)             #This overlays the kriged surface tmax and the location of weather stations
540 616
        
541 617
        rpred_val_s <- overlay(pred_sgdf,data_s)             #This overlays the interpolated surface tmax and the location of weather stations

Also available in: Unified diff