Project

General

Profile

« Previous | Next » 

Revision e4da0a50

Added by Benoit Parmentier almost 9 years ago

initial commit of python script from Alberto modified for stage6

View differences:

climate/research/oregon/interpolation/inputCreate_stage_6.py
1
import sys, os
2
from optparse import OptionParser
3
import glob
4
import shutil
5
import math
6

  
7
from osgeo import ogr
8

  
9
#Creates a shell script that can be used to run stage 2 and 3 of the climate layer scripts. It sets up all the 
10
#input arguments for master_script based on provided arguments.
11
#Inputs:
12
#    0) inLatLonList: A text file that includes lat_lon of the tiles you want to process,one per line in format 10.0_-120.0 
13
#    1) outputFolder:  The path to the dir where you want to place the output files (e.g. /nobackupp4/climateLayers/output20Deg/reg4/
14
#    2) outDirForShFiles: The directory where you want to place the shell script serial files.
15
#    3) filePerDir: How many sh files to put in each subdirectory, this is useful if you want to run a subset.
16
#    4) lowStatCount: What's the minimum station count you want to use
17
#    5) higStatCount: The max station count you want to use
18
#    6) scriptFn: The filename of the master script you want to use to run stage4. 
19
#              For example 'Rscript /climateCode/environmental-layers/climate/research/world/interpolation/master_script_09012014_normal.R'
20
#    7) prefix: e.g. "r4" prefix for the job 
21
#    8) year: year predicted, this can be changed into a list
22
#
23
#Output:
24
#    Places number of files into directory.
25

  
26
def main():
27
  usage = "usage: %prog [options] <inLatLonList> <outputFolder> <outDirForShFiles> <filesPerDir> <lowFeatureCount> <highFeatureCount> <scriptFn> <prefix> <year>\n"+\
28
            "Prepare input for stage 4 "
29
  parser = OptionParser(usage=usage)
30
  opts,args=parser.parse_args()
31
  if len(args)<2:
32
     sys.exit(parser.print_help())
33

  
34
  inFn = args[0]
35
  outputFolder = args[1]
36
  outDir = args[2]
37
  fPer = int(args[3]) #subdivide the job for NEX
38
  lowTresh = int(args[4])
39
  hiTresh = int(args[5])
40
  scriptFn = args[6]
41
  prefix = args[7]
42
  year = args[8] #give a range of date here
43

  
44
  inPtr = open(inFn,"r") 
45
  if inPtr is None:
46
    print "Error: %s doesnt' exist" % inFn
47
  lines = inPtr.readlines()
48
  inPtr.close()
49

  
50
  found = 0
51
  notFound = 0
52
  
53
  if os.path.exists(outDir) is False:
54
    print "Warning: %s doesn't exists, creating" % outDir
55
    os.mkdir(outDir)
56
    
57
  wrap = "%s/wrapper.sh" % outDir
58
  if os.path.exists(wrap) is False:
59
    print "Warning: %s doesn't exists, creating" % wrap
60
    wrapFn = os.path.abspath(os.path.dirname(sys.argv[0]))+"/wrapper.sh"
61
    shutil.copyfile(wrapFn,wrap)
62
    os.chmod(wrap,0750)
63

  
64
  qs = "%s/serial_TEMPLATE.qsub" % outDir
65
  if os.path.exists(qs) is False:
66
    print "Warning: %s doesn't exists, creating" % qs
67
    qsFn = os.path.abspath(os.path.dirname(sys.argv[0]))+"/serial_TEMPLATE.qsub"
68
    shutil.copyfile(qsFn,qs)
69
    os.chmod(wrap,0750)
70

  
71
  #rankCount = 0
72
  #subDirNum = (len(lines)/fPer) + 2
73

  
74

  
75
  #for i in range(0,subDirNum):
76
  #  subDirName = "%s/dirSub_%d" % (outDir,i)
77
  #  if os.path.exists(subDirName) is False:
78
  #    print "Creating %s" % subDirName
79
  #    os.mkdir(subDirName)
80
  subDirName = "%s/dirSub_*" % outDir
81
  dirs = glob.glob(subDirName)
82
  for d in dirs:
83
    print "Removing %s" % d
84
    shutil.rmtree(d)
85

  
86
  
87
  dirCounter = 0
88
  
89
  shpFiles = []
90
  
91
  #Open the shapefiles to look at actual number of stations
92
  for l in lines:
93
    l = l.rstrip().split(",")
94
    print l[0]
95
    #41.3_-84.9/daily_covariates_ghcn_data_TMAX_2010_201141.3_-84.9.shp'
96
    searchStr = "%s/%s/%s/daily_covariates_ghcn_data_*%s.shp" % (outputFolder,l[0],year,l[0])
97

  
98
    shpFn = glob.glob(searchStr)
99
    if len(shpFn) > 0:
100
      fCount = featureCount(shpFn[0])
101
      
102
      if fCount is not None:
103
        shpFiles.append([shpFn[0],fCount,l[0]])
104
    else:
105
       #notFound += 1
106
       print "Error finding %s" % searchStr
107
  
108
  sortedShpFiles = sorted(shpFiles,key=lambda x: x[1])
109
  #print sortedShpFiles
110

  
111
  stCFn = "%s/stationCounts.txt" % outDir
112
  fPtrSt = open(stCFn,"w+")
113
  if fPtrSt is None:
114
    print "Error opening %s" % stCFn
115
    sys.exit(0)
116

  
117
  for s in sortedShpFiles:
118
   outSt = "%s,%d\n" % (s[2],s[1])
119
   fPtrSt.write(outSt)
120
  fPtrSt.close()
121

  
122
  stDim = len(sortedShpFiles)-1
123
   
124
  stMin = int(sortedShpFiles[0][1])
125
  stMax = int(sortedShpFiles[stDim][1])
126
  
127
  print "Min %d, Max %d " % (stMin,stMax)
128
 
129
  bins = []
130

  
131
  #Every 15k about 2 mins for daily
132
  minNeed = 3
133
  minAdd = 2
134
  tStep = 30000
135
  tStart = 15000
136
  tEnd = 300000
137
  for s in range(tStart,tEnd,tStep):
138
    print "%d-%d range needs %d minutes" % (s-tStep,s,minNeed)
139
    #continue
140

  
141
    bin1 =  [i for i in sortedShpFiles if ((i[1] < s) and (i[1] > (s-tStep)))]
142
    #print bin1
143
    rows = len(bin1)
144
    if rows > 0:
145
      #cols = len(bin1[0])
146
      #print rows,cols
147
      for b in bin1:
148
         b.append(minNeed)
149
      bins.append(bin1)
150
    else:
151
      print "No values for %d-%d range" % (s-tStep,s) 
152
    
153
    minNeed = minNeed + minAdd  
154
  
155
  dirCount = -1
156
  count = 0
157
  for binSingle in bins:
158
    rankCount = 0
159

  
160
    subDirName = "%s/dirSub_%d" % (outDir,dirCounter)
161
    if os.path.exists(subDirName) is True:
162
      if os.listdir(subDirName):
163
         dirCounter += 1
164
 
165

  
166
    subDirName = "%s/dirSub_%d" % (outDir,dirCounter)
167
    if os.path.exists(subDirName) is False:
168
      print "Creating %s" % subDirName
169
      os.mkdir(subDirName)
170

  
171
    print count,subDirName
172
    if len(binSingle) > 0:
173
      b0 = float(binSingle[0][3])
174
      #(minutesPerTile*(365/cores)+(climatologyStep) + assesment)/minutes
175
      hours = math.ceil(((b0 * (365.0/10.0))+60.0+30.0)/60.0)
176
      fPtrMinu = open(subDirName+"/numMinutes.txt","w+")
177
      if fPtrMinu is None:
178
        print "Error opening "+ subDirName+"/numMinutes.txt"
179
      else:
180
        print "Creating "+ subDirName+"/numMinutes.txt"
181
        fPtrMinu.write(str(hours)+'\n')
182
        fPtrMinu.close() 
183

  
184
    count += 1
185
    
186
    ##Input files that contain Rscript command
187
    #binSingle: this contains a list of tiles to run ranked by time 
188
    #run by year 
189
    for b in binSingle:
190
      ll = b[2]#"%s_%s" % (baseSpl[1],baseSpl[2]) 
191
      #check for existence of files from stage2
192
      metTest = "%s/%s/%s/met_stations_outfiles_obj_gam_CAI_%s.RData" % (outputFolder,ll,year,ll)
193
 
194
      #if os.path.exists(metTest) is False:
195
      #   print "No met object %s" % metTest
196
      #   continue     
197
      #prediction object check
198
      #methodTest =  "%s/%s/%s/method_mod_obj_gam_CAI_dailyTmax%s.RData" % (outputFolder,ll,year,ll)
199
      #if os.path.exists(methodTest) is True:
200
      #   print "Method object exists"
201
      #   continue 
202
    
203
      yearInt = int(year)+1
204
      outStr = "Rscript %s %s wgs84Grid %s %s %s %s/subset/mean_LST_%s_jan_6_wgs84.tif FALSE %s/%s/covar_obj_%s.RData %s/%s/%s/met_stations_outfiles_obj_gam_CAI_%s.RData 10 4800  %s %s > %s/outLogs/%s_stage4_%s.log 2>  %s/outLogs/%s_stage4_err_%s.log" % (scriptFn,ll,ll,outputFolder,b[0],outputFolder,ll,outputFolder,ll,ll,outputFolder,ll,year,ll,year,yearInt,outputFolder,ll,year,outputFolder,ll,year)
205
      #print outStr
206

  
207
      outFnSt = "%s/dirSub_%d/input_%d.in" % (outDir,dirCounter,rankCount)
208
      outPtr = open(outFnSt,"w+")
209
      if outPtr is None:
210
         print "Error: Opening %s" % outFnSt
211
      else:
212
         outPtr.write("#!/bin/bash"+"\n")
213
         outPtr.write(outStr+"\n")
214
         outPtr.close()
215
      
216
         if rankCount >= fPer:
217
           dirCounter += 1
218
           rankCount = 0
219

  
220
           subDirName = "%s/dirSub_%d" % (outDir,dirCounter)
221
           if os.path.exists(subDirName) is False:
222
             print "Creating %s" % subDirName
223
             os.mkdir(subDirName)
224

  
225
           #if len(binSingle) > 0:
226
           b0 = float(binSingle[0][3])
227
           hours = math.ceil(((b0 * (365.0/10.0))+60.0+30.0)/60.0)
228
           fPtrMinu = open(subDirName+"/numMinutes.txt","w+")
229
           if fPtrMinu is None:
230
             print "Error opening "+ subDirName+"/numMinutes.txt"
231
           else:
232
             print "Creating "+ subDirName+"/numMinutes.txt"
233
             fPtrMinu.write(str(hours)+'\n')
234
             fPtrMinu.close() 
235
         else:
236
           rankCount += 1
237

  
238
      found += 1
239
      outPtr.close()
240
      #found += 1
241
  prefix3 = prefix + "_" + year[2:4]
242
  postQsubCreator(outDir,prefix3)
243

  
244
  print "Found %d" % found
245
  print "Not found %d" % notFound
246
  print "End" 
247
   
248
  sys.exit(0)
249
  
250
def postQsubCreator(outDir,prefix):
251
  subDirName = "%s/dirSub_*" % outDir
252
  dirs = glob.glob(subDirName)
253
  
254
  if len(dirs) > 0:
255
   for d in dirs:
256
     fn = "%s/numMinutes.txt" % d
257
     if os.path.exists(fn): 
258
       fPtr = open(fn)
259
       numHrs = float(fPtr.readlines()[0])
260
       outDirTmp = os.path.dirname(fn)
261
       tmpRpl = "%sdirSub_" % outDir
262
       rank = int(outDirTmp.replace(tmpRpl,''))
263
       cores = len(glob.glob(outDirTmp+"/input*"))
264
       prefix2 = prefix + "_"+str(rank)
265
       print outDir,rank,cores,numHrs,prefix2           
266

  
267
       createQsub(outDir,rank,cores,numHrs,prefix2)
268
       fPtr.close()
269
     else:
270
       print "File %s doesn't exists" % fn
271
       continue
272

  
273
def featureCount(fn):
274
  driver = ogr.GetDriverByName('ESRI Shapefile')
275
  ds = driver.Open(fn, 0) # 0 means read-only. 1 means writeable.
276

  
277
  # Check to see if shapefile is found.
278
  if ds is None:
279
    print 'Could not open %s' % (fn)
280
    return None
281
  else:
282
    print 'Opened %s' % (fn)
283
    layer = ds.GetLayer()
284
    featureCount = layer.GetFeatureCount()
285
    print "Number of features in %s: %d" % (os.path.basename(fn),featureCount)
286

  
287
  return featureCount
288

  
289
def createQsub(outDir,rank,cores,time,prefix):
290
  fn = "%s/qsub_%d.qsub" % (outDir,rank)
291
  if time <= 2:
292
    qType = "devel"
293
  elif time <= 8:
294
    qType = "normal"
295
  elif time > 8:
296
    qType = "long" 
297
  else:
298
    return None
299
   
300
  fPtr = open(fn,"w+")
301
  if fPtr is None:
302
    print "Error opening %s " % fn
303
    return None
304
  
305
  fPtr.write("#PBS -S /bin/bash\n")
306
  out1 = "#PBS -l select=%d:ncpus=14:model=ivy\n" % cores
307
  fPtr.write(out1)
308
  out1 = "#PBS -l walltime=%d:00:00\n" % time
309
  fPtr.write(out1)
310
  fPtr.write("#PBS -j n\n")
311
  fPtr.write("#PBS -m be\n")
312
  out1 = "#PBS -N %s\n" % prefix # 
313
  fPtr.write(out1)
314
  fPtr.write("#PBS -V\n")
315
  out1 = "#PBS -q %s\n" % qType
316
  fPtr.write(out1)
317
  
318
  #ELayers Project ID
319
  fPtr.write("#PBS -W group_list=s1557\n\n")
320

  
321
  out1 = "CORES=%d\n" % cores
322
  fPtr.write(out1)
323
  out1 = "SUBDIRNUM=%d\n" % rank
324
  fPtr.write(out1)
325
  out1 = "HDIR=%s\n\n" % outDir
326
  fPtr.write(out1)
327

  
328
  fPtr.write("source /u/aguzman4/sharedModules/etc/environ.sh\n\n")
329

  
330
  fPtr.write("module load mpi-mvapich2/1.4.1/intel\n\n")
331

  
332
  fPtr.write("#$PBS_O_WORKDIR\n\n")
333

  
334
  fPtr.write("LOGSTDOUT=$HDIR/climate_ivy_serial_%d.stdout\n" % rank)
335
  fPtr.write("LOGSTDERR=$HDIR/climate_ivy_serial_%d.stderr\n\n" % rank)
336

  
337
  fPtr.write("mpiexec -pernode -comm none -np $CORES $HDIR/wrapper.sh $SUBDIRNUM $HDIR 1> $LOGSTDOUT 2> $LOGSTDERR\n")
338

  
339
  fPtr.close()
340

  
341
if __name__ == '__main__':
342
   main()
343

  
344
################################ #
345

  
346
#python /nobackupp6/aguzman4/climateLayers/finalCode/environmental-layers/climate/research/world/interpolation/runSetup/inputCreate_stage_4.py /nobackupp6/aguzman4/climateLayers/inputLayers/regionCSV/list_reg4.csv /nobackupp6/aguzman4/climateLayers/out/reg4/ /nobackupp6/aguzman4/climateLayers/out/reg4/serialRuns/2014/ 10 0 250000 /nobackupp6/aguzman4/climateLayers/finalCode/environmental-layers/climate/research/world/interpolation/master_script_stage_4.R r4 2014
347

  
348
#parameters
349
#python script:/nobackupp6/aguzman4/climateLayers/finalCode/environmental-layers/climate/research/world/interpolation/runSetup/inputCreate_stage_4.py 
350
#inLatLonList: /nobackupp6/aguzman4/climateLayers/inputLayers/regionCSV/list_reg4.csv 
351
#outputFolder: /nobackupp6/aguzman4/climateLayers/out/reg4/ 
352
#outDirForShFiles: /nobackupp6/aguzman4/climateLayers/out/reg4/serialRuns/2014/ 
353
#filePerDir: 10 
354
#lowStatCount: 0 
355
#higStatCount: 250000 
356
#scriptFn: /nobackupp6/aguzman4/climateLayers/finalCode/environmental-layers/climate/research/world/interpolation/master_script_stage_4.R 
357
#r4 
358
#2014
359

  
360
#    inLatLonList: A text file that includes lat_lon of the tiles you want to process,one per line in format 10.0_-120.0 
361
#    outputFolder:  The path to the dir where you want to place the output files (e.g. /nobackupp4/climateLayers/output20Deg/reg4/
362
#    outDirForShFiles: The directory where you want to place the shell script serial files.
363
#    filePerDir: How many sh files to put in each subdirectory, this is useful if you want to run a subset.
364
#    lowStatCount: What's the minimum station count you want to use
365
#    higStatCount: The max station count you want to use
366
#    scriptFn: The filename of the master script you want to use to run stage4. 
367
#              For example 'Rscript /climateCode/environmental-layers/climate/research/world/interpolation/master_script_09012014_normal.R'
368

  
369

  
370
#python /nobackupp6/aguzman4/climateLayers/finalCode/environmental-layers/climate/research/world/interpolation/runSetup/inputCreate_stage_4.py /nobackupp6/aguzman4/climateLayers/inputLayers/regionCSV/list_reg4.csv /nobackupp6/aguzman4/climateLayers/out/reg4/ /nobackupp6/aguzman4/climateLayers/out/reg4/serialStage6/2010/ 10 0 250000 /nobackupp6/aguzman4/climateLayers/finalCode/environmental-layers/climate/research/world/interpolation/master_script_stage_4.R r4 2010
371
#python /nobackupp6/aguzman4/climateLayers/finalCode/environmental-layers/climate/research/world/interpolation/runSetup/master_script_stage_6_01182016.R /nobackupp6/aguzman4/climateLayers/inputLayers/regionCSV/list_reg4.csv /nobackupp6/aguzman4/climateLayers/out/reg4/ /nobackupp6/aguzman4/climateLayers/out/reg4/serialStage6/2010/ 10 0 250000 /nobackupp6/aguzman4/climateLayers/finalCode/environmental-layers/climate/research/world/interpolation/master_script_stage_6_01182016.R r4 2010
372

  

Also available in: Unified diff