Revision e4da0a50
Added by Benoit Parmentier almost 9 years ago
climate/research/oregon/interpolation/inputCreate_stage_6.py | ||
---|---|---|
1 |
import sys, os |
|
2 |
from optparse import OptionParser |
|
3 |
import glob |
|
4 |
import shutil |
|
5 |
import math |
|
6 |
|
|
7 |
from osgeo import ogr |
|
8 |
|
|
9 |
#Creates a shell script that can be used to run stage 2 and 3 of the climate layer scripts. It sets up all the |
|
10 |
#input arguments for master_script based on provided arguments. |
|
11 |
#Inputs: |
|
12 |
# 0) inLatLonList: A text file that includes lat_lon of the tiles you want to process,one per line in format 10.0_-120.0 |
|
13 |
# 1) outputFolder: The path to the dir where you want to place the output files (e.g. /nobackupp4/climateLayers/output20Deg/reg4/ |
|
14 |
# 2) outDirForShFiles: The directory where you want to place the shell script serial files. |
|
15 |
# 3) filePerDir: How many sh files to put in each subdirectory, this is useful if you want to run a subset. |
|
16 |
# 4) lowStatCount: What's the minimum station count you want to use |
|
17 |
# 5) higStatCount: The max station count you want to use |
|
18 |
# 6) scriptFn: The filename of the master script you want to use to run stage4. |
|
19 |
# For example 'Rscript /climateCode/environmental-layers/climate/research/world/interpolation/master_script_09012014_normal.R' |
|
20 |
# 7) prefix: e.g. "r4" prefix for the job |
|
21 |
# 8) year: year predicted, this can be changed into a list |
|
22 |
# |
|
23 |
#Output: |
|
24 |
# Places number of files into directory. |
|
25 |
|
|
26 |
def main(): |
|
27 |
usage = "usage: %prog [options] <inLatLonList> <outputFolder> <outDirForShFiles> <filesPerDir> <lowFeatureCount> <highFeatureCount> <scriptFn> <prefix> <year>\n"+\ |
|
28 |
"Prepare input for stage 4 " |
|
29 |
parser = OptionParser(usage=usage) |
|
30 |
opts,args=parser.parse_args() |
|
31 |
if len(args)<2: |
|
32 |
sys.exit(parser.print_help()) |
|
33 |
|
|
34 |
inFn = args[0] |
|
35 |
outputFolder = args[1] |
|
36 |
outDir = args[2] |
|
37 |
fPer = int(args[3]) #subdivide the job for NEX |
|
38 |
lowTresh = int(args[4]) |
|
39 |
hiTresh = int(args[5]) |
|
40 |
scriptFn = args[6] |
|
41 |
prefix = args[7] |
|
42 |
year = args[8] #give a range of date here |
|
43 |
|
|
44 |
inPtr = open(inFn,"r") |
|
45 |
if inPtr is None: |
|
46 |
print "Error: %s doesnt' exist" % inFn |
|
47 |
lines = inPtr.readlines() |
|
48 |
inPtr.close() |
|
49 |
|
|
50 |
found = 0 |
|
51 |
notFound = 0 |
|
52 |
|
|
53 |
if os.path.exists(outDir) is False: |
|
54 |
print "Warning: %s doesn't exists, creating" % outDir |
|
55 |
os.mkdir(outDir) |
|
56 |
|
|
57 |
wrap = "%s/wrapper.sh" % outDir |
|
58 |
if os.path.exists(wrap) is False: |
|
59 |
print "Warning: %s doesn't exists, creating" % wrap |
|
60 |
wrapFn = os.path.abspath(os.path.dirname(sys.argv[0]))+"/wrapper.sh" |
|
61 |
shutil.copyfile(wrapFn,wrap) |
|
62 |
os.chmod(wrap,0750) |
|
63 |
|
|
64 |
qs = "%s/serial_TEMPLATE.qsub" % outDir |
|
65 |
if os.path.exists(qs) is False: |
|
66 |
print "Warning: %s doesn't exists, creating" % qs |
|
67 |
qsFn = os.path.abspath(os.path.dirname(sys.argv[0]))+"/serial_TEMPLATE.qsub" |
|
68 |
shutil.copyfile(qsFn,qs) |
|
69 |
os.chmod(wrap,0750) |
|
70 |
|
|
71 |
#rankCount = 0 |
|
72 |
#subDirNum = (len(lines)/fPer) + 2 |
|
73 |
|
|
74 |
|
|
75 |
#for i in range(0,subDirNum): |
|
76 |
# subDirName = "%s/dirSub_%d" % (outDir,i) |
|
77 |
# if os.path.exists(subDirName) is False: |
|
78 |
# print "Creating %s" % subDirName |
|
79 |
# os.mkdir(subDirName) |
|
80 |
subDirName = "%s/dirSub_*" % outDir |
|
81 |
dirs = glob.glob(subDirName) |
|
82 |
for d in dirs: |
|
83 |
print "Removing %s" % d |
|
84 |
shutil.rmtree(d) |
|
85 |
|
|
86 |
|
|
87 |
dirCounter = 0 |
|
88 |
|
|
89 |
shpFiles = [] |
|
90 |
|
|
91 |
#Open the shapefiles to look at actual number of stations |
|
92 |
for l in lines: |
|
93 |
l = l.rstrip().split(",") |
|
94 |
print l[0] |
|
95 |
#41.3_-84.9/daily_covariates_ghcn_data_TMAX_2010_201141.3_-84.9.shp' |
|
96 |
searchStr = "%s/%s/%s/daily_covariates_ghcn_data_*%s.shp" % (outputFolder,l[0],year,l[0]) |
|
97 |
|
|
98 |
shpFn = glob.glob(searchStr) |
|
99 |
if len(shpFn) > 0: |
|
100 |
fCount = featureCount(shpFn[0]) |
|
101 |
|
|
102 |
if fCount is not None: |
|
103 |
shpFiles.append([shpFn[0],fCount,l[0]]) |
|
104 |
else: |
|
105 |
#notFound += 1 |
|
106 |
print "Error finding %s" % searchStr |
|
107 |
|
|
108 |
sortedShpFiles = sorted(shpFiles,key=lambda x: x[1]) |
|
109 |
#print sortedShpFiles |
|
110 |
|
|
111 |
stCFn = "%s/stationCounts.txt" % outDir |
|
112 |
fPtrSt = open(stCFn,"w+") |
|
113 |
if fPtrSt is None: |
|
114 |
print "Error opening %s" % stCFn |
|
115 |
sys.exit(0) |
|
116 |
|
|
117 |
for s in sortedShpFiles: |
|
118 |
outSt = "%s,%d\n" % (s[2],s[1]) |
|
119 |
fPtrSt.write(outSt) |
|
120 |
fPtrSt.close() |
|
121 |
|
|
122 |
stDim = len(sortedShpFiles)-1 |
|
123 |
|
|
124 |
stMin = int(sortedShpFiles[0][1]) |
|
125 |
stMax = int(sortedShpFiles[stDim][1]) |
|
126 |
|
|
127 |
print "Min %d, Max %d " % (stMin,stMax) |
|
128 |
|
|
129 |
bins = [] |
|
130 |
|
|
131 |
#Every 15k about 2 mins for daily |
|
132 |
minNeed = 3 |
|
133 |
minAdd = 2 |
|
134 |
tStep = 30000 |
|
135 |
tStart = 15000 |
|
136 |
tEnd = 300000 |
|
137 |
for s in range(tStart,tEnd,tStep): |
|
138 |
print "%d-%d range needs %d minutes" % (s-tStep,s,minNeed) |
|
139 |
#continue |
|
140 |
|
|
141 |
bin1 = [i for i in sortedShpFiles if ((i[1] < s) and (i[1] > (s-tStep)))] |
|
142 |
#print bin1 |
|
143 |
rows = len(bin1) |
|
144 |
if rows > 0: |
|
145 |
#cols = len(bin1[0]) |
|
146 |
#print rows,cols |
|
147 |
for b in bin1: |
|
148 |
b.append(minNeed) |
|
149 |
bins.append(bin1) |
|
150 |
else: |
|
151 |
print "No values for %d-%d range" % (s-tStep,s) |
|
152 |
|
|
153 |
minNeed = minNeed + minAdd |
|
154 |
|
|
155 |
dirCount = -1 |
|
156 |
count = 0 |
|
157 |
for binSingle in bins: |
|
158 |
rankCount = 0 |
|
159 |
|
|
160 |
subDirName = "%s/dirSub_%d" % (outDir,dirCounter) |
|
161 |
if os.path.exists(subDirName) is True: |
|
162 |
if os.listdir(subDirName): |
|
163 |
dirCounter += 1 |
|
164 |
|
|
165 |
|
|
166 |
subDirName = "%s/dirSub_%d" % (outDir,dirCounter) |
|
167 |
if os.path.exists(subDirName) is False: |
|
168 |
print "Creating %s" % subDirName |
|
169 |
os.mkdir(subDirName) |
|
170 |
|
|
171 |
print count,subDirName |
|
172 |
if len(binSingle) > 0: |
|
173 |
b0 = float(binSingle[0][3]) |
|
174 |
#(minutesPerTile*(365/cores)+(climatologyStep) + assesment)/minutes |
|
175 |
hours = math.ceil(((b0 * (365.0/10.0))+60.0+30.0)/60.0) |
|
176 |
fPtrMinu = open(subDirName+"/numMinutes.txt","w+") |
|
177 |
if fPtrMinu is None: |
|
178 |
print "Error opening "+ subDirName+"/numMinutes.txt" |
|
179 |
else: |
|
180 |
print "Creating "+ subDirName+"/numMinutes.txt" |
|
181 |
fPtrMinu.write(str(hours)+'\n') |
|
182 |
fPtrMinu.close() |
|
183 |
|
|
184 |
count += 1 |
|
185 |
|
|
186 |
##Input files that contain Rscript command |
|
187 |
#binSingle: this contains a list of tiles to run ranked by time |
|
188 |
#run by year |
|
189 |
for b in binSingle: |
|
190 |
ll = b[2]#"%s_%s" % (baseSpl[1],baseSpl[2]) |
|
191 |
#check for existence of files from stage2 |
|
192 |
metTest = "%s/%s/%s/met_stations_outfiles_obj_gam_CAI_%s.RData" % (outputFolder,ll,year,ll) |
|
193 |
|
|
194 |
#if os.path.exists(metTest) is False: |
|
195 |
# print "No met object %s" % metTest |
|
196 |
# continue |
|
197 |
#prediction object check |
|
198 |
#methodTest = "%s/%s/%s/method_mod_obj_gam_CAI_dailyTmax%s.RData" % (outputFolder,ll,year,ll) |
|
199 |
#if os.path.exists(methodTest) is True: |
|
200 |
# print "Method object exists" |
|
201 |
# continue |
|
202 |
|
|
203 |
yearInt = int(year)+1 |
|
204 |
outStr = "Rscript %s %s wgs84Grid %s %s %s %s/subset/mean_LST_%s_jan_6_wgs84.tif FALSE %s/%s/covar_obj_%s.RData %s/%s/%s/met_stations_outfiles_obj_gam_CAI_%s.RData 10 4800 %s %s > %s/outLogs/%s_stage4_%s.log 2> %s/outLogs/%s_stage4_err_%s.log" % (scriptFn,ll,ll,outputFolder,b[0],outputFolder,ll,outputFolder,ll,ll,outputFolder,ll,year,ll,year,yearInt,outputFolder,ll,year,outputFolder,ll,year) |
|
205 |
#print outStr |
|
206 |
|
|
207 |
outFnSt = "%s/dirSub_%d/input_%d.in" % (outDir,dirCounter,rankCount) |
|
208 |
outPtr = open(outFnSt,"w+") |
|
209 |
if outPtr is None: |
|
210 |
print "Error: Opening %s" % outFnSt |
|
211 |
else: |
|
212 |
outPtr.write("#!/bin/bash"+"\n") |
|
213 |
outPtr.write(outStr+"\n") |
|
214 |
outPtr.close() |
|
215 |
|
|
216 |
if rankCount >= fPer: |
|
217 |
dirCounter += 1 |
|
218 |
rankCount = 0 |
|
219 |
|
|
220 |
subDirName = "%s/dirSub_%d" % (outDir,dirCounter) |
|
221 |
if os.path.exists(subDirName) is False: |
|
222 |
print "Creating %s" % subDirName |
|
223 |
os.mkdir(subDirName) |
|
224 |
|
|
225 |
#if len(binSingle) > 0: |
|
226 |
b0 = float(binSingle[0][3]) |
|
227 |
hours = math.ceil(((b0 * (365.0/10.0))+60.0+30.0)/60.0) |
|
228 |
fPtrMinu = open(subDirName+"/numMinutes.txt","w+") |
|
229 |
if fPtrMinu is None: |
|
230 |
print "Error opening "+ subDirName+"/numMinutes.txt" |
|
231 |
else: |
|
232 |
print "Creating "+ subDirName+"/numMinutes.txt" |
|
233 |
fPtrMinu.write(str(hours)+'\n') |
|
234 |
fPtrMinu.close() |
|
235 |
else: |
|
236 |
rankCount += 1 |
|
237 |
|
|
238 |
found += 1 |
|
239 |
outPtr.close() |
|
240 |
#found += 1 |
|
241 |
prefix3 = prefix + "_" + year[2:4] |
|
242 |
postQsubCreator(outDir,prefix3) |
|
243 |
|
|
244 |
print "Found %d" % found |
|
245 |
print "Not found %d" % notFound |
|
246 |
print "End" |
|
247 |
|
|
248 |
sys.exit(0) |
|
249 |
|
|
250 |
def postQsubCreator(outDir,prefix): |
|
251 |
subDirName = "%s/dirSub_*" % outDir |
|
252 |
dirs = glob.glob(subDirName) |
|
253 |
|
|
254 |
if len(dirs) > 0: |
|
255 |
for d in dirs: |
|
256 |
fn = "%s/numMinutes.txt" % d |
|
257 |
if os.path.exists(fn): |
|
258 |
fPtr = open(fn) |
|
259 |
numHrs = float(fPtr.readlines()[0]) |
|
260 |
outDirTmp = os.path.dirname(fn) |
|
261 |
tmpRpl = "%sdirSub_" % outDir |
|
262 |
rank = int(outDirTmp.replace(tmpRpl,'')) |
|
263 |
cores = len(glob.glob(outDirTmp+"/input*")) |
|
264 |
prefix2 = prefix + "_"+str(rank) |
|
265 |
print outDir,rank,cores,numHrs,prefix2 |
|
266 |
|
|
267 |
createQsub(outDir,rank,cores,numHrs,prefix2) |
|
268 |
fPtr.close() |
|
269 |
else: |
|
270 |
print "File %s doesn't exists" % fn |
|
271 |
continue |
|
272 |
|
|
273 |
def featureCount(fn): |
|
274 |
driver = ogr.GetDriverByName('ESRI Shapefile') |
|
275 |
ds = driver.Open(fn, 0) # 0 means read-only. 1 means writeable. |
|
276 |
|
|
277 |
# Check to see if shapefile is found. |
|
278 |
if ds is None: |
|
279 |
print 'Could not open %s' % (fn) |
|
280 |
return None |
|
281 |
else: |
|
282 |
print 'Opened %s' % (fn) |
|
283 |
layer = ds.GetLayer() |
|
284 |
featureCount = layer.GetFeatureCount() |
|
285 |
print "Number of features in %s: %d" % (os.path.basename(fn),featureCount) |
|
286 |
|
|
287 |
return featureCount |
|
288 |
|
|
289 |
def createQsub(outDir,rank,cores,time,prefix): |
|
290 |
fn = "%s/qsub_%d.qsub" % (outDir,rank) |
|
291 |
if time <= 2: |
|
292 |
qType = "devel" |
|
293 |
elif time <= 8: |
|
294 |
qType = "normal" |
|
295 |
elif time > 8: |
|
296 |
qType = "long" |
|
297 |
else: |
|
298 |
return None |
|
299 |
|
|
300 |
fPtr = open(fn,"w+") |
|
301 |
if fPtr is None: |
|
302 |
print "Error opening %s " % fn |
|
303 |
return None |
|
304 |
|
|
305 |
fPtr.write("#PBS -S /bin/bash\n") |
|
306 |
out1 = "#PBS -l select=%d:ncpus=14:model=ivy\n" % cores |
|
307 |
fPtr.write(out1) |
|
308 |
out1 = "#PBS -l walltime=%d:00:00\n" % time |
|
309 |
fPtr.write(out1) |
|
310 |
fPtr.write("#PBS -j n\n") |
|
311 |
fPtr.write("#PBS -m be\n") |
|
312 |
out1 = "#PBS -N %s\n" % prefix # |
|
313 |
fPtr.write(out1) |
|
314 |
fPtr.write("#PBS -V\n") |
|
315 |
out1 = "#PBS -q %s\n" % qType |
|
316 |
fPtr.write(out1) |
|
317 |
|
|
318 |
#ELayers Project ID |
|
319 |
fPtr.write("#PBS -W group_list=s1557\n\n") |
|
320 |
|
|
321 |
out1 = "CORES=%d\n" % cores |
|
322 |
fPtr.write(out1) |
|
323 |
out1 = "SUBDIRNUM=%d\n" % rank |
|
324 |
fPtr.write(out1) |
|
325 |
out1 = "HDIR=%s\n\n" % outDir |
|
326 |
fPtr.write(out1) |
|
327 |
|
|
328 |
fPtr.write("source /u/aguzman4/sharedModules/etc/environ.sh\n\n") |
|
329 |
|
|
330 |
fPtr.write("module load mpi-mvapich2/1.4.1/intel\n\n") |
|
331 |
|
|
332 |
fPtr.write("#$PBS_O_WORKDIR\n\n") |
|
333 |
|
|
334 |
fPtr.write("LOGSTDOUT=$HDIR/climate_ivy_serial_%d.stdout\n" % rank) |
|
335 |
fPtr.write("LOGSTDERR=$HDIR/climate_ivy_serial_%d.stderr\n\n" % rank) |
|
336 |
|
|
337 |
fPtr.write("mpiexec -pernode -comm none -np $CORES $HDIR/wrapper.sh $SUBDIRNUM $HDIR 1> $LOGSTDOUT 2> $LOGSTDERR\n") |
|
338 |
|
|
339 |
fPtr.close() |
|
340 |
|
|
341 |
if __name__ == '__main__': |
|
342 |
main() |
|
343 |
|
|
344 |
################################ # |
|
345 |
|
|
346 |
#python /nobackupp6/aguzman4/climateLayers/finalCode/environmental-layers/climate/research/world/interpolation/runSetup/inputCreate_stage_4.py /nobackupp6/aguzman4/climateLayers/inputLayers/regionCSV/list_reg4.csv /nobackupp6/aguzman4/climateLayers/out/reg4/ /nobackupp6/aguzman4/climateLayers/out/reg4/serialRuns/2014/ 10 0 250000 /nobackupp6/aguzman4/climateLayers/finalCode/environmental-layers/climate/research/world/interpolation/master_script_stage_4.R r4 2014 |
|
347 |
|
|
348 |
#parameters |
|
349 |
#python script:/nobackupp6/aguzman4/climateLayers/finalCode/environmental-layers/climate/research/world/interpolation/runSetup/inputCreate_stage_4.py |
|
350 |
#inLatLonList: /nobackupp6/aguzman4/climateLayers/inputLayers/regionCSV/list_reg4.csv |
|
351 |
#outputFolder: /nobackupp6/aguzman4/climateLayers/out/reg4/ |
|
352 |
#outDirForShFiles: /nobackupp6/aguzman4/climateLayers/out/reg4/serialRuns/2014/ |
|
353 |
#filePerDir: 10 |
|
354 |
#lowStatCount: 0 |
|
355 |
#higStatCount: 250000 |
|
356 |
#scriptFn: /nobackupp6/aguzman4/climateLayers/finalCode/environmental-layers/climate/research/world/interpolation/master_script_stage_4.R |
|
357 |
#r4 |
|
358 |
#2014 |
|
359 |
|
|
360 |
# inLatLonList: A text file that includes lat_lon of the tiles you want to process,one per line in format 10.0_-120.0 |
|
361 |
# outputFolder: The path to the dir where you want to place the output files (e.g. /nobackupp4/climateLayers/output20Deg/reg4/ |
|
362 |
# outDirForShFiles: The directory where you want to place the shell script serial files. |
|
363 |
# filePerDir: How many sh files to put in each subdirectory, this is useful if you want to run a subset. |
|
364 |
# lowStatCount: What's the minimum station count you want to use |
|
365 |
# higStatCount: The max station count you want to use |
|
366 |
# scriptFn: The filename of the master script you want to use to run stage4. |
|
367 |
# For example 'Rscript /climateCode/environmental-layers/climate/research/world/interpolation/master_script_09012014_normal.R' |
|
368 |
|
|
369 |
|
|
370 |
#python /nobackupp6/aguzman4/climateLayers/finalCode/environmental-layers/climate/research/world/interpolation/runSetup/inputCreate_stage_4.py /nobackupp6/aguzman4/climateLayers/inputLayers/regionCSV/list_reg4.csv /nobackupp6/aguzman4/climateLayers/out/reg4/ /nobackupp6/aguzman4/climateLayers/out/reg4/serialStage6/2010/ 10 0 250000 /nobackupp6/aguzman4/climateLayers/finalCode/environmental-layers/climate/research/world/interpolation/master_script_stage_4.R r4 2010 |
|
371 |
#python /nobackupp6/aguzman4/climateLayers/finalCode/environmental-layers/climate/research/world/interpolation/runSetup/master_script_stage_6_01182016.R /nobackupp6/aguzman4/climateLayers/inputLayers/regionCSV/list_reg4.csv /nobackupp6/aguzman4/climateLayers/out/reg4/ /nobackupp6/aguzman4/climateLayers/out/reg4/serialStage6/2010/ 10 0 250000 /nobackupp6/aguzman4/climateLayers/finalCode/environmental-layers/climate/research/world/interpolation/master_script_stage_6_01182016.R r4 2010 |
|
372 |
|
Also available in: Unified diff
initial commit of python script from Alberto modified for stage6