Project

General

Profile

« Previous | Next » 

Revision 41d291a6

Added by Adam Wilson almost 11 years ago

Added multiple tries to python GEE script

View differences:

climate/procedures/ee.MOD09.py
12 12
import wget
13 13
import os
14 14
import sys
15
from subprocess import call
15
import subprocess
16
import time
16 17

  
17 18
import logging
18 19
logging.basicConfig(filename='error.log',level=logging.DEBUG)
19 20

  
20 21
def Usage():
21
    print('Usage: ee.MOD9.py -projwin  ulx uly lrx lry -year year -month month -regionname 1') 
22
    print('Usage: ee.MOD9.py -projwin  ulx uly urx ury lrx lry llx lly -year year -month month -regionname 1') 
22 23
    sys.exit( 1 )
23 24

  
24 25
ulx = float(sys.argv[2])
25 26
uly = float(sys.argv[3])
26
lrx = float(sys.argv[4])
27
lry = float(sys.argv[5])
28
year = int(sys.argv[7])
29
month = int(sys.argv[9])
30
regionname = str(sys.argv[11])
31

  
27
urx = float(sys.argv[4])
28
ury = float(sys.argv[5])
29
lrx = float(sys.argv[6])
30
lry = float(sys.argv[7])
31
llx = float(sys.argv[8])
32
lly = float(sys.argv[9])
33
year = int(sys.argv[11])
34
month = int(sys.argv[13])
35
regionname = str(sys.argv[15])
32 36
#```
33 37
#ulx=-159
34 38
#uly=20
......
41 45
output=regionname+'_'+str(year)+'_'+str(month)
42 46

  
43 47
## set working directory (where files will be downloaded)
44
os.chdir('/mnt/data2/projects/cloud/mod09')
48
cwd='/mnt/data2/projects/cloud/mod09/'
49
## Wget always starts with a temporary file called "download", so move to a subdirectory to
50
## prevent overwrting when parallel downloading 
51
if not os.path.exists(cwd+output):
52
    os.makedirs(cwd+output)
53
os.chdir(cwd+output)
45 54

  
55
      # output filename
56
unzippedfilename=output+".mod09.tif"
57
      # Check if file already exists and continue if so...
58
if(os.path.exists(unzippedfilename)):
59
    sys.exit("File exists:"+output)    
60

  
61
## initialize GEE
46 62
MY_SERVICE_ACCOUNT = '511722844190@developer.gserviceaccount.com'  # replace with your service account
47 63
MY_PRIVATE_KEY_FILE = '/home/adamw/EarthEngine-privatekey.p12'       # replace with you private key file path
48

  
49 64
ee.Initialize(ee.ServiceAccountCredentials(MY_SERVICE_ACCOUNT, MY_PRIVATE_KEY_FILE))
50 65

  
51 66
## set map center to speed up viewing
......
57 72
# added the >0.5 because some values are coming out >1.  Need to look into this further as they should be bounded 0-1...
58 73

  
59 74
#////////////////////////////////////////////////////
60

  
61
      # output filename
62
unzippedfilename=output+".mod09.tif"
63

  
64
      # Check if file already exists and continue if so...
65
if(os.path.exists(unzippedfilename)):
66
    sys.exit("File exists:"+output)    
67

  
68

  
69 75
#####################################################
70 76
# Processing Function
71 77
# MOD09 internal cloud flag for this year-month
......
80 86
######################################################
81 87

  
82 88
## define region for download
83
region=[ulx,lry], [ulx, uly], [lrx, uly], [lrx, lry]  #h11v08
89
region=[llx, lly],[lrx, lry],[urx, ury],[ulx,uly]  #h11v08
84 90
strregion=str(list(region))
85
# Next few lines for testing only
86
# print info to confirm there is data
87
print(data.getInfo())
88

  
89
## print a status update
90
print(output+' Processing....      Coords:'+strregion)
91

  
92 91

  
93 92
# add to plot to confirm it's working
94 93
#ee.mapclient.addToMap(data, {'range': '0,100'}, 'MOD09')
95
#```
96

  
97
# TODO:  
98
#  use MODIS projection
99 94

  
100 95
      # build the URL and name the object (so that when it's unzipped we know what it is!)
101 96
path =mod09a.getDownloadUrl({
97
        'crs': 'SR-ORG:6974',          # MODIS Sinusoidal
98
        'scale': '926.625433055833',   # MODIS ~1km
102 99
        'name': output,  # name the file (otherwise it will be a uninterpretable hash)
103
        'scale': 926,                              # resolution in meters
104
        'crs': 'EPSG:4326', #4326                         #  projection
105 100
        'region': strregion                        # region defined above
106 101
        });
107 102

  
108
      # Sometimes EE will serve a corrupt zipped file with no error
109
      # to check this, use a while loop that keeps going till there is an unzippable file.  
110
      # This has the potential for an infinite loop...
111

  
112
#if(not(os.path.exists(output+".tif"))):
113
    # download with wget
114
print("Downloading "+output) 
115
wget.download(path)
116
#call(["wget"+path,shell=T])
117
        # try to unzip it
118
print("Unzipping "+output)
119
zipstatus=call("unzip "+output+".zip",shell=True)
120
         # if file doesn't exists or it didn't unzip, remove it and try again      
121
if(zipstatus==9):
122
    sys.exit("File exists:"+output)    
123
#        print("ERROR: "+output+" unzip-able")
124
#        os.remove(output+".zip")
103
# print info to confirm there is data
104
#print(data.getInfo())
105
print(' Processing.... '+output+'     Coords:'+strregion)
106

  
107
#test=wget.download(path)
108
test=subprocess.call(['-c','-q','--timeout=0','--ignore-length','--no-http-keep-alive','-O','mod09.zip',path],executable='wget')
109
print('download sucess for'+output+':   '+str(test))
110

  
111
## Sometimes EE will serve a corrupt zipped file with no error
112
# try to unzip it
113
zipstatus=subprocess.call("unzip -o mod09.zip",shell=True)
114

  
115
if zipstatus==0:  #if sucessful, quit
116
    os.remove("mod09.zip")
117
    sys.exit("Finished:  "+output)
118

  
119
# if file doesn't exists or it didn't unzip, try again      
120
if zipstatus!=0:
121
    print("Zip Error for:  "+output+"...         Trying again (#2)")    
122
    time.sleep(15)
123
    test=subprocess.call(['-c','-q','--timeout=0','--ignore-length','--no-http-keep-alive','-O','mod09.zip',path],executable='wget')
124

  
125
zipstatus=subprocess.call("unzip -o mod09.zip",shell=True)
126

  
127
if zipstatus==0:  #if sucessful, quit
128
    os.remove("mod09.zip")
129
    sys.exit("Finished:  "+output)
130

  
131
# if file doesn't exists or it didn't unzip, try again      
132
if zipstatus!=0:
133
    print("Zip Error for:  "+output+"...         Trying again (#3)")    
134
    time.sleep(30)
135
    test=subprocess.call(['-c','-q','--timeout=0','--ignore-length','--no-http-keep-alive','-O','mod09.zip',path],executable='wget')
136

  
137
# try again #4
138
zipstatus=subprocess.call("unzip -o mod09.zip",shell=True)
139

  
140
if zipstatus==0:  #if sucessful, quit
141
    os.remove("mod09.zip")
142
    sys.exit("Finished:  "+output)
143

  
144
# if file doesn't exists or it didn't unzip, try again      
145
if zipstatus!=0:
146
    print("Zip Error for:  "+output+"...         Trying again (#4)")    
147
    time.sleep(30)
148
    test=subprocess.call(['-c','-q','--timeout=0','--ignore-length','--no-http-keep-alive','-O','mod09.zip',path],executable='wget')
149

  
150
zipstatus=subprocess.call("unzip -o mod09.zip",shell=True)
151

  
152
if zipstatus==0:  #if sucessful, quit
153
    os.remove("mod09.zip")
154
    sys.exit("Finished:  "+output)
125 155

  
126 156
## delete the zipped file (the unzipped version is kept)
127
os.remove(output+".zip")
128
       
129
print(output+' Finished!')
157
os.remove("mod09.zip")
158
sys.exit("Zip Error for:  "+output)    
130 159

  
131 160

  
climate/procedures/ee_compile.R
3 3
setwd("~/acrobates/adamw/projects/cloud")
4 4
library(raster)
5 5
library(doMC)
6

  
7
registerDoMC(4)
8
beginCluster(4)
6
library(multicore)
7
library(foreach)
8
#library(doMPI)
9
registerDoMC(10)
10
#beginCluster(4)
9 11

  
10 12
tempdir="tmp"
11 13
if(!file.exists(tempdir)) dir.create(tempdir)
......
14 16
## Load list of tiles
15 17
tiles=read.table("tile_lat_long_10d.txt",header=T)
16 18

  
17
jobs=expand.grid(tile=tiles$Tile,year=2000:2012,month=1:12)
18
jobs[,c("ULX","ULY","LRX","LRY")]=tiles[match(jobs$tile,tiles$Tile),c("ULX","ULY","LRX","LRY")]
19
### Build Tiles
20

  
21
## bin sizes
22
ybin=30
23
xbin=30
24

  
25
tiles=expand.grid(ulx=seq(-180,180-xbin,by=xbin),uly=seq(90,-90+ybin,by=-ybin))
26
tiles$h=factor(tiles$ulx,labels=paste("h",sprintf("%02d",1:length(unique(tiles$ulx))),sep=""))
27
tiles$v=factor(tiles$uly,labels=paste("v",sprintf("%02d",1:length(unique(tiles$uly))),sep=""))
28
tiles$tile=paste(tiles$h,tiles$v,sep="")
29
tiles$urx=tiles$ulx+xbin
30
tiles$ury=tiles$uly
31
tiles$lrx=tiles$ulx+xbin
32
tiles$lry=tiles$uly-ybin
33
tiles$llx=tiles$ulx
34
tiles$lly=tiles$uly-ybin
35
tiles$cy=(tiles$uly+tiles$lry)/2
36
tiles$cx=(tiles$ulx+tiles$urx)/2
37
tiles=tiles[,c("tile","h","v","ulx","uly","urx","ury","lrx","lry","llx","lly","cx","cy")]
38

  
39
jobs=expand.grid(tile=tiles$tile,year=2000:2012,month=1:12)
40
jobs[,c("ulx","uly","urx","ury","lrx","lry","llx","lly")]=tiles[match(jobs$tile,tiles$tile),c("ulx","uly","urx","ury","lrx","lry","llx","lly")]
41

  
42
## drop Janurary 2000 from list (pre-modis)
43
jobs=jobs[!(jobs$year==2000&jobs$month==1),]
44

  
45
#jobs=jobs[jobs$month==1,]
19 46

  
20 47

  
21
jobs=jobs[jobs$month==1,]
48
#jobs=jobs[jobs$month==7,]
22 49
## Run the python downloading script
23 50
#system("~/acrobates/adamw/projects/environmental-layers/climate/procedures/ee.MOD09.py -projwin -159 20 -154.5 18.5 -year 2001 -month 6 -region test")   
24
i=6715
25
testtiles=c("h02v07","h02v06","h02v08","h03v07","h03v06")
26
todo=which(jobs$tile%in%testtiles)
27
todo=todo[1:3]
51
i=1
52
#testtiles=c("h02v07","h02v06","h02v08","h03v07","h03v06","h03v08")
53
#todo=which(jobs$tile%in%testtiles)
54
#todo=todo[1:3]
55
#todo=1
28 56
todo=1:nrow(jobs)
29
#todo=todo[500:503]
30
mclapply(todo,function(i)
31
         system(paste("~/acrobates/adamw/projects/environmental-layers/climate/procedures/ee.MOD09.py -projwin ",jobs$ULX[i]," ",jobs$ULY[i]," ",jobs$LRX[i]," ",jobs$LRY[i],
32
       "  -year ",jobs$year[i]," -month ",jobs$month[i]," -region ",jobs$tile[i],sep="")),mc.cores=1)
57

  
58
checkcomplete=T
59
if(checkcomplete&exists("df")){  #if desired (and "df" exists from below) drop complete date-tiles
60
todo=which(!paste(jobs$tile,jobs$year,jobs$month)%in%paste(df$region,df$year,df$month))
61
}
62

  
63
writeLines(paste("Tiling options will produce",nrow(tiles),"tiles and ",nrow(jobs),"tile-months.  Current todo list is ",length(todo)))
64

  
65
foreach(i=todo) %dopar%{
66
    system(paste("python ~/acrobates/adamw/projects/environmental-layers/climate/procedures/ee.MOD09.py -projwin ",
67
                      jobs$ulx[i]," ",jobs$uly[i]," ",jobs$urx[i]," ",jobs$ury[i]," ",jobs$lrx[i]," ",jobs$lry[i]," ",jobs$llx[i]," ",jobs$lly[i]," ",
68
                      "  -year ",jobs$year[i]," -month ",jobs$month[i]," -region ",jobs$tile[i],sep=""))
69
     }
33 70

  
34 71

  
35 72
##  Get list of available files
36
df=data.frame(path=list.files("/mnt/data2/projects/cloud/mod09",pattern="*.tif$",full=T),stringsAsFactors=F)
73
df=data.frame(path=list.files("/mnt/data2/projects/cloud/mod09",pattern="*.tif$",full=T,recur=T),stringsAsFactors=F)
37 74
df[,c("region","year","month")]=do.call(rbind,strsplit(basename(df$path),"_|[.]"))[,c(1,2,3)]
38 75
df$date=as.Date(paste(df$year,"_",df$month,"_15",sep=""),"%Y_%m_%d")
39 76

  
77
## add stats to test for missing data
78
addstats=F
79
if(addstats){
80
    df[,c("max","min","mean","sd")]=do.call(rbind.data.frame,mclapply(1:nrow(df),function(i) as.numeric(sub("^.*[=]","",grep("STATISTICS",system(paste("gdalinfo -stats",df$path[i]),inter=T),value=T)))))
81
    table(df$sd==0)
82
}
83

  
40 84
## subset to testtiles?
41 85
#df=df[df$region%in%testtiles,]
42
df=df[df$month==1,]
86
#df=df[df$month==1,]
43 87
table(df$year,df$month)
44 88

  
45 89
## drop some if not complete
46
#df=df[df$year<=2009,]
90
df=df[df$month==1&df$year%in%c(2001:2002,2005),]
47 91
rerun=T  # set to true to recalculate all dates even if file already exists
48 92

  
49 93
## Loop over existing months to build composite netcdf files
......
51 95
## get date
52 96
  print(date)
53 97
  ## Define output and check if it already exists
98
  tffile=paste(tempdir,"/mod09_",date,".tif",sep="")
54 99
  ncfile=paste(tempdir,"/mod09_",date,".nc",sep="")
55 100
  if(!rerun&file.exists(ncfile)) next
56 101
  ## merge regions to a new netcdf file
57
  system(paste("gdal_merge.py -o ",ncfile," -n -32768 -of netCDF -ot Int16 ",paste(df$path[df$date==date],collapse=" ")))
102
#  system(paste("gdal_merge.py -tap -init 5 -o ",ncfile," -n -32768.000 -of netCDF -ot Int16 ",paste(df$path[df$date==date],collapse=" ")))
103
  system(paste("gdal_merge.py -o ",tffile," -init -32768  -n -32768.000 -ot Int16 ",paste(df$path[df$date==date],collapse=" ")))
104
  system(paste("gdal_translate -of netCDF ",tffile," ",ncfile," -ot Int16 "))
105
  file.remove(tffile)
58 106
  system(paste("ncecat -O -u time ",ncfile," ",ncfile,sep=""))
59 107
## create temporary nc file with time information to append to MOD06 data
60 108
  cat(paste("
......
102 150

  
103 151
### generate the monthly mean and sd
104 152
#system(paste("cdo -P 10 -O merge -ymonmean data/mod09.nc -chname,CF,CF_sd -ymonstd data/mod09.nc data/mod09_clim.nc"))
105
system(paste("cdo  -O -ymonmean data/mod09.nc data/mod09_clim_mean.nc"))
153
xosystem(paste("cdo  -O -ymonmean data/mod09.nc data/mod09_clim_mean.nc"))
106 154
system(paste("cdo  -O -chname,CF,CF_sd -ymonstd data/mod09.nc data/mod09_clim_sd.nc"))
107 155

  
108 156
#  Overall mean

Also available in: Unified diff