Project

General

Profile

« Previous | Next » 

Revision 1690

digir_client: Repeatedly retrieve data in chunks. Provide match count. Added section comments.

View differences:

digir_client
6 6
import sys
7 7
import urllib
8 8
import urllib2
9
import xml.dom.minidom as minidom
9 10

  
10 11
sys.path.append(os.path.dirname(__file__)+"/../lib")
11 12

  
......
13 14
import opts
14 15
import streams
15 16
import util
17
import xml_dom
18
import xpath
16 19

  
17 20
# Config
18 21
timeout = 20 # sec
22
chunk_size = 10000 # records
19 23
default_schema = 'http://digir.net/schema/conceptual/darwin/full/2003/1.0/darwin2full.xsd'
20 24

  
21 25
request_xml_template = '''\
......
55 59
diags_start = '<diagnostics>'
56 60
diags_end = '</diagnostics>'
57 61

  
58
class InputException(Exception): pass
62
class InputError(Exception): pass
59 63

  
60 64
def main():
65
    # Usage
61 66
    env_names = []
62 67
    def usage_err():
63 68
        raise SystemExit('Usage: '+opts.env_usage(env_names, True)+' '
......
68 73
    resource = opts.get_env_var('resource', None, env_names)
69 74
    schema = opts.get_env_var('schema', default_schema, env_names)
70 75
    start = util.cast(int, opts.get_env_var('start', 0, env_names))
71
    count = util.cast(int, opts.get_env_var('n', 1, env_names))
76
    count = util.cast(int, opts.get_env_var('n', None, env_names))
72 77
    debug = opts.env_flag('debug', False, env_names)
73 78
    if url == None or resource == None: usage_err()
74 79
    
80
    # Logging
75 81
    def clear_line(): sys.stderr.write('\n')
76 82
    log_indent = 0
77 83
    def log(msg, line_ending='\n'):
......
81 87
            if label != None: sys.stderr.write(label+':\n')
82 88
            sys.stderr.write(str_+'\n')
83 89
    
90
    # Request XML
84 91
    self_dir = os.path.dirname(__file__)
85 92
    source = os.popen(self_dir+"/local_ip").read().strip()
86 93
    this_request_xml_template = (request_xml_template
......
88 95
        .replace('[url]', url)
89 96
        .replace('[resource]', resource)
90 97
        .replace('[schema]', schema)
91
        .replace('[count]', str(count))
98
        .replace('[count]', str(chunk_size))
92 99
        )
93 100
    
94
    time = dates.strftime('%Y-%m-%d %H:%M:%S %Z', dates.now())
95
    request_xml = (this_request_xml_template
96
        .replace('[start]', str(start))
97
        .replace('[time]', time)
98
        )
99
    debug_log(request_xml, 'request')
100
    this_url = url+'?'+urllib.urlencode({'request': request_xml})
101
    stream = streams.CaptureStream(streams.TimeoutInputStream(
102
        urllib2.urlopen(this_url), timeout), diags_start, diags_end)
101
    # Stats
102
    total = 0
103
    def print_status(line_ending='\n'):
104
        log('Processed '+str(total)+' record(s)', line_ending)
105
    match_ct = None
103 106
    
104
    streams.copy(stream, sys.stdout)
105
    stream.close()
107
    # Retrieve data
108
    while count == None or total < count:
109
        # Request XML
110
        time = dates.strftime('%Y-%m-%d %H:%M:%S %Z', dates.now())
111
        request_xml = (this_request_xml_template
112
            .replace('[start]', str(start))
113
            .replace('[time]', time)
114
            )
115
        debug_log(request_xml, 'request')
116
        
117
        # Send request
118
        this_url = url+'?'+urllib.urlencode({'request': request_xml})
119
        stream = streams.CaptureStream(streams.TimeoutInputStream(
120
            urllib2.urlopen(this_url), timeout), diags_start, diags_end)
121
        
122
        # Retrieve response
123
        streams.copy(stream, sys.stdout)
124
        stream.close()
125
        
126
        # Parse diagnostics
127
        diags_str = stream.match
128
        debug_log(diags_str, 'diagnostics')
129
        diags = xml_dom.parse_str(diags_str)
130
        def get_diag(name):
131
            return xpath.get_value(diags, 'diagnostic[@code='+name+']')
132
        
133
        # Process match count
134
        this_match_ct = util.cast(int, get_diag('MATCH_COUNT'))
135
        if this_match_ct != match_ct: # first or updated match count
136
            match_ct = this_match_ct
137
            log('Found '+str(match_ct)+' record(s)')
138
        
139
        # Process record count
140
        this_ct = util.cast(int, get_diag('RECORD_COUNT'))
141
        if this_ct == None: raise InputError('Missing RECORD_COUNT diagnostic')
142
        total += this_ct
143
        start += this_ct # advance start to fetch next set
144
        print_status('\r') # CR at end so next print overwrites msg
145
        if this_ct == 0 or get_diag('END_OF_RECORDS') == 'true': break
106 146
    
107
    debug_log(stream.match, 'diagnostics')
147
    print_status()
108 148

  
109 149
main()

Also available in: Unified diff