Project

General

Profile

1 1674 aaronmk
#!/usr/bin/env python
2
# A DiGIR client
3
4 1678 aaronmk
import os
5 1674 aaronmk
import os.path
6
import sys
7
import urllib
8
import urllib2
9 1690 aaronmk
import xml.dom.minidom as minidom
10 1674 aaronmk
11
sys.path.append(os.path.dirname(__file__)+"/../lib")
12
13 1678 aaronmk
import dates
14 1697 aaronmk
import http
15 1674 aaronmk
import opts
16 1699 aaronmk
import profiling
17 1674 aaronmk
import streams
18
import util
19 1690 aaronmk
import xml_dom
20
import xpath
21 1674 aaronmk
22
# Config
23
timeout = 20 # sec
24 1692 aaronmk
default_chunk_size = 10000 # records
25 1674 aaronmk
26 1692 aaronmk
schema = 'http://digir.net/schema/conceptual/darwin/full/2003/1.0/darwin2full.xsd'
27 1674 aaronmk
request_xml_template = '''\
28
<?xml version="1.0" encoding="UTF-8"?>
29
<request
30
    xmlns="http://digir.net/schema/protocol/2003/1.0"
31
    xmlns:xsd="http://www.w3.org/2001/XMLSchema"
32
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
33
    xmlns:digir="http://digir.net/schema/protocol/2003/1.0"
34
    xmlns:darwin="http://digir.net/schema/conceptual/darwin/2003/1.0"
35
    xmlns:dwc="http://digir.net/schema/conceptual/darwin/2003/1.0"
36
    xsi:schemaLocation="http://digir.net/schema/protocol/2003/1.0
37
      http://digir.sourceforge.net/schema/protocol/2003/1.0/digir.xsd
38
      http://digir.net/schema/conceptual/darwin/2003/1.0
39
      http://digir.sourceforge.net/schema/conceptual/darwin/2003/1.0/darwin2.xsd">
40
    <header>
41
        <version>1.0</version>
42 1678 aaronmk
        <sendTime>[time]</sendTime>
43
        <source>[source]</source>
44 1674 aaronmk
        <destination resource="[resource]">[url]</destination>
45
        <type>search</type>
46
    </header>
47
    <search>
48
        <filter>
49 1675 aaronmk
            <equals>
50 1678 aaronmk
                <darwin:Kingdom>plantae</darwin:Kingdom>
51 1675 aaronmk
            </equals>
52 1674 aaronmk
        </filter>
53
        <records limit="[count]" start="[start]">
54 1679 aaronmk
            <structure schemaLocation="[schema]"/>
55 1674 aaronmk
        </records>
56
        <count>true</count>
57
    </search>
58
</request>
59
'''
60
61 1683 aaronmk
diags_start = '<diagnostics>'
62
diags_end = '</diagnostics>'
63
64 1690 aaronmk
class InputError(Exception): pass
65 1685 aaronmk
66 1674 aaronmk
def main():
67 1690 aaronmk
    # Usage
68 1674 aaronmk
    env_names = []
69
    def usage_err():
70
        raise SystemExit('Usage: '+opts.env_usage(env_names, True)+' '
71
            +sys.argv[0]+' 2>>log')
72
73
    # Get config from env vars
74
    url = opts.get_env_var('url', None, env_names)
75
    resource = opts.get_env_var('resource', None, env_names)
76
    start = util.cast(int, opts.get_env_var('start', 0, env_names))
77 1690 aaronmk
    count = util.cast(int, opts.get_env_var('n', None, env_names))
78 1692 aaronmk
    chunk_size = util.cast(int, opts.get_env_var('chunk_size',
79
        default_chunk_size, env_names))
80 1678 aaronmk
    debug = opts.env_flag('debug', False, env_names)
81 1674 aaronmk
    if url == None or resource == None: usage_err()
82
83 1690 aaronmk
    # Logging
84 1678 aaronmk
    def clear_line(): sys.stderr.write('\n')
85
    log_indent = 0
86 1691 aaronmk
    def log(msg, line_ending='\n'): sys.stderr.write(msg+line_ending)
87 1685 aaronmk
    def debug_log(str_, label=None):
88
        if debug:
89
            if label != None: sys.stderr.write(label+':\n')
90
            sys.stderr.write(str_+'\n')
91 1674 aaronmk
92 1690 aaronmk
    # Request XML
93 1678 aaronmk
    self_dir = os.path.dirname(__file__)
94
    source = os.popen(self_dir+"/local_ip").read().strip()
95
    this_request_xml_template = (request_xml_template
96
        .replace('[source]', source)
97
        .replace('[url]', url)
98
        .replace('[resource]', resource)
99 1679 aaronmk
        .replace('[schema]', schema)
100 1678 aaronmk
        )
101
102 1690 aaronmk
    # Stats
103
    total = 0
104
    def print_status(line_ending='\n'):
105
        log('Processed '+str(total)+' record(s)', line_ending)
106
    match_ct = None
107 1674 aaronmk
108 1699 aaronmk
    profiler = profiling.ItersProfiler(start_now=True, iter_text='record')
109
110 1690 aaronmk
    # Retrieve data
111
    while count == None or total < count:
112 1691 aaronmk
        # Adjust chunk size if last chunk
113 1695 aaronmk
        this_count = chunk_size
114
        if count != None: this_count = min(this_count, count - total)
115 1691 aaronmk
116 1690 aaronmk
        # Request XML
117
        time = dates.strftime('%Y-%m-%d %H:%M:%S %Z', dates.now())
118
        request_xml = (this_request_xml_template
119 1695 aaronmk
            .replace('[count]', str(this_count))
120 1690 aaronmk
            .replace('[start]', str(start))
121
            .replace('[time]', time)
122
            )
123
        debug_log(request_xml, 'request')
124
125
        # Send request
126
        this_url = url+'?'+urllib.urlencode({'request': request_xml})
127
        stream = streams.CaptureStream(streams.TimeoutInputStream(
128
            urllib2.urlopen(this_url), timeout), diags_start, diags_end)
129
130
        # Retrieve response
131
        streams.copy(stream, sys.stdout)
132 1692 aaronmk
        # Make sure output ends in a newline so that consecutive XML documents
133
        # are on different lines
134
        sys.stdout.write('\n')
135 1690 aaronmk
        stream.close()
136
137
        # Parse diagnostics
138
        diags_str = stream.match
139
        debug_log(diags_str, 'diagnostics')
140
        diags = xml_dom.parse_str(diags_str)
141
        def get_diag(name):
142
            return xpath.get_value(diags, 'diagnostic[@code='+name+']')
143
144
        # Process match count
145
        this_match_ct = util.cast(int, get_diag('MATCH_COUNT'))
146
        if this_match_ct != match_ct: # first or updated match count
147
            match_ct = this_match_ct
148
            log('Found '+str(match_ct)+' record(s)')
149
150
        # Process record count
151
        this_ct = util.cast(int, get_diag('RECORD_COUNT'))
152
        if this_ct == None: raise InputError('Missing RECORD_COUNT diagnostic')
153
        total += this_ct
154
        start += this_ct # advance start to fetch next set
155
        print_status('\r') # CR at end so next print overwrites msg
156 1695 aaronmk
157
        # Decide if done
158 1690 aaronmk
        if this_ct == 0 or get_diag('END_OF_RECORDS') == 'true': break
159 1687 aaronmk
160 1690 aaronmk
    print_status()
161 1699 aaronmk
    profiler.stop(total)
162
    log(profiler.msg())
163 1674 aaronmk
164
main()