Project

General

Profile

1 1674 aaronmk
#!/usr/bin/env python
2
# A DiGIR client
3
4 1678 aaronmk
import os
5 1674 aaronmk
import os.path
6
import sys
7
import urllib
8
import urllib2
9 1690 aaronmk
import xml.dom.minidom as minidom
10 1674 aaronmk
11
sys.path.append(os.path.dirname(__file__)+"/../lib")
12
13 1678 aaronmk
import dates
14 1674 aaronmk
import opts
15
import streams
16
import util
17 1690 aaronmk
import xml_dom
18
import xpath
19 1674 aaronmk
20
# Config
21
timeout = 20 # sec
22 1692 aaronmk
default_chunk_size = 10000 # records
23 1674 aaronmk
24 1692 aaronmk
schema = 'http://digir.net/schema/conceptual/darwin/full/2003/1.0/darwin2full.xsd'
25 1674 aaronmk
request_xml_template = '''\
26
<?xml version="1.0" encoding="UTF-8"?>
27
<request
28
    xmlns="http://digir.net/schema/protocol/2003/1.0"
29
    xmlns:xsd="http://www.w3.org/2001/XMLSchema"
30
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
31
    xmlns:digir="http://digir.net/schema/protocol/2003/1.0"
32
    xmlns:darwin="http://digir.net/schema/conceptual/darwin/2003/1.0"
33
    xmlns:dwc="http://digir.net/schema/conceptual/darwin/2003/1.0"
34
    xsi:schemaLocation="http://digir.net/schema/protocol/2003/1.0
35
      http://digir.sourceforge.net/schema/protocol/2003/1.0/digir.xsd
36
      http://digir.net/schema/conceptual/darwin/2003/1.0
37
      http://digir.sourceforge.net/schema/conceptual/darwin/2003/1.0/darwin2.xsd">
38
    <header>
39
        <version>1.0</version>
40 1678 aaronmk
        <sendTime>[time]</sendTime>
41
        <source>[source]</source>
42 1674 aaronmk
        <destination resource="[resource]">[url]</destination>
43
        <type>search</type>
44
    </header>
45
    <search>
46
        <filter>
47 1675 aaronmk
            <equals>
48 1678 aaronmk
                <darwin:Kingdom>plantae</darwin:Kingdom>
49 1675 aaronmk
            </equals>
50 1674 aaronmk
        </filter>
51
        <records limit="[count]" start="[start]">
52 1679 aaronmk
            <structure schemaLocation="[schema]"/>
53 1674 aaronmk
        </records>
54
        <count>true</count>
55
    </search>
56
</request>
57
'''
58
59 1683 aaronmk
diags_start = '<diagnostics>'
60
diags_end = '</diagnostics>'
61
62 1690 aaronmk
class InputError(Exception): pass
63 1685 aaronmk
64 1674 aaronmk
def main():
65 1690 aaronmk
    # Usage
66 1674 aaronmk
    env_names = []
67
    def usage_err():
68
        raise SystemExit('Usage: '+opts.env_usage(env_names, True)+' '
69
            +sys.argv[0]+' 2>>log')
70
71
    # Get config from env vars
72
    url = opts.get_env_var('url', None, env_names)
73
    resource = opts.get_env_var('resource', None, env_names)
74
    start = util.cast(int, opts.get_env_var('start', 0, env_names))
75 1690 aaronmk
    count = util.cast(int, opts.get_env_var('n', None, env_names))
76 1692 aaronmk
    chunk_size = util.cast(int, opts.get_env_var('chunk_size',
77
        default_chunk_size, env_names))
78 1678 aaronmk
    debug = opts.env_flag('debug', False, env_names)
79 1674 aaronmk
    if url == None or resource == None: usage_err()
80
81 1690 aaronmk
    # Logging
82 1678 aaronmk
    def clear_line(): sys.stderr.write('\n')
83
    log_indent = 0
84 1691 aaronmk
    def log(msg, line_ending='\n'): sys.stderr.write(msg+line_ending)
85 1685 aaronmk
    def debug_log(str_, label=None):
86
        if debug:
87
            if label != None: sys.stderr.write(label+':\n')
88
            sys.stderr.write(str_+'\n')
89 1674 aaronmk
90 1690 aaronmk
    # Request XML
91 1678 aaronmk
    self_dir = os.path.dirname(__file__)
92
    source = os.popen(self_dir+"/local_ip").read().strip()
93
    this_request_xml_template = (request_xml_template
94
        .replace('[source]', source)
95
        .replace('[url]', url)
96
        .replace('[resource]', resource)
97 1679 aaronmk
        .replace('[schema]', schema)
98 1678 aaronmk
        )
99
100 1690 aaronmk
    # Stats
101
    total = 0
102
    def print_status(line_ending='\n'):
103
        log('Processed '+str(total)+' record(s)', line_ending)
104
    match_ct = None
105 1674 aaronmk
106 1690 aaronmk
    # Retrieve data
107
    while count == None or total < count:
108 1691 aaronmk
        # Adjust chunk size if last chunk
109 1695 aaronmk
        this_count = chunk_size
110
        if count != None: this_count = min(this_count, count - total)
111 1691 aaronmk
112 1690 aaronmk
        # Request XML
113
        time = dates.strftime('%Y-%m-%d %H:%M:%S %Z', dates.now())
114
        request_xml = (this_request_xml_template
115 1695 aaronmk
            .replace('[count]', str(this_count))
116 1690 aaronmk
            .replace('[start]', str(start))
117
            .replace('[time]', time)
118
            )
119
        debug_log(request_xml, 'request')
120
121
        # Send request
122
        this_url = url+'?'+urllib.urlencode({'request': request_xml})
123
        stream = streams.CaptureStream(streams.TimeoutInputStream(
124
            urllib2.urlopen(this_url), timeout), diags_start, diags_end)
125
126
        # Retrieve response
127
        streams.copy(stream, sys.stdout)
128 1692 aaronmk
        # Make sure output ends in a newline so that consecutive XML documents
129
        # are on different lines
130
        sys.stdout.write('\n')
131 1690 aaronmk
        stream.close()
132
133
        # Parse diagnostics
134
        diags_str = stream.match
135
        debug_log(diags_str, 'diagnostics')
136
        diags = xml_dom.parse_str(diags_str)
137
        def get_diag(name):
138
            return xpath.get_value(diags, 'diagnostic[@code='+name+']')
139
140
        # Process match count
141
        this_match_ct = util.cast(int, get_diag('MATCH_COUNT'))
142
        if this_match_ct != match_ct: # first or updated match count
143
            match_ct = this_match_ct
144
            log('Found '+str(match_ct)+' record(s)')
145
146
        # Process record count
147
        this_ct = util.cast(int, get_diag('RECORD_COUNT'))
148
        if this_ct == None: raise InputError('Missing RECORD_COUNT diagnostic')
149
        total += this_ct
150
        start += this_ct # advance start to fetch next set
151
        print_status('\r') # CR at end so next print overwrites msg
152 1695 aaronmk
153
        # Decide if done
154 1690 aaronmk
        if this_ct == 0 or get_diag('END_OF_RECORDS') == 'true': break
155 1687 aaronmk
156 1690 aaronmk
    print_status()
157 1674 aaronmk
158
main()