Project

General

Profile

« Previous | Next » 

Revision 1714

bin/map: Fixed bug in iteration over consecutive XML documents where only the first element of the first document was processed. Use of iters.flatten() and itertools.imap() fixes this problem so that the consecutive XML documents are regarded as a continuous stream of rows.

View differences:

bin/map
5 5
# http://vegbank.org/vegdocs/xml/vegbank_example_ver1.0.2.xml
6 6

  
7 7
import csv
8
import itertools
8 9
import os.path
9 10
import sys
10 11
import xml.dom.minidom as minidom
......
13 14

  
14 15
import csvs
15 16
import exc
17
import iters
16 18
import maps
17 19
import opts
18 20
import Parser
......
222 224
            
223 225
            in_db.close()
224 226
        elif in_is_xml:
225
            for in_xml_root in xml_parse.docs_iter(sys.stdin):
226
                if map_path == None:
227
            if map_path == None:
228
                for in_xml_root in xml_parse.docs_iter(sys.stdin):
227 229
                    iter_ = xml_dom.NodeElemIter(in_xml_root)
228 230
                    util.skip(iter_, xml_dom.is_text) # skip metadata
229 231
                    row_ct = process_rows(lambda row, i: root.appendChild(row),
230 232
                        iter_)
231
                else:
233
            else:
234
                def doc_rows(in_xml_root):
232 235
                    rows = xpath.get(in_xml_root, in_root, limit=end)
233
                    if rows == []: raise SystemExit('Map error: Root "'+in_root
234
                        +'" not found in input')
235
                    
236
                    def get_value(in_, row):
237
                        in_ = './{'+(','.join(strings.with_prefixes(
238
                            ['']+prefixes, in_)))+'}' # also with no prefix
239
                        nodes = xpath.get(row, in_, allow_rooted=False)
240
                        if nodes != []: return xml_dom.value(nodes[0])
241
                        else: return None
242
                    
243
                    row_ct = map_rows(get_value, rows)
236
                    if rows == []: raise SystemExit('Map error: Root "'
237
                        +in_root+'" not found in input')
238
                    return rows
239
                
240
                def get_value(in_, row):
241
                    in_ = './{'+(','.join(strings.with_prefixes(
242
                        ['']+prefixes, in_)))+'}' # also with no prefix
243
                    nodes = xpath.get(row, in_, allow_rooted=False)
244
                    if nodes != []: return xml_dom.value(nodes[0])
245
                    else: return None
246
                
247
                row_ct = map_rows(get_value, iters.flatten(itertools.imap(
248
                    doc_rows, xml_parse.docs_iter(sys.stdin))))
244 249
        else: # input is CSV
245 250
            map_ = dict(mappings)
246 251
            reader, col_names = csvs.reader_and_header(sys.stdin)

Also available in: Unified diff