/ - Diff - BIEN 3 - NCEAS Projects

« Previous | Next »

Revision 1714

Added by Aaron Marcuse-Kubitza almost 13 years ago

bin/map: Fixed bug in iteration over consecutive XML documents where only the first element of the first document was processed. Use of iters.flatten() and itertools.imap() fixes this problem so that the consecutive XML documents are regarded as a continuous stream of rows.

     # http://vegbank.org/vegdocs/xml/vegbank_example_ver1.0.2.xml
     import csv
     import itertools
     import os.path
     import sys
     import xml.dom.minidom as minidom
-...
     import csvs
     import exc
     import iters
     import maps
     import opts
     import Parser
-...
                 in_db.close()
             elif in_is_xml:
                 for in_xml_root in xml_parse.docs_iter(sys.stdin):
                     if map_path == None:
                 if map_path == None:
                     for in_xml_root in xml_parse.docs_iter(sys.stdin):
                         iter_ = xml_dom.NodeElemIter(in_xml_root)
                         util.skip(iter_, xml_dom.is_text) # skip metadata
                         row_ct = process_rows(lambda row, i: root.appendChild(row),
                             iter_)
                     else:
                 else:
                     def doc_rows(in_xml_root):
                         rows = xpath.get(in_xml_root, in_root, limit=end)
                         if rows == []: raise SystemExit('Map error: Root "'+in_root
                             +'" not found in input')
                         def get_value(in_, row):
                             in_ = './{'+(','.join(strings.with_prefixes(
                                 ['']+prefixes, in_)))+'}' # also with no prefix
                             nodes = xpath.get(row, in_, allow_rooted=False)
                             if nodes != []: return xml_dom.value(nodes[0])
                             else: return None
                         row_ct = map_rows(get_value, rows)
                         if rows == []: raise SystemExit('Map error: Root "'
                             +in_root+'" not found in input')
                         return rows
                     def get_value(in_, row):
                         in_ = './{'+(','.join(strings.with_prefixes(
                             ['']+prefixes, in_)))+'}' # also with no prefix
                         nodes = xpath.get(row, in_, allow_rooted=False)
                         if nodes != []: return xml_dom.value(nodes[0])
                         else: return None
                     row_ct = map_rows(get_value, iters.flatten(itertools.imap(
                         doc_rows, xml_parse.docs_iter(sys.stdin))))
             else: # input is CSV
                 map_ = dict(mappings)
                 reader, col_names = csvs.reader_and_header(sys.stdin)

Also available in: Unified diff

Project

General

Profile

Revision 1714

Added by Aaron Marcuse-Kubitza almost 13 years ago