Project

General

Profile

« Previous | Next » 

Revision 1701

bin/map: Support concatenated XML documents for XML inputs

View differences:

map
8 8
import os.path
9 9
import sys
10 10
import xml.dom.minidom as minidom
11
import xml.parsers.expat as expat
11 12

  
12 13
sys.path.append(os.path.dirname(__file__)+"/../lib")
13 14

  
......
217 218
            
218 219
            in_db.close()
219 220
        elif in_is_xml:
220
            doc0_root = minidom.parse(sys.stdin).documentElement
221
            if map_path == None:
222
                iter_ = xml_dom.NodeElemIter(doc0_root)
223
                util.skip(iter_, xml_dom.is_text) # skip metadata
224
                row_ct = process_rows(lambda row, i: root.appendChild(row),
225
                    iter_)
226
            else:
227
                rows = xpath.get(doc0_root, in_root, limit=end)
228
                if rows == []: raise SystemExit('Map error: Root "'+in_root
229
                    +'" not found in input')
221
            while True:
222
                try: in_xml_root = minidom.parse(sys.stdin).documentElement
223
                except expat.ExpatError, e:
224
                    if str(e).startswith('no element found:'): break
225
                        # no more concatenated XML documents
226
                    else: raise e
230 227
                
231
                def get_value(in_, row):
232
                    in_ = './{'+(','.join(strings.with_prefixes(['']+prefixes,
233
                        in_)))+'}' # also with no prefix
234
                    nodes = xpath.get(row, in_, allow_rooted=False)
235
                    if nodes != []: return xml_dom.value(nodes[0])
236
                    else: return None
237
                
238
                row_ct = map_rows(get_value, rows)
228
                if map_path == None:
229
                    iter_ = xml_dom.NodeElemIter(in_xml_root)
230
                    util.skip(iter_, xml_dom.is_text) # skip metadata
231
                    row_ct = process_rows(lambda row, i: root.appendChild(row),
232
                        iter_)
233
                else:
234
                    rows = xpath.get(in_xml_root, in_root, limit=end)
235
                    if rows == []: raise SystemExit('Map error: Root "'+in_root
236
                        +'" not found in input')
237
                    
238
                    def get_value(in_, row):
239
                        in_ = './{'+(','.join(strings.with_prefixes(
240
                            ['']+prefixes, in_)))+'}' # also with no prefix
241
                        nodes = xpath.get(row, in_, allow_rooted=False)
242
                        if nodes != []: return xml_dom.value(nodes[0])
243
                        else: return None
244
                    
245
                    row_ct = map_rows(get_value, rows)
239 246
        else: # input is CSV
240 247
            map_ = dict(mappings)
241 248
            reader, col_names = csvs.reader_and_header(sys.stdin)

Also available in: Unified diff