Revision 56
Added by Aaron Marcuse-Kubitza about 13 years ago
scripts/map | ||
---|---|---|
3 | 3 |
# For outputting an XML file to a PostgreSQL database, use the general format of |
4 | 4 |
# http://vegbank.org/vegdocs/xml/vegbank_example_ver1.0.2.xml |
5 | 5 |
|
6 |
import copy |
|
7 |
import csv |
|
8 | 6 |
import os |
9 | 7 |
import os.path |
10 |
import re |
|
11 | 8 |
import sys |
12 | 9 |
import xml.dom.minidom |
13 | 10 |
|
... | ... | |
44 | 41 |
|
45 | 42 |
csv_config = dict(delimiter=',', quotechar='"') |
46 | 43 |
|
47 |
# Input datasource to XML tree |
|
48 |
if uses_map: # input is CSV |
|
49 |
import xml_xpath |
|
44 |
# Load map |
|
45 |
is_xml = True |
|
46 |
if uses_map: |
|
47 |
import copy |
|
48 |
import csv |
|
49 |
|
|
50 | 50 |
import xpath |
51 | 51 |
|
52 |
# Load map |
|
53 | 52 |
map_ = {} |
54 | 53 |
has_types = False # whether outer elements are type containiners |
55 | 54 |
stream = open(map_path, 'rb') |
56 | 55 |
reader = csv.reader(stream, **csv_config) |
57 | 56 |
src, dest = reader.next()[:2] |
57 |
src, sep, src_base = src.partition('/') |
|
58 |
is_xml = sep != '' |
|
58 | 59 |
for row in reader: |
59 | 60 |
name, path = row[:2] |
60 | 61 |
if name != '' and path != '': |
... | ... | |
62 | 63 |
path = path.replace('<name>', name) |
63 | 64 |
map_[name] = xpath.XpathParser(path).parse() |
64 | 65 |
stream.close() |
66 |
|
|
67 |
# Input datasource to XML tree |
|
68 |
if is_xml: doc = xml.dom.minidom.parse(sys.stdin) |
|
69 |
if uses_map: |
|
70 |
import xml_xpath |
|
65 | 71 |
|
66 |
# Load and map CSV |
|
67 |
doc = xml.dom.minidom.getDOMImplementation().createDocument(None, dest, |
|
68 |
None) |
|
69 |
reader = csv.reader(sys.stdin, **csv_config) |
|
70 |
fieldnames = reader.next() |
|
71 |
row_idx = 0 |
|
72 |
for row in reader: |
|
73 |
row_id = str(row_idx) |
|
74 |
for idx, name in enumerate(fieldnames): |
|
75 |
value = row[idx] |
|
76 |
if value != '' and name in map_: |
|
77 |
path = copy.deepcopy(map_[name]) # don't modify main value! |
|
78 |
xpath.set_id(path, row_id, has_types) |
|
79 |
xpath.set_value(path, value) |
|
80 |
xml_xpath.get(doc, path, True) |
|
81 |
row_idx += 1 |
|
82 |
else: doc = xml.dom.minidom.parse(sys.stdin) # input is XML |
|
72 |
out_doc = xml.dom.minidom.getDOMImplementation().createDocument(None, |
|
73 |
dest, None) |
|
74 |
if is_xml: raise Exception('XML-XML mapping not supported yet') |
|
75 |
else: # input is CSV |
|
76 |
reader = csv.reader(sys.stdin, **csv_config) |
|
77 |
fieldnames = reader.next() |
|
78 |
row_idx = 0 |
|
79 |
for row in reader: |
|
80 |
row_id = str(row_idx) |
|
81 |
for idx, name in enumerate(fieldnames): |
|
82 |
value = row[idx] |
|
83 |
if value != '' and name in map_: |
|
84 |
path = copy.deepcopy(map_[name]) # don't modify value! |
|
85 |
xpath.set_id(path, row_id, has_types) |
|
86 |
xpath.set_value(path, value) |
|
87 |
xml_xpath.get(out_doc, path, True) |
|
88 |
row_idx += 1 |
|
89 |
doc = out_doc |
|
83 | 90 |
|
84 | 91 |
# Output XML tree |
85 | 92 |
if to_db_config != None: # output is database |
Also available in: Unified diff
Started adding XML-XML mapping support to map