Revision 44
Added by Aaron Marcuse-Kubitza about 13 years ago
scripts/xml2db_ | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# Imports an XML file into a PostgreSQL database |
|
3 |
# Format: see http://vegbank.org/vegdocs/xml/vegbank_example_ver1.0.2.xml |
|
4 |
|
|
5 |
import os |
|
6 |
import os.path |
|
7 |
import psycopg2 |
|
8 |
from psycopg2.extensions import ISOLATION_LEVEL_SERIALIZABLE |
|
9 |
import sys |
|
10 |
import xml.dom.minidom |
|
11 |
|
|
12 |
sys.path.append(os.path.dirname(__file__)+"/lib") |
|
13 |
import xml_db |
|
14 |
|
|
15 |
def env_flag(name): return name in os.environ and os.environ[name] != '' |
|
16 |
|
|
17 |
def main(): |
|
18 |
prog_name = sys.argv.pop(0) |
|
19 |
try: |
|
20 |
db_config = {} |
|
21 |
for name in ['host', 'user', 'password', 'database']: |
|
22 |
if os.environ[name] != '': db_config[name] = os.environ[name] |
|
23 |
except KeyError: raise Exception('Usage: env host=... user=... password=...' |
|
24 |
' database=... [commit=1] '+prog_name+' <dataset') |
|
25 |
commit = env_flag('commit') |
|
26 |
|
|
27 |
# Process dataset |
|
28 |
db = psycopg2.connect(**db_config) |
|
29 |
db.set_isolation_level(ISOLATION_LEVEL_SERIALIZABLE) |
|
30 |
try: |
|
31 |
doc = xml.dom.minidom.parse(sys.stdin) |
|
32 |
row_ct_ref = [0] |
|
33 |
xml_db.xml2db(db, doc.documentElement, row_ct_ref) |
|
34 |
print 'Inserted '+str(row_ct_ref[0])+' rows' |
|
35 |
if commit: db.commit() |
|
36 |
finally: |
|
37 |
db.rollback() |
|
38 |
db.close() |
|
39 |
|
|
40 |
main() |
|
41 | 0 |
scripts/data2xml_ | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# Converts a CSV dataset to XML using a mappings spreadsheet |
|
3 |
|
|
4 |
import csv |
|
5 |
import os.path |
|
6 |
import re |
|
7 |
import sys |
|
8 |
from copy import deepcopy |
|
9 |
from xml.dom.minidom import getDOMImplementation |
|
10 |
|
|
11 |
sys.path.append(os.path.dirname(__file__)+"/lib") |
|
12 |
import xpath |
|
13 |
|
|
14 |
def main(): |
|
15 |
prog_name = sys.argv.pop(0) |
|
16 |
try: |
|
17 |
dest = sys.argv.pop(0) |
|
18 |
mappings_path = sys.argv.pop(0) |
|
19 |
except IndexError: raise Exception('Usage: '+prog_name |
|
20 |
+' dest_mappings_column mappings_path <dataset >output') |
|
21 |
|
|
22 |
# Get mappings |
|
23 |
mappings = {} |
|
24 |
has_types = False # whether outer elements are type containiners |
|
25 |
stream = open(mappings_path, 'rb') |
|
26 |
reader = csv.reader(stream, delimiter=',', quotechar='"') |
|
27 |
fieldnames = reader.next() |
|
28 |
src = fieldnames[0] |
|
29 |
dest_idx = fieldnames.index(dest) |
|
30 |
for row in reader: |
|
31 |
name = row[0] |
|
32 |
path = row[dest_idx] |
|
33 |
if name != '' and path != '': |
|
34 |
if path.startswith('/*s/'): has_types = True # *s used for type elem |
|
35 |
path = path.replace('<name>', name) |
|
36 |
mappings[name] = xpath.XpathParser(path).parse() |
|
37 |
stream.close() |
|
38 |
|
|
39 |
# Process dataset |
|
40 |
doc = getDOMImplementation().createDocument(None, dest, None) |
|
41 |
stream = sys.stdin |
|
42 |
reader = csv.reader(stream, delimiter=',', quotechar='"') |
|
43 |
fieldnames = reader.next() |
|
44 |
row_idx = 0 |
|
45 |
for row in reader: |
|
46 |
row_id = str(row_idx) |
|
47 |
for idx, name in enumerate(fieldnames): |
|
48 |
value = row[idx] |
|
49 |
if value != '' and name in mappings: |
|
50 |
path = deepcopy(mappings[name]) # don't modify main value! |
|
51 |
xpath.set_id(path, row_id, has_types) |
|
52 |
xpath.set_value(path, value) |
|
53 |
xpath.get(doc, path, True) |
|
54 |
row_idx += 1 |
|
55 |
doc.writexml(sys.stdout, addindent=' ', newl='\n') |
|
56 |
|
|
57 |
main() |
|
58 | 0 |
scripts/xml2db | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# Imports an XML file into a PostgreSQL database |
|
3 |
# Format: see http://vegbank.org/vegdocs/xml/vegbank_example_ver1.0.2.xml |
|
4 |
|
|
5 |
import os |
|
6 |
import os.path |
|
7 |
import psycopg2 |
|
8 |
from psycopg2.extensions import ISOLATION_LEVEL_SERIALIZABLE |
|
9 |
import sys |
|
10 |
import xml.dom.minidom |
|
11 |
|
|
12 |
sys.path.append(os.path.dirname(__file__)+"/lib") |
|
13 |
import xml_db |
|
14 |
|
|
15 |
def env_flag(name): return name in os.environ and os.environ[name] != '' |
|
16 |
|
|
17 |
def main(): |
|
18 |
prog_name = sys.argv.pop(0) |
|
19 |
try: |
|
20 |
db_config = {} |
|
21 |
for name in ['host', 'user', 'password', 'database']: |
|
22 |
if os.environ[name] != '': db_config[name] = os.environ[name] |
|
23 |
except KeyError: raise Exception('Usage: env host=... user=... password=...' |
|
24 |
' database=... [commit=1] '+prog_name+' <dataset') |
|
25 |
commit = env_flag('commit') |
|
26 |
|
|
27 |
# Process dataset |
|
28 |
db = psycopg2.connect(**db_config) |
|
29 |
db.set_isolation_level(ISOLATION_LEVEL_SERIALIZABLE) |
|
30 |
try: |
|
31 |
doc = xml.dom.minidom.parse(sys.stdin) |
|
32 |
row_ct_ref = [0] |
|
33 |
xml_db.xml2db(db, doc.documentElement, row_ct_ref) |
|
34 |
print 'Inserted '+str(row_ct_ref[0])+' rows' |
|
35 |
if commit: db.commit() |
|
36 |
finally: |
|
37 |
db.rollback() |
|
38 |
db.close() |
|
39 |
|
|
40 |
main() |
|
0 | 41 |
scripts/data2xml | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
# Converts a CSV dataset to XML using a mappings spreadsheet |
|
3 |
|
|
4 |
import csv |
|
5 |
import os.path |
|
6 |
import re |
|
7 |
import sys |
|
8 |
from copy import deepcopy |
|
9 |
from xml.dom.minidom import getDOMImplementation |
|
10 |
|
|
11 |
sys.path.append(os.path.dirname(__file__)+"/lib") |
|
12 |
import xpath |
|
13 |
|
|
14 |
def main(): |
|
15 |
prog_name = sys.argv.pop(0) |
|
16 |
try: |
|
17 |
dest = sys.argv.pop(0) |
|
18 |
mappings_path = sys.argv.pop(0) |
|
19 |
except IndexError: raise Exception('Usage: '+prog_name |
|
20 |
+' dest_mappings_column mappings_path <dataset >output') |
|
21 |
|
|
22 |
# Get mappings |
|
23 |
mappings = {} |
|
24 |
has_types = False # whether outer elements are type containiners |
|
25 |
stream = open(mappings_path, 'rb') |
|
26 |
reader = csv.reader(stream, delimiter=',', quotechar='"') |
|
27 |
fieldnames = reader.next() |
|
28 |
src = fieldnames[0] |
|
29 |
dest_idx = fieldnames.index(dest) |
|
30 |
for row in reader: |
|
31 |
name = row[0] |
|
32 |
path = row[dest_idx] |
|
33 |
if name != '' and path != '': |
|
34 |
if path.startswith('/*s/'): has_types = True # *s used for type elem |
|
35 |
path = path.replace('<name>', name) |
|
36 |
mappings[name] = xpath.XpathParser(path).parse() |
|
37 |
stream.close() |
|
38 |
|
|
39 |
# Process dataset |
|
40 |
doc = getDOMImplementation().createDocument(None, dest, None) |
|
41 |
stream = sys.stdin |
|
42 |
reader = csv.reader(stream, delimiter=',', quotechar='"') |
|
43 |
fieldnames = reader.next() |
|
44 |
row_idx = 0 |
|
45 |
for row in reader: |
|
46 |
row_id = str(row_idx) |
|
47 |
for idx, name in enumerate(fieldnames): |
|
48 |
value = row[idx] |
|
49 |
if value != '' and name in mappings: |
|
50 |
path = deepcopy(mappings[name]) # don't modify main value! |
|
51 |
xpath.set_id(path, row_id, has_types) |
|
52 |
xpath.set_value(path, value) |
|
53 |
xpath.get(doc, path, True) |
|
54 |
row_idx += 1 |
|
55 |
doc.writexml(sys.stdout, addindent=' ', newl='\n') |
|
56 |
|
|
57 |
main() |
|
0 | 58 |
Also available in: Unified diff
Renamed xml2db_ and data2xml_ to remove _