Project

General

Profile

1
#!/usr/bin/env python
2
# Maps one datasource to another, using a map spreadsheet if needed
3
# For outputting an XML file to a PostgreSQL database, use the general format of
4
# http://vegbank.org/vegdocs/xml/vegbank_example_ver1.0.2.xml
5

    
6
import copy
7
import csv
8
import os
9
import os.path
10
import re
11
import sys
12
import xml.dom.minidom
13

    
14
sys.path.append(os.path.dirname(__file__)+"/lib")
15

    
16
def env_flag(name): return name in os.environ and os.environ[name] != ''
17

    
18
def main():
19
    # Get db config from env vars
20
    db_config_names = ['host', 'user', 'password', 'database']
21
    env_names = []
22
    def get_db_config(prefix):
23
        has_all = True
24
        db_config = {}
25
        for name in db_config_names:
26
            env_name = prefix+'_'+name
27
            env_names.append(env_name)
28
            if env_name in os.environ: db_config[name] = os.environ[env_name]
29
            else: has_all = False
30
        if has_all: return db_config
31
        else: return None
32
    from_db_config = get_db_config('from')
33
    to_db_config = get_db_config('to')
34
    uses_map = not (from_db_config == None and to_db_config != None)
35
    
36
    # Parse args
37
    prog_name = sys.argv[0]
38
    try: prog_name, map_path = sys.argv
39
    except ValueError:
40
        if uses_map: raise SystemExit('Usage: env'+''.join(map(lambda name:
41
            ' ['+name+'=...]', env_names))+' [commit=1] '+prog_name
42
            +' [map_path] [<input] [>output]')
43
    commit = env_flag('commit')
44
    
45
    csv_config = dict(delimiter=',', quotechar='"')
46
    
47
    # Input datasource to XML tree
48
    if uses_map: # input is CSV
49
        import xml_xpath
50
        import xpath
51
        
52
        # Load map
53
        map_ = {}
54
        has_types = False # whether outer elements are type containiners
55
        stream = open(map_path, 'rb')
56
        reader = csv.reader(stream, **csv_config)
57
        src, dest = reader.next()[:2]
58
        for row in reader:
59
            name, path = row[:2]
60
            if name != '' and path != '':
61
                if path.startswith('/*s/'): has_types = True # *s for type elem
62
                path = path.replace('<name>', name)
63
                map_[name] = xpath.XpathParser(path).parse()
64
        stream.close()
65
        
66
        # Load and map CSV
67
        doc = xml.dom.minidom.getDOMImplementation().createDocument(None, dest,
68
            None)
69
        reader = csv.reader(sys.stdin, **csv_config)
70
        fieldnames = reader.next()
71
        row_idx = 0
72
        for row in reader:
73
            row_id = str(row_idx)
74
            for idx, name in enumerate(fieldnames):
75
                value = row[idx]
76
                if value != '' and name in map_:
77
                    path = copy.deepcopy(map_[name]) # don't modify main value!
78
                    xpath.set_id(path, row_id, has_types)
79
                    xpath.set_value(path, value)
80
                    xml_xpath.get(doc, path, True)
81
            row_idx += 1
82
    else: doc = xml.dom.minidom.parse(sys.stdin) # input is XML
83
    
84
    # Output XML tree
85
    if to_db_config != None: # output is database
86
        import psycopg2
87
        from psycopg2.extensions import ISOLATION_LEVEL_SERIALIZABLE
88
        
89
        import db_xml
90
        
91
        db = psycopg2.connect(**to_db_config)
92
        db.set_isolation_level(ISOLATION_LEVEL_SERIALIZABLE)
93
        try:
94
            row_ct_ref = [0]
95
            db_xml.xml2db(db, doc.documentElement, row_ct_ref)
96
            print 'Inserted '+str(row_ct_ref[0])+' rows'
97
            if commit: db.commit()
98
        finally:
99
            db.rollback()
100
            db.close()
101
    else: doc.writexml(sys.stdout, addindent='    ', newl='\n') # output is XML
102

    
103
main()
(2-2/3)