Project

General

Profile

« Previous | Next » 

Revision 44

Renamed xml2db_ and data2xml_ to remove _

View differences:

scripts/xml2db_
1
#!/usr/bin/env python
2
# Imports an XML file into a PostgreSQL database
3
# Format: see http://vegbank.org/vegdocs/xml/vegbank_example_ver1.0.2.xml
4

  
5
import os
6
import os.path
7
import psycopg2
8
from psycopg2.extensions import ISOLATION_LEVEL_SERIALIZABLE
9
import sys
10
import xml.dom.minidom
11

  
12
sys.path.append(os.path.dirname(__file__)+"/lib")
13
import xml_db
14

  
15
def env_flag(name): return name in os.environ and os.environ[name] != ''
16

  
17
def main():
18
    prog_name = sys.argv.pop(0)
19
    try:
20
        db_config = {}
21
        for name in ['host', 'user', 'password', 'database']:
22
            if os.environ[name] != '': db_config[name] = os.environ[name]
23
    except KeyError: raise Exception('Usage: env host=... user=... password=...'
24
        ' database=... [commit=1] '+prog_name+' <dataset')
25
    commit = env_flag('commit')
26
    
27
    # Process dataset
28
    db = psycopg2.connect(**db_config)
29
    db.set_isolation_level(ISOLATION_LEVEL_SERIALIZABLE)
30
    try:
31
        doc = xml.dom.minidom.parse(sys.stdin)
32
        row_ct_ref = [0]
33
        xml_db.xml2db(db, doc.documentElement, row_ct_ref)
34
        print 'Inserted '+str(row_ct_ref[0])+' rows'
35
        if commit: db.commit()
36
    finally:
37
        db.rollback()
38
        db.close()
39

  
40
main()
41 0

  
scripts/data2xml_
1
#!/usr/bin/env python
2
# Converts a CSV dataset to XML using a mappings spreadsheet
3

  
4
import csv
5
import os.path
6
import re
7
import sys
8
from copy import deepcopy
9
from xml.dom.minidom import getDOMImplementation
10

  
11
sys.path.append(os.path.dirname(__file__)+"/lib")
12
import xpath
13

  
14
def main():
15
    prog_name = sys.argv.pop(0)
16
    try:
17
        dest = sys.argv.pop(0)
18
        mappings_path = sys.argv.pop(0)
19
    except IndexError: raise Exception('Usage: '+prog_name
20
        +' dest_mappings_column mappings_path <dataset >output')
21
    
22
    # Get mappings
23
    mappings = {}
24
    has_types = False # whether outer elements are type containiners
25
    stream = open(mappings_path, 'rb')
26
    reader = csv.reader(stream, delimiter=',', quotechar='"')
27
    fieldnames = reader.next()
28
    src = fieldnames[0]
29
    dest_idx = fieldnames.index(dest)
30
    for row in reader:
31
        name = row[0]
32
        path = row[dest_idx]
33
        if name != '' and path != '':
34
            if path.startswith('/*s/'): has_types = True # *s used for type elem
35
            path = path.replace('<name>', name)
36
            mappings[name] = xpath.XpathParser(path).parse()
37
    stream.close()
38
    
39
    # Process dataset
40
    doc = getDOMImplementation().createDocument(None, dest, None)
41
    stream = sys.stdin
42
    reader = csv.reader(stream, delimiter=',', quotechar='"')
43
    fieldnames = reader.next()
44
    row_idx = 0
45
    for row in reader:
46
        row_id = str(row_idx)
47
        for idx, name in enumerate(fieldnames):
48
            value = row[idx]
49
            if value != '' and name in mappings:
50
                path = deepcopy(mappings[name]) # don't modify main value!
51
                xpath.set_id(path, row_id, has_types)
52
                xpath.set_value(path, value)
53
                xpath.get(doc, path, True)
54
        row_idx += 1
55
    doc.writexml(sys.stdout, addindent='    ', newl='\n')
56

  
57
main()
58 0

  
scripts/xml2db
1
#!/usr/bin/env python
2
# Imports an XML file into a PostgreSQL database
3
# Format: see http://vegbank.org/vegdocs/xml/vegbank_example_ver1.0.2.xml
4

  
5
import os
6
import os.path
7
import psycopg2
8
from psycopg2.extensions import ISOLATION_LEVEL_SERIALIZABLE
9
import sys
10
import xml.dom.minidom
11

  
12
sys.path.append(os.path.dirname(__file__)+"/lib")
13
import xml_db
14

  
15
def env_flag(name): return name in os.environ and os.environ[name] != ''
16

  
17
def main():
18
    prog_name = sys.argv.pop(0)
19
    try:
20
        db_config = {}
21
        for name in ['host', 'user', 'password', 'database']:
22
            if os.environ[name] != '': db_config[name] = os.environ[name]
23
    except KeyError: raise Exception('Usage: env host=... user=... password=...'
24
        ' database=... [commit=1] '+prog_name+' <dataset')
25
    commit = env_flag('commit')
26
    
27
    # Process dataset
28
    db = psycopg2.connect(**db_config)
29
    db.set_isolation_level(ISOLATION_LEVEL_SERIALIZABLE)
30
    try:
31
        doc = xml.dom.minidom.parse(sys.stdin)
32
        row_ct_ref = [0]
33
        xml_db.xml2db(db, doc.documentElement, row_ct_ref)
34
        print 'Inserted '+str(row_ct_ref[0])+' rows'
35
        if commit: db.commit()
36
    finally:
37
        db.rollback()
38
        db.close()
39

  
40
main()
0 41

  
scripts/data2xml
1
#!/usr/bin/env python
2
# Converts a CSV dataset to XML using a mappings spreadsheet
3

  
4
import csv
5
import os.path
6
import re
7
import sys
8
from copy import deepcopy
9
from xml.dom.minidom import getDOMImplementation
10

  
11
sys.path.append(os.path.dirname(__file__)+"/lib")
12
import xpath
13

  
14
def main():
15
    prog_name = sys.argv.pop(0)
16
    try:
17
        dest = sys.argv.pop(0)
18
        mappings_path = sys.argv.pop(0)
19
    except IndexError: raise Exception('Usage: '+prog_name
20
        +' dest_mappings_column mappings_path <dataset >output')
21
    
22
    # Get mappings
23
    mappings = {}
24
    has_types = False # whether outer elements are type containiners
25
    stream = open(mappings_path, 'rb')
26
    reader = csv.reader(stream, delimiter=',', quotechar='"')
27
    fieldnames = reader.next()
28
    src = fieldnames[0]
29
    dest_idx = fieldnames.index(dest)
30
    for row in reader:
31
        name = row[0]
32
        path = row[dest_idx]
33
        if name != '' and path != '':
34
            if path.startswith('/*s/'): has_types = True # *s used for type elem
35
            path = path.replace('<name>', name)
36
            mappings[name] = xpath.XpathParser(path).parse()
37
    stream.close()
38
    
39
    # Process dataset
40
    doc = getDOMImplementation().createDocument(None, dest, None)
41
    stream = sys.stdin
42
    reader = csv.reader(stream, delimiter=',', quotechar='"')
43
    fieldnames = reader.next()
44
    row_idx = 0
45
    for row in reader:
46
        row_id = str(row_idx)
47
        for idx, name in enumerate(fieldnames):
48
            value = row[idx]
49
            if value != '' and name in mappings:
50
                path = deepcopy(mappings[name]) # don't modify main value!
51
                xpath.set_id(path, row_id, has_types)
52
                xpath.set_value(path, value)
53
                xpath.get(doc, path, True)
54
        row_idx += 1
55
    doc.writexml(sys.stdout, addindent='    ', newl='\n')
56

  
57
main()
0 58

  

Also available in: Unified diff