Project

General

Profile

« Previous | Next » 

Revision 51

Created join_maps to join two 2-column map spreadsheets

View differences:

scripts/data2xml
1 1
#!/usr/bin/env python
2
# Converts a CSV dataset to XML using a mappings spreadsheet
2
# Converts a CSV dataset to XML using a map spreadsheet
3 3

  
4
import copy
4 5
import csv
5 6
import os.path
6 7
import re
7 8
import sys
8
from copy import deepcopy
9 9
from xml.dom.minidom import getDOMImplementation
10 10

  
11 11
sys.path.append(os.path.dirname(__file__)+"/lib")
......
13 13

  
14 14
def main():
15 15
    prog_name = sys.argv[0]
16
    try: mappings_path = sys.argv[1]
16
    try: prog_name, map_path = sys.argv
17 17
    except ValueError:
18
        raise Exception('Usage: '+prog_name+' mappings_path <dataset >output')
18
        raise Exception('Usage: '+prog_name+' map_path <dataset >output')
19 19
    
20
    # Get mappings
21
    mappings = {}
20
    # Get map
21
    map = {}
22 22
    has_types = False # whether outer elements are type containiners
23
    stream = open(mappings_path, 'rb')
23
    stream = open(map_path, 'rb')
24 24
    reader = csv.reader(stream, delimiter=',', quotechar='"')
25 25
    src, dest = reader.next()[:2]
26 26
    for row in reader:
......
28 28
        if name != '' and path != '':
29 29
            if path.startswith('/*s/'): has_types = True # *s used for type elem
30 30
            path = path.replace('<name>', name)
31
            mappings[name] = xpath.XpathParser(path).parse()
31
            map[name] = xpath.XpathParser(path).parse()
32 32
    stream.close()
33 33
    
34 34
    # Process dataset
......
41 41
        row_id = str(row_idx)
42 42
        for idx, name in enumerate(fieldnames):
43 43
            value = row[idx]
44
            if value != '' and name in mappings:
45
                path = deepcopy(mappings[name]) # don't modify main value!
44
            if value != '' and name in map:
45
                path = copy.deepcopy(map[name]) # don't modify main value!
46 46
                xpath.set_id(path, row_id, has_types)
47 47
                xpath.set_value(path, value)
48 48
                xpath.get(doc, path, True)
scripts/util/join_maps
1
#!/usr/bin/env python
2
# Inner-joins two map spreadsheets A->B and B->C to A->C
3

  
4
import csv
5
import sys
6

  
7
def main():
8
    prog_name = sys.argv[0]
9
    try: prog_name, map_1_path = sys.argv
10
    except ValueError:
11
        raise Exception('Usage: '+prog_name+' map_1 <map_0 >joined_map')
12
    
13
    csv_config = dict(delimiter=',', quotechar='"')
14
    
15
    # Get map 1
16
    map_1 = {}
17
    stream = open(map_1_path, 'rb')
18
    reader = csv.reader(stream, **csv_config)
19
    map_1_in, map_1_out = reader.next()[:2]
20
    for row in reader:
21
        if row[1] != '': map_1[row[0]] = row[1]
22
    stream.close()
23
    
24
    # Join map 1 to map 0
25
    reader = csv.reader(sys.stdin, **csv_config)
26
    map_0_in, map_0_out = reader.next()[:2]
27
    assert map_0_out == map_1_in
28
    writer = csv.writer(sys.stdout, **csv_config)
29
    writer.writerow([map_0_in, map_1_out])
30
    for row in reader:
31
        if row[1] in map_1: writer.writerow([row[0], map_1[row[1]]])
32

  
33
main()
0 34

  

Also available in: Unified diff