Project

General

Profile

« Previous | Next » 

Revision 164

map: Handle metadata in order with regular mappings

View differences:

scripts/test/accepted_output/NYBG.organisms.VegBank.xml
1 1
<?xml version="1.0" ?>
2 2
<VegBank>
3 3
    <taxonObservation id="0">
4
        <taxonImportance>
5
            <stemCount>
6
                <stemCount>1</stemCount>
7
                <stemLocation>
8
                    <definedValue fkey="tableRecord_ID">
9
                        <userDefined_ID>
10
                            <userDefined>
11
                                <tableName>stemLocation</tableName>
12
                                <userDefinedName>habitat</userDefinedName>
13
                                <userDefinedType>varchar</userDefinedType>
14
                            </userDefined>
15
                        </userDefined_ID>
16
                        <definedValue>Floodplain forest</definedValue>
17
                    </definedValue>
18
                    <definedValue fkey="tableRecord_ID">
19
                        <userDefined_ID>
20
                            <userDefined>
21
                                <tableName>stemLocation</tableName>
22
                                <userDefinedName>plantFungusDescription</userDefinedName>
23
                                <userDefinedType>varchar</userDefinedType>
24
                            </userDefined>
25
                        </userDefined_ID>
26
                        <definedValue>with rough brown bark, almost no buttressing, 1 m diam; fallen fruit and leaves</definedValue>
27
                    </definedValue>
28
                </stemLocation>
29
            </stemCount>
30
        </taxonImportance>
31 4
        <observation_ID>
32 5
            <observation>
33 6
                <plot_ID>
......
68 41
                <obsStartDate>1984-08-20</obsStartDate>
69 42
            </observation>
70 43
        </observation_ID>
44
        <taxonImportance>
45
            <stemCount>
46
                <stemCount>1</stemCount>
47
                <stemLocation>
48
                    <definedValue fkey="tableRecord_ID">
49
                        <userDefined_ID>
50
                            <userDefined>
51
                                <tableName>stemLocation</tableName>
52
                                <userDefinedName>habitat</userDefinedName>
53
                                <userDefinedType>varchar</userDefinedType>
54
                            </userDefined>
55
                        </userDefined_ID>
56
                        <definedValue>Floodplain forest</definedValue>
57
                    </definedValue>
58
                    <definedValue fkey="tableRecord_ID">
59
                        <userDefined_ID>
60
                            <userDefined>
61
                                <tableName>stemLocation</tableName>
62
                                <userDefinedName>plantFungusDescription</userDefinedName>
63
                                <userDefinedType>varchar</userDefinedType>
64
                            </userDefined>
65
                        </userDefined_ID>
66
                        <definedValue>with rough brown bark, almost no buttressing, 1 m diam; fallen fruit and leaves</definedValue>
67
                    </definedValue>
68
                </stemLocation>
69
            </stemCount>
70
        </taxonImportance>
71 71
        <taxonInterpretation>
72 72
            <ROLE_ID>
73 73
                <aux_Role>
......
315 315
        </taxonInterpretation>
316 316
    </taxonObservation>
317 317
    <taxonObservation id="1">
318
        <taxonImportance>
319
            <stemCount>
320
                <stemCount>1</stemCount>
321
            </stemCount>
322
        </taxonImportance>
323 318
        <observation_ID>
324 319
            <observation>
325 320
                <plot_ID>
......
352 347
                <obsStartDate>1994-01-17</obsStartDate>
353 348
            </observation>
354 349
        </observation_ID>
350
        <taxonImportance>
351
            <stemCount>
352
                <stemCount>1</stemCount>
353
            </stemCount>
354
        </taxonImportance>
355 355
        <taxonInterpretation>
356 356
            <ROLE_ID>
357 357
                <aux_Role>
scripts/map
16 16
import xml_func
17 17

  
18 18
def metadata_value(name):
19
    if name.startswith(':'): return name[1:]
19
    if type(name) == str and name.startswith(':'): return name[1:]
20 20
    else: return None
21 21

  
22 22
def main():
......
66 66
        for row in reader:
67 67
            in_, out = row[:2]
68 68
            if out != '':
69
                value = metadata_value(in_)
70
                if out_is_xpaths: out = xpath.parse(out_root+out)
71
                if value != None: metadata.append((value, out))
72
                else: mappings.append((in_, out))
69
                if out_is_xpaths: out = out_root+out
70
                mappings.append((in_, out))
73 71
        stream.close()
74 72
    in_is_xml = in_is_xpaths and not in_is_db
75 73
    
......
85 83
            import db_xml
86 84
            
87 85
            in_root_xml = xpath.path2xml(in_root)
88
            mappings = [(xpath.path2xml(in_root+in_), out) for in_, out in mappings]
86
            for i, mapping in enumerate(mappings):
87
                in_, out = mapping
88
                if metadata_value(in_) == None:
89
                    mappings[i] = (xpath.path2xml(in_root+in_), out)
89 90
            
90 91
            in_db = sql.connect(in_db_config)
91 92
            in_pkeys = {}
......
93 94
                limit)):
94 95
                row_id, = row
95 96
                row_id = str(row_id)
96
                
97
                def put_col(path, value):
98
                    xpath.put_obj(root, path, row_id, has_types, value)
99
                for value, out in metadata: put_col(out, value)
100 97
                for in_, out in mappings:
101
                    in_ = in_.cloneNode(True) # don't modify orig value!
102
                    xml_dom.set_id(xpath.get(in_, in_root), row_id)
103
                    value = sql.value_or_none(db_xml.get(in_db, in_, in_pkeys))
104
                    if value != None: put_col(out, str(value))
98
                    value = metadata_value(in_)
99
                    if value == None:
100
                        in_ = in_.cloneNode(True) # don't modify orig value!
101
                        xml_dom.set_id(xpath.get(in_, in_root), row_id)
102
                        value = sql.value_or_none(db_xml.get(in_db, in_,
103
                            in_pkeys))
104
                    if value != None:
105
                        xpath.put_obj(root, out, row_id, has_types, str(value))
105 106
            in_db.close()
106 107
        elif in_is_xml:
107 108
            row = xpath.get(doc0.documentElement, in_root)
108 109
            for row_idx, row in enumerate(xml_dom.NodeElemIter(row.parentNode)):
109 110
                if not (limit == None or row_idx < limit): break
110 111
                row_id = str(row_idx)
111
                
112
                def put_col(path, value):
113
                    xpath.put_obj(root, path, row_id, has_types, value)
114
                for value, out in metadata: put_col(out, value)
115 112
                for in_, out in mappings:
116
                    node = xpath.get(row, in_)
117
                    if node != None: put_col(out, xml_dom.value(node))
113
                    value = metadata_value(in_)
114
                    if value == None:
115
                        node = xpath.get(row, in_)
116
                        if node != None: value = xml_dom.value(node)
117
                    if value != None:
118
                        xpath.put_obj(root, out, row_id, has_types, value)
118 119
        else: # input is CSV
119 120
            map_ = dict(mappings)
120 121
            reader = csv.reader(sys.stdin)
121 122
            cols = reader.next()
122 123
            col_idxs = dict([(value, idx) for idx, value in enumerate(cols)])
123
            mappings = [(col_idxs.get(in_, None), out) for in_, out in mappings]
124
            mappings = filter(lambda item: item[0] != None, mappings)
124
            for i, mapping in enumerate(mappings):
125
                in_, out = mapping
126
                if metadata_value(in_) == None:
127
                    try: mappings[i] = (col_idxs[in_], out)
128
                    except KeyError: pass
125 129
            
126 130
            for row_idx, row in enumerate(reader):
127 131
                if not (limit == None or row_idx < limit): break
128 132
                row_id = str(row_idx)
129
                
130
                def put_col(path, value):
131
                    xpath.put_obj(root, path, row_id, has_types, value)
132
                for value, out in metadata: put_col(out, value)
133 133
                for in_, out in mappings:
134
                    if row[in_] != '': put_col(out, row[in_])
134
                    value = metadata_value(in_)
135
                    if value == None:
136
                        value = row[in_]
137
                        if value == '': value = None
138
                    if value != None:
139
                        xpath.put_obj(root, out, row_id, has_types, value)
135 140
        xml_func.process(root)
136 141
    else: doc1 = doc0
137 142
    

Also available in: Unified diff