Revision 3661
Added by Aaron Marcuse-Kubitza over 12 years ago
inputs/Madidi/test/import.plots.xml.ref | ||
---|---|---|
1 | 1 |
Put template: |
2 |
<VegBIEN><_ignore><inLabel>Madidi</inLabel></_ignore></VegBIEN>Inserted 0 new rows into database |
|
2 |
<VegBIEN><_ignore><inLabel>Madidi</inLabel></_ignore></VegBIEN>Inserted 1 new rows into database |
inputs/Madidi/test/import.organisms.xml.ref | ||
---|---|---|
1 | 1 |
Put template: |
2 |
<VegBIEN><_ignore><inLabel>Madidi</inLabel></_ignore></VegBIEN>Inserted 0 new rows into database |
|
2 |
<VegBIEN><_ignore><inLabel>Madidi</inLabel></_ignore></VegBIEN>Inserted 1 new rows into database |
lib/db_xml.py | ||
---|---|---|
182 | 182 |
db.autoanalyze = True # but don't do this in row-based import |
183 | 183 |
db.autoexplain = True # but don't do this in row-based import |
184 | 184 |
|
185 |
# Import col_defaults |
|
186 |
for col, node_ in col_defaults.items(): |
|
187 |
col_defaults[col] = put(db, node_, row_ins_ct_ref, on_error) |
|
188 |
|
|
189 | 185 |
# Subset and partition in_table |
190 | 186 |
# OK to do even if table already the right size because it takes <1 sec. |
191 | 187 |
full_in_table = in_table |
bin/map | ||
---|---|---|
154 | 154 |
pool = parallelproc.MultiProducerPool(cpus) |
155 | 155 |
log('Using '+str(pool.process_ct)+' parallel CPUs') |
156 | 156 |
|
157 |
# Set up DB access |
|
158 |
row_ins_ct_ref = [0] |
|
159 |
if out_is_db: |
|
160 |
out_db = connect_db(out_db_config) |
|
161 |
rel_funcs = set(sql.tables(out_db, schema_like='%', |
|
162 |
table_like=r'\__%')) |
|
163 |
|
|
157 | 164 |
doc = xml_dom.create_doc() |
158 | 165 |
root = doc.documentElement |
159 | 166 |
out_is_xml_ref = [False] |
... | ... | |
163 | 170 |
def update_in_label(): |
164 | 171 |
if in_label_ref[0] != None: |
165 | 172 |
xpath.get(root, '/_ignore/inLabel="'+in_label_ref[0]+'"', True) |
166 |
# TODO: Move this to the mappings as some kind of metadata |
|
167 |
col_defaults['datasource_id'] = xpath.path2xml( |
|
168 |
'party/organizationname="'+in_label_ref[0]+'"') |
|
173 |
if out_is_db: |
|
174 |
# TODO: Move this to the mappings as some kind of metadata |
|
175 |
col_defaults['datasource_id'] = db_xml.put(out_db, |
|
176 |
xpath.path2xml('party/organizationname="'+in_label_ref[0] |
|
177 |
+'"'), row_ins_ct_ref) |
|
169 | 178 |
|
170 | 179 |
def prep_root(): |
171 | 180 |
root.clear() |
172 | 181 |
update_in_label() |
173 | 182 |
prep_root() |
174 | 183 |
|
175 |
# Define before the out_is_db section because it's used by by_col |
|
176 |
row_ins_ct_ref = [0] |
|
177 |
|
|
178 |
if out_is_db: |
|
179 |
out_db = connect_db(out_db_config) |
|
180 |
rel_funcs = set(sql.tables(out_db, schema_like='%', |
|
181 |
table_like=r'\__%')) |
|
182 |
|
|
183 | 184 |
def process_input(root, row_ready, map_path): |
184 | 185 |
'''Inputs datasource to XML tree, mapping if needed''' |
185 | 186 |
# Load map header |
Also available in: Unified diff
Moved importing of col_defaults from db_xml.put_table() to bin/map, so that it also happens in row-based mode. Note that this causes a DB entry for the datasource to always be created, even if the datasource has no mappings or no rows.