Revision 3621
Added by Aaron Marcuse-Kubitza over 12 years ago
lib/db_xml.py | ||
---|---|---|
136 | 136 |
|
137 | 137 |
def put_table(db, node, in_table, in_row_ct_ref=None, |
138 | 138 |
row_ins_ct_ref=None, limit=None, start=0, on_error=exc.raise_, |
139 |
parent_ids_loc=None, next=None, col_defaults={}, top_call=True):
|
|
139 |
parent_ids_loc=None, next=None, col_defaults={}): |
|
140 | 140 |
''' |
141 | 141 |
@param node The XML tree that transforms the input to the output. Similar to |
142 | 142 |
put()'s node param, but with the input column name prefixed by |
... | ... | |
148 | 148 |
in_table.set_srcs([in_table], overwrite=False) |
149 | 149 |
db.src = str(in_table) |
150 | 150 |
|
151 |
def put_table_(node, in_row_ct_ref=None): |
|
152 |
return put_table(db, node, in_table, in_row_ct_ref, row_ins_ct_ref, |
|
153 |
None, 0, on_error, parent_ids_loc, next, col_defaults, False) |
|
151 |
db.autoanalyze = True # but don't do this in row-based import |
|
152 |
db.autoexplain = True # but don't do this in row-based import |
|
154 | 153 |
|
154 |
full_in_table = in_table |
|
155 |
|
|
155 | 156 |
# Subset and partition in_table |
156 | 157 |
# OK to do even if table already the right size because it takes <1 sec. |
157 |
if top_call: |
|
158 |
db.autoanalyze = True # but don't do this in row-based import |
|
159 |
db.autoexplain = True # but don't do this in row-based import |
|
158 |
total = 0 |
|
159 |
while limit == None or total < limit: |
|
160 |
# Adjust partition size if last partition |
|
161 |
this_limit = partition_size |
|
162 |
if limit != None: this_limit = min(this_limit, limit - total) |
|
160 | 163 |
|
161 |
full_in_table = in_table |
|
164 |
# Row # is interally 0-based, but 1-based to the user |
|
165 |
db.log_debug('********** Partition: rows '+str(start+1)+'-' |
|
166 |
+str(start+this_limit)+' **********', level=1.2) |
|
162 | 167 |
|
163 |
total = 0 |
|
164 |
while limit == None or total < limit: |
|
165 |
# Adjust partition size if last partition |
|
166 |
this_limit = partition_size |
|
167 |
if limit != None: this_limit = min(this_limit, limit - total) |
|
168 |
|
|
169 |
# Row # is interally 0-based, but 1-based to the user |
|
170 |
db.log_debug('********** Partition: rows '+str(start+1)+'-' |
|
171 |
+str(start+this_limit)+' **********', level=1.2) |
|
172 |
|
|
173 |
# Subset in_table |
|
174 |
in_table = copy.copy(full_in_table) # don't modify input! |
|
175 |
in_table.name = str(in_table) # prepend schema |
|
176 |
cur = sql.run_query_into(db, sql.mk_select(db, full_in_table, |
|
177 |
limit=this_limit, start=start), into=in_table, add_pkey_=True) |
|
178 |
# full_in_table will be shadowed (hidden) by created temp table |
|
179 |
|
|
180 |
this_ct = cur.rowcount |
|
181 |
total += this_ct |
|
182 |
start += this_ct # advance start to fetch next set |
|
183 |
if this_ct == 0: break # in_table size is multiple of partition_size |
|
184 |
|
|
185 |
# Recurse |
|
186 |
pkeys_loc = put_table_(node, in_row_ct_ref) |
|
187 |
if in_row_ct_ref != None: in_row_ct_ref[0] += this_ct |
|
188 |
|
|
189 |
sql.empty_temp(db, in_table) |
|
190 |
|
|
191 |
if this_ct < partition_size: break # partial partition = last |
|
192 |
|
|
193 |
# Work around PostgreSQL's temp table disk space leak |
|
194 |
db.reconnect() |
|
168 |
# Subset in_table |
|
169 |
in_table = copy.copy(full_in_table) # don't modify input! |
|
170 |
in_table.name = str(in_table) # prepend schema |
|
171 |
cur = sql.run_query_into(db, sql.mk_select(db, full_in_table, |
|
172 |
limit=this_limit, start=start), into=in_table, add_pkey_=True) |
|
173 |
# full_in_table will be shadowed (hidden) by created temp table |
|
195 | 174 |
|
196 |
return pkeys_loc |
|
175 |
this_ct = cur.rowcount |
|
176 |
total += this_ct |
|
177 |
start += this_ct # advance start to fetch next set |
|
178 |
if this_ct == 0: break # in_table size is multiple of partition_size |
|
179 |
|
|
180 |
# Recurse |
|
181 |
pkeys_loc = _put_table_part(db, node, in_table, row_ins_ct_ref, |
|
182 |
on_error, parent_ids_loc, next, col_defaults) |
|
183 |
if in_row_ct_ref != None: in_row_ct_ref[0] += this_ct |
|
184 |
|
|
185 |
sql.empty_temp(db, in_table) |
|
186 |
|
|
187 |
if this_ct < partition_size: break # partial partition = last |
|
188 |
|
|
189 |
# Work around PostgreSQL's temp table disk space leak |
|
190 |
db.reconnect() |
|
197 | 191 |
|
192 |
return pkeys_loc |
|
193 |
|
|
194 |
def _put_table_part(db, node, in_table, row_ins_ct_ref, on_error, |
|
195 |
parent_ids_loc, next, col_defaults): |
|
196 |
'''Helper function for put_table() only; should not be called directly''' |
|
197 |
def put_table_(node, in_row_ct_ref=None): |
|
198 |
return _put_table_part(db, node, in_table, row_ins_ct_ref, on_error, |
|
199 |
parent_ids_loc, next, col_defaults) |
|
200 |
|
|
198 | 201 |
is_func = xml_func.is_func(node) |
199 | 202 |
out_table = name_of(node) |
200 | 203 |
|
Also available in: Unified diff
db_xml.py: put_table(): Split into an outer function that sets up the database environment and subsets in_table, and a (recursive) inner function that imports the data