Revision 3194
Added by Aaron Marcuse-Kubitza over 12 years ago
db_xml.py | ||
---|---|---|
138 | 138 |
|
139 | 139 |
def put_table(db, node, in_table, in_row_ct_ref=None, |
140 | 140 |
row_ins_ct_ref=None, limit=None, start=0, on_error=exc.raise_, |
141 |
parent_ids_loc=None, next=None): |
|
141 |
parent_ids_loc=None, next=None, top_call=True):
|
|
142 | 142 |
''' |
143 | 143 |
@param node The XML tree that transforms the input to the output. Similar to |
144 | 144 |
put()'s node param, but with the input column name prefixed by |
145 | 145 |
input_col_prefix in place of the column value. |
146 |
@return (table, col) Where the pkeys (from INSERT RETURNING) are made
|
|
146 |
@return sql_gen.Col Where the pkeys (from INSERT RETURNING) are made
|
|
147 | 147 |
available |
148 | 148 |
''' |
149 | 149 |
in_table = sql_gen.as_Table(in_table) |
... | ... | |
152 | 152 |
|
153 | 153 |
def put_table_(node, in_row_ct_ref=None): |
154 | 154 |
return put_table(db, node, in_table, in_row_ct_ref, row_ins_ct_ref, |
155 |
on_error=on_error, parent_ids_loc=parent_ids_loc, next=next)
|
|
155 |
None, 0, on_error, parent_ids_loc, next, False)
|
|
156 | 156 |
|
157 | 157 |
# Subset and/or partition in_table if needed |
158 |
in_row_ct = sql.table_row_count(db, in_table) |
|
159 |
if limit != None or start != 0 or in_row_ct > partition_size: |
|
160 |
end = in_row_ct |
|
161 |
if limit != None: end = min(start + limit, end) |
|
162 |
|
|
158 |
if top_call: |
|
163 | 159 |
full_in_table = in_table |
164 | 160 |
|
165 |
for start_ in xrange(start, end, partition_size): |
|
166 |
limit_ = min(end - start_, partition_size) |
|
161 |
total = 0 |
|
162 |
while limit == None or total < limit: |
|
163 |
# Adjust partition size if last partition |
|
164 |
this_limit = partition_size |
|
165 |
if limit != None: this_limit = min(this_limit, limit - total) |
|
167 | 166 |
|
168 | 167 |
# Row # is interally 0-based, but 1-based to the user |
169 |
db.log_debug('********** Partition: rows '+str(start_+1)+'-'
|
|
170 |
+str(start_+limit_)+' **********', level=1.2)
|
|
168 |
db.log_debug('********** Partition: rows '+str(start+1)+'-' |
|
169 |
+str(start+this_limit)+' **********', level=1.2)
|
|
171 | 170 |
|
172 | 171 |
# Subset in_table |
173 | 172 |
in_table = copy.copy(full_in_table) # don't modify input! |
174 | 173 |
in_table.name = str(in_table) # prepend schema |
175 |
sql.run_query_into(db, sql.mk_select(db, full_in_table, |
|
176 |
limit=limit_, start=start_), into=in_table)
|
|
174 |
cur = sql.run_query_into(db, sql.mk_select(db, full_in_table,
|
|
175 |
limit=this_limit, start=start), into=in_table)
|
|
177 | 176 |
# full_in_table will be shadowed (hidden) by created temp table |
177 |
|
|
178 |
this_ct = cur.rowcount |
|
179 |
total += this_ct |
|
180 |
start += this_ct # advance start to fetch next set |
|
181 |
if this_ct == 0: break # if in_table size is exact multiple of limit |
|
182 |
|
|
178 | 183 |
sql.add_pkey(db, in_table) |
179 | 184 |
|
180 | 185 |
# Recurse |
... | ... | |
182 | 187 |
|
183 | 188 |
sql.empty_temp(db, in_table) |
184 | 189 |
|
190 |
if this_ct < partition_size: break # partial partition = last |
|
191 |
|
|
185 | 192 |
# Work around PostgreSQL's temp table disk space leak |
186 | 193 |
db.reconnect() |
187 | 194 |
|
Also available in: Unified diff
db_xml.py: put_table(): Subsetting in_table: Don't count # rows because this takes awhile for large datasets. Instead, use the chunking algorithm in digir_client, which ends the loop when a partial or empty partition is encountered.