Revision 3102
Added by Aaron Marcuse-Kubitza over 12 years ago
lib/sql_io.py | ||
---|---|---|
298 | 298 |
conds = set() |
299 | 299 |
distinct_on = sql_gen.ColDict(db, out_table) |
300 | 300 |
def mk_main_select(joins, cols): |
301 |
distinct_on_cols = [c.to_Col() for c in distinct_on.values()] |
|
302 |
return sql.mk_select(db, joins, cols, conds, distinct_on_cols, |
|
303 |
limit=limit_ref[0], start=0) |
|
301 |
return sql.mk_select(db, joins, cols, conds, limit=limit_ref[0], |
|
302 |
start=0) |
|
304 | 303 |
|
305 | 304 |
exc_strs = set() |
306 | 305 |
def log_exc(e): |
... | ... | |
337 | 336 |
insert_in_pkeys = insert_pkeys_table('in') |
338 | 337 |
|
339 | 338 |
# Do inserts and selects |
339 |
insert_in_table = in_table |
|
340 | 340 |
join_cols = sql_gen.ColDict(db, out_table) |
341 | 341 |
while True: |
342 | 342 |
if limit_ref[0] == 0: # special case |
... | ... | |
350 | 350 |
log_debug('Trying to insert new rows') |
351 | 351 |
|
352 | 352 |
# Prepare to insert new rows |
353 |
insert_joins = input_joins[:] # don't modify original! |
|
354 | 353 |
insert_args = dict(recover=True, cacheable=False) |
355 | 354 |
if has_joins: |
356 | 355 |
insert_args.update(dict(ignore=True)) |
357 | 356 |
else: |
358 | 357 |
insert_args.update(dict(returning=out_pkey, into=insert_out_pkeys)) |
359 |
main_select = mk_main_select(insert_joins, mapping.values()) |
|
358 |
main_select = mk_main_select([insert_in_table], |
|
359 |
[sql_gen.with_table(c, insert_in_table) for c in mapping.values()]) |
|
360 | 360 |
|
361 | 361 |
def main_insert(): |
362 | 362 |
if is_function: |
... | ... | |
392 | 392 |
log_debug('Ignoring existing rows, comparing on these columns:\n' |
393 | 393 |
+strings.as_inline_table(join_cols, ustr=col_ustr)) |
394 | 394 |
assert join_cols != old_join_cols # avoid infinite loops |
395 |
|
|
396 |
# Uniquify input table to avoid internal duplicate keys |
|
397 |
insert_in_table = sql.distinct_table(db, insert_in_table, |
|
398 |
filter(sql_gen.is_table_col, distinct_on.values())) |
|
395 | 399 |
except sql.NullValueException, e: |
396 | 400 |
log_exc(e) |
397 | 401 |
|
... | ... | |
455 | 459 |
|
456 | 460 |
assert sql.table_row_count(db, into) == sql.table_row_count(db, in_table) |
457 | 461 |
|
458 |
sql.empty_temp(db, in_table)
|
|
462 |
sql.empty_temp(db, set([in_table, insert_in_table]))
|
|
459 | 463 |
|
460 | 464 |
srcs = [] |
461 | 465 |
if is_func: srcs = sql_gen.cols_srcs(in_cols) |
Also available in: Unified diff
sql_io.py: put_table(): Use sql.distinct_table() to uniquify input table, instead of DISTINCT ON. This avoids letting PostgreSQL create a sort temp table to store the output of the DISTINCT ON, which is not automatically removed until the end of the connection, causing database bloat that can use up the available disk space.