Project

General

Profile

« Previous | Next » 

Revision 3102

sql_io.py: put_table(): Use sql.distinct_table() to uniquify input table, instead of DISTINCT ON. This avoids letting PostgreSQL create a sort temp table to store the output of the DISTINCT ON, which is not automatically removed until the end of the connection, causing database bloat that can use up the available disk space.

View differences:

lib/sql_io.py
298 298
    conds = set()
299 299
    distinct_on = sql_gen.ColDict(db, out_table)
300 300
    def mk_main_select(joins, cols):
301
        distinct_on_cols = [c.to_Col() for c in distinct_on.values()]
302
        return sql.mk_select(db, joins, cols, conds, distinct_on_cols,
303
            limit=limit_ref[0], start=0)
301
        return sql.mk_select(db, joins, cols, conds, limit=limit_ref[0],
302
            start=0)
304 303
    
305 304
    exc_strs = set()
306 305
    def log_exc(e):
......
337 336
    insert_in_pkeys = insert_pkeys_table('in')
338 337
    
339 338
    # Do inserts and selects
339
    insert_in_table = in_table
340 340
    join_cols = sql_gen.ColDict(db, out_table)
341 341
    while True:
342 342
        if limit_ref[0] == 0: # special case
......
350 350
        log_debug('Trying to insert new rows')
351 351
        
352 352
        # Prepare to insert new rows
353
        insert_joins = input_joins[:] # don't modify original!
354 353
        insert_args = dict(recover=True, cacheable=False)
355 354
        if has_joins:
356 355
            insert_args.update(dict(ignore=True))
357 356
        else:
358 357
            insert_args.update(dict(returning=out_pkey, into=insert_out_pkeys))
359
        main_select = mk_main_select(insert_joins, mapping.values())
358
        main_select = mk_main_select([insert_in_table],
359
            [sql_gen.with_table(c, insert_in_table) for c in mapping.values()])
360 360
        
361 361
        def main_insert():
362 362
            if is_function:
......
392 392
            log_debug('Ignoring existing rows, comparing on these columns:\n'
393 393
                +strings.as_inline_table(join_cols, ustr=col_ustr))
394 394
            assert join_cols != old_join_cols # avoid infinite loops
395
            
396
            # Uniquify input table to avoid internal duplicate keys
397
            insert_in_table = sql.distinct_table(db, insert_in_table,
398
                filter(sql_gen.is_table_col, distinct_on.values()))
395 399
        except sql.NullValueException, e:
396 400
            log_exc(e)
397 401
            
......
455 459
    
456 460
    assert sql.table_row_count(db, into) == sql.table_row_count(db, in_table)
457 461
    
458
    sql.empty_temp(db, in_table)
462
    sql.empty_temp(db, set([in_table, insert_in_table]))
459 463
    
460 464
    srcs = []
461 465
    if is_func: srcs = sql_gen.cols_srcs(in_cols)

Also available in: Unified diff