Project

General

Profile

« Previous | Next » 

Revision 7179

sql.py: distinct_table(): Added support for custom joins used in creating the new table. This can then be used by sql_io.put_table() to filter out duplicate rows in the out_table, so that they don't create duplicate key errors and the resulting index holes.

View differences:

lib/sql.py
1520 1520
    '''For kw_args, see tables()'''
1521 1521
    for table in tables(db, schema, **kw_args): truncate(db, table, schema)
1522 1522

  
1523
def distinct_table(db, table, distinct_on):
1523
def distinct_table(db, table, distinct_on, joins=None):
1524 1524
    '''Creates a copy of a temp table which is distinct on the given columns.
1525 1525
    Adds an index on table's distinct_on columns, to facilitate merge joins.
1526 1526
    @param distinct_on If empty, creates a table with one row. This is useful if
1527 1527
        your distinct_on columns are all literal values.
1528
    @param joins The joins to use when creating the new table
1528 1529
    @return The new table.
1529 1530
    '''
1531
    if joins == None: joins = [table]
1532
    
1530 1533
    new_table = sql_gen.suffixed_table(table, '_distinct')
1531 1534
    distinct_on = filter(sql_gen.is_table_col, distinct_on)
1532 1535
    
......
1536 1539
    if distinct_on == []: limit = 1 # one sample row
1537 1540
    else: add_index(db, distinct_on, table) # for join optimization
1538 1541
    
1539
    insert_select(db, new_table, None, mk_select(db, table,
1540
        distinct_on=distinct_on, order_by=None, limit=limit))
1542
    insert_select(db, new_table, None, mk_select(db, joins,
1543
        [sql_gen.Col(sql_gen.all_cols, table)], distinct_on=distinct_on,
1544
        order_by=None, limit=limit))
1541 1545
    analyze(db, new_table)
1542 1546
    
1543 1547
    return new_table

Also available in: Unified diff