Revision 7179
Added by Aaron Marcuse-Kubitza almost 12 years ago
lib/sql.py | ||
---|---|---|
1520 | 1520 |
'''For kw_args, see tables()''' |
1521 | 1521 |
for table in tables(db, schema, **kw_args): truncate(db, table, schema) |
1522 | 1522 |
|
1523 |
def distinct_table(db, table, distinct_on): |
|
1523 |
def distinct_table(db, table, distinct_on, joins=None):
|
|
1524 | 1524 |
'''Creates a copy of a temp table which is distinct on the given columns. |
1525 | 1525 |
Adds an index on table's distinct_on columns, to facilitate merge joins. |
1526 | 1526 |
@param distinct_on If empty, creates a table with one row. This is useful if |
1527 | 1527 |
your distinct_on columns are all literal values. |
1528 |
@param joins The joins to use when creating the new table |
|
1528 | 1529 |
@return The new table. |
1529 | 1530 |
''' |
1531 |
if joins == None: joins = [table] |
|
1532 |
|
|
1530 | 1533 |
new_table = sql_gen.suffixed_table(table, '_distinct') |
1531 | 1534 |
distinct_on = filter(sql_gen.is_table_col, distinct_on) |
1532 | 1535 |
|
... | ... | |
1536 | 1539 |
if distinct_on == []: limit = 1 # one sample row |
1537 | 1540 |
else: add_index(db, distinct_on, table) # for join optimization |
1538 | 1541 |
|
1539 |
insert_select(db, new_table, None, mk_select(db, table, |
|
1540 |
distinct_on=distinct_on, order_by=None, limit=limit)) |
|
1542 |
insert_select(db, new_table, None, mk_select(db, joins, |
|
1543 |
[sql_gen.Col(sql_gen.all_cols, table)], distinct_on=distinct_on, |
|
1544 |
order_by=None, limit=limit)) |
|
1541 | 1545 |
analyze(db, new_table) |
1542 | 1546 |
|
1543 | 1547 |
return new_table |
Also available in: Unified diff
sql.py: distinct_table(): Added support for custom joins used in creating the new table. This can then be used by sql_io.put_table() to filter out duplicate rows in the out_table, so that they don't create duplicate key errors and the resulting index holes.