Revision 3099
Added by Aaron Marcuse-Kubitza over 12 years ago
lib/sql.py | ||
---|---|---|
1226 | 1226 |
|
1227 | 1227 |
def distinct_table(db, table, distinct_on): |
1228 | 1228 |
'''Creates a copy of a temp table which is distinct on the given columns. |
1229 |
The table will get an index on these columns, facilitating merge joins.
|
|
1230 |
@param table Will be renamed to the distinct on table.
|
|
1229 |
The old and new tables will both get an index on these columns, to
|
|
1230 |
facilitate merge joins.
|
|
1231 | 1231 |
@param distinct_on If empty, creates a table with one row. This is useful if |
1232 | 1232 |
your distinct_on columns are all literal values. |
1233 |
@return The old table.
|
|
1233 |
@return The new table.
|
|
1234 | 1234 |
''' |
1235 |
old_table = copy.copy(table) |
|
1236 |
table.name = sql_gen.concat(table.name, '_distinct') |
|
1235 |
new_table = sql_gen.suffixed_table(table, '_distinct') |
|
1237 | 1236 |
|
1238 |
copy_table_struct(db, old_table, table)
|
|
1237 |
copy_table_struct(db, table, new_table)
|
|
1239 | 1238 |
|
1240 | 1239 |
limit = None |
1241 | 1240 |
if distinct_on == []: limit = 1 # one sample row |
1242 |
else: add_index(db, distinct_on, table, unique=True) |
|
1241 |
else: |
|
1242 |
add_index(db, distinct_on, new_table, unique=True) |
|
1243 |
add_index(db, distinct_on, table) # for join optimization |
|
1243 | 1244 |
|
1244 |
insert_select(db, table, None, mk_select(db, old_table, start=0,
|
|
1245 |
insert_select(db, new_table, None, mk_select(db, table, start=0,
|
|
1245 | 1246 |
limit=limit), ignore=True) |
1246 |
analyze(db, table) |
|
1247 |
analyze(db, new_table)
|
|
1247 | 1248 |
|
1248 |
return old_table |
|
1249 |
return new_table |
Also available in: Unified diff
sql.py: distinct_table(): Return new table instead of renaming input table so that columns that use input table will continue to work correctly