Revision 3122
Added by Aaron Marcuse-Kubitza over 12 years ago
lib/db_xml.py | ||
---|---|---|
130 | 130 |
def __str__(self): return self.name |
131 | 131 |
|
132 | 132 |
# Controls when and how put_table() will partition the input table |
133 |
partition_size = 2e5 # rows
|
|
133 |
partition_size = 200000 # rows
|
|
134 | 134 |
|
135 | 135 |
input_col_prefix = '$' |
136 | 136 |
|
... | ... | |
153 | 153 |
return put_table(db, node, in_table, in_row_ct_ref, row_ins_ct_ref, |
154 | 154 |
limit, start, on_error, parent_ids_loc, next) |
155 | 155 |
|
156 |
# Subset in_table if needed |
|
157 |
if limit != None or start != 0: |
|
158 |
in_table = copy.copy(in_table) # don't modify input! |
|
159 |
sql.run_query_into(db, sql.mk_select(db, in_table, limit=limit, |
|
160 |
start=start), into=in_table) |
|
161 |
# in_table will be shadowed (hidden) by the created temp table |
|
162 |
sql.add_pkey(db, in_table) |
|
163 |
return put_table_(node, in_row_ct_ref) |
|
164 |
|
|
165 |
# Partition in_table if needed |
|
156 |
# Subset and/or partition in_table if needed |
|
166 | 157 |
in_row_ct = sql.table_row_count(db, in_table) |
167 |
if in_row_ct > partition_size: |
|
158 |
if limit != None or start != 0 or in_row_ct > partition_size:
|
|
168 | 159 |
end = in_row_ct |
169 |
if limit != None: end = start + limit
|
|
160 |
if limit != None: end = min(start + limit, end)
|
|
170 | 161 |
|
162 |
full_in_table = in_table |
|
163 |
|
|
171 | 164 |
for start_ in xrange(start, end, partition_size): |
172 | 165 |
limit_ = min(end - start_, partition_size) |
173 |
db.log_debug('********** Partition: rows '+str(start_)+'-' |
|
174 |
+str(start_ + limit_)+' **********', level=1.2) |
|
175 |
pkeys_loc = put_table_(node, in_row_ct_ref, limit_, start_) |
|
166 |
|
|
167 |
# Row # is interally 0-based, but 1-based to the user |
|
168 |
db.log_debug('********** Partition: rows '+str(start_+1)+'-' |
|
169 |
+str(start_+limit_)+' **********', level=1.2) |
|
170 |
|
|
171 |
# Subset in_table |
|
172 |
in_table = copy.copy(full_in_table) # don't modify input! |
|
173 |
sql.run_query_into(db, sql.mk_select(db, full_in_table, |
|
174 |
limit=limit_, start=start_), into=in_table) |
|
175 |
# full_in_table will be shadowed (hidden) by created temp table |
|
176 |
sql.add_pkey(db, in_table) |
|
177 |
|
|
178 |
pkeys_loc = put_table_(node, in_row_ct_ref) |
|
176 | 179 |
|
177 | 180 |
return pkeys_loc |
178 | 181 |
|
Also available in: Unified diff
db_xml.py: put_table(): Merged partitioning and subsetting into same section for simplicity, to avoid creating extra temp tables, and to later allow the connection to be closed and reopened between partitions. partition_size: Expressed value without exponent notation to ensure that it's an integer.