Project

General

Profile

« Previous | Next » 

Revision 3122

db_xml.py: put_table(): Merged partitioning and subsetting into same section for simplicity, to avoid creating extra temp tables, and to later allow the connection to be closed and reopened between partitions. partition_size: Expressed value without exponent notation to ensure that it's an integer.

View differences:

db_xml.py
130 130
    def __str__(self): return self.name
131 131

  
132 132
# Controls when and how put_table() will partition the input table
133
partition_size = 2e5 # rows
133
partition_size = 200000 # rows
134 134

  
135 135
input_col_prefix = '$'
136 136

  
......
153 153
        return put_table(db, node, in_table, in_row_ct_ref, row_ins_ct_ref,
154 154
            limit, start, on_error, parent_ids_loc, next)
155 155
    
156
    # Subset in_table if needed
157
    if limit != None or start != 0:
158
        in_table = copy.copy(in_table) # don't modify input!
159
        sql.run_query_into(db, sql.mk_select(db, in_table, limit=limit,
160
            start=start), into=in_table)
161
            # in_table will be shadowed (hidden) by the created temp table
162
        sql.add_pkey(db, in_table)
163
        return put_table_(node, in_row_ct_ref)
164
    
165
    # Partition in_table if needed
156
    # Subset and/or partition in_table if needed
166 157
    in_row_ct = sql.table_row_count(db, in_table)
167
    if in_row_ct > partition_size:
158
    if limit != None or start != 0 or in_row_ct > partition_size:
168 159
        end = in_row_ct
169
        if limit != None: end = start + limit
160
        if limit != None: end = min(start + limit, end)
170 161
        
162
        full_in_table = in_table
163
        
171 164
        for start_ in xrange(start, end, partition_size):
172 165
            limit_ = min(end - start_, partition_size)
173
            db.log_debug('********** Partition: rows '+str(start_)+'-'
174
                +str(start_ + limit_)+' **********', level=1.2)
175
            pkeys_loc = put_table_(node, in_row_ct_ref, limit_, start_)
166
            
167
            # Row # is interally 0-based, but 1-based to the user
168
            db.log_debug('********** Partition: rows '+str(start_+1)+'-'
169
                +str(start_+limit_)+' **********', level=1.2)
170
            
171
            # Subset in_table
172
            in_table = copy.copy(full_in_table) # don't modify input!
173
            sql.run_query_into(db, sql.mk_select(db, full_in_table,
174
                limit=limit_, start=start_), into=in_table)
175
                # full_in_table will be shadowed (hidden) by created temp table
176
            sql.add_pkey(db, in_table)
177
            
178
            pkeys_loc = put_table_(node, in_row_ct_ref)
176 179
        
177 180
        return pkeys_loc
178 181
    

Also available in: Unified diff