Project

General

Profile

« Previous | Next » 

Revision 3621

db_xml.py: put_table(): Split into an outer function that sets up the database environment and subsets in_table, and a (recursive) inner function that imports the data

View differences:

lib/db_xml.py
136 136

  
137 137
def put_table(db, node, in_table, in_row_ct_ref=None,
138 138
    row_ins_ct_ref=None, limit=None, start=0, on_error=exc.raise_,
139
    parent_ids_loc=None, next=None, col_defaults={}, top_call=True):
139
    parent_ids_loc=None, next=None, col_defaults={}):
140 140
    '''
141 141
    @param node The XML tree that transforms the input to the output. Similar to
142 142
        put()'s node param, but with the input column name prefixed by
......
148 148
    in_table.set_srcs([in_table], overwrite=False)
149 149
    db.src = str(in_table)
150 150
    
151
    def put_table_(node, in_row_ct_ref=None):
152
        return put_table(db, node, in_table, in_row_ct_ref, row_ins_ct_ref,
153
            None, 0, on_error, parent_ids_loc, next, col_defaults, False)
151
    db.autoanalyze = True # but don't do this in row-based import
152
    db.autoexplain = True # but don't do this in row-based import
154 153
    
154
    full_in_table = in_table
155
    
155 156
    # Subset and partition in_table
156 157
    # OK to do even if table already the right size because it takes <1 sec.
157
    if top_call:
158
        db.autoanalyze = True # but don't do this in row-based import
159
        db.autoexplain = True # but don't do this in row-based import
158
    total = 0
159
    while limit == None or total < limit:
160
        # Adjust partition size if last partition
161
        this_limit = partition_size
162
        if limit != None: this_limit = min(this_limit, limit - total)
160 163
        
161
        full_in_table = in_table
164
        # Row # is interally 0-based, but 1-based to the user
165
        db.log_debug('********** Partition: rows '+str(start+1)+'-'
166
            +str(start+this_limit)+' **********', level=1.2)
162 167
        
163
        total = 0
164
        while limit == None or total < limit:
165
            # Adjust partition size if last partition
166
            this_limit = partition_size
167
            if limit != None: this_limit = min(this_limit, limit - total)
168
            
169
            # Row # is interally 0-based, but 1-based to the user
170
            db.log_debug('********** Partition: rows '+str(start+1)+'-'
171
                +str(start+this_limit)+' **********', level=1.2)
172
            
173
            # Subset in_table
174
            in_table = copy.copy(full_in_table) # don't modify input!
175
            in_table.name = str(in_table) # prepend schema
176
            cur = sql.run_query_into(db, sql.mk_select(db, full_in_table,
177
                limit=this_limit, start=start), into=in_table, add_pkey_=True)
178
                # full_in_table will be shadowed (hidden) by created temp table
179
            
180
            this_ct = cur.rowcount
181
            total += this_ct
182
            start += this_ct # advance start to fetch next set
183
            if this_ct == 0: break # in_table size is multiple of partition_size
184
            
185
            # Recurse
186
            pkeys_loc = put_table_(node, in_row_ct_ref)
187
            if in_row_ct_ref != None: in_row_ct_ref[0] += this_ct
188
            
189
            sql.empty_temp(db, in_table)
190
            
191
            if this_ct < partition_size: break # partial partition = last
192
            
193
            # Work around PostgreSQL's temp table disk space leak
194
            db.reconnect()
168
        # Subset in_table
169
        in_table = copy.copy(full_in_table) # don't modify input!
170
        in_table.name = str(in_table) # prepend schema
171
        cur = sql.run_query_into(db, sql.mk_select(db, full_in_table,
172
            limit=this_limit, start=start), into=in_table, add_pkey_=True)
173
            # full_in_table will be shadowed (hidden) by created temp table
195 174
        
196
        return pkeys_loc
175
        this_ct = cur.rowcount
176
        total += this_ct
177
        start += this_ct # advance start to fetch next set
178
        if this_ct == 0: break # in_table size is multiple of partition_size
179
        
180
        # Recurse
181
        pkeys_loc = _put_table_part(db, node, in_table, row_ins_ct_ref,
182
            on_error, parent_ids_loc, next, col_defaults)
183
        if in_row_ct_ref != None: in_row_ct_ref[0] += this_ct
184
        
185
        sql.empty_temp(db, in_table)
186
        
187
        if this_ct < partition_size: break # partial partition = last
188
        
189
        # Work around PostgreSQL's temp table disk space leak
190
        db.reconnect()
197 191
    
192
    return pkeys_loc
193

  
194
def _put_table_part(db, node, in_table, row_ins_ct_ref, on_error,
195
    parent_ids_loc, next, col_defaults):
196
    '''Helper function for put_table() only; should not be called directly'''
197
    def put_table_(node, in_row_ct_ref=None):
198
        return _put_table_part(db, node, in_table, row_ins_ct_ref, on_error,
199
            parent_ids_loc, next, col_defaults)
200
    
198 201
    is_func = xml_func.is_func(node)
199 202
    out_table = name_of(node)
200 203
    

Also available in: Unified diff