Revision 3651
Added by Aaron Marcuse-Kubitza over 12 years ago
lib/db_xml.py | ||
---|---|---|
41 | 41 |
elif child_name == name: return child |
42 | 42 |
return None |
43 | 43 |
|
44 |
def get(db, node, limit=None, start=None): |
|
45 |
def pkey(table): return sql.pkey(db, table) |
|
46 |
|
|
47 |
node = node.firstChild |
|
48 |
table = name_of(node) |
|
49 |
pkey_ = pkey(table) |
|
50 |
|
|
51 |
fields = [] |
|
52 |
conds = {} |
|
53 |
for child in xml_dom.NodeElemIter(node): |
|
54 |
child_name = name_of(child) |
|
55 |
if xml_dom.is_empty(child): fields.append(child_name) |
|
56 |
elif xml_dom.is_text(child): conds[child_name] = xml_dom.value(child) |
|
57 |
else: raise Exception('Joins not supported yet') |
|
58 |
id_ = xml_dom.get_id(node) |
|
59 |
if id_ != None: conds[pkey(table)] = id_ # replace any existing pkey value |
|
60 |
if fields == []: fields.append(pkey_) |
|
61 |
|
|
62 |
return sql.select(db, table, fields, conds, limit, start) |
|
63 |
|
|
64 | 44 |
class ColRef: |
65 | 45 |
'''A reference to a table column''' |
66 | 46 |
def __init__(self, name, idx): |
... | ... | |
69 | 49 |
|
70 | 50 |
def __str__(self): return self.name |
71 | 51 |
|
72 |
# Controls when and how put_table() will partition the input table |
|
73 |
partition_size = 500000 # rows |
|
74 |
|
|
75 |
input_col_prefix = '$' |
|
76 |
|
|
77 | 52 |
put_table_special_funcs = set(['_simplifyPath']) |
78 | 53 |
|
79 |
def put_table(db, node, in_table, in_row_ct_ref=None, row_ins_ct_ref=None, |
|
80 |
limit=None, start=0, on_error=exc.raise_, col_defaults={}): |
|
81 |
''' |
|
82 |
@param node The XML tree that transforms the input to the output. Similar to |
|
83 |
put()'s node param, but with the input column name prefixed by |
|
84 |
input_col_prefix in place of the column value. |
|
85 |
@return sql_gen.Col Where the pkeys (from INSERT RETURNING) are made |
|
86 |
available |
|
87 |
''' |
|
88 |
in_table = sql_gen.as_Table(in_table) |
|
89 |
in_table.set_srcs([in_table], overwrite=False) |
|
90 |
db.src = str(in_table) |
|
91 |
|
|
92 |
db.autoanalyze = True # but don't do this in row-based import |
|
93 |
db.autoexplain = True # but don't do this in row-based import |
|
94 |
|
|
95 |
# Import col_defaults |
|
96 |
for col, node_ in col_defaults.items(): |
|
97 |
col_defaults[col] = put(db, node_, row_ins_ct_ref, on_error) |
|
98 |
|
|
99 |
# Subset and partition in_table |
|
100 |
# OK to do even if table already the right size because it takes <1 sec. |
|
101 |
full_in_table = in_table |
|
102 |
total = 0 |
|
103 |
while limit == None or total < limit: |
|
104 |
# Adjust partition size if last partition |
|
105 |
this_limit = partition_size |
|
106 |
if limit != None: this_limit = min(this_limit, limit - total) |
|
107 |
|
|
108 |
# Row # is interally 0-based, but 1-based to the user |
|
109 |
db.log_debug('********** Partition: rows '+str(start+1)+'-' |
|
110 |
+str(start+this_limit)+' **********', level=1.2) |
|
111 |
|
|
112 |
# Subset in_table |
|
113 |
in_table = copy.copy(full_in_table) # don't modify input! |
|
114 |
in_table.name = str(in_table) # prepend schema |
|
115 |
cur = sql.run_query_into(db, sql.mk_select(db, full_in_table, |
|
116 |
limit=this_limit, start=start), into=in_table, add_pkey_=True) |
|
117 |
# full_in_table will be shadowed (hidden) by created temp table |
|
118 |
|
|
119 |
this_ct = cur.rowcount |
|
120 |
total += this_ct |
|
121 |
start += this_ct # advance start to fetch next set |
|
122 |
if this_ct == 0: break # in_table size is multiple of partition_size |
|
123 |
|
|
124 |
# Recurse |
|
125 |
pkeys_loc = put(db, node, row_ins_ct_ref, on_error, |
|
126 |
col_defaults, in_table) |
|
127 |
if in_row_ct_ref != None: in_row_ct_ref[0] += this_ct |
|
128 |
|
|
129 |
sql.empty_temp(db, in_table) |
|
130 |
|
|
131 |
if this_ct < partition_size: break # partial partition = last |
|
132 |
|
|
133 |
# Work around PostgreSQL's temp table disk space leak |
|
134 |
db.reconnect() |
|
135 |
|
|
136 |
return pkeys_loc |
|
137 |
|
|
138 | 54 |
def put(db, node, row_ins_ct_ref=None, on_error=exc.raise_, |
139 | 55 |
col_defaults={}, in_table=None, parent_ids_loc=None, next=None): |
140 | 56 |
def put_(node, in_row_ct_ref=None): |
... | ... | |
221 | 137 |
for child in children: put_(child) |
222 | 138 |
|
223 | 139 |
return pkeys_loc |
140 |
|
|
141 |
def get(db, node, limit=None, start=None): |
|
142 |
def pkey(table): return sql.pkey(db, table) |
|
143 |
|
|
144 |
node = node.firstChild |
|
145 |
table = name_of(node) |
|
146 |
pkey_ = pkey(table) |
|
147 |
|
|
148 |
fields = [] |
|
149 |
conds = {} |
|
150 |
for child in xml_dom.NodeElemIter(node): |
|
151 |
child_name = name_of(child) |
|
152 |
if xml_dom.is_empty(child): fields.append(child_name) |
|
153 |
elif xml_dom.is_text(child): conds[child_name] = xml_dom.value(child) |
|
154 |
else: raise Exception('Joins not supported yet') |
|
155 |
id_ = xml_dom.get_id(node) |
|
156 |
if id_ != None: conds[pkey(table)] = id_ # replace any existing pkey value |
|
157 |
if fields == []: fields.append(pkey_) |
|
158 |
|
|
159 |
return sql.select(db, table, fields, conds, limit, start) |
|
160 |
|
|
161 |
# Controls when and how put_table() will partition the input table |
|
162 |
partition_size = 500000 # rows |
|
163 |
|
|
164 |
input_col_prefix = '$' |
|
165 |
|
|
166 |
def put_table(db, node, in_table, in_row_ct_ref=None, row_ins_ct_ref=None, |
|
167 |
limit=None, start=0, on_error=exc.raise_, col_defaults={}): |
|
168 |
''' |
|
169 |
@param node The XML tree that transforms the input to the output. Similar to |
|
170 |
put()'s node param, but with the input column name prefixed by |
|
171 |
input_col_prefix in place of the column value. |
|
172 |
@return sql_gen.Col Where the pkeys (from INSERT RETURNING) are made |
|
173 |
available |
|
174 |
''' |
|
175 |
in_table = sql_gen.as_Table(in_table) |
|
176 |
in_table.set_srcs([in_table], overwrite=False) |
|
177 |
db.src = str(in_table) |
|
178 |
|
|
179 |
db.autoanalyze = True # but don't do this in row-based import |
|
180 |
db.autoexplain = True # but don't do this in row-based import |
|
181 |
|
|
182 |
# Import col_defaults |
|
183 |
for col, node_ in col_defaults.items(): |
|
184 |
col_defaults[col] = put(db, node_, row_ins_ct_ref, on_error) |
|
185 |
|
|
186 |
# Subset and partition in_table |
|
187 |
# OK to do even if table already the right size because it takes <1 sec. |
|
188 |
full_in_table = in_table |
|
189 |
total = 0 |
|
190 |
while limit == None or total < limit: |
|
191 |
# Adjust partition size if last partition |
|
192 |
this_limit = partition_size |
|
193 |
if limit != None: this_limit = min(this_limit, limit - total) |
|
194 |
|
|
195 |
# Row # is interally 0-based, but 1-based to the user |
|
196 |
db.log_debug('********** Partition: rows '+str(start+1)+'-' |
|
197 |
+str(start+this_limit)+' **********', level=1.2) |
|
198 |
|
|
199 |
# Subset in_table |
|
200 |
in_table = copy.copy(full_in_table) # don't modify input! |
|
201 |
in_table.name = str(in_table) # prepend schema |
|
202 |
cur = sql.run_query_into(db, sql.mk_select(db, full_in_table, |
|
203 |
limit=this_limit, start=start), into=in_table, add_pkey_=True) |
|
204 |
# full_in_table will be shadowed (hidden) by created temp table |
|
205 |
|
|
206 |
this_ct = cur.rowcount |
|
207 |
total += this_ct |
|
208 |
start += this_ct # advance start to fetch next set |
|
209 |
if this_ct == 0: break # in_table size is multiple of partition_size |
|
210 |
|
|
211 |
# Recurse |
|
212 |
pkeys_loc = put(db, node, row_ins_ct_ref, on_error, |
|
213 |
col_defaults, in_table) |
|
214 |
if in_row_ct_ref != None: in_row_ct_ref[0] += this_ct |
|
215 |
|
|
216 |
sql.empty_temp(db, in_table) |
|
217 |
|
|
218 |
if this_ct < partition_size: break # partial partition = last |
|
219 |
|
|
220 |
# Work around PostgreSQL's temp table disk space leak |
|
221 |
db.reconnect() |
|
222 |
|
|
223 |
return pkeys_loc |
Also available in: Unified diff
db_xml.py: Moved put() before the functions that use it