1
|
# SQL code generation
|
2
|
|
3
|
import copy
|
4
|
import operator
|
5
|
import re
|
6
|
import UserDict
|
7
|
|
8
|
import dicts
|
9
|
import iters
|
10
|
import lists
|
11
|
import objects
|
12
|
import strings
|
13
|
import util
|
14
|
|
15
|
##### Names
|
16
|
|
17
|
identifier_max_len = 63 # works for both PostgreSQL and MySQL
|
18
|
|
19
|
def add_suffix(str_, suffix):
|
20
|
'''Preserves version so that it won't be truncated off the string, leading
|
21
|
to collisions.'''
|
22
|
# Preserve version
|
23
|
before, sep, version = str_.rpartition('#')
|
24
|
if sep != '': # found match
|
25
|
str_ = before
|
26
|
suffix = sep+version+suffix
|
27
|
|
28
|
return strings.add_suffix(str_, suffix, identifier_max_len)
|
29
|
|
30
|
def truncate(str_): return add_suffix(str_, '')
|
31
|
|
32
|
def is_safe_name(name):
|
33
|
'''A name is safe *and unambiguous* if it:
|
34
|
* contains only *lowercase* word (\w) characters
|
35
|
* doesn't start with a digit
|
36
|
* contains "_", so that it's not a keyword
|
37
|
'''
|
38
|
return re.match(r'^(?=.*_)(?!\d)[^\WA-Z]+$', name)
|
39
|
|
40
|
def esc_name(name, quote='"'):
|
41
|
return quote + name.replace(quote, quote+quote) + quote
|
42
|
# doubling an embedded quote escapes it in both PostgreSQL and MySQL
|
43
|
|
44
|
def clean_name(name): return name.replace('"', '').replace('`', '')
|
45
|
|
46
|
##### General SQL code objects
|
47
|
|
48
|
class MockDb:
|
49
|
def esc_value(self, value): return strings.repr_no_u(value)
|
50
|
|
51
|
def esc_name(self, name): return esc_name(name)
|
52
|
mockDb = MockDb()
|
53
|
|
54
|
class BasicObject(objects.BasicObject):
|
55
|
def __init__(self, value): self.value = value
|
56
|
|
57
|
def __str__(self): return clean_name(strings.repr_no_u(self))
|
58
|
|
59
|
##### Unparameterized code objects
|
60
|
|
61
|
class Code(BasicObject):
|
62
|
def to_str(self, db): raise NotImplementedError()
|
63
|
|
64
|
def __repr__(self): return self.to_str(mockDb)
|
65
|
|
66
|
class CustomCode(Code):
|
67
|
def __init__(self, str_): self.str_ = str_
|
68
|
|
69
|
def to_str(self, db): return self.str_
|
70
|
|
71
|
def as_Code(value, db=None):
|
72
|
'''
|
73
|
@param db If set, runs db.std_code() on the value.
|
74
|
'''
|
75
|
if util.is_str(value):
|
76
|
if db != None: value = db.std_code(value)
|
77
|
return CustomCode(value)
|
78
|
else: return Literal(value)
|
79
|
|
80
|
class Expr(Code):
|
81
|
def __init__(self, expr): self.expr = expr
|
82
|
|
83
|
def to_str(self, db): return '('+self.expr.to_str(db)+')'
|
84
|
|
85
|
##### Literal values
|
86
|
|
87
|
class Literal(Code):
|
88
|
def __init__(self, value): self.value = value
|
89
|
|
90
|
def to_str(self, db): return db.esc_value(self.value)
|
91
|
|
92
|
def as_Value(value):
|
93
|
if isinstance(value, Code): return value
|
94
|
else: return Literal(value)
|
95
|
|
96
|
def is_null(value): return isinstance(value, Literal) and value.value == None
|
97
|
|
98
|
##### Derived elements
|
99
|
|
100
|
src_self = object() # tells Col that it is its own source column
|
101
|
|
102
|
class Derived(Code):
|
103
|
def __init__(self, srcs):
|
104
|
'''An element which was derived from some other element(s).
|
105
|
@param srcs See self.set_srcs()
|
106
|
'''
|
107
|
self.set_srcs(srcs)
|
108
|
|
109
|
def set_srcs(self, srcs, overwrite=True):
|
110
|
'''
|
111
|
@param srcs (self_type...)|src_self The element(s) this is derived from
|
112
|
'''
|
113
|
if not overwrite and self.srcs != (): return # already set
|
114
|
|
115
|
if srcs == src_self: srcs = (self,)
|
116
|
srcs = tuple(srcs) # make Col hashable
|
117
|
self.srcs = srcs
|
118
|
|
119
|
def _compare_on(self):
|
120
|
compare_on = self.__dict__.copy()
|
121
|
del compare_on['srcs'] # ignore
|
122
|
return compare_on
|
123
|
|
124
|
def cols_srcs(cols): return lists.uniqify(iters.flatten((v.srcs for v in cols)))
|
125
|
|
126
|
##### Tables
|
127
|
|
128
|
class Table(Derived):
|
129
|
def __init__(self, name, schema=None, srcs=()):
|
130
|
'''
|
131
|
@param schema str|None (for no schema)
|
132
|
@param srcs (Table...)|src_self See Derived.set_srcs()
|
133
|
'''
|
134
|
Derived.__init__(self, srcs)
|
135
|
|
136
|
name = truncate(name)
|
137
|
|
138
|
self.name = name
|
139
|
self.schema = schema
|
140
|
|
141
|
def to_str(self, db):
|
142
|
str_ = ''
|
143
|
if self.schema != None: str_ += db.esc_name(self.schema)+'.'
|
144
|
str_ += db.esc_name(self.name)
|
145
|
return str_
|
146
|
|
147
|
def to_Table(self): return self
|
148
|
|
149
|
def is_underlying_table(table):
|
150
|
return isinstance(table, Table) and table.to_Table() is table
|
151
|
|
152
|
def as_Table(table, schema=None):
|
153
|
if table == None or isinstance(table, Code): return table
|
154
|
else: return Table(table, schema)
|
155
|
|
156
|
def suffixed_table(table, suffix): return Table(table.name+suffix, table.schema)
|
157
|
|
158
|
class NamedTable(Table):
|
159
|
def __init__(self, name, code, cols=None):
|
160
|
Table.__init__(self, name)
|
161
|
|
162
|
if not isinstance(code, Code): code = Table(code)
|
163
|
if not isinstance(code, (Table, FunctionCall, Expr)): code = Expr(code)
|
164
|
if cols != None: cols = map(to_name_only_col, cols)
|
165
|
|
166
|
self.code = code
|
167
|
self.cols = cols
|
168
|
|
169
|
def to_str(self, db):
|
170
|
str_ = self.code.to_str(db)+'\nAS '+Table.to_str(self, db)
|
171
|
if self.cols != None:
|
172
|
str_ += ' ('+(', '.join((c.to_str(db) for c in self.cols)))+')'
|
173
|
return str_
|
174
|
|
175
|
def to_Table(self): return Table(self.name)
|
176
|
|
177
|
def remove_table_rename(table):
|
178
|
if isinstance(table, NamedTable): table = table.code
|
179
|
return table
|
180
|
|
181
|
##### Columns
|
182
|
|
183
|
class Col(Derived):
|
184
|
def __init__(self, name, table=None, srcs=()):
|
185
|
'''
|
186
|
@param table Table|None (for no table)
|
187
|
@param srcs (Col...)|src_self See Derived.set_srcs()
|
188
|
'''
|
189
|
Derived.__init__(self, srcs)
|
190
|
|
191
|
name = truncate(name)
|
192
|
if util.is_str(table): table = Table(table)
|
193
|
assert table == None or isinstance(table, Table)
|
194
|
|
195
|
self.name = name
|
196
|
self.table = table
|
197
|
|
198
|
def to_str(self, db):
|
199
|
str_ = ''
|
200
|
if self.table != None: str_ += self.table.to_Table().to_str(db)+'.'
|
201
|
str_ += db.esc_name(self.name)
|
202
|
return str_
|
203
|
|
204
|
def to_Col(self): return self
|
205
|
|
206
|
def is_table_col(col): return isinstance(col, Col) and col.table != None
|
207
|
|
208
|
def as_Col(col, table=None, name=None):
|
209
|
'''
|
210
|
@param name If not None, any non-Col input will be renamed using NamedCol.
|
211
|
'''
|
212
|
if name != None:
|
213
|
col = as_Value(col)
|
214
|
if not isinstance(col, Col): col = NamedCol(name, col)
|
215
|
|
216
|
if isinstance(col, Code): return col
|
217
|
else: return Col(col, table)
|
218
|
|
219
|
def with_default_table(col, table, overwrite=False):
|
220
|
col = as_Col(col)
|
221
|
if not isinstance(col, NamedCol) and (overwrite or col.table == None):
|
222
|
col = copy.copy(col) # don't modify input!
|
223
|
col.table = table
|
224
|
return col
|
225
|
|
226
|
def set_cols_table(table, cols):
|
227
|
table = as_Table(table)
|
228
|
|
229
|
for i, col in enumerate(cols):
|
230
|
col = cols[i] = as_Col(col)
|
231
|
col.table = table
|
232
|
|
233
|
def to_name_only_col(col, check_table=None):
|
234
|
col = as_Col(col)
|
235
|
if not isinstance(col, Col): return col
|
236
|
|
237
|
if check_table != None:
|
238
|
table = col.table
|
239
|
assert table == None or table == check_table
|
240
|
return Col(col.name)
|
241
|
|
242
|
class NamedCol(Col):
|
243
|
def __init__(self, name, code):
|
244
|
Col.__init__(self, name)
|
245
|
|
246
|
if not isinstance(code, Code): code = Literal(code)
|
247
|
|
248
|
self.code = code
|
249
|
|
250
|
def to_str(self, db):
|
251
|
return self.code.to_str(db)+' AS '+Col.to_str(self, db)
|
252
|
|
253
|
def to_Col(self): return Col(self.name)
|
254
|
|
255
|
def remove_col_rename(col):
|
256
|
if isinstance(col, NamedCol): col = col.code
|
257
|
return col
|
258
|
|
259
|
class NoUnderlyingTableException(Exception): pass
|
260
|
|
261
|
def underlying_col(col):
|
262
|
col = remove_col_rename(col)
|
263
|
if not isinstance(col, Col): raise NoUnderlyingTableException
|
264
|
|
265
|
table = remove_table_rename(col.table)
|
266
|
if not is_underlying_table(table): raise NoUnderlyingTableException
|
267
|
|
268
|
return Col(col.name, table, col.srcs)
|
269
|
|
270
|
def wrap(wrap_func, value):
|
271
|
'''Wraps a value, propagating any column renaming to the returned value.'''
|
272
|
if isinstance(value, NamedCol):
|
273
|
return NamedCol(value.name, wrap_func(value.code))
|
274
|
else: return wrap_func(value)
|
275
|
|
276
|
class ColDict(dicts.DictProxy):
|
277
|
'''A dict that automatically makes inserted entries Col objects'''
|
278
|
|
279
|
def __init__(self, db, keys_table, dict_={}):
|
280
|
dicts.DictProxy.__init__(self, {})
|
281
|
|
282
|
keys_table = as_Table(keys_table)
|
283
|
|
284
|
self.db = db
|
285
|
self.table = keys_table
|
286
|
self.update(dict_) # after setting vars because __setitem__() needs them
|
287
|
|
288
|
def copy(self): return ColDict(self.db, self.table, self.inner.copy())
|
289
|
|
290
|
def __getitem__(self, key):
|
291
|
return dicts.DictProxy.__getitem__(self, self._key(key))
|
292
|
|
293
|
def __setitem__(self, key, value):
|
294
|
key = self._key(key)
|
295
|
if value == None: value = self.db.col_info(key).default
|
296
|
dicts.DictProxy.__setitem__(self, key, as_Col(value, name=key.name))
|
297
|
|
298
|
def _key(self, key): return as_Col(key, self.table)
|
299
|
|
300
|
##### Functions
|
301
|
|
302
|
class Function(Table): pass
|
303
|
|
304
|
def TempFunction(name, autocommit):
|
305
|
schema = None
|
306
|
if not autocommit: schema = 'pg_temp'
|
307
|
return Function(name, schema)
|
308
|
|
309
|
class InternalFunction(CustomCode): pass
|
310
|
|
311
|
class FunctionCall(Code):
|
312
|
def __init__(self, function, *args):
|
313
|
'''
|
314
|
@param args [Code|literal-value...] The function's arguments
|
315
|
'''
|
316
|
if not isinstance(function, Code): function = Function(function)
|
317
|
args = map(remove_col_rename, map(as_Value, args))
|
318
|
|
319
|
self.function = function
|
320
|
self.args = args
|
321
|
|
322
|
def to_str(self, db):
|
323
|
args_str = ', '.join((v.to_str(db) for v in self.args))
|
324
|
return self.function.to_str(db)+'('+args_str+')'
|
325
|
|
326
|
def wrap_in_func(function, value):
|
327
|
'''Wraps a value inside a function call.
|
328
|
Propagates any column renaming to the returned value.
|
329
|
'''
|
330
|
return wrap(lambda v: FunctionCall(function, v), value)
|
331
|
|
332
|
def unwrap_func_call(func_call, check_name=None):
|
333
|
'''Unwraps any function call to its first argument.
|
334
|
Also removes any column renaming.
|
335
|
'''
|
336
|
func_call = remove_col_rename(func_call)
|
337
|
if not isinstance(func_call, FunctionCall): return func_call
|
338
|
|
339
|
if check_name != None:
|
340
|
name = func_call.function.name
|
341
|
assert name == None or name == check_name
|
342
|
return func_call.args[0]
|
343
|
|
344
|
##### Conditions
|
345
|
|
346
|
class ColValueCond(Code):
|
347
|
def __init__(self, col, value):
|
348
|
value = as_ValueCond(value)
|
349
|
|
350
|
self.col = col
|
351
|
self.value = value
|
352
|
|
353
|
def to_str(self, db): return self.value.to_str(db, self.col)
|
354
|
|
355
|
def combine_conds(conds, keyword=None):
|
356
|
'''
|
357
|
@param keyword The keyword to add before the conditions, if any
|
358
|
'''
|
359
|
str_ = ''
|
360
|
if keyword != None:
|
361
|
if conds == []: whitespace = ''
|
362
|
elif len(conds) == 1: whitespace = ' '
|
363
|
else: whitespace = '\n'
|
364
|
str_ += keyword+whitespace
|
365
|
|
366
|
str_ += '\nAND '.join(conds)
|
367
|
return str_
|
368
|
|
369
|
##### Condition column comparisons
|
370
|
|
371
|
class ValueCond(BasicObject):
|
372
|
def __init__(self, value):
|
373
|
if not isinstance(value, Code): value = Literal(value)
|
374
|
value = remove_col_rename(value)
|
375
|
|
376
|
self.value = value
|
377
|
|
378
|
def to_str(self, db, left_value):
|
379
|
'''
|
380
|
@param left_value The Code object that the condition is being applied on
|
381
|
'''
|
382
|
raise NotImplemented()
|
383
|
|
384
|
def __repr__(self): return self.to_str(mockDb, '<left_value>')
|
385
|
|
386
|
class CompareCond(ValueCond):
|
387
|
def __init__(self, value, operator='='):
|
388
|
'''
|
389
|
@param operator By default, compares NULL values literally. Use '~=' or
|
390
|
'~!=' to pass NULLs through.
|
391
|
'''
|
392
|
ValueCond.__init__(self, value)
|
393
|
self.operator = operator
|
394
|
|
395
|
def to_str(self, db, left_value):
|
396
|
if not isinstance(left_value, Code): left_value = Col(left_value)
|
397
|
left_value = remove_col_rename(left_value)
|
398
|
|
399
|
right_value = self.value
|
400
|
|
401
|
# Parse operator
|
402
|
operator = self.operator
|
403
|
passthru_null_ref = [False]
|
404
|
operator = strings.remove_prefix('~', operator, passthru_null_ref)
|
405
|
neg_ref = [False]
|
406
|
operator = strings.remove_prefix('!', operator, neg_ref)
|
407
|
equals = operator.endswith('=') # also includes <=, >=
|
408
|
|
409
|
# Handle nullable columns
|
410
|
check_null = False
|
411
|
if not passthru_null_ref[0]: # NULLs compare equal
|
412
|
try: left_non_null = ensure_not_null(db, left_value)
|
413
|
except ensure_not_null_excs: # fall back to alternate method
|
414
|
check_null = equals and isinstance(right_value, Col)
|
415
|
else:
|
416
|
if left_non_null is not left_value: # wrapped, so wrap both
|
417
|
left_value = left_non_null
|
418
|
right_value = EnsureNotNull(right_value, left_value.type)
|
419
|
|
420
|
if equals and is_null(right_value): operator = 'IS'
|
421
|
|
422
|
left = left_value.to_str(db)
|
423
|
right = right_value.to_str(db)
|
424
|
|
425
|
# Create str
|
426
|
str_ = left+' '+operator+' '+right
|
427
|
if check_null:
|
428
|
str_ = '('+str_+' OR ('+left+' IS NULL AND '+right+' IS NULL))'
|
429
|
if neg_ref[0]: str_ = 'NOT '+str_
|
430
|
return str_
|
431
|
|
432
|
# Tells as_ValueCond() to assume a non-ValueCond is a literal value
|
433
|
assume_literal = object()
|
434
|
|
435
|
def as_ValueCond(value, default_table=assume_literal):
|
436
|
if not isinstance(value, ValueCond):
|
437
|
if default_table is not assume_literal:
|
438
|
value = with_default_table(value, default_table)
|
439
|
return CompareCond(value)
|
440
|
else: return value
|
441
|
|
442
|
##### Joins
|
443
|
|
444
|
join_same = object() # tells Join the left and right columns have the same name
|
445
|
|
446
|
# Tells Join the left and right columns have the same name and are never NULL
|
447
|
join_same_not_null = object()
|
448
|
|
449
|
filter_out = object() # tells Join to filter out rows that match the join
|
450
|
|
451
|
class Join(BasicObject):
|
452
|
def __init__(self, table, mapping={}, type_=None):
|
453
|
'''
|
454
|
@param mapping dict(right_table_col=left_table_col, ...)
|
455
|
* if left_table_col is join_same: left_table_col = right_table_col
|
456
|
* Note that right_table_col must be a string
|
457
|
* if left_table_col is join_same_not_null:
|
458
|
left_table_col = right_table_col and both have NOT NULL constraint
|
459
|
* Note that right_table_col must be a string
|
460
|
@param type_ None (for plain join)|str (e.g. 'LEFT')|filter_out
|
461
|
* filter_out: equivalent to 'LEFT' with the query filtered by
|
462
|
`table_pkey IS NULL` (indicating no match)
|
463
|
'''
|
464
|
if util.is_str(table): table = Table(table)
|
465
|
assert type_ == None or util.is_str(type_) or type_ is filter_out
|
466
|
|
467
|
self.table = table
|
468
|
self.mapping = mapping
|
469
|
self.type_ = type_
|
470
|
|
471
|
def to_str(self, db, left_table_):
|
472
|
def join(entry):
|
473
|
'''Parses non-USING joins'''
|
474
|
right_table_col, left_table_col = entry
|
475
|
|
476
|
# Switch order (right_table_col is on the left in the comparison)
|
477
|
left = right_table_col
|
478
|
right = left_table_col
|
479
|
left_table = self.table
|
480
|
right_table = left_table_
|
481
|
|
482
|
# Parse left side
|
483
|
left = with_default_table(left, left_table)
|
484
|
|
485
|
# Parse special values
|
486
|
left_on_right = Col(left.name, right_table)
|
487
|
if right is join_same: right = left_on_right
|
488
|
elif right is join_same_not_null:
|
489
|
right = CompareCond(left_on_right, '~=')
|
490
|
|
491
|
# Parse right side
|
492
|
right = as_ValueCond(right, right_table)
|
493
|
|
494
|
return right.to_str(db, left)
|
495
|
|
496
|
# Create join condition
|
497
|
type_ = self.type_
|
498
|
joins = self.mapping
|
499
|
if joins == {}: join_cond = None
|
500
|
elif type_ is not filter_out and reduce(operator.and_,
|
501
|
(v is join_same_not_null for v in joins.itervalues())):
|
502
|
# all cols w/ USING, so can use simpler USING syntax
|
503
|
cols = map(to_name_only_col, joins.iterkeys())
|
504
|
join_cond = 'USING ('+(', '.join((c.to_str(db) for c in cols)))+')'
|
505
|
else: join_cond = combine_conds(map(join, joins.iteritems()), 'ON')
|
506
|
|
507
|
if isinstance(self.table, NamedTable): whitespace = '\n'
|
508
|
else: whitespace = ' '
|
509
|
|
510
|
# Create join
|
511
|
if type_ is filter_out: type_ = 'LEFT'
|
512
|
str_ = ''
|
513
|
if type_ != None: str_ += type_+' '
|
514
|
str_ += 'JOIN'+whitespace+self.table.to_str(db)
|
515
|
if join_cond != None: str_ += whitespace+join_cond
|
516
|
return str_
|
517
|
|
518
|
def __repr__(self): return self.to_str(mockDb, '<left_table>')
|
519
|
|
520
|
##### Value exprs
|
521
|
|
522
|
default = CustomCode('DEFAULT')
|
523
|
|
524
|
row_count = CustomCode('count(*)')
|
525
|
|
526
|
# See <http://www.postgresql.org/docs/8.3/static/datatype-numeric.html>
|
527
|
null_sentinels = {'text': r'\N', 'integer': 2147483647}
|
528
|
|
529
|
class EnsureNotNull(FunctionCall):
|
530
|
def __init__(self, value, type_):
|
531
|
FunctionCall.__init__(self, InternalFunction('coalesce'), as_Col(value),
|
532
|
null_sentinels[type_])
|
533
|
|
534
|
self.type = type_
|
535
|
|
536
|
##### Table exprs
|
537
|
|
538
|
class Values(Code):
|
539
|
def __init__(self, values):
|
540
|
'''
|
541
|
@param values [...]|[[...], ...] Can be one or multiple rows.
|
542
|
'''
|
543
|
rows = values
|
544
|
if len(values) >= 1 and not lists.is_seq(values[0]): # only one row
|
545
|
rows = [values]
|
546
|
for i, row in enumerate(rows):
|
547
|
rows[i] = map(remove_col_rename, map(as_Value, row))
|
548
|
|
549
|
self.rows = rows
|
550
|
|
551
|
def to_str(self, db):
|
552
|
def row_str(row):
|
553
|
return '('+(', '.join((v.to_str(db) for v in row)))+')'
|
554
|
return 'VALUES '+(', '.join(map(row_str, self.rows)))
|
555
|
|
556
|
def NamedValues(name, cols, values):
|
557
|
'''
|
558
|
@post `cols` will be changed to Col objects with the table set to `name`.
|
559
|
'''
|
560
|
table = NamedTable(name, Values(values), cols)
|
561
|
set_cols_table(table, cols)
|
562
|
return table
|
563
|
|
564
|
##### Database structure
|
565
|
|
566
|
class TypedCol(Col):
|
567
|
def __init__(self, name, type_, default=None, nullable=True):
|
568
|
assert default == None or isinstance(default, Code)
|
569
|
|
570
|
Col.__init__(self, name)
|
571
|
|
572
|
self.type = type_
|
573
|
self.default = default
|
574
|
self.nullable = nullable
|
575
|
|
576
|
def to_str(self, db):
|
577
|
str_ = Col.to_str(self, db)+' '+self.type
|
578
|
if not self.nullable: str_ += ' NOT NULL'
|
579
|
if self.default != None: str_ += ' DEFAULT '+self.default.to_str(db)
|
580
|
return str_
|
581
|
|
582
|
def to_Col(self): return Col(self.name)
|
583
|
|
584
|
ensure_not_null_excs = (NoUnderlyingTableException, KeyError)
|
585
|
|
586
|
def ensure_not_null(db, col):
|
587
|
'''
|
588
|
@param col Must have an underlying column.
|
589
|
@return EnsureNotNull|Col
|
590
|
@throws ensure_not_null_excs
|
591
|
'''
|
592
|
typed_col = db.col_info(underlying_col(col))
|
593
|
if typed_col.nullable: col = EnsureNotNull(col, typed_col.type)
|
594
|
return col
|