[e988c2]: / tests / unit / utils / test_sqlalchemy_query_utils.py

Download this file

405 lines (337 with data), 14.4 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
import pytest
import sqlalchemy
from sqlalchemy.dialects.sqlite.pysqlite import SQLiteDialect_pysqlite
from sqlalchemy.engine.default import DefaultDialect
from sqlalchemy.sql.visitors import iterate
from ehrql.utils.sqlalchemy_query_utils import (
CreateTableAs,
GeneratedTable,
InsertMany,
add_setup_and_cleanup_queries,
clause_as_str,
is_predicate,
)
from ehrql.utils.string_utils import strip_indent
table = sqlalchemy.Table(
"some_table",
sqlalchemy.MetaData(),
sqlalchemy.Column("i", type_=sqlalchemy.Integer()),
sqlalchemy.Column("b", type_=sqlalchemy.Boolean()),
)
integer = table.c.i
boolean = table.c.b
@pytest.mark.parametrize(
"expected,clause",
[
# All comparisons are predicates
(True, integer == integer),
(True, integer >= integer),
(True, integer > integer),
(True, integer < integer),
(True, integer <= integer),
(True, integer != integer),
# As are boolean operations
(True, boolean | boolean),
(True, boolean & boolean),
(True, ~boolean),
# And combined boolean operations
(True, ~(boolean & boolean)),
(True, ~(boolean | boolean)),
(True, ~boolean | ~boolean),
(True, ~boolean & ~boolean),
# And null checks
(True, integer.is_(None)),
(True, integer.is_not(None)),
#
# But not direct references to boolean columns
(False, boolean),
# Or other, non-boolean, binary operations
(False, integer + integer),
# Or arbitrary function calls
(False, sqlalchemy.func.log10(integer)),
],
)
def test_is_predicate(expected, clause):
assert is_predicate(clause) == expected, f"Expected {expected}: {clause}"
def test_add_setup_and_cleanup_queries_basic():
# Make a temporary table
temp_table = _make_temp_table("temp_table", "foo")
temp_table.setup_queries.append(
temp_table.insert().values(foo="bar"),
)
# Select something from it
query = sqlalchemy.select(temp_table.c.foo)
# Check that we get the right queries in the right order
assert _queries_as_strs([query]) == [
"CREATE TABLE temp_table (\n\tfoo NULL\n)",
"INSERT INTO temp_table (foo) VALUES (:foo)",
"SELECT temp_table.foo \nFROM temp_table",
"DROP TABLE temp_table",
]
def test_add_setup_and_cleanup_queries_nested():
# Make a temporary table
temp_table1 = _make_temp_table("temp_table1", "foo")
temp_table1.setup_queries.append(
temp_table1.insert().values(foo="bar"),
)
# Make a second temporary table ...
temp_table2 = _make_temp_table("temp_table2", "baz")
temp_table2.setup_queries.append(
# ... populated by a SELECT query against the first table
temp_table2.insert().from_select(
[temp_table2.c.baz], sqlalchemy.select(temp_table1.c.foo)
),
)
# Select something from the second table
query = sqlalchemy.select(temp_table2.c.baz)
# Check that we create and drop the temporary tables in the right order
assert _queries_as_strs([query]) == [
"CREATE TABLE temp_table1 (\n\tfoo NULL\n)",
"INSERT INTO temp_table1 (foo) VALUES (:foo)",
"CREATE TABLE temp_table2 (\n\tbaz NULL\n)",
"INSERT INTO temp_table2 (baz) SELECT temp_table1.foo \nFROM temp_table1",
"DROP TABLE temp_table1",
"SELECT temp_table2.baz \nFROM temp_table2",
"DROP TABLE temp_table2",
]
def test_add_setup_and_cleanup_queries_multiple():
# Make a temporary table
temp_table1 = _make_temp_table("temp_table1", "foo")
temp_table1.setup_queries.append(
temp_table1.insert().values(foo="bar"),
)
# Make a second temporary table ...
temp_table2 = _make_temp_table("temp_table2", "baz")
temp_table2.setup_queries.append(
# ... populated by a SELECT query against the first table
temp_table2.insert().from_select(
[temp_table2.c.baz], sqlalchemy.select(temp_table1.c.foo)
),
)
# Select something from the second table
query_1 = sqlalchemy.select(temp_table2.c.baz)
# Select something from the first table
query_2 = sqlalchemy.select(temp_table1.c.foo)
# Check that we create and drop the temporary tables in the right order
assert _queries_as_strs([query_1, query_2]) == [
"CREATE TABLE temp_table1 (\n\tfoo NULL\n)",
"INSERT INTO temp_table1 (foo) VALUES (:foo)",
"CREATE TABLE temp_table2 (\n\tbaz NULL\n)",
"INSERT INTO temp_table2 (baz) SELECT temp_table1.foo \nFROM temp_table1",
"SELECT temp_table2.baz \nFROM temp_table2",
"DROP TABLE temp_table2",
"SELECT temp_table1.foo \nFROM temp_table1",
"DROP TABLE temp_table1",
]
def test_add_setup_and_cleanup_queries_multiply_nested():
# Make a temporary table
temp_table1 = _make_temp_table("temp_table1", "foo")
temp_table1.setup_queries.append(
temp_table1.insert().values(foo="bar"),
)
# Make a second temporary table that depends on the first
temp_table2 = _make_temp_table("temp_table2", "bar")
temp_table2.setup_queries.append(
temp_table2.insert().from_select(
[temp_table2.c.bar], sqlalchemy.select(temp_table1.c.foo)
),
)
# Make a third temporary table that also depends on the first
temp_table3 = _make_temp_table("temp_table3", "baz")
temp_table3.setup_queries.append(
temp_table3.insert().from_select(
[temp_table3.c.baz], sqlalchemy.select(temp_table1.c.foo)
),
)
# Select something from the third table
query_1 = sqlalchemy.select(temp_table3.c.baz)
# Select something from the second table
query_2 = sqlalchemy.select(temp_table2.c.bar)
# Check that we create and drop the temporary tables in the right order
assert _queries_as_strs([query_1, query_2]) == [
"CREATE TABLE temp_table1 (\n\tfoo NULL\n)",
"INSERT INTO temp_table1 (foo) VALUES (:foo)",
"CREATE TABLE temp_table3 (\n\tbaz NULL\n)",
"INSERT INTO temp_table3 (baz) SELECT temp_table1.foo \nFROM temp_table1",
"SELECT temp_table3.baz \nFROM temp_table3",
"DROP TABLE temp_table3",
"CREATE TABLE temp_table2 (\n\tbar NULL\n)",
"INSERT INTO temp_table2 (bar) SELECT temp_table1.foo \nFROM temp_table1",
"DROP TABLE temp_table1",
"SELECT temp_table2.bar \nFROM temp_table2",
"DROP TABLE temp_table2",
]
def _make_temp_table(name, *columns):
table = GeneratedTable(
name, sqlalchemy.MetaData(), *[sqlalchemy.Column(c) for c in columns]
)
table.setup_queries = [
sqlalchemy.schema.CreateTable(table),
]
table.cleanup_queries = [sqlalchemy.schema.DropTable(table)]
return table
def _queries_as_strs(queries):
return [str(q).strip() for q in add_setup_and_cleanup_queries(queries)]
def test_clause_as_str():
table = sqlalchemy.table("foo", sqlalchemy.Column("bar"))
query = sqlalchemy.select(table.c.bar).where(table.c.bar > 100)
query_str = clause_as_str(query, DefaultDialect())
assert query_str == "SELECT foo.bar \nFROM foo \nWHERE foo.bar > 100"
def test_clause_as_str_with_insert_many():
table = sqlalchemy.Table(
"t",
sqlalchemy.MetaData(),
sqlalchemy.Column("i", sqlalchemy.Integer()),
sqlalchemy.Column("s", sqlalchemy.String()),
)
statement = InsertMany(
table,
[
(1, "a"),
(2, "b"),
(3, "c"),
],
)
query_str = clause_as_str(statement, DefaultDialect())
assert query_str == strip_indent(
"""
INSERT INTO t (i, s) VALUES (1, 'a');
INSERT INTO t (i, s) VALUES (2, 'b');
INSERT INTO t (i, s) VALUES (3, 'c')
"""
)
def test_insert_many_compile():
table = sqlalchemy.Table(
"t",
sqlalchemy.MetaData(),
sqlalchemy.Column("i", sqlalchemy.Integer()),
sqlalchemy.Column("s", sqlalchemy.String()),
)
statement = InsertMany(
table,
[
(1, "a"),
(2, "b"),
(3, "c"),
],
)
query_str = statement.compile(dialect=DefaultDialect())
assert str(query_str).strip() == "INSERT INTO t (i, s) VALUES (:i, :s)"
def test_add_setup_and_cleanup_queries_with_insert_many():
# Confirm that the InsertMany class acts enough like a SQLAlchemy ClauseElement for
# our setup/cleanup code to work with it
table = sqlalchemy.Table(
"t",
sqlalchemy.MetaData(),
sqlalchemy.Column("i", sqlalchemy.Integer()),
)
statement = InsertMany(table, rows=[])
setup_cleanup = add_setup_and_cleanup_queries([statement])
assert setup_cleanup == [statement]
def test_generated_table_from_query():
query = sqlalchemy.select(
sqlalchemy.literal(1).label("number"),
sqlalchemy.literal("a").label("string"),
)
table = GeneratedTable.from_query("some_table", query, schema="some_schema")
assert str(sqlalchemy.schema.CreateTable(table)).strip() == (
"CREATE TABLE some_schema.some_table (\n\tnumber INTEGER, \n\tstring VARCHAR\n)"
)
def test_generated_table_from_query_with_metadata():
metadata = sqlalchemy.MetaData()
query = sqlalchemy.select(sqlalchemy.literal(1).label("number"))
table = GeneratedTable.from_query("some_table", query, metadata=metadata)
assert table.metadata is metadata
def test_create_table_as():
query = sqlalchemy.select(
sqlalchemy.literal(1).label("number"),
sqlalchemy.literal("a").label("string"),
)
table = sqlalchemy.table("test")
create_table = CreateTableAs(table, query)
assert str(create_table) == (
"CREATE TABLE test AS SELECT :param_1 AS number, :param_2 AS string"
)
def test_create_table_as_can_be_iterated():
# If we don't define the `get_children()` method on `CreateTableAs` we won't get an
# error when attempting to iterate the resulting element structure: it will just act
# as a leaf node. But as we rely heavily on query introspection we need to ensure we
# can iterate over query structures.
query = sqlalchemy.select(
sqlalchemy.literal(1).label("number"),
sqlalchemy.literal("a").label("string"),
)
table = sqlalchemy.table("test")
create_table = CreateTableAs(table, query)
# Check that the original elements show up when iterated
assert any([e is table for e in iterate(create_table)])
assert any([e is query for e in iterate(create_table)])
# The below tests exercise obscure corners of SQLAlchemy which used to have bugs that we
# had to workaroud. These have been fixed in SQLAlchemy 2 but we retain the tests for
# their warm fuzzy value.
def test_clause_as_str_with_create_index_on_sqlite():
# Setting `literal_binds=True` (as we do in `clause_as_str()`) while compiling
# CreateIndex used to blow up with a TypeError in the SQLite dialect. We confirm
# that this is no longer the case.
table = sqlalchemy.Table("foo", sqlalchemy.MetaData(), sqlalchemy.Column("bar"))
index = sqlalchemy.Index(None, table.c.bar)
create_index = sqlalchemy.schema.CreateIndex(index)
dialect = SQLiteDialect_pysqlite(paramstyle="named")
query_str = clause_as_str(create_index, dialect)
assert query_str == "CREATE INDEX ix_foo_bar ON foo (bar)"
def test_clause_as_str_with_expanding_bindparameter_and_bind_expression():
# This exercises an obscure corner of SQLAlchemy which used to be buggy: using
# "literal_binds" to compile a clause which combines expanding BindParameters with a
# bind expression.
# Create a custom type with a "bind_expression", see:
# https://docs.sqlalchemy.org/en/14/core/type_api.html#sqlalchemy.types.TypeEngine.bind_expression
class CustomType(sqlalchemy.types.TypeDecorator):
impl = sqlalchemy.types.String
cache_ok = True
# This means that every time we reference a value of this type it gets wrapped
# in a function call
def bind_expression(self, bindvalue):
return sqlalchemy.func.upper(bindvalue)
table = sqlalchemy.Table(
"tbl", sqlalchemy.MetaData(), sqlalchemy.Column("col", CustomType())
)
# With a single value comparison like `==` we can compile this to either a
# parameterised string, or a string containing literals and it works as expected
equality_expr = table.c.col == "abc"
assert (
str(equality_expr.compile(compile_kwargs={"render_postcompile": True}))
== "tbl.col = upper(:col_1)"
)
assert (
str(equality_expr.compile(compile_kwargs={"literal_binds": True}))
== "tbl.col = upper('abc')"
)
# With a multi-valued comparison like `IN` we get an "expanding" BindParameter, see:
# https://docs.sqlalchemy.org/en/14/core/sqlelement.html#sqlalchemy.sql.expression.bindparam.params.expanding
contains_expr = table.c.col.in_(["abc", "def"])
# We can compile this to a parameterised string and get the expected output
assert (
str(contains_expr.compile(compile_kwargs={"render_postcompile": True}))
== "tbl.col IN (upper(:col_1_1), upper(:col_1_2))"
)
# Attempting to compile it with parameters replaced by string literals used to blow
# up with:
#
# AttributeError("'NoneType' object has no attribute 'group'")
#
# We confirm it no longer does.
compiled = clause_as_str(contains_expr, DefaultDialect())
assert compiled == "tbl.col IN (upper('abc'), upper('def'))"
def test_clause_as_string_with_repeated_expanding_bindparameter():
# Previously we would blow up with a KeyError when the same "expanding" (i.e.
# multi-valued) BindParameter was used more than once within a query
table = sqlalchemy.Table(
"tbl",
sqlalchemy.MetaData(),
sqlalchemy.Column("col_1", sqlalchemy.Integer()),
sqlalchemy.Column("col_2", sqlalchemy.Integer()),
)
multi_valued = sqlalchemy.literal([1, 2])
clause = table.c.col_1.in_(multi_valued) | table.c.col_2.in_(multi_valued)
compiled = clause_as_str(clause, DefaultDialect())
assert compiled == "tbl.col_1 IN (1, 2) OR tbl.col_2 IN (1, 2)"