Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ The supported method of passing ClickHouse server settings is to prefix such arg
- Dropped Python 3.9 support. The minimum supported Python version is now 3.10. 0.15.x is the last series supporting Python 3.9.

### Bug Fixes
- Fix Dynamic/JSON column reads when a path's inferred type sorts alphabetically after `"SharedVariant"`. ClickHouse's `DataTypeVariant` constructor sorts its members alphabetically by name, and discriminator bytes on the wire index into that sorted order. The client appended `SharedVariant` to the variant list without sorting, so affected paths were read as the wrong variant. Closes [#712](https://github.com/ClickHouse/clickhouse-connect/issues/712)
- Fix async streaming race condition that caused unhandled `InvalidStateError` exceptions on early stream termination. When breaking out of an async stream early, `shutdown()` scheduled a `set_result` callback for pending futures via `call_soon_threadsafe`, but `Task.cancel()` could cancel the future before the callback ran. The done-check is now deferred into the callback itself so it sees the actual future state at execution time.
- SQLAlchemy: Wrap raw SQL strings in `text()` in `ChClickHouseDialect.get_schema_names()` and `get_table_names()`, so `Inspector.get_schema_names()` and `get_table_names()` work on SQLAlchemy 2.x instead of raising `ObjectNotExecutableError`.

Expand Down
2 changes: 2 additions & 0 deletions clickhouse_connect/datatypes/dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,8 @@ def read_dynamic_prefix(_, source: ByteSource, ctx: QueryContext) -> DynamicStat
num_variants = source.read_leb128()
variant_types = [get_from_name(source.read_leb128_str()) for _ in range(num_variants)]
variant_types.append(SHARED_VARIANT_TYPE) # noqa: F821 (undefined-name)
# replicate the sort after appending SharedVariant
variant_types.sort(key=lambda t: t.name)
if source.read_uint64() != 0: # discriminator format, currently only 0 is recognized
raise DataError("Unexpected discriminator format in Variant column prefix")
variant_states = [e_type.read_column_prefix(source, ctx) for e_type in variant_types]
Expand Down
23 changes: 23 additions & 0 deletions tests/integration_tests/test_dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,3 +600,26 @@ def test_json_dynamic_variant_multiple_rows(param_client: Client, call, table_co
r4 = result[3][1]
assert r4["value"] == pytest.approx(0.0)
assert r4["status"] == -10


def test_json_path_sorts_after_shared_variant(param_client: Client, call, table_context: Callable):
type_available(param_client, "json")
# Value >= 2^63 forces UInt64 inference on default settings. "UInt64" sorts after
# "SharedVariant" alphabetically, so this triggers the discriminator misalignment.
huge_uint = 18000000000000000000
with table_context("json_sort_uint64", ["id Int32", "j JSON"]):
call(param_client.insert, "json_sort_uint64", [[1, {"k": huge_uint}]])
result = call(param_client.query, "SELECT * FROM json_sort_uint64 ORDER BY id").result_set
assert result[0][1] == {"k": huge_uint}


def test_dynamic_uint64_single_variant(param_client: Client, call, table_context: Callable):
"""Dynamic column whose only active typed variant is UInt64. Without the sort fix this
would read each row as SharedVariant and crash or return garbage."""
type_available(param_client, "dynamic")
with table_context("dynamic_uint64", ["id Int32", "d Dynamic"]):
call(param_client.command, "INSERT INTO dynamic_uint64 SELECT 1, toUInt64(17)")
call(param_client.command, "INSERT INTO dynamic_uint64 SELECT 2, toUInt64(9223372036854775900)")
result = call(param_client.query, "SELECT * FROM dynamic_uint64 ORDER BY id").result_set
assert result[0][1] == 17
assert result[1][1] == 9223372036854775900
28 changes: 27 additions & 1 deletion tests/unit_tests/test_dynamic.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import pytest

from clickhouse_connect.datatypes.dynamic import typed_variant
from clickhouse_connect.datatypes.dynamic import read_dynamic_prefix, read_variant_column, typed_variant
from clickhouse_connect.datatypes.registry import get_from_name
from clickhouse_connect.driver.bytesource import ByteArraySource
from clickhouse_connect.driver.exceptions import DataError
from clickhouse_connect.driver.query import QueryContext


def test_variant_data_size():
Expand All @@ -19,3 +21,27 @@ def test_variant_invalid_data_size():

with pytest.raises(DataError):
v_type.data_size(["not an int"])


def test_dynamic_prefix_sorts_shared_variant():
# Prefix: struct_version=1, max_dynamic_types=1, num_variants=1, "UInt64",
# discriminator_format=0. UInt64 and SharedVariant have no per-column prefix.
prefix = (
b"\x01\x00\x00\x00\x00\x00\x00\x00" # struct_version = 1 (UInt64 LE)
b"\x01" # max_dynamic_types leb128 = 1
b"\x01" # num_variants leb128 = 1
b"\x06UInt64" # leb128 length 6 + type name
b"\x00\x00\x00\x00\x00\x00\x00\x00" # discriminator_format = 0 (UInt64 LE)
)
# One row, discriminator=1 pointing at UInt64 in the sorted list [SharedVariant, UInt64],
# followed by the 8-byte UInt64 value 100.
data = b"\x01" + b"\x64\x00\x00\x00\x00\x00\x00\x00"

source = ByteArraySource(prefix + data)
ctx = QueryContext()

state = read_dynamic_prefix(None, source, ctx)
assert [t.name for t in state.variant_types] == ["SharedVariant", "UInt64"]

column = read_variant_column(source, 1, ctx, state.variant_types, state.variant_states)
assert column == [100]
Loading