From 5971ebae6fd59c9062d74bb142ee52cda8da7691 Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Sat, 8 Feb 2025 21:44:53 +0300 Subject: [PATCH 1/7] new json --- lib/ch/row_binary.ex | 18 ++++++++ lib/ch/types.ex | 1 + test/ch/connection_test.exs | 90 ++++++++++++++++++++++++++++++++++--- test/ch/json_test.exs | 86 +++++++++++++++++++++++++++++++++++ 4 files changed, 189 insertions(+), 6 deletions(-) create mode 100644 test/ch/json_test.exs diff --git a/lib/ch/row_binary.ex b/lib/ch/row_binary.ex index aa95a038..4458ae84 100644 --- a/lib/ch/row_binary.ex +++ b/lib/ch/row_binary.ex @@ -516,6 +516,7 @@ defmodule Ch.RowBinary do :uuid, :date, :date32, + :json, :ipv4, :ipv6, :point, @@ -697,6 +698,20 @@ defmodule Ch.RowBinary do defp utf8_size(codepoint) when codepoint <= 0xFFFF, do: 3 defp utf8_size(codepoint) when codepoint <= 0x10FFFF, do: 4 + @compile inline: [decode_json_decode_rows: 5] + + for {pattern, size} <- varints do + defp decode_json_decode_rows( + <>, + types_rest, + row, + rows, + types + ) do + decode_rows(types_rest, bin, [Jason.decode!(s) | row], rows, types) + end + end + @compile inline: [decode_binary_decode_rows: 5] for {pattern, size} <- varints do @@ -865,6 +880,9 @@ defmodule Ch.RowBinary do <> = bin decode_rows(types_rest, bin, [Date.add(@epoch_date, d) | row], rows, types) + :json -> + decode_json_decode_rows(bin, types_rest, row, rows, types) + {:datetime, timezone} -> <> = bin diff --git a/lib/ch/types.ex b/lib/ch/types.ex index c9e77316..be862653 100644 --- a/lib/ch/types.ex +++ b/lib/ch/types.ex @@ -26,6 +26,7 @@ defmodule Ch.Types do # {"DateTime", :datetime, []}, {"Date32", :date32, []}, {"Date", :date, []}, + {"JSON", :json, []}, {"LowCardinality", :low_cardinality, [:type]}, for size <- [32, 64, 128, 256] do {"Decimal#{size}", :"decimal#{size}", [:int]} diff --git a/test/ch/connection_test.exs b/test/ch/connection_test.exs index ff008982..0d355e63 100644 --- a/test/ch/connection_test.exs +++ b/test/ch/connection_test.exs @@ -568,20 +568,98 @@ defmodule Ch.ConnectionTest do }} = Ch.query(conn, "SELECT * FROM t_uuid ORDER BY y") end + test "read json as string", %{conn: conn} do + assert Ch.query!(conn, ~s|select '{"a":42}'::JSON|, [], + settings: [ + enable_json_type: 1, + output_format_binary_write_json_as_string: 1 + ] + ).rows == [[%{"a" => "42"}]] + end + + @tag :skip + test "read json with invalid utf8 string", %{conn: conn} do + assert Ch.query!( + conn, + ~s|select map('a', 42)::JSON|, + %{"bin" => "\x61\xF0\x80\x80\x80b"}, + settings: [ + enable_json_type: 1, + output_format_binary_write_json_as_string: 1 + ] + ).rows == [[%{"a" => "a����b"}]] + end + + test "write->read json as string", %{conn: conn} do + Ch.query!(conn, "CREATE TABLE test_write_json(json JSON) ENGINE = Memory", [], + settings: [ + enable_json_type: 1 + ] + ) + + rowbinary = + Ch.RowBinary.encode_rows( + [ + [Jason.encode_to_iodata!(%{"a" => 42})], + [Jason.encode_to_iodata!(%{"b" => 10})] + ], + _types = [:string] + ) + + Ch.query!(conn, ["insert into test_write_json(json) format RowBinary\n" | rowbinary], [], + settings: [ + enable_json_type: 1, + input_format_binary_read_json_as_string: 1 + ] + ) + + assert Ch.query!(conn, "select json from test_write_json", [], + settings: [ + enable_json_type: 1, + output_format_binary_write_json_as_string: 1 + ] + ).rows == + [[%{"a" => "42"}], [%{"b" => "10"}]] + end + + # https://clickhouse.com/docs/en/sql-reference/data-types/newjson + # https://clickhouse.com/docs/en/integrations/data-formats/json/overview + # https://clickhouse.com/blog/a-new-powerful-json-data-type-for-clickhouse + # https://clickhouse.com/blog/json-bench-clickhouse-vs-mongodb-elasticsearch-duckdb-postgresql + # https://github.com/ClickHouse/ClickHouse/pull/70288 + # https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/TypeId.h @tag :skip test "json", %{conn: conn} do - settings = [allow_experimental_object_type: 1] + settings = [enable_json_type: 1] - Ch.query!(conn, "CREATE TABLE json(o JSON) ENGINE = Memory", [], settings: settings) + assert Ch.query!( + conn, + ~s|select '{"a":42,"b":10}'::JSON|, + [], + settings: settings, + decode: false, + format: "RowBinary" + ).rows == [ + <<2, 1, 97, 10, 42, 0, 0, 0, 0, 0, 0, 0, 1, 98, 10, 10, 0, 0, 0, 0, 0, 0, 0>> + ] + + # Ch.query!(conn, "CREATE TABLE test_json(json JSON) ENGINE = Memory", [], settings: settings) - Ch.query!(conn, ~s|INSERT INTO json VALUES ('{"a": 1, "b": { "c": 2, "d": [1, 2, 3] }}')|) + # Ch.query!( + # conn, + # ~s|INSERT INTO test_json VALUES ('{"a" : {"b" : 42}, "c" : [1, 2, 3]}'), ('{"f" : "Hello, World!"}'), ('{"a" : {"b" : 43, "e" : 10}, "c" : [4, 5, 6]}')| + # ) - assert Ch.query!(conn, "SELECT o.a, o.b.c, o.b.d[3] FROM json").rows == [[1, 2, 3]] + # assert Ch.query!(conn, "SELECT json FROM test_json") == :asdf - # named tuples are not supported yet - assert_raise ArgumentError, fn -> Ch.query!(conn, "SELECT o FROM json") end + # assert Ch.query!(conn, "SELECT json.a, json.b.c, json.b.d[3] FROM test_json").rows == [ + # [1, 2, 3] + # ] end + # TODO variant (is there?) + # TODO dynamic + # TODO enum16 test "enum8", %{conn: conn} do diff --git a/test/ch/json_test.exs b/test/ch/json_test.exs new file mode 100644 index 00000000..70c7a9c8 --- /dev/null +++ b/test/ch/json_test.exs @@ -0,0 +1,86 @@ +defmodule Ch.JSONTest do + use ExUnit.Case + + setup do + conn = + start_supervised!( + {Ch, + database: Ch.Test.database(), + settings: [ + enable_json_type: 1, + input_format_binary_read_json_as_string: 1, + output_format_binary_write_json_as_string: 1 + ]} + ) + + {:ok, conn: conn} + end + + # https://clickhouse.com/docs/en/sql-reference/data-types/newjson#creating-json + test "Creating JSON", %{conn: conn} do + Ch.query!(conn, "CREATE TABLE test (json JSON) ENGINE = Memory") + on_exit(fn -> Ch.Test.sql_exec("DROP TABLE test") end) + + Ch.query!(conn, """ + INSERT INTO test VALUES + ('{"a" : {"b" : 42}, "c" : [1, 2, 3]}'), + ('{"f" : "Hello, World!"}'), + ('{"a" : {"b" : 43, "e" : 10}, "c" : [4, 5, 6]}') + """) + + assert Ch.query!(conn, "SELECT json FROM test").rows == [ + [%{"a" => %{"b" => "42"}, "c" => ["1", "2", "3"]}], + [%{"f" => "Hello, World!"}], + [%{"a" => %{"b" => "43", "e" => "10"}, "c" => ["4", "5", "6"]}] + ] + end + + @tag :skip + test "Creating JSON (explicit types and SKIP)", %{conn: conn} do + Ch.query!(conn, "CREATE TABLE test (json JSON(a.b UInt32, SKIP a.e)) ENGINE = Memory") + on_exit(fn -> Ch.Test.sql_exec("DROP TABLE test") end) + + Ch.query!(conn, """ + INSERT INTO test VALUES + ('{"a" : {"b" : 42}, "c" : [1, 2, 3]}'), + ('{"f" : "Hello, World!"}'), + ('{"a" : {"b" : 43, "e" : 10}, "c" : [4, 5, 6]}') + """) + + assert Ch.query!(conn, "SELECT json FROM test").rows == [] + end + + test "Creating JSON using CAST from String", %{conn: conn} do + assert Ch.query!(conn, """ + SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::JSON AS json + """).rows == [ + [%{"a" => %{"b" => "42"}, "c" => ["1", "2", "3"], "d" => "Hello, World!"}] + ] + end + + test "Creating JSON using CAST from Tuple", %{conn: conn} do + assert Ch.query!( + conn, + """ + SELECT (tuple(42 AS b) AS a, [1, 2, 3] AS c, 'Hello, World!' AS d)::JSON AS json + """, + [], + settings: [enable_named_columns_in_function_tuple: 1] + ).rows == [[%{"a" => %{"b" => "42"}, "c" => ["1", "2", "3"], "d" => "Hello, World!"}]] + end + + test "Creating JSON using CAST from Map", %{conn: conn} do + assert Ch.query!( + conn, + """ + SELECT map('a', map('b', 42), 'c', [1,2,3], 'd', 'Hello, World!')::JSON AS json; + """, + [], + settings: [enable_variant_type: 1, use_variant_as_common_type: 1] + ).rows == [[%{"a" => %{"b" => "42"}, "c" => ["1", "2", "3"], "d" => "Hello, World!"}]] + end + + # https://clickhouse.com/docs/en/sql-reference/data-types/newjson#reading-json-paths-as-subcolumns + test "Reading JSON paths as subcolumns", %{conn: _conn} do + end +end From 811879a6d32bd936a570c9b492b336f026d8c03c Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Mon, 10 Feb 2025 13:12:07 +0300 Subject: [PATCH 2/7] eh? --- lib/ch/types.ex | 1 + test/ch/json_test.exs | 29 ++++++++++++++++++++++++----- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/lib/ch/types.ex b/lib/ch/types.ex index be862653..d96c792c 100644 --- a/lib/ch/types.ex +++ b/lib/ch/types.ex @@ -323,6 +323,7 @@ defmodule Ch.Types do end def decode("DateTime"), do: :datetime + def decode("JSON" <> _), do: :json def decode(type) do try do diff --git a/test/ch/json_test.exs b/test/ch/json_test.exs index 70c7a9c8..e285d972 100644 --- a/test/ch/json_test.exs +++ b/test/ch/json_test.exs @@ -13,13 +13,16 @@ defmodule Ch.JSONTest do ]} ) + on_exit(fn -> + Ch.Test.sql_exec("DROP TABLE test", [], database: Ch.Test.database()) + end) + {:ok, conn: conn} end # https://clickhouse.com/docs/en/sql-reference/data-types/newjson#creating-json test "Creating JSON", %{conn: conn} do Ch.query!(conn, "CREATE TABLE test (json JSON) ENGINE = Memory") - on_exit(fn -> Ch.Test.sql_exec("DROP TABLE test") end) Ch.query!(conn, """ INSERT INTO test VALUES @@ -35,10 +38,8 @@ defmodule Ch.JSONTest do ] end - @tag :skip test "Creating JSON (explicit types and SKIP)", %{conn: conn} do Ch.query!(conn, "CREATE TABLE test (json JSON(a.b UInt32, SKIP a.e)) ENGINE = Memory") - on_exit(fn -> Ch.Test.sql_exec("DROP TABLE test") end) Ch.query!(conn, """ INSERT INTO test VALUES @@ -47,7 +48,11 @@ defmodule Ch.JSONTest do ('{"a" : {"b" : 43, "e" : 10}, "c" : [4, 5, 6]}') """) - assert Ch.query!(conn, "SELECT json FROM test").rows == [] + assert Ch.query!(conn, "SELECT json FROM test").rows == [ + [%{"a" => %{"b" => 42}, "c" => ["1", "2", "3"]}], + [%{"a" => %{"b" => 0}, "f" => "Hello, World!"}], + [%{"a" => %{"b" => 43}, "c" => ["4", "5", "6"]}] + ] end test "Creating JSON using CAST from String", %{conn: conn} do @@ -81,6 +86,20 @@ defmodule Ch.JSONTest do end # https://clickhouse.com/docs/en/sql-reference/data-types/newjson#reading-json-paths-as-subcolumns - test "Reading JSON paths as subcolumns", %{conn: _conn} do + test "Reading JSON paths as subcolumns", %{conn: conn} do + Ch.query!(conn, "CREATE TABLE test (json JSON(a.b UInt32, SKIP a.e)) ENGINE = Memory") + + Ch.query!(conn, """ + INSERT INTO test VALUES + ('{"a" : {"b" : 42, "g" : 42.42}, "c" : [1, 2, 3], "d" : "2020-01-01"}'), + ('{"f" : "Hello, World!", "d" : "2020-01-02"}'), + ('{"a" : {"b" : 43, "e" : 10, "g" : 43.43}, "c" : [4, 5, 6]}') + """) + + assert Ch.query!(conn, "SELECT json FROM test").rows == [ + [%{"a" => %{"b" => 42, "g" => 42.42}, "c" => ["1", "2", "3"], "d" => "2020-01-01"}], + [%{"a" => %{"b" => 0}, "d" => "2020-01-02", "f" => "Hello, World!"}], + [%{"a" => %{"b" => 43, "g" => 43.43}, "c" => ["4", "5", "6"]}] + ] end end From 0fad98ea3ed82be1e5b6d20bf4dd9631efa6571e Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Mon, 24 Mar 2025 00:16:31 +0300 Subject: [PATCH 3/7] failed automatic string/binary json encoding --- lib/ch/query.ex | 8 +- lib/ch/row_binary.ex | 258 +++++++++++++++++++++++------------- test/ch/row_binary_test.exs | 23 +++- 3 files changed, 185 insertions(+), 104 deletions(-) diff --git a/lib/ch/query.ex b/lib/ch/query.ex index f5a98053..ea2a0e38 100644 --- a/lib/ch/query.ex +++ b/lib/ch/query.ex @@ -114,12 +114,12 @@ defimpl DBConnection.Query, for: Ch.Query do names = Keyword.get(opts, :names) -> types = Keyword.fetch!(opts, :types) header = RowBinary.encode_names_and_types(names, types) - data = RowBinary.encode_rows(params, types) + data = RowBinary.encode_rows(params, types, opts) {_query_params = [], headers(opts), [statement, ?\n, header | data]} format_row_binary?(statement) -> types = Keyword.fetch!(opts, :types) - data = RowBinary.encode_rows(params, types) + data = RowBinary.encode_rows(params, types, opts) {_query_params = [], headers(opts), [statement, ?\n | data]} true -> @@ -181,11 +181,11 @@ defimpl DBConnection.Query, for: Ch.Query do case get_header(headers, "x-clickhouse-format") do "RowBinary" -> types = Keyword.fetch!(opts, :types) - rows = data |> IO.iodata_to_binary() |> RowBinary.decode_rows(types) + rows = data |> IO.iodata_to_binary() |> RowBinary.decode_rows(types, opts) %Result{num_rows: length(rows), rows: rows, command: command, headers: headers} "RowBinaryWithNamesAndTypes" -> - [names | rows] = data |> IO.iodata_to_binary() |> RowBinary.decode_names_and_rows() + [names | rows] = data |> IO.iodata_to_binary() |> RowBinary.decode_names_and_rows(opts) %Result{ num_rows: length(rows), diff --git a/lib/ch/row_binary.ex b/lib/ch/row_binary.ex index 0053c96d..e5f83712 100644 --- a/lib/ch/row_binary.ex +++ b/lib/ch/row_binary.ex @@ -42,8 +42,11 @@ defmodule Ch.RowBinary do [3, [5 | "hello"]] """ - def encode_row(row, types) do - _encode_row(row, encoding_types(types)) + @spec encode_row([term], [type], [Ch.query_option()]) :: iodata + when type: String.t() | atom | tuple + def encode_row(row, types, opts \\ []) do + # opts here are the same opts that are passed to Ch.query/4 in the last argument + _encode_row(row, encoding_types(types, opts)) end defp _encode_row([el | els], [type | types]), do: [encode(type, el) | _encode_row(els, types)] @@ -64,8 +67,11 @@ defmodule Ch.RowBinary do [3, [5 | "hello"], 4, [2 | "hi"]] """ - def encode_rows(rows, types) do - _encode_rows(rows, encoding_types(types)) + @spec encode_rows([row], [type], [Ch.query_option()]) :: iodata + when row: [term], type: String.t() | atom | tuple + def encode_rows(rows, types, opts \\ []) do + # `opts` here is what's passed to `Ch.query/4` in the last argument + _encode_rows(rows, encoding_types(types, opts)) end @doc false @@ -79,17 +85,17 @@ defmodule Ch.RowBinary do defp _encode_rows([], [], rows, types), do: _encode_rows(rows, types) @doc false - def encoding_types([type | types]) do - [encoding_type(type) | encoding_types(types)] + def encoding_types([type | types], opts) do + [encoding_type(type, opts) | encoding_types(types, opts)] end - def encoding_types([] = done), do: done + def encoding_types([] = done, _opts), do: done - defp encoding_type(type) when is_binary(type) do - encoding_type(Ch.Types.decode(type)) + defp encoding_type(type, opts) when is_binary(type) do + encoding_type(Ch.Types.decode(type), opts) end - defp encoding_type(t) + defp encoding_type(t, _opts) when t in [ :string, :binary, @@ -105,37 +111,37 @@ defmodule Ch.RowBinary do ], do: t - defp encoding_type({:datetime = d, "UTC"}), do: d + defp encoding_type({:datetime = d, "UTC"}, _opts), do: d - defp encoding_type({:datetime, tz}) do + defp encoding_type({:datetime, tz}, _opts) do raise ArgumentError, "can't encode DateTime with non-UTC timezone: #{inspect(tz)}" end - defp encoding_type({:fixed_string, _len} = t), do: t + defp encoding_type({:fixed_string, _len} = t, _opts), do: t for size <- [8, 16, 32, 64, 128, 256] do - defp encoding_type(unquote(:"u#{size}") = u), do: u - defp encoding_type(unquote(:"i#{size}") = i), do: i + defp encoding_type(unquote(:"u#{size}") = u, _opts), do: u + defp encoding_type(unquote(:"i#{size}") = i, _opts), do: i end for size <- [32, 64] do - defp encoding_type(unquote(:"f#{size}") = f), do: f + defp encoding_type(unquote(:"f#{size}") = f, _opts), do: f end - defp encoding_type({:array = a, t}), do: {a, encoding_type(t)} + defp encoding_type({:array = a, t}, opts), do: {a, encoding_type(t, opts)} - defp encoding_type({:tuple = t, ts}) do - {t, Enum.map(ts, &encoding_type/1)} + defp encoding_type({:tuple = t, ts}, opts) do + {t, Enum.map(ts, fn t -> encoding_type(t, opts) end)} end - defp encoding_type({:map = m, kt, vt}) do - {m, encoding_type(kt), encoding_type(vt)} + defp encoding_type({:map = m, kt, vt}, opts) do + {m, encoding_type(kt, opts), encoding_type(vt, opts)} end - defp encoding_type({:nullable = n, t}), do: {n, encoding_type(t)} - defp encoding_type({:low_cardinality, t}), do: encoding_type(t) + defp encoding_type({:nullable = n, t}, opts), do: {n, encoding_type(t, opts)} + defp encoding_type({:low_cardinality, t}, opts), do: encoding_type(t, opts) - defp encoding_type({:decimal, p, s}) do + defp encoding_type({:decimal, p, s}, _opts) do case decimal_size(p) do 32 -> {:decimal32, s} 64 -> {:decimal64, s} @@ -144,29 +150,55 @@ defmodule Ch.RowBinary do end end - defp encoding_type({d, _scale} = t) + defp encoding_type({d, _scale} = t, _opts) when d in [:decimal32, :decimal64, :decimal128, :decimal256], do: t - defp encoding_type({:datetime64 = t, p}), do: {t, time_unit(p)} + defp encoding_type({:datetime64 = t, p}, _opts), do: {t, time_unit(p)} - defp encoding_type({:datetime64 = t, p, "UTC"}), do: {t, time_unit(p)} + defp encoding_type({:datetime64 = t, p, "UTC"}, _opts), do: {t, time_unit(p)} - defp encoding_type({:datetime64, _, tz}) do + defp encoding_type({:datetime64, _, tz}, _opts) do raise ArgumentError, "can't encode DateTime64 with non-UTC timezone: #{inspect(tz)}" end - defp encoding_type({e, mappings}) when e in [:enum8, :enum16] do + defp encoding_type({e, mappings}, _opts) when e in [:enum8, :enum16] do {e, Map.new(mappings)} end - defp encoding_type({:simple_aggregate_function, _f, t}), do: encoding_type(t) + defp encoding_type({:simple_aggregate_function, _f, t}, opts), do: encoding_type(t, opts) - defp encoding_type(:ring), do: {:array, :point} - defp encoding_type(:polygon), do: {:array, {:array, :point}} - defp encoding_type(:multipolygon), do: {:array, {:array, {:array, :point}}} + defp encoding_type(:ring, _opts), do: {:array, :point} + defp encoding_type(:polygon, _opts), do: {:array, {:array, :point}} + defp encoding_type(:multipolygon, _opts), do: {:array, {:array, {:array, :point}}} - defp encoding_type(type) do + defp encoding_type(:json, opts) do + json_as_string = get_in(opts, [:settings, :input_format_binary_read_json_as_string]) + json_as_string = with 0 <- json_as_string, do: false + + if json_as_string do + :string_json + else + raise ArgumentError, """ + Native JSON encoding is not yet supported, please use :input_format_binary_read_json_as_string setting to encode JSON as string. + + Example: + + Ch.query( + conn, + "insert into test(json) format RowBinary", + [%{"a" => 42}], + settings: [ + enable_json_type: 1, + input_format_binary_read_json_as_string: 1 + ] + ) + + """ + end + end + + defp encoding_type(type, _opts) do raise ArgumentError, "unsupported type for encoding: #{inspect(type)}" end @@ -184,6 +216,10 @@ defmodule Ch.RowBinary do end end + def encode(:string_json, json) do + encode(:string, Jason.encode_to_iodata!(json)) + end + def encode({:fixed_string, size}, str) when byte_size(str) == size do str end @@ -292,7 +328,9 @@ defmodule Ch.RowBinary do end def encode({:tuple, types}, values) when is_list(types) and is_list(values) do - encode_row(values, types) + # it's OK to pass empty `opts` since they are currently only used for `decoding_types` + # and `types` here have already been pre-processed + encode_row(values, types, _no_opts_needed = []) end def encode({:tuple, types}, nil) when is_list(types) do @@ -461,6 +499,7 @@ defmodule Ch.RowBinary do [[2]] """ + @deprecated "Use `decode_names_and_rows/2` instead" def decode_rows(row_binary_with_names_and_types) def decode_rows(<>), do: skip_names(rest, cols, cols) def decode_rows(<<>>), do: [] @@ -470,14 +509,16 @@ defmodule Ch.RowBinary do Example: - iex> decode_names_and_rows(<<1, 3, "1+1"::bytes, 5, "UInt8"::bytes, 2>>) + iex> decode_names_and_rows(<<1, 3, "1+1"::bytes, 5, "UInt8"::bytes, 2>>, _opts = []) [["1+1"], [2]] """ - def decode_names_and_rows(row_binary_with_names_and_types) + @spec decode_names_and_rows(binary, [Ch.query_option()]) :: [row] + when row: [term] + def decode_names_and_rows(row_binary_with_names_and_types, opts \\ []) - def decode_names_and_rows(<>) do - decode_names(rest, cols, cols, _acc = []) + def decode_names_and_rows(<>, opts) do + decode_names(rest, cols, cols, _acc = [], opts) end @doc """ @@ -489,26 +530,28 @@ defmodule Ch.RowBinary do [[1]] """ - def decode_rows(row_binary, types) - def decode_rows(<<>>, _types), do: [] + @spec decode_rows(binary, [type], [Ch.query_option()]) :: [row] + when row: [term], type: String.t() | atom | tuple + def decode_rows(row_binary, types, opts \\ []) + def decode_rows(<<>>, _types, _opts), do: [] - def decode_rows(<>, types) do - types = decoding_types(types) - decode_rows(types, data, [], [], types) + def decode_rows(<>, types, opts) do + types = decoding_types(types, opts) + decode_rows(types, data, _row = [], _rows = [], types) end @doc false - def decoding_types([type | types]) do - [decoding_type(type) | types] + def decoding_types([type | types], opts) do + [decoding_type(type, opts) | decoding_types(types, opts)] end - def decoding_types([] = done), do: done + def decoding_types([] = done, _opts), do: done - defp decoding_type(t) when is_binary(t) do - decoding_type(Ch.Types.decode(t)) + defp decoding_type(t, opts) when is_binary(t) do + decoding_type(Ch.Types.decode(t), opts) end - defp decoding_type(t) + defp decoding_type(t, _opts) when t in [ :string, :binary, @@ -516,7 +559,6 @@ defmodule Ch.RowBinary do :uuid, :date, :date32, - :json, :ipv4, :ipv6, :point, @@ -524,53 +566,79 @@ defmodule Ch.RowBinary do ], do: t - defp decoding_type({:datetime, _tz} = t), do: t - defp decoding_type({:fixed_string, _len} = t), do: t + defp decoding_type({:datetime, _tz} = t, _opts), do: t + defp decoding_type({:fixed_string, _len} = t, _opts), do: t for size <- [8, 16, 32, 64, 128, 256] do - defp decoding_type(unquote(:"u#{size}") = u), do: u - defp decoding_type(unquote(:"i#{size}") = i), do: i + defp decoding_type(unquote(:"u#{size}") = u, _opts), do: u + defp decoding_type(unquote(:"i#{size}") = i, _opts), do: i end for size <- [32, 64] do - defp decoding_type(unquote(:"f#{size}") = f), do: f + defp decoding_type(unquote(:"f#{size}") = f, _opts), do: f end - defp decoding_type(:datetime = t), do: {t, _tz = nil} + defp decoding_type(:datetime = t, _opts), do: {t, _tz = nil} - defp decoding_type({:array = a, t}), do: {a, decoding_type(t)} + defp decoding_type({:array = a, t}, opts), do: {a, decoding_type(t, opts)} - defp decoding_type({:tuple = t, ts}) do - {t, Enum.map(ts, &decoding_type/1)} + defp decoding_type({:tuple = t, ts}, opts) do + {t, Enum.map(ts, fn t -> decoding_type(t, opts) end)} end - defp decoding_type({:map = m, kt, vt}) do - {m, decoding_type(kt), decoding_type(vt)} + defp decoding_type({:map = m, kt, vt}, opts) do + {m, decoding_type(kt, opts), decoding_type(vt, opts)} end - defp decoding_type({:nullable = n, t}), do: {n, decoding_type(t)} - defp decoding_type({:low_cardinality, t}), do: decoding_type(t) + defp decoding_type({:nullable = n, t}, opts), do: {n, decoding_type(t, opts)} + defp decoding_type({:low_cardinality, t}, opts), do: decoding_type(t, opts) - defp decoding_type({:decimal = t, p, s}), do: {t, decimal_size(p), s} - defp decoding_type({:decimal32, s}), do: {:decimal, 32, s} - defp decoding_type({:decimal64, s}), do: {:decimal, 64, s} - defp decoding_type({:decimal128, s}), do: {:decimal, 128, s} - defp decoding_type({:decimal256, s}), do: {:decimal, 256, s} + defp decoding_type({:decimal = t, p, s}, _opts), do: {t, decimal_size(p), s} + defp decoding_type({:decimal32, s}, _opts), do: {:decimal, 32, s} + defp decoding_type({:decimal64, s}, _opts), do: {:decimal, 64, s} + defp decoding_type({:decimal128, s}, _opts), do: {:decimal, 128, s} + defp decoding_type({:decimal256, s}, _opts), do: {:decimal, 256, s} - defp decoding_type({:datetime64 = t, p}), do: {t, time_unit(p), _tz = nil} - defp decoding_type({:datetime64 = t, p, tz}), do: {t, time_unit(p), tz} + defp decoding_type({:datetime64 = t, p}, _opts), do: {t, time_unit(p), _tz = nil} + defp decoding_type({:datetime64 = t, p, tz}, _opts), do: {t, time_unit(p), tz} - defp decoding_type({e, mappings}) when e in [:enum8, :enum16] do + defp decoding_type({e, mappings}, _opts) when e in [:enum8, :enum16] do {e, Map.new(mappings, fn {k, v} -> {v, k} end)} end - defp decoding_type({:simple_aggregate_function, _f, t}), do: decoding_type(t) + defp decoding_type({:simple_aggregate_function, _f, t}, opts), do: decoding_type(t, opts) + + defp decoding_type(:ring, _opts), do: {:array, :point} + defp decoding_type(:polygon, _opts), do: {:array, {:array, :point}} + defp decoding_type(:multipolygon, _opts), do: {:array, {:array, {:array, :point}}} + + defp decoding_type(:json, opts) do + json_as_string = get_in(opts, [:settings, :output_format_binary_write_json_as_string]) + json_as_string = with 0 <- json_as_string, do: false + + if json_as_string do + :string_json + else + raise ArgumentError, """ + Native JSON decoding is not yet supported, please use :output_format_binary_write_json_as_string setting to decode JSON as string. - defp decoding_type(:ring), do: {:array, :point} - defp decoding_type(:polygon), do: {:array, {:array, :point}} - defp decoding_type(:multipolygon), do: {:array, {:array, {:array, :point}}} + Example: + + Ch.query( + conn, + "select json from test", + _params = [], + settings: [ + enable_json_type: 1, + output_format_binary_write_json_as_string: 1 + ] + ) + + """ + end + end - defp decoding_type(type) do + defp decoding_type(type, _opts) do raise ArgumentError, "unsupported type for decoding: #{inspect(type)}" end @@ -602,7 +670,7 @@ defmodule Ch.RowBinary do end} ] - defp skip_names(<>, 0, count), do: decode_types(rest, count, _acc = []) + defp skip_names(<>, 0, count), do: decode_types(rest, count, _acc = [], _opts = []) for {pattern, value} <- varints do defp skip_names(<>, left, count) do @@ -610,8 +678,8 @@ defmodule Ch.RowBinary do end end - defp decode_names(<>, 0, count, names) do - [:lists.reverse(names) | decode_types(rest, count, _acc = [])] + defp decode_names(<>, 0, count, names, opts) do + [:lists.reverse(names) | decode_types(rest, count, _acc = [], opts)] end for {pattern, value} <- varints do @@ -619,16 +687,17 @@ defmodule Ch.RowBinary do <>, left, count, - acc + acc, + opts ) do - decode_names(rest, left - 1, count, [name | acc]) + decode_names(rest, left - 1, count, [name | acc], opts) end end - defp decode_types(<<>>, 0, _types), do: [] + defp decode_types(<<>>, 0, _types, _opts), do: [] - defp decode_types(<>, 0, types) do - types = types |> decode_types() |> :lists.reverse() + defp decode_types(<>, 0, types, opts) do + types = types |> decode_types(opts) |> :lists.reverse() decode_rows(types, rest, _row = [], _rows = [], types) end @@ -636,18 +705,19 @@ defmodule Ch.RowBinary do defp decode_types( <>, count, - acc + acc, + opts ) do - decode_types(rest, count - 1, [type | acc]) + decode_types(rest, count - 1, [type | acc], opts) end end @doc false - def decode_types([type | types]) do - [decoding_type(Ch.Types.decode(type)) | decode_types(types)] + def decode_types([type | types], opts) do + [decoding_type(Ch.Types.decode(type), opts) | decode_types(types, opts)] end - def decode_types([] = done), do: done + def decode_types([] = done, _opts), do: done @compile inline: [decode_string_decode_rows: 5] @@ -704,10 +774,10 @@ defmodule Ch.RowBinary do defp utf8_size(codepoint) when codepoint <= 0xFFFF, do: 3 defp utf8_size(codepoint) when codepoint <= 0x10FFFF, do: 4 - @compile inline: [decode_json_decode_rows: 5] + @compile inline: [decode_string_json_decode_rows: 5] for {pattern, size} <- varints do - defp decode_json_decode_rows( + defp decode_string_json_decode_rows( <>, types_rest, row, @@ -886,8 +956,8 @@ defmodule Ch.RowBinary do <> = bin decode_rows(types_rest, bin, [Date.add(@epoch_date, d) | row], rows, types) - :json -> - decode_json_decode_rows(bin, types_rest, row, rows, types) + :string_json -> + decode_string_json_decode_rows(bin, types_rest, row, rows, types) {:datetime, timezone} -> <> = bin diff --git a/test/ch/row_binary_test.exs b/test/ch/row_binary_test.exs index 908a182e..f362ba6d 100644 --- a/test/ch/row_binary_test.exs +++ b/test/ch/row_binary_test.exs @@ -212,6 +212,7 @@ defmodule Ch.RowBinaryTest do {"Decimal(23, 11)", {:decimal, _size = 128, _scale = 11}}, {"Bool", :boolean}, {"String", :string}, + {"JSON", :json}, {"FixedString(2)", {:fixed_string, _size = 2}}, {"FixedString(22)", {:fixed_string, _size = 22}}, {"FixedString(222)", {:fixed_string, _size = 222}}, @@ -244,21 +245,17 @@ defmodule Ch.RowBinaryTest do ] Enum.each(spec, fn {encoded, decoded} -> - assert decode_types([encoded]) == [decoded] + assert decode_types([encoded], _opts = []) == [decoded] end) end test "preserves order" do - assert decode_types(["UInt8", "UInt16"]) == [:u8, :u16] + assert decode_types(["UInt8", "UInt16"], _opts = []) == [:u8, :u16] end end describe "decode_rows/1" do test "empty" do - assert decode_rows(<<>>) == [] - end - - test "empty rows" do types = [:u8, :string] num_cols = length(types) @@ -271,6 +268,20 @@ defmodule Ch.RowBinaryTest do assert decode_rows(IO.iodata_to_binary(encoded)) == [] end + test "non empty" do + types = [:u8, :string] + num_cols = length(types) + + encoded = [ + num_cols, + Enum.map(1..num_cols, fn col -> encode(:string, "col#{col}") end), + Enum.map(types, fn type -> encode(:string, Ch.Types.encode(type)) end), + <<1, 2, "ab">> + ] + + assert decode_rows(IO.iodata_to_binary(encoded)) == [[1, "ab"]] + end + test "nan floats" do payload = <<3, 164, 1, 114, 111, 117, 110, 100, 40, 100, 105, 118, 105, 100, 101, 40, 109, 117, 108, From c0b77f3c5705f5fa30beff404d7aac893853f84e Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Mon, 24 Mar 2025 00:35:35 +0300 Subject: [PATCH 4/7] manual opts --- test/ch/json_test.exs | 66 ++++++++++++++++++++++++++----------- test/ch/row_binary_test.exs | 12 ++++++- 2 files changed, 57 insertions(+), 21 deletions(-) diff --git a/test/ch/json_test.exs b/test/ch/json_test.exs index e285d972..f5598fff 100644 --- a/test/ch/json_test.exs +++ b/test/ch/json_test.exs @@ -2,16 +2,7 @@ defmodule Ch.JSONTest do use ExUnit.Case setup do - conn = - start_supervised!( - {Ch, - database: Ch.Test.database(), - settings: [ - enable_json_type: 1, - input_format_binary_read_json_as_string: 1, - output_format_binary_write_json_as_string: 1 - ]} - ) + conn = start_supervised!({Ch, database: Ch.Test.database(), settings: [enable_json_type: 1]}) on_exit(fn -> Ch.Test.sql_exec("DROP TABLE test", [], database: Ch.Test.database()) @@ -31,7 +22,14 @@ defmodule Ch.JSONTest do ('{"a" : {"b" : 43, "e" : 10}, "c" : [4, 5, 6]}') """) - assert Ch.query!(conn, "SELECT json FROM test").rows == [ + assert Ch.query!( + conn, + "SELECT json FROM test", + _no_params = [], + settings: [ + output_format_binary_write_json_as_string: 1 + ] + ).rows == [ [%{"a" => %{"b" => "42"}, "c" => ["1", "2", "3"]}], [%{"f" => "Hello, World!"}], [%{"a" => %{"b" => "43", "e" => "10"}, "c" => ["4", "5", "6"]}] @@ -48,7 +46,14 @@ defmodule Ch.JSONTest do ('{"a" : {"b" : 43, "e" : 10}, "c" : [4, 5, 6]}') """) - assert Ch.query!(conn, "SELECT json FROM test").rows == [ + assert Ch.query!( + conn, + "SELECT json FROM test", + _no_params = [], + settings: [ + output_format_binary_write_json_as_string: 1 + ] + ).rows == [ [%{"a" => %{"b" => 42}, "c" => ["1", "2", "3"]}], [%{"a" => %{"b" => 0}, "f" => "Hello, World!"}], [%{"a" => %{"b" => 43}, "c" => ["4", "5", "6"]}] @@ -56,9 +61,16 @@ defmodule Ch.JSONTest do end test "Creating JSON using CAST from String", %{conn: conn} do - assert Ch.query!(conn, """ - SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::JSON AS json - """).rows == [ + assert Ch.query!( + conn, + """ + SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::JSON AS json + """, + _no_params = [], + settings: [ + output_format_binary_write_json_as_string: 1 + ] + ).rows == [ [%{"a" => %{"b" => "42"}, "c" => ["1", "2", "3"], "d" => "Hello, World!"}] ] end @@ -69,8 +81,11 @@ defmodule Ch.JSONTest do """ SELECT (tuple(42 AS b) AS a, [1, 2, 3] AS c, 'Hello, World!' AS d)::JSON AS json """, - [], - settings: [enable_named_columns_in_function_tuple: 1] + _no_params = [], + settings: [ + enable_named_columns_in_function_tuple: 1, + output_format_binary_write_json_as_string: 1 + ] ).rows == [[%{"a" => %{"b" => "42"}, "c" => ["1", "2", "3"], "d" => "Hello, World!"}]] end @@ -80,8 +95,12 @@ defmodule Ch.JSONTest do """ SELECT map('a', map('b', 42), 'c', [1,2,3], 'd', 'Hello, World!')::JSON AS json; """, - [], - settings: [enable_variant_type: 1, use_variant_as_common_type: 1] + _no_params = [], + settings: [ + enable_variant_type: 1, + use_variant_as_common_type: 1, + output_format_binary_write_json_as_string: 1 + ] ).rows == [[%{"a" => %{"b" => "42"}, "c" => ["1", "2", "3"], "d" => "Hello, World!"}]] end @@ -96,7 +115,14 @@ defmodule Ch.JSONTest do ('{"a" : {"b" : 43, "e" : 10, "g" : 43.43}, "c" : [4, 5, 6]}') """) - assert Ch.query!(conn, "SELECT json FROM test").rows == [ + assert Ch.query!( + conn, + "SELECT json FROM test", + _no_params = [], + settings: [ + output_format_binary_write_json_as_string: 1 + ] + ).rows == [ [%{"a" => %{"b" => 42, "g" => 42.42}, "c" => ["1", "2", "3"], "d" => "2020-01-01"}], [%{"a" => %{"b" => 0}, "d" => "2020-01-02", "f" => "Hello, World!"}], [%{"a" => %{"b" => 43, "g" => 43.43}, "c" => ["4", "5", "6"]}] diff --git a/test/ch/row_binary_test.exs b/test/ch/row_binary_test.exs index f362ba6d..e60c2978 100644 --- a/test/ch/row_binary_test.exs +++ b/test/ch/row_binary_test.exs @@ -212,7 +212,6 @@ defmodule Ch.RowBinaryTest do {"Decimal(23, 11)", {:decimal, _size = 128, _scale = 11}}, {"Bool", :boolean}, {"String", :string}, - {"JSON", :json}, {"FixedString(2)", {:fixed_string, _size = 2}}, {"FixedString(22)", {:fixed_string, _size = 22}}, {"FixedString(222)", {:fixed_string, _size = 222}}, @@ -252,6 +251,17 @@ defmodule Ch.RowBinaryTest do test "preserves order" do assert decode_types(["UInt8", "UInt16"], _opts = []) == [:u8, :u16] end + + test "JSON raises without options" do + assert_raise ArgumentError, + ~r/Native JSON decoding is not yet supported/, + fn -> decode_types(["JSON"], _opts = []) end + end + + test "JSON becomes string_json with options" do + opts = [settings: [output_format_binary_write_json_as_string: 1]] + assert decode_types(["JSON"], opts) == [:string_json] + end end describe "decode_rows/1" do From 1eaaf2e3dc8dfb0a031977cbea728fa387239a65 Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Mon, 24 Mar 2025 00:40:39 +0300 Subject: [PATCH 5/7] skip version < 25 --- test/ch/json_test.exs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/test/ch/json_test.exs b/test/ch/json_test.exs index f5598fff..80a46e87 100644 --- a/test/ch/json_test.exs +++ b/test/ch/json_test.exs @@ -8,7 +8,13 @@ defmodule Ch.JSONTest do Ch.Test.sql_exec("DROP TABLE test", [], database: Ch.Test.database()) end) - {:ok, conn: conn} + [[ch_version]] = Ch.query!(conn, "select version()").rows + + if ch_version < "25" do + {:ok, skip: true} + else + {:ok, conn: conn} + end end # https://clickhouse.com/docs/en/sql-reference/data-types/newjson#creating-json From 03027c6af17e91ac79c1287f6be260d533cbd8a2 Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Mon, 24 Mar 2025 00:48:21 +0300 Subject: [PATCH 6/7] skip json tests in ClickHouse < 25 --- test/ch/json_test.exs | 10 +++------- test/test_helper.exs | 5 ++++- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/test/ch/json_test.exs b/test/ch/json_test.exs index 80a46e87..49cc8fcc 100644 --- a/test/ch/json_test.exs +++ b/test/ch/json_test.exs @@ -1,6 +1,8 @@ defmodule Ch.JSONTest do use ExUnit.Case + @moduletag :json + setup do conn = start_supervised!({Ch, database: Ch.Test.database(), settings: [enable_json_type: 1]}) @@ -8,13 +10,7 @@ defmodule Ch.JSONTest do Ch.Test.sql_exec("DROP TABLE test", [], database: Ch.Test.database()) end) - [[ch_version]] = Ch.query!(conn, "select version()").rows - - if ch_version < "25" do - {:ok, skip: true} - else - {:ok, conn: conn} - end + {:ok, conn: conn} end # https://clickhouse.com/docs/en/sql-reference/data-types/newjson#creating-json diff --git a/test/test_helper.exs b/test/test_helper.exs index 56856994..17ed73ee 100644 --- a/test/test_helper.exs +++ b/test/test_helper.exs @@ -5,4 +5,7 @@ default_test_db = System.get_env("CH_DATABASE", "ch_elixir_test") {:ok, _} = Ch.Test.sql_exec("CREATE DATABASE #{default_test_db}") Application.put_env(:ch, :database, default_test_db) -ExUnit.start(exclude: [:slow]) +{:ok, %{rows: [[ch_version]]}} = Ch.Test.sql_exec("SELECT version()") + +exclude = if ch_version >= "25", do: [], else: [:json] +ExUnit.start(exclude: [:slow | exclude]) From 7a113cacda579a367b2d0292e15335cb28afe8c0 Mon Sep 17 00:00:00 2001 From: ruslandoga Date: Mon, 24 Mar 2025 00:55:51 +0300 Subject: [PATCH 7/7] update connection_test.exs --- test/ch/connection_test.exs | 72 ++++++++++--------------------------- 1 file changed, 18 insertions(+), 54 deletions(-) diff --git a/test/ch/connection_test.exs b/test/ch/connection_test.exs index 1873320e..cefda722 100644 --- a/test/ch/connection_test.exs +++ b/test/ch/connection_test.exs @@ -568,6 +568,7 @@ defmodule Ch.ConnectionTest do }} = Ch.query(conn, "SELECT * FROM t_uuid ORDER BY y") end + @tag :json test "read json as string", %{conn: conn} do assert Ch.query!(conn, ~s|select '{"a":42}'::JSON|, [], settings: [ @@ -590,6 +591,7 @@ defmodule Ch.ConnectionTest do ).rows == [[%{"a" => "a����b"}]] end + @tag :json test "write->read json as string", %{conn: conn} do Ch.query!(conn, "CREATE TABLE test_write_json(json JSON) ENGINE = Memory", [], settings: [ @@ -597,29 +599,33 @@ defmodule Ch.ConnectionTest do ] ) - rowbinary = - Ch.RowBinary.encode_rows( - [ - [Jason.encode_to_iodata!(%{"a" => 42})], - [Jason.encode_to_iodata!(%{"b" => 10})] - ], - _types = [:string] - ) - - Ch.query!(conn, ["insert into test_write_json(json) format RowBinary\n" | rowbinary], [], + Ch.query!( + conn, + "insert into test_write_json(json) format RowBinary", + [ + [%{"a" => 42}], + [%{"b" => 10}] + ], + types: ["JSON"], settings: [ enable_json_type: 1, input_format_binary_read_json_as_string: 1 ] ) - assert Ch.query!(conn, "select json from test_write_json", [], + assert Ch.query!( + conn, + "select json from test_write_json", + _no_params = [], settings: [ enable_json_type: 1, output_format_binary_write_json_as_string: 1 ] ).rows == - [[%{"a" => "42"}], [%{"b" => "10"}]] + [ + [%{"a" => "42"}], + [%{"b" => "10"}] + ] end # https://clickhouse.com/docs/en/sql-reference/data-types/newjson @@ -660,48 +666,6 @@ defmodule Ch.ConnectionTest do # TODO variant (is there?) # TODO dynamic - test "json as string", %{conn: conn} do - # after v25 ClickHouse started rendering numbers in JSON as strings - [[version]] = Ch.query!(conn, "select version()").rows - numbers_as_strings? = version >= "25" - - [expected1, expected2] = - if numbers_as_strings? do - [ - [[~s|{"answer":"42"}|]], - [[~s|{"a":"42"}|], [~s|{"b":"10"}|]] - ] - else - [ - [[~s|{"answer":42}|]], - [[~s|{"a":"42"}|], [~s|{"b":"10"}|]] - ] - end - - assert Ch.query!(conn, ~s|select '{"answer":42}'::JSON::String|, [], - settings: [enable_json_type: 1] - ).rows == expected1 - - Ch.query!(conn, "CREATE TABLE test_json_as_string(json JSON) ENGINE = Memory", [], - settings: [enable_json_type: 1] - ) - - Ch.query!( - conn, - "INSERT INTO test_json_as_string(json) FORMAT RowBinary", - _rows = [[Jason.encode_to_iodata!(%{"a" => 42})], [Jason.encode_to_iodata!(%{"b" => 10})]], - types: [:string], - settings: [ - enable_json_type: 1, - input_format_binary_read_json_as_string: 1 - ] - ) - - assert Ch.query!(conn, "select json::String from test_json_as_string", [], - settings: [enable_json_type: 1] - ).rows == expected2 - end - # TODO enum16 test "enum8", %{conn: conn} do