-
Notifications
You must be signed in to change notification settings - Fork 42
Open
Description
I did an informal set of benchmarks to check some of my hunches about the performance of item-level CRS as implemented here. The results are promising! ST_Transform is much slower (requires resolving many transforms instead of one, which is slower even though we cache them) and ST_SetSRID is slower (requires creating the CRS array), but other than that the checking of the CRSes is apparently fast.
I put together a notebook of this ( https://gist.github.com/paleolimbot/19b9b0002f2fd8f2b8df4e2aea39b408 , rendered below) but it should be converted to an actual benchmark at some point.
import sedona.db
sd = sedona.db.connect()
sd.options.interactive = True
sd.sql("SET datafusion.execution.target_partitions = 1").execute()0
# Use northern hemisphere to make the UTM zone ID math a little easier
sd.funcs.table.sd_random_geometry(
"Point", 1_000_000, bounds=[-180, 0, 180, 90], seed=9547290
).to_view("pts", overwrite=True)
sd.sql("""
SELECT
(floor((ST_X(geometry) + 180) / 6) + 1)::INTEGER AS utm_zone,
-- e.g., EPSG:32620 is UTM zone 20N
'EPSG:326' || LPAD((floor((ST_X(geometry) + 180) / 6) + 1)::INTEGER::TEXT, 2, '0') AS utm_code,
-- Also resolve an srid while we're here
('326' || LPAD((floor((ST_X(geometry) + 180) / 6) + 1)::INTEGER::TEXT, 2, '0'))::INTEGER AS utm_srid,
-- For one of the benchmarks we need an array of SRIDs
3857 AS srid_array,
-- A geometry with a type-level SRID
ST_SetSRID(geometry, 4326) AS geom,
-- A geometry with an item-level SRID
ST_SetSRID(geometry, id - id + 4326) AS item_crs
FROM pts
""").to_memtable().to_view("pts", overwrite=True)sd.view("pts")┌──────────┬────────────┬──────────┬────────────┬─────────────────────────┬────────────────────────┐
│ utm_zone ┆ utm_code ┆ utm_srid ┆ srid_array ┆ geom ┆ item_crs │
│ int32 ┆ utf8 ┆ int32 ┆ int64 ┆ geometry ┆ struct │
╞══════════╪════════════╪══════════╪════════════╪═════════════════════════╪════════════════════════╡
│ 56 ┆ EPSG:32656 ┆ 32656 ┆ 3857 ┆ POINT(154.594195187868… ┆ {item: POINT(154.5941… │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 19 ┆ EPSG:32619 ┆ 32619 ┆ 3857 ┆ POINT(-66.913979903712… ┆ {item: POINT(-66.9139… │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 38 ┆ EPSG:32638 ┆ 32638 ┆ 3857 ┆ POINT(45.4933820760052… ┆ {item: POINT(45.49338… │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 17 ┆ EPSG:32617 ┆ 32617 ┆ 3857 ┆ POINT(-78.610730157315… ┆ {item: POINT(-78.6107… │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 28 ┆ EPSG:32628 ┆ 32628 ┆ 3857 ┆ POINT(-17.562196103398… ┆ {item: POINT(-17.5621… │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 35 ┆ EPSG:32635 ┆ 32635 ┆ 3857 ┆ POINT(28.1834126352999… ┆ {item: POINT(28.18341… │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 37 ┆ EPSG:32637 ┆ 32637 ┆ 3857 ┆ POINT(41.0192584222319… ┆ {item: POINT(41.01925… │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 44 ┆ EPSG:32644 ┆ 32644 ┆ 3857 ┆ POINT(79.5360801455188… ┆ {item: POINT(79.53608… │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 29 ┆ EPSG:32629 ┆ 32629 ┆ 3857 ┆ POINT(-6.4542505711957… ┆ {item: POINT(-6.45425… │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 19 ┆ EPSG:32619 ┆ 32619 ┆ 3857 ┆ POINT(-67.236633829464… ┆ {item: POINT(-67.2366… │
└──────────┴────────────┴──────────┴────────────┴─────────────────────────┴────────────────────────┘
# ST_Transform with type-level CRS to type-level CRS
%timeit sd.sql("""SELECT ST_Transform(geom, 3857) FROM pts""").execute()80.3 ms ± 200 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)
# The same transform but writing out the result as item crs
%timeit sd.sql("""SELECT ST_Transform(geom, srid_array) FROM pts""").execute()190 ms ± 647 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)
# ST_SetSRID with type-level CRS (very fast, should be zero-copy)
%timeit sd.sql("""SELECT ST_SetSRID(geom, 3857) FROM pts""").execute()1.05 ms ± 7.57 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
# ST_SetSRID with item-level CRS (slower because it has to create an array of CRSes)
%timeit sd.sql("""SELECT ST_SetSRID(geom, srid_array) FROM pts""").execute()33.9 ms ± 615 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)
# Execute a predicate with type-level CRS
%timeit sd.sql("""SELECT ST_Intersects(geom, geom) FROM pts""").execute()28.1 ms ± 133 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)
# Execute a predicate with item-level CRS. This should check every CRS.
# Only a tiny bit slower!
%timeit sd.sql("""SELECT ST_Intersects(item_crs, item_crs) FROM pts""").execute()31.8 ms ± 208 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)
# Execute a unary operation that must propagate the input CRS. This should
# have very little overhead because the CRS array should be zero-copy transferred
# to the output.
%timeit sd.sql("""SELECT ST_Envelope(geom) FROM pts""").execute()
%timeit sd.sql("""SELECT ST_Envelope(item_crs) FROM pts""").execute()18 ms ± 107 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
19 ms ± 107 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
# Real world demo of something you might want to do with item-level CRSes:
# project some lon/lat data into local UTM zones compute a buffer, and transform back.
sd.sql("SET datafusion.execution.target_partitions = 0").execute()
sd.sql("""
SELECT
ST_Transform(ST_Buffer(ST_Transform(geom, utm_srid), 1000), 4326) AS buffered,
FROM pts
""").execute()jiayuasu
Metadata
Metadata
Assignees
Labels
No labels