Skip to content

Add python-level benchmarks for item-level CRS operations to python/benchmarks #548

@paleolimbot

Description

@paleolimbot

I did an informal set of benchmarks to check some of my hunches about the performance of item-level CRS as implemented here. The results are promising! ST_Transform is much slower (requires resolving many transforms instead of one, which is slower even though we cache them) and ST_SetSRID is slower (requires creating the CRS array), but other than that the checking of the CRSes is apparently fast.

I put together a notebook of this ( https://gist.github.com/paleolimbot/19b9b0002f2fd8f2b8df4e2aea39b408 , rendered below) but it should be converted to an actual benchmark at some point.

import sedona.db

sd = sedona.db.connect()
sd.options.interactive = True
sd.sql("SET datafusion.execution.target_partitions = 1").execute()
0
# Use northern hemisphere to make the UTM zone ID math a little easier
sd.funcs.table.sd_random_geometry(
    "Point", 1_000_000, bounds=[-180, 0, 180, 90], seed=9547290
).to_view("pts", overwrite=True)
sd.sql("""
    SELECT
        (floor((ST_X(geometry) + 180) / 6) + 1)::INTEGER AS utm_zone,
        -- e.g., EPSG:32620 is UTM zone 20N
        'EPSG:326' || LPAD((floor((ST_X(geometry) + 180) / 6) + 1)::INTEGER::TEXT, 2, '0') AS utm_code,
        -- Also resolve an srid while we're here
        ('326' || LPAD((floor((ST_X(geometry) + 180) / 6) + 1)::INTEGER::TEXT, 2, '0'))::INTEGER AS utm_srid,
        -- For one of the benchmarks we need an array of SRIDs
        3857 AS srid_array,
        -- A geometry with a type-level SRID
        ST_SetSRID(geometry, 4326) AS geom,
        -- A geometry with an item-level SRID
        ST_SetSRID(geometry, id - id + 4326) AS item_crs
    FROM pts
""").to_memtable().to_view("pts", overwrite=True)
sd.view("pts")
┌──────────┬────────────┬──────────┬────────────┬─────────────────────────┬────────────────────────┐
│ utm_zone ┆  utm_code  ┆ utm_srid ┆ srid_array ┆           geom          ┆        item_crs        │
│   int32  ┆    utf8    ┆   int32  ┆    int64   ┆         geometry        ┆         struct         │
╞══════════╪════════════╪══════════╪════════════╪═════════════════════════╪════════════════════════╡
│       56 ┆ EPSG:32656 ┆    32656 ┆       3857 ┆ POINT(154.594195187868… ┆ {item: POINT(154.5941… │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│       19 ┆ EPSG:32619 ┆    32619 ┆       3857 ┆ POINT(-66.913979903712… ┆ {item: POINT(-66.9139… │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│       38 ┆ EPSG:32638 ┆    32638 ┆       3857 ┆ POINT(45.4933820760052… ┆ {item: POINT(45.49338… │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│       17 ┆ EPSG:32617 ┆    32617 ┆       3857 ┆ POINT(-78.610730157315… ┆ {item: POINT(-78.6107… │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│       28 ┆ EPSG:32628 ┆    32628 ┆       3857 ┆ POINT(-17.562196103398… ┆ {item: POINT(-17.5621… │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│       35 ┆ EPSG:32635 ┆    32635 ┆       3857 ┆ POINT(28.1834126352999… ┆ {item: POINT(28.18341… │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│       37 ┆ EPSG:32637 ┆    32637 ┆       3857 ┆ POINT(41.0192584222319… ┆ {item: POINT(41.01925… │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│       44 ┆ EPSG:32644 ┆    32644 ┆       3857 ┆ POINT(79.5360801455188… ┆ {item: POINT(79.53608… │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│       29 ┆ EPSG:32629 ┆    32629 ┆       3857 ┆ POINT(-6.4542505711957… ┆ {item: POINT(-6.45425… │
├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│       19 ┆ EPSG:32619 ┆    32619 ┆       3857 ┆ POINT(-67.236633829464… ┆ {item: POINT(-67.2366… │
└──────────┴────────────┴──────────┴────────────┴─────────────────────────┴────────────────────────┘
# ST_Transform with type-level CRS to type-level CRS
%timeit sd.sql("""SELECT ST_Transform(geom, 3857) FROM pts""").execute()
80.3 ms ± 200 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)
# The same transform but writing out the result as item crs
%timeit sd.sql("""SELECT ST_Transform(geom, srid_array) FROM pts""").execute()
190 ms ± 647 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)
# ST_SetSRID with type-level CRS (very fast, should be zero-copy)
%timeit sd.sql("""SELECT ST_SetSRID(geom, 3857) FROM pts""").execute()
1.05 ms ± 7.57 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
# ST_SetSRID with item-level CRS (slower because it has to create an array of CRSes)
%timeit sd.sql("""SELECT ST_SetSRID(geom, srid_array) FROM pts""").execute()
33.9 ms ± 615 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)
# Execute a predicate with type-level CRS
%timeit sd.sql("""SELECT ST_Intersects(geom, geom) FROM pts""").execute()
28.1 ms ± 133 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)
# Execute a predicate with item-level CRS. This should check every CRS.
# Only a tiny bit slower!
%timeit sd.sql("""SELECT ST_Intersects(item_crs, item_crs) FROM pts""").execute()
31.8 ms ± 208 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)
# Execute a unary operation that must propagate the input CRS. This should
# have very little overhead because the CRS array should be zero-copy transferred
# to the output.
%timeit sd.sql("""SELECT ST_Envelope(geom) FROM pts""").execute()
%timeit sd.sql("""SELECT ST_Envelope(item_crs) FROM pts""").execute()
18 ms ± 107 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
19 ms ± 107 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
# Real world demo of something you might want to do with item-level CRSes:
# project some lon/lat data into local UTM zones compute a buffer, and transform back.
sd.sql("SET datafusion.execution.target_partitions = 0").execute()
sd.sql("""
    SELECT
        ST_Transform(ST_Buffer(ST_Transform(geom, utm_srid), 1000), 4326) AS buffered,
    FROM pts
""").execute()

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions