From d102363344268be252dd8e0ceb1a71d72c27a64f Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Sun, 11 May 2025 21:53:21 -0400 Subject: [PATCH 001/117] chore: Add .aider* to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 0a19790..17abd6d 100644 --- a/.gitignore +++ b/.gitignore @@ -172,3 +172,4 @@ cython_debug/ # PyPI configuration file .pypirc +.aider* From eb8aed280505806dc212b57646dd319e43bef890 Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Sun, 11 May 2025 21:56:05 -0400 Subject: [PATCH 002/117] docs: Add README with data model and API documentation --- README.md | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 34b2329..fd6aaf1 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,59 @@ -# cvec-python -CVector SDK for Python +# CVec Client Library + +# Data Model + +This SDK integrates directly with CVector's database. Each tenant has a schema and a database user, both named for the tenant. The API Key is the password of the user. The database user is restricted to only have access to the tenant's schema. Here are the available database tables: + +```sql +CREATE TABLE tag_data ( + tag_name_id INTEGER NOT NULL, + tag_value_changed_at TIMESTAMP WITH TIME ZONE, + tag_value DOUBLE PRECISION +) +``` + +```sql +CREATE TABLE tag_data_str ( + tag_name_id INTEGER NOT NULL, + tag_value_changed_at timestamptz NOT NULL, + tag_value text +); +``` + +```sql +CREATE TABLE tag_names ( + id SERIAL PRIMARY KEY, + normalized_name VARCHAR NOT NULL, + birth_at TIMESTAMPTZ NULL, + death_at TIMESTAMPTZ NULL +); +``` + +```sql +CREATE VIEW metrics AS + SELECT td.tag_value AS value, + td.tag_value_changed_at AS "time", + tn.normalized_name AS metric + FROM tag_data td + JOIN tag_names tn ON td.tag_name_id = tn.id; +``` + +# CVec Class + +The SDK provides an API client class named `CVec` with the following functions. + +## `__init__(?host, ?tenant, ?api_key, ?default_time_range)` + +Setup the SDK with the given host and API Key. The host and API key are loaded from environment variables CVEC_HOST, CVEC_TENANT, CVEC_API_KEY, if they are not given as arguments to the constructor. The default_time_range constrains most API keys, and can be overridden by the time_range argument to each API function. + +## `get_spans(tag_name, ?time_range, ?limit)` + +Return all of the time spans where a tag has a constant value. The function returns a list of time-ranges with the value for each time-range. Returns a list of spans. Each span has the following fields: {id, tag_name, value, begin_at, end_at, raw_begin_at, raw_end_at, metadata}. In a future version of the SDK, spans can be annotated, edited, and deleted. + +## `get_metric_data(?tag_names, ?time_range)` + +Return all data-points within a given time-range, optionally selecting a given list of tags. The return value is a Pandas DataFrame with three columns: tag_name, time, value. One row is returned for each tag value transition. + +## `get_tags(?time_range)` + +Return a list of tags that had at least one transition in the given time range. All tags are returned if no time_range is given. Each tag has {id, name, birth_at, death_at}. From 0513e537beeab41707306163ede64a80e5731926 Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Sun, 11 May 2025 22:07:36 -0400 Subject: [PATCH 003/117] chore: install poetry --- pyproject.toml | 19 +++++++++++++++++++ src/cvec/__init__.py | 0 tests/__init__.py | 0 3 files changed, 19 insertions(+) create mode 100644 pyproject.toml create mode 100644 src/cvec/__init__.py create mode 100644 tests/__init__.py diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a0004b0 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,19 @@ +[project] +name = "cvec" +version = "0.1.0" +description = "" +authors = [ + {name = "Joshua Napoli",email = "jnapoli@cvector.energy"} +] +readme = "README.md" +requires-python = ">=3.13" +dependencies = [ +] + +[tool.poetry] +packages = [{include = "cvec", from = "src"}] + + +[build-system] +requires = ["poetry-core>=2.0.0,<3.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/src/cvec/__init__.py b/src/cvec/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 From 945eb9b7328986025af89e747b38b2d0529cd76b Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Sun, 11 May 2025 22:17:19 -0400 Subject: [PATCH 004/117] chore: add black --- poetry.lock | 132 +++++++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 3 ++ 2 files changed, 135 insertions(+) create mode 100644 poetry.lock diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..f7ea719 --- /dev/null +++ b/poetry.lock @@ -0,0 +1,132 @@ +# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. + +[[package]] +name = "black" +version = "25.1.0" +description = "The uncompromising code formatter." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "black-25.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:759e7ec1e050a15f89b770cefbf91ebee8917aac5c20483bc2d80a6c3a04df32"}, + {file = "black-25.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e519ecf93120f34243e6b0054db49c00a35f84f195d5bce7e9f5cfc578fc2da"}, + {file = "black-25.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:055e59b198df7ac0b7efca5ad7ff2516bca343276c466be72eb04a3bcc1f82d7"}, + {file = "black-25.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:db8ea9917d6f8fc62abd90d944920d95e73c83a5ee3383493e35d271aca872e9"}, + {file = "black-25.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a39337598244de4bae26475f77dda852ea00a93bd4c728e09eacd827ec929df0"}, + {file = "black-25.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96c1c7cd856bba8e20094e36e0f948718dc688dba4a9d78c3adde52b9e6c2299"}, + {file = "black-25.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bce2e264d59c91e52d8000d507eb20a9aca4a778731a08cfff7e5ac4a4bb7096"}, + {file = "black-25.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:172b1dbff09f86ce6f4eb8edf9dede08b1fce58ba194c87d7a4f1a5aa2f5b3c2"}, + {file = "black-25.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b"}, + {file = "black-25.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc"}, + {file = "black-25.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f"}, + {file = "black-25.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba"}, + {file = "black-25.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f0b18a02996a836cc9c9c78e5babec10930862827b1b724ddfe98ccf2f2fe4f"}, + {file = "black-25.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:afebb7098bfbc70037a053b91ae8437c3857482d3a690fefc03e9ff7aa9a5fd3"}, + {file = "black-25.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:030b9759066a4ee5e5aca28c3c77f9c64789cdd4de8ac1df642c40b708be6171"}, + {file = "black-25.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:a22f402b410566e2d1c950708c77ebf5ebd5d0d88a6a2e87c86d9fb48afa0d18"}, + {file = "black-25.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1ee0a0c330f7b5130ce0caed9936a904793576ef4d2b98c40835d6a65afa6a0"}, + {file = "black-25.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3df5f1bf91d36002b0a75389ca8663510cf0531cca8aa5c1ef695b46d98655f"}, + {file = "black-25.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9e6827d563a2c820772b32ce8a42828dc6790f095f441beef18f96aa6f8294e"}, + {file = "black-25.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:bacabb307dca5ebaf9c118d2d2f6903da0d62c9faa82bd21a33eecc319559355"}, + {file = "black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717"}, + {file = "black-25.1.0.tar.gz", hash = "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666"}, +] + +[package.dependencies] +click = ">=8.0.0" +mypy-extensions = ">=0.4.3" +packaging = ">=22.0" +pathspec = ">=0.9.0" +platformdirs = ">=2" + +[package.extras] +colorama = ["colorama (>=0.4.3)"] +d = ["aiohttp (>=3.10)"] +jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] +uvloop = ["uvloop (>=0.15.2)"] + +[[package]] +name = "click" +version = "8.2.0" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "click-8.2.0-py3-none-any.whl", hash = "sha256:6b303f0b2aa85f1cb4e5303078fadcbcd4e476f114fab9b5007005711839325c"}, + {file = "click-8.2.0.tar.gz", hash = "sha256:f5452aeddd9988eefa20f90f05ab66f17fce1ee2a36907fd30b05bbb5953814d"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["dev"] +markers = "platform_system == \"Windows\"" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "mypy-extensions" +version = "1.1.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505"}, + {file = "mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"}, +] + +[[package]] +name = "packaging" +version = "25.0" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484"}, + {file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"}, +] + +[[package]] +name = "pathspec" +version = "0.12.1" +description = "Utility library for gitignore style pattern matching of file paths." +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, + {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, +] + +[[package]] +name = "platformdirs" +version = "4.3.8" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4"}, + {file = "platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc"}, +] + +[package.extras] +docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.4)", "pytest-cov (>=6)", "pytest-mock (>=3.14)"] +type = ["mypy (>=1.14.1)"] + +[metadata] +lock-version = "2.1" +python-versions = ">=3.13" +content-hash = "40ee54797d48b9065690dd8e6731a17378a51772a7a94a507f6815312d3aba14" diff --git a/pyproject.toml b/pyproject.toml index a0004b0..bb1e29b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,9 @@ dependencies = [ packages = [{include = "cvec", from = "src"}] +[tool.poetry.group.dev.dependencies] +black = "^25.1.0" + [build-system] requires = ["poetry-core>=2.0.0,<3.0.0"] build-backend = "poetry.core.masonry.api" From bdbe1b11780b291f204982220dd0fef8e6751233 Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Sun, 11 May 2025 22:17:56 -0400 Subject: [PATCH 005/117] chore: add pytest --- poetry.lock | 53 ++++++++++++++++++++++++++++++++++++++++++++++++-- pyproject.toml | 1 + 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index f7ea719..45898ff 100644 --- a/poetry.lock +++ b/poetry.lock @@ -67,12 +67,24 @@ description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" groups = ["dev"] -markers = "platform_system == \"Windows\"" +markers = "platform_system == \"Windows\" or sys_platform == \"win32\"" files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "iniconfig" +version = "2.1.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"}, + {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, +] + [[package]] name = "mypy-extensions" version = "1.1.0" @@ -126,7 +138,44 @@ docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.1.3)", "sphinx-a test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.4)", "pytest-cov (>=6)", "pytest-mock (>=3.14)"] type = ["mypy (>=1.14.1)"] +[[package]] +name = "pluggy" +version = "1.5.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pytest" +version = "8.3.5" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820"}, + {file = "pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=1.5,<2" + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + [metadata] lock-version = "2.1" python-versions = ">=3.13" -content-hash = "40ee54797d48b9065690dd8e6731a17378a51772a7a94a507f6815312d3aba14" +content-hash = "773ac4ca93ee1220727fd04b0111b62d847ff4232c5708b02947991c54c13f9c" diff --git a/pyproject.toml b/pyproject.toml index bb1e29b..fcf0806 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ packages = [{include = "cvec", from = "src"}] [tool.poetry.group.dev.dependencies] black = "^25.1.0" +pytest = "^8.3.5" [build-system] requires = ["poetry-core>=2.0.0,<3.0.0"] From 22a210a01b249baaa9740a1171d8ac8819130de4 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Sun, 11 May 2025 22:20:42 -0400 Subject: [PATCH 006/117] feat: Create CVec class scaffold with basic methods --- src/cvec/cvec.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 src/cvec/cvec.py diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py new file mode 100644 index 0000000..f413997 --- /dev/null +++ b/src/cvec/cvec.py @@ -0,0 +1,46 @@ +import pandas as pd + +class CVec: + """ + CVec API Client + """ + + def __init__(self, host=None, tenant=None, api_key=None, default_time_range=None): + """ + Setup the SDK with the given host and API Key. + The host and API key are loaded from environment variables CVEC_HOST, + CVEC_TENANT, CVEC_API_KEY, if they are not given as arguments to the constructor. + The default_time_range constrains most API keys, and can be overridden + by the time_range argument to each API function. + """ + # Implementation to be added + pass + + def get_spans(self, tag_name, time_range=None, limit=None): + """ + Return all of the time spans where a tag has a constant value. + The function returns a list of time-ranges with the value for each time-range. + Returns a list of spans. Each span has the following fields: + {id, tag_name, value, begin_at, end_at, raw_begin_at, raw_end_at, metadata}. + In a future version of the SDK, spans can be annotated, edited, and deleted. + """ + # Implementation to be added + return [] + + def get_metric_data(self, tag_names=None, time_range=None): + """ + Return all data-points within a given time-range, optionally selecting a given list of tags. + The return value is a Pandas DataFrame with three columns: tag_name, time, value. + One row is returned for each tag value transition. + """ + # Implementation to be added + return pd.DataFrame(columns=["tag_name", "time", "value"]) + + def get_tags(self, time_range=None): + """ + Return a list of tags that had at least one transition in the given time range. + All tags are returned if no time_range is given. + Each tag has {id, name, birth_at, death_at}. + """ + # Implementation to be added + return [] From f39ad3ca2c3d1e60a2ef80f2dfb54626334ab68e Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Sun, 11 May 2025 22:21:53 -0400 Subject: [PATCH 007/117] feat: Add pandas as a project dependency --- poetry.lock | 201 ++++++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 1 + 2 files changed, 201 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 45898ff..36ff70a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -97,6 +97,71 @@ files = [ {file = "mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"}, ] +[[package]] +name = "numpy" +version = "2.2.5" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.10" +groups = ["main"] +files = [ + {file = "numpy-2.2.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1f4a922da1729f4c40932b2af4fe84909c7a6e167e6e99f71838ce3a29f3fe26"}, + {file = "numpy-2.2.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b6f91524d31b34f4a5fee24f5bc16dcd1491b668798b6d85585d836c1e633a6a"}, + {file = "numpy-2.2.5-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:19f4718c9012e3baea91a7dba661dcab2451cda2550678dc30d53acb91a7290f"}, + {file = "numpy-2.2.5-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:eb7fd5b184e5d277afa9ec0ad5e4eb562ecff541e7f60e69ee69c8d59e9aeaba"}, + {file = "numpy-2.2.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6413d48a9be53e183eb06495d8e3b006ef8f87c324af68241bbe7a39e8ff54c3"}, + {file = "numpy-2.2.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7451f92eddf8503c9b8aa4fe6aa7e87fd51a29c2cfc5f7dbd72efde6c65acf57"}, + {file = "numpy-2.2.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0bcb1d057b7571334139129b7f941588f69ce7c4ed15a9d6162b2ea54ded700c"}, + {file = "numpy-2.2.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:36ab5b23915887543441efd0417e6a3baa08634308894316f446027611b53bf1"}, + {file = "numpy-2.2.5-cp310-cp310-win32.whl", hash = "sha256:422cc684f17bc963da5f59a31530b3936f57c95a29743056ef7a7903a5dbdf88"}, + {file = "numpy-2.2.5-cp310-cp310-win_amd64.whl", hash = "sha256:e4f0b035d9d0ed519c813ee23e0a733db81ec37d2e9503afbb6e54ccfdee0fa7"}, + {file = "numpy-2.2.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c42365005c7a6c42436a54d28c43fe0e01ca11eb2ac3cefe796c25a5f98e5e9b"}, + {file = "numpy-2.2.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:498815b96f67dc347e03b719ef49c772589fb74b8ee9ea2c37feae915ad6ebda"}, + {file = "numpy-2.2.5-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:6411f744f7f20081b1b4e7112e0f4c9c5b08f94b9f086e6f0adf3645f85d3a4d"}, + {file = "numpy-2.2.5-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:9de6832228f617c9ef45d948ec1cd8949c482238d68b2477e6f642c33a7b0a54"}, + {file = "numpy-2.2.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:369e0d4647c17c9363244f3468f2227d557a74b6781cb62ce57cf3ef5cc7c610"}, + {file = "numpy-2.2.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:262d23f383170f99cd9191a7c85b9a50970fe9069b2f8ab5d786eca8a675d60b"}, + {file = "numpy-2.2.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:aa70fdbdc3b169d69e8c59e65c07a1c9351ceb438e627f0fdcd471015cd956be"}, + {file = "numpy-2.2.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37e32e985f03c06206582a7323ef926b4e78bdaa6915095ef08070471865b906"}, + {file = "numpy-2.2.5-cp311-cp311-win32.whl", hash = "sha256:f5045039100ed58fa817a6227a356240ea1b9a1bc141018864c306c1a16d4175"}, + {file = "numpy-2.2.5-cp311-cp311-win_amd64.whl", hash = "sha256:b13f04968b46ad705f7c8a80122a42ae8f620536ea38cf4bdd374302926424dd"}, + {file = "numpy-2.2.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ee461a4eaab4f165b68780a6a1af95fb23a29932be7569b9fab666c407969051"}, + {file = "numpy-2.2.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ec31367fd6a255dc8de4772bd1658c3e926d8e860a0b6e922b615e532d320ddc"}, + {file = "numpy-2.2.5-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:47834cde750d3c9f4e52c6ca28a7361859fcaf52695c7dc3cc1a720b8922683e"}, + {file = "numpy-2.2.5-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:2c1a1c6ccce4022383583a6ded7bbcda22fc635eb4eb1e0a053336425ed36dfa"}, + {file = "numpy-2.2.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d75f338f5f79ee23548b03d801d28a505198297534f62416391857ea0479571"}, + {file = "numpy-2.2.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a801fef99668f309b88640e28d261991bfad9617c27beda4a3aec4f217ea073"}, + {file = "numpy-2.2.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:abe38cd8381245a7f49967a6010e77dbf3680bd3627c0fe4362dd693b404c7f8"}, + {file = "numpy-2.2.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5a0ac90e46fdb5649ab6369d1ab6104bfe5854ab19b645bf5cda0127a13034ae"}, + {file = "numpy-2.2.5-cp312-cp312-win32.whl", hash = "sha256:0cd48122a6b7eab8f06404805b1bd5856200e3ed6f8a1b9a194f9d9054631beb"}, + {file = "numpy-2.2.5-cp312-cp312-win_amd64.whl", hash = "sha256:ced69262a8278547e63409b2653b372bf4baff0870c57efa76c5703fd6543282"}, + {file = "numpy-2.2.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:059b51b658f4414fff78c6d7b1b4e18283ab5fa56d270ff212d5ba0c561846f4"}, + {file = "numpy-2.2.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:47f9ed103af0bc63182609044b0490747e03bd20a67e391192dde119bf43d52f"}, + {file = "numpy-2.2.5-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:261a1ef047751bb02f29dfe337230b5882b54521ca121fc7f62668133cb119c9"}, + {file = "numpy-2.2.5-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:4520caa3807c1ceb005d125a75e715567806fed67e315cea619d5ec6e75a4191"}, + {file = "numpy-2.2.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d14b17b9be5f9c9301f43d2e2a4886a33b53f4e6fdf9ca2f4cc60aeeee76372"}, + {file = "numpy-2.2.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ba321813a00e508d5421104464510cc962a6f791aa2fca1c97b1e65027da80d"}, + {file = "numpy-2.2.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4cbdef3ddf777423060c6f81b5694bad2dc9675f110c4b2a60dc0181543fac7"}, + {file = "numpy-2.2.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:54088a5a147ab71a8e7fdfd8c3601972751ded0739c6b696ad9cb0343e21ab73"}, + {file = "numpy-2.2.5-cp313-cp313-win32.whl", hash = "sha256:c8b82a55ef86a2d8e81b63da85e55f5537d2157165be1cb2ce7cfa57b6aef38b"}, + {file = "numpy-2.2.5-cp313-cp313-win_amd64.whl", hash = "sha256:d8882a829fd779f0f43998e931c466802a77ca1ee0fe25a3abe50278616b1471"}, + {file = "numpy-2.2.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:e8b025c351b9f0e8b5436cf28a07fa4ac0204d67b38f01433ac7f9b870fa38c6"}, + {file = "numpy-2.2.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8dfa94b6a4374e7851bbb6f35e6ded2120b752b063e6acdd3157e4d2bb922eba"}, + {file = "numpy-2.2.5-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:97c8425d4e26437e65e1d189d22dff4a079b747ff9c2788057bfb8114ce1e133"}, + {file = "numpy-2.2.5-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:352d330048c055ea6db701130abc48a21bec690a8d38f8284e00fab256dc1376"}, + {file = "numpy-2.2.5-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b4c0773b6ada798f51f0f8e30c054d32304ccc6e9c5d93d46cb26f3d385ab19"}, + {file = "numpy-2.2.5-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:55f09e00d4dccd76b179c0f18a44f041e5332fd0e022886ba1c0bbf3ea4a18d0"}, + {file = "numpy-2.2.5-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:02f226baeefa68f7d579e213d0f3493496397d8f1cff5e2b222af274c86a552a"}, + {file = "numpy-2.2.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c26843fd58f65da9491165072da2cccc372530681de481ef670dcc8e27cfb066"}, + {file = "numpy-2.2.5-cp313-cp313t-win32.whl", hash = "sha256:1a161c2c79ab30fe4501d5a2bbfe8b162490757cf90b7f05be8b80bc02f7bb8e"}, + {file = "numpy-2.2.5-cp313-cp313t-win_amd64.whl", hash = "sha256:d403c84991b5ad291d3809bace5e85f4bbf44a04bdc9a88ed2bb1807b3360bb8"}, + {file = "numpy-2.2.5-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b4ea7e1cff6784e58fe281ce7e7f05036b3e1c89c6f922a6bfbc0a7e8768adbe"}, + {file = "numpy-2.2.5-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:d7543263084a85fbc09c704b515395398d31d6395518446237eac219eab9e55e"}, + {file = "numpy-2.2.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0255732338c4fdd00996c0421884ea8a3651eea555c3a56b84892b66f696eb70"}, + {file = "numpy-2.2.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d2e3bdadaba0e040d1e7ab39db73e0afe2c74ae277f5614dad53eadbecbbb169"}, + {file = "numpy-2.2.5.tar.gz", hash = "sha256:a9c0d994680cd991b1cb772e8b297340085466a6fe964bc9d4e80f5e2f43c291"}, +] + [[package]] name = "packaging" version = "25.0" @@ -109,6 +174,89 @@ files = [ {file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"}, ] +[[package]] +name = "pandas" +version = "2.2.3" +description = "Powerful data structures for data analysis, time series, and statistics" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5"}, + {file = "pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348"}, + {file = "pandas-2.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed"}, + {file = "pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57"}, + {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42"}, + {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f"}, + {file = "pandas-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645"}, + {file = "pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039"}, + {file = "pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd"}, + {file = "pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698"}, + {file = "pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc"}, + {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3"}, + {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32"}, + {file = "pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5"}, + {file = "pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9"}, + {file = "pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4"}, + {file = "pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3"}, + {file = "pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319"}, + {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8"}, + {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a"}, + {file = "pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13"}, + {file = "pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015"}, + {file = "pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28"}, + {file = "pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0"}, + {file = "pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24"}, + {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659"}, + {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb"}, + {file = "pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d"}, + {file = "pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468"}, + {file = "pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18"}, + {file = "pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2"}, + {file = "pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4"}, + {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d"}, + {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a"}, + {file = "pandas-2.2.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc6b93f9b966093cb0fd62ff1a7e4c09e6d546ad7c1de191767baffc57628f39"}, + {file = "pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5dbca4c1acd72e8eeef4753eeca07de9b1db4f398669d5994086f788a5d7cc30"}, + {file = "pandas-2.2.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8cd6d7cc958a3910f934ea8dbdf17b2364827bb4dafc38ce6eef6bb3d65ff09c"}, + {file = "pandas-2.2.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99df71520d25fade9db7c1076ac94eb994f4d2673ef2aa2e86ee039b6746d20c"}, + {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:31d0ced62d4ea3e231a9f228366919a5ea0b07440d9d4dac345376fd8e1477ea"}, + {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7eee9e7cea6adf3e3d24e304ac6b8300646e2a5d1cd3a3c2abed9101b0846761"}, + {file = "pandas-2.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e"}, + {file = "pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667"}, +] + +[package.dependencies] +numpy = {version = ">=1.26.0", markers = "python_version >= \"3.12\""} +python-dateutil = ">=2.8.2" +pytz = ">=2020.1" +tzdata = ">=2022.7" + +[package.extras] +all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"] +aws = ["s3fs (>=2022.11.0)"] +clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"] +compression = ["zstandard (>=0.19.0)"] +computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"] +consortium-standard = ["dataframe-api-compat (>=0.1.7)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"] +feather = ["pyarrow (>=10.0.1)"] +fss = ["fsspec (>=2022.11.0)"] +gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"] +hdf5 = ["tables (>=3.8.0)"] +html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"] +mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"] +parquet = ["pyarrow (>=10.0.1)"] +performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"] +plot = ["matplotlib (>=3.6.3)"] +postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"] +pyarrow = ["pyarrow (>=10.0.1)"] +spss = ["pyreadstat (>=1.2.0)"] +sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"] +test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.9.2)"] + [[package]] name = "pathspec" version = "0.12.1" @@ -175,7 +323,58 @@ pluggy = ">=1.5,<2" [package.extras] dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main"] +files = [ + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "pytz" +version = "2025.2" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, + {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, +] + +[[package]] +name = "six" +version = "1.17.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main"] +files = [ + {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, + {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, +] + +[[package]] +name = "tzdata" +version = "2025.2" +description = "Provider of IANA time zone data" +optional = false +python-versions = ">=2" +groups = ["main"] +files = [ + {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, + {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, +] + [metadata] lock-version = "2.1" python-versions = ">=3.13" -content-hash = "773ac4ca93ee1220727fd04b0111b62d847ff4232c5708b02947991c54c13f9c" +content-hash = "271f296fad705649f3ce2848b4fab4afbdb1e6dda922ce6844f9c31a0136ee5f" diff --git a/pyproject.toml b/pyproject.toml index fcf0806..fd3e27d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ authors = [ readme = "README.md" requires-python = ">=3.13" dependencies = [ + "pandas (>=2.2.3,<3.0.0)" ] [tool.poetry] From 0ea449ba9f0192c3b5eb30e65419345df4897e00 Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Sun, 11 May 2025 22:22:35 -0400 Subject: [PATCH 008/117] chore: setup aider --- .aider.conf.yml | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .aider.conf.yml diff --git a/.aider.conf.yml b/.aider.conf.yml new file mode 100644 index 0000000..9be80fb --- /dev/null +++ b/.aider.conf.yml @@ -0,0 +1,7 @@ +lint-cmd: + - "python: poetry run black" +auto-lint: true +test-cmd: poetry run pytest +auto-test: true +read: + - README.md From 5b8a5b3321c4bfdf16f0f635644827ded80e02b7 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Sun, 11 May 2025 22:23:56 -0400 Subject: [PATCH 009/117] feat: Implement CVec class constructor with env var fallback. --- src/cvec/cvec.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index f413997..832d05f 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -1,3 +1,4 @@ +import os import pandas as pd class CVec: @@ -13,8 +14,17 @@ def __init__(self, host=None, tenant=None, api_key=None, default_time_range=None The default_time_range constrains most API keys, and can be overridden by the time_range argument to each API function. """ - # Implementation to be added - pass + self.host = host or os.environ.get("CVEC_HOST") + self.tenant = tenant or os.environ.get("CVEC_TENANT") + self.api_key = api_key or os.environ.get("CVEC_API_KEY") + self.default_time_range = default_time_range + + if not self.host: + raise ValueError("CVEC_HOST must be set either as an argument or environment variable") + if not self.tenant: + raise ValueError("CVEC_TENANT must be set either as an argument or environment variable") + if not self.api_key: + raise ValueError("CVEC_API_KEY must be set either as an argument or environment variable") def get_spans(self, tag_name, time_range=None, limit=None): """ From f296e53d5562fb86184dc9b41cb8d4d5b61d82b2 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Sun, 11 May 2025 22:24:00 -0400 Subject: [PATCH 010/117] style: Run linter on cvec.py --- src/cvec/cvec.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 832d05f..04b0550 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -1,6 +1,7 @@ import os import pandas as pd + class CVec: """ CVec API Client @@ -20,11 +21,17 @@ def __init__(self, host=None, tenant=None, api_key=None, default_time_range=None self.default_time_range = default_time_range if not self.host: - raise ValueError("CVEC_HOST must be set either as an argument or environment variable") + raise ValueError( + "CVEC_HOST must be set either as an argument or environment variable" + ) if not self.tenant: - raise ValueError("CVEC_TENANT must be set either as an argument or environment variable") + raise ValueError( + "CVEC_TENANT must be set either as an argument or environment variable" + ) if not self.api_key: - raise ValueError("CVEC_API_KEY must be set either as an argument or environment variable") + raise ValueError( + "CVEC_API_KEY must be set either as an argument or environment variable" + ) def get_spans(self, tag_name, time_range=None, limit=None): """ From e1974da14f7b3340da409032368bc7247ba4d6a3 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Sun, 11 May 2025 22:25:14 -0400 Subject: [PATCH 011/117] test: Add unit tests for the CVec constructor --- tests/test_cvec.py | 102 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 tests/test_cvec.py diff --git a/tests/test_cvec.py b/tests/test_cvec.py new file mode 100644 index 0000000..2b90544 --- /dev/null +++ b/tests/test_cvec.py @@ -0,0 +1,102 @@ +import pytest +import os +from unittest.mock import patch +from cvec.cvec import CVec + + +class TestCVecConstructor: + def test_constructor_with_arguments(self): + """Test CVec constructor with all arguments provided.""" + client = CVec( + host="test_host", + tenant="test_tenant", + api_key="test_api_key", + default_time_range="test_range", + ) + assert client.host == "test_host" + assert client.tenant == "test_tenant" + assert client.api_key == "test_api_key" + assert client.default_time_range == "test_range" + + @patch.dict( + os.environ, + { + "CVEC_HOST": "env_host", + "CVEC_TENANT": "env_tenant", + "CVEC_API_KEY": "env_api_key", + }, + clear=True, + ) + def test_constructor_with_env_vars(self): + """Test CVec constructor with environment variables.""" + client = CVec(default_time_range="env_range") + assert client.host == "env_host" + assert client.tenant == "env_tenant" + assert client.api_key == "env_api_key" + assert client.default_time_range == "env_range" + + @patch.dict(os.environ, {}, clear=True) + def test_constructor_missing_host_raises_value_error(self): + """Test CVec constructor raises ValueError if host is missing.""" + with pytest.raises( + ValueError, + match="CVEC_HOST must be set either as an argument or environment variable", + ): + CVec(tenant="test_tenant", api_key="test_api_key") + + @patch.dict(os.environ, {}, clear=True) + def test_constructor_missing_tenant_raises_value_error(self): + """Test CVec constructor raises ValueError if tenant is missing.""" + with pytest.raises( + ValueError, + match="CVEC_TENANT must be set either as an argument or environment variable", + ): + CVec(host="test_host", api_key="test_api_key") + + @patch.dict(os.environ, {}, clear=True) + def test_constructor_missing_api_key_raises_value_error(self): + """Test CVec constructor raises ValueError if api_key is missing.""" + with pytest.raises( + ValueError, + match="CVEC_API_KEY must be set either as an argument or environment variable", + ): + CVec(host="test_host", tenant="test_tenant") + + @patch.dict( + os.environ, + { + "CVEC_HOST": "env_host", + # CVEC_TENANT is missing + "CVEC_API_KEY": "env_api_key", + }, + clear=True, + ) + def test_constructor_missing_tenant_env_var_raises_value_error(self): + """Test CVec constructor raises ValueError if CVEC_TENANT env var is missing.""" + with pytest.raises( + ValueError, + match="CVEC_TENANT must be set either as an argument or environment variable", + ): + CVec() + + def test_constructor_args_override_env_vars(self): + """Test CVec constructor arguments override environment variables.""" + with patch.dict( + os.environ, + { + "CVEC_HOST": "env_host", + "CVEC_TENANT": "env_tenant", + "CVEC_API_KEY": "env_api_key", + }, + clear=True, + ): + client = CVec( + host="arg_host", + tenant="arg_tenant", + api_key="arg_api_key", + default_time_range="arg_range", + ) + assert client.host == "arg_host" + assert client.tenant == "arg_tenant" + assert client.api_key == "arg_api_key" + assert client.default_time_range == "arg_range" From 096d6a688599a3349d05398e1873929b3e9b5af4 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Sun, 11 May 2025 22:35:00 -0400 Subject: [PATCH 012/117] feat: Expose CVec class in src/cvec/__init__.py --- src/cvec/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/cvec/__init__.py b/src/cvec/__init__.py index e69de29..e25fa13 100644 --- a/src/cvec/__init__.py +++ b/src/cvec/__init__.py @@ -0,0 +1,3 @@ +from .cvec import CVec + +__all__ = ["CVec"] From dd35c3df4634165e580f55a89fe8f5d191acdcf2 Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Sun, 11 May 2025 22:35:32 -0400 Subject: [PATCH 013/117] test: Import CVec directly from package --- tests/test_cvec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_cvec.py b/tests/test_cvec.py index 2b90544..19cb063 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -1,7 +1,7 @@ import pytest import os from unittest.mock import patch -from cvec.cvec import CVec +from cvec import CVec class TestCVecConstructor: From 3613f7d27715f3d4e6cd237ac6ea53cd6d14e6c4 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Sun, 11 May 2025 22:36:18 -0400 Subject: [PATCH 014/117] ci: Add GitHub Action for black and pytest checks --- .github/workflows/ci.yml | 51 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..bef119c --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,51 @@ +name: Python CI + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.11", "3.12", "3.13"] # Using a matrix for Python versions + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Poetry + uses: snok/install-poetry@v1 + with: + virtualenvs-create: true + virtualenvs-in-project: true + installer-parallel: true + + - name: Load cached venv + id: cached-poetry-dependencies + uses: actions/cache@v4 + with: + path: .venv + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }} + + - name: Install dependencies + if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' + run: poetry install --no-interaction --no-root + + - name: Install project + run: poetry install --no-interaction + + - name: Run linters + run: | + poetry run black --check . + + - name: Run tests + run: | + poetry run pytest From 4d468dcdbaeacd9d726d354b7421694cd7dde4e1 Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Sun, 11 May 2025 22:38:59 -0400 Subject: [PATCH 015/117] build: Update CI workflow and pyproject.toml for Python 3.9-3.13 support --- .github/workflows/ci.yml | 4 +- poetry.lock | 163 ++++++++++++++++++++++++++++++++++++++- pyproject.toml | 2 +- 3 files changed, 163 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bef119c..d2f1029 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,10 +8,10 @@ on: jobs: build: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 strategy: matrix: - python-version: ["3.11", "3.12", "3.13"] # Using a matrix for Python versions + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] # Using a matrix for Python versions steps: - uses: actions/checkout@v4 diff --git a/poetry.lock b/poetry.lock index 36ff70a..23320eb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -38,6 +38,8 @@ mypy-extensions = ">=0.4.3" packaging = ">=22.0" pathspec = ">=0.9.0" platformdirs = ">=2" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} [package.extras] colorama = ["colorama (>=0.4.3)"] @@ -45,6 +47,22 @@ d = ["aiohttp (>=3.10)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] +[[package]] +name = "click" +version = "8.1.8" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.7" +groups = ["dev"] +markers = "python_version < \"3.11\"" +files = [ + {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, + {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + [[package]] name = "click" version = "8.2.0" @@ -52,6 +70,7 @@ description = "Composable command line interface toolkit" optional = false python-versions = ">=3.10" groups = ["dev"] +markers = "python_version >= \"3.11\"" files = [ {file = "click-8.2.0-py3-none-any.whl", hash = "sha256:6b303f0b2aa85f1cb4e5303078fadcbcd4e476f114fab9b5007005711839325c"}, {file = "click-8.2.0.tar.gz", hash = "sha256:f5452aeddd9988eefa20f90f05ab66f17fce1ee2a36907fd30b05bbb5953814d"}, @@ -73,6 +92,25 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "exceptiongroup" +version = "1.3.0" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +groups = ["dev"] +markers = "python_version < \"3.11\"" +files = [ + {file = "exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10"}, + {file = "exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4.6.0", markers = "python_version < \"3.13\""} + +[package.extras] +test = ["pytest (>=6)"] + [[package]] name = "iniconfig" version = "2.1.0" @@ -97,6 +135,62 @@ files = [ {file = "mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"}, ] +[[package]] +name = "numpy" +version = "2.0.2" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.9" +groups = ["main"] +markers = "python_version < \"3.11\"" +files = [ + {file = "numpy-2.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:51129a29dbe56f9ca83438b706e2e69a39892b5eda6cedcb6b0c9fdc9b0d3ece"}, + {file = "numpy-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f15975dfec0cf2239224d80e32c3170b1d168335eaedee69da84fbe9f1f9cd04"}, + {file = "numpy-2.0.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:8c5713284ce4e282544c68d1c3b2c7161d38c256d2eefc93c1d683cf47683e66"}, + {file = "numpy-2.0.2-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:becfae3ddd30736fe1889a37f1f580e245ba79a5855bff5f2a29cb3ccc22dd7b"}, + {file = "numpy-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2da5960c3cf0df7eafefd806d4e612c5e19358de82cb3c343631188991566ccd"}, + {file = "numpy-2.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:496f71341824ed9f3d2fd36cf3ac57ae2e0165c143b55c3a035ee219413f3318"}, + {file = "numpy-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a61ec659f68ae254e4d237816e33171497e978140353c0c2038d46e63282d0c8"}, + {file = "numpy-2.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d731a1c6116ba289c1e9ee714b08a8ff882944d4ad631fd411106a30f083c326"}, + {file = "numpy-2.0.2-cp310-cp310-win32.whl", hash = "sha256:984d96121c9f9616cd33fbd0618b7f08e0cfc9600a7ee1d6fd9b239186d19d97"}, + {file = "numpy-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:c7b0be4ef08607dd04da4092faee0b86607f111d5ae68036f16cc787e250a131"}, + {file = "numpy-2.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:49ca4decb342d66018b01932139c0961a8f9ddc7589611158cb3c27cbcf76448"}, + {file = "numpy-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:11a76c372d1d37437857280aa142086476136a8c0f373b2e648ab2c8f18fb195"}, + {file = "numpy-2.0.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:807ec44583fd708a21d4a11d94aedf2f4f3c3719035c76a2bbe1fe8e217bdc57"}, + {file = "numpy-2.0.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:8cafab480740e22f8d833acefed5cc87ce276f4ece12fdaa2e8903db2f82897a"}, + {file = "numpy-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a15f476a45e6e5a3a79d8a14e62161d27ad897381fecfa4a09ed5322f2085669"}, + {file = "numpy-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13e689d772146140a252c3a28501da66dfecd77490b498b168b501835041f951"}, + {file = "numpy-2.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9ea91dfb7c3d1c56a0e55657c0afb38cf1eeae4544c208dc465c3c9f3a7c09f9"}, + {file = "numpy-2.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c1c9307701fec8f3f7a1e6711f9089c06e6284b3afbbcd259f7791282d660a15"}, + {file = "numpy-2.0.2-cp311-cp311-win32.whl", hash = "sha256:a392a68bd329eafac5817e5aefeb39038c48b671afd242710b451e76090e81f4"}, + {file = "numpy-2.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:286cd40ce2b7d652a6f22efdfc6d1edf879440e53e76a75955bc0c826c7e64dc"}, + {file = "numpy-2.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:df55d490dea7934f330006d0f81e8551ba6010a5bf035a249ef61a94f21c500b"}, + {file = "numpy-2.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8df823f570d9adf0978347d1f926b2a867d5608f434a7cff7f7908c6570dcf5e"}, + {file = "numpy-2.0.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:9a92ae5c14811e390f3767053ff54eaee3bf84576d99a2456391401323f4ec2c"}, + {file = "numpy-2.0.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:a842d573724391493a97a62ebbb8e731f8a5dcc5d285dfc99141ca15a3302d0c"}, + {file = "numpy-2.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05e238064fc0610c840d1cf6a13bf63d7e391717d247f1bf0318172e759e692"}, + {file = "numpy-2.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0123ffdaa88fa4ab64835dcbde75dcdf89c453c922f18dced6e27c90d1d0ec5a"}, + {file = "numpy-2.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:96a55f64139912d61de9137f11bf39a55ec8faec288c75a54f93dfd39f7eb40c"}, + {file = "numpy-2.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ec9852fb39354b5a45a80bdab5ac02dd02b15f44b3804e9f00c556bf24b4bded"}, + {file = "numpy-2.0.2-cp312-cp312-win32.whl", hash = "sha256:671bec6496f83202ed2d3c8fdc486a8fc86942f2e69ff0e986140339a63bcbe5"}, + {file = "numpy-2.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:cfd41e13fdc257aa5778496b8caa5e856dc4896d4ccf01841daee1d96465467a"}, + {file = "numpy-2.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9059e10581ce4093f735ed23f3b9d283b9d517ff46009ddd485f1747eb22653c"}, + {file = "numpy-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:423e89b23490805d2a5a96fe40ec507407b8ee786d66f7328be214f9679df6dd"}, + {file = "numpy-2.0.2-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:2b2955fa6f11907cf7a70dab0d0755159bca87755e831e47932367fc8f2f2d0b"}, + {file = "numpy-2.0.2-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:97032a27bd9d8988b9a97a8c4d2c9f2c15a81f61e2f21404d7e8ef00cb5be729"}, + {file = "numpy-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e795a8be3ddbac43274f18588329c72939870a16cae810c2b73461c40718ab1"}, + {file = "numpy-2.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26b258c385842546006213344c50655ff1555a9338e2e5e02a0756dc3e803dd"}, + {file = "numpy-2.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5fec9451a7789926bcf7c2b8d187292c9f93ea30284802a0ab3f5be8ab36865d"}, + {file = "numpy-2.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:9189427407d88ff25ecf8f12469d4d39d35bee1db5d39fc5c168c6f088a6956d"}, + {file = "numpy-2.0.2-cp39-cp39-win32.whl", hash = "sha256:905d16e0c60200656500c95b6b8dca5d109e23cb24abc701d41c02d74c6b3afa"}, + {file = "numpy-2.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:a3f4ab0caa7f053f6797fcd4e1e25caee367db3112ef2b6ef82d749530768c73"}, + {file = "numpy-2.0.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7f0a0c6f12e07fa94133c8a67404322845220c06a9e80e85999afe727f7438b8"}, + {file = "numpy-2.0.2-pp39-pypy39_pp73-macosx_14_0_x86_64.whl", hash = "sha256:312950fdd060354350ed123c0e25a71327d3711584beaef30cdaa93320c392d4"}, + {file = "numpy-2.0.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26df23238872200f63518dd2aa984cfca675d82469535dc7162dc2ee52d9dd5c"}, + {file = "numpy-2.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a46288ec55ebbd58947d31d72be2c63cbf839f0a63b49cb755022310792a3385"}, + {file = "numpy-2.0.2.tar.gz", hash = "sha256:883c987dee1880e2a864ab0dc9892292582510604156762362d9326444636e78"}, +] + [[package]] name = "numpy" version = "2.2.5" @@ -104,6 +198,7 @@ description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.10" groups = ["main"] +markers = "python_version >= \"3.11\"" files = [ {file = "numpy-2.2.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1f4a922da1729f4c40932b2af4fe84909c7a6e167e6e99f71838ce3a29f3fe26"}, {file = "numpy-2.2.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b6f91524d31b34f4a5fee24f5bc16dcd1491b668798b6d85585d836c1e633a6a"}, @@ -227,7 +322,11 @@ files = [ ] [package.dependencies] -numpy = {version = ">=1.26.0", markers = "python_version >= \"3.12\""} +numpy = [ + {version = ">=1.22.4", markers = "python_version < \"3.11\""}, + {version = ">=1.23.2", markers = "python_version == \"3.11\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, +] python-dateutil = ">=2.8.2" pytz = ">=2020.1" tzdata = ">=2022.7" @@ -316,9 +415,11 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} iniconfig = "*" packaging = "*" pluggy = ">=1.5,<2" +tomli = {version = ">=1", markers = "python_version < \"3.11\""} [package.extras] dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] @@ -362,6 +463,62 @@ files = [ {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, ] +[[package]] +name = "tomli" +version = "2.2.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +markers = "python_version < \"3.11\"" +files = [ + {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, + {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8"}, + {file = "tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff"}, + {file = "tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b"}, + {file = "tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea"}, + {file = "tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e"}, + {file = "tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98"}, + {file = "tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4"}, + {file = "tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7"}, + {file = "tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744"}, + {file = "tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec"}, + {file = "tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69"}, + {file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"}, + {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"}, +] + +[[package]] +name = "typing-extensions" +version = "4.13.2" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +markers = "python_version < \"3.11\"" +files = [ + {file = "typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c"}, + {file = "typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef"}, +] + [[package]] name = "tzdata" version = "2025.2" @@ -376,5 +533,5 @@ files = [ [metadata] lock-version = "2.1" -python-versions = ">=3.13" -content-hash = "271f296fad705649f3ce2848b4fab4afbdb1e6dda922ce6844f9c31a0136ee5f" +python-versions = ">=3.9" +content-hash = "137aed099538854f3fd8fa187da0be3c16f4e3ce7601a3000a7399b18099da6c" diff --git a/pyproject.toml b/pyproject.toml index fd3e27d..ea09298 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ authors = [ {name = "Joshua Napoli",email = "jnapoli@cvector.energy"} ] readme = "README.md" -requires-python = ">=3.13" +requires-python = ">=3.9" dependencies = [ "pandas (>=2.2.3,<3.0.0)" ] From d0cbc90137810cf6400f6deece04e6d827baa6f4 Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Sun, 11 May 2025 22:42:23 -0400 Subject: [PATCH 016/117] ci: Run CI workflow on multiple Python versions --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d2f1029..aac22cd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-22.04 strategy: matrix: - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] # Using a matrix for Python versions + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 From 34f4af3b25bf57abd161e2ad76dc4f4938922497 Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Sun, 11 May 2025 22:45:03 -0400 Subject: [PATCH 017/117] feat: Add psycopg2-binary dependency for PostgreSQL support --- poetry.lock | 80 +++++++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 3 +- 2 files changed, 81 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index 23320eb..8abdf10 100644 --- a/poetry.lock +++ b/poetry.lock @@ -401,6 +401,84 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "psycopg2-binary" +version = "2.9.10" +description = "psycopg2 - Python-PostgreSQL Database Adapter" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "psycopg2-binary-2.9.10.tar.gz", hash = "sha256:4b3df0e6990aa98acda57d983942eff13d824135fe2250e6522edaa782a06de2"}, + {file = "psycopg2_binary-2.9.10-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:0ea8e3d0ae83564f2fc554955d327fa081d065c8ca5cc6d2abb643e2c9c1200f"}, + {file = "psycopg2_binary-2.9.10-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:3e9c76f0ac6f92ecfc79516a8034a544926430f7b080ec5a0537bca389ee0906"}, + {file = "psycopg2_binary-2.9.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ad26b467a405c798aaa1458ba09d7e2b6e5f96b1ce0ac15d82fd9f95dc38a92"}, + {file = "psycopg2_binary-2.9.10-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:270934a475a0e4b6925b5f804e3809dd5f90f8613621d062848dd82f9cd62007"}, + {file = "psycopg2_binary-2.9.10-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:48b338f08d93e7be4ab2b5f1dbe69dc5e9ef07170fe1f86514422076d9c010d0"}, + {file = "psycopg2_binary-2.9.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f4152f8f76d2023aac16285576a9ecd2b11a9895373a1f10fd9db54b3ff06b4"}, + {file = "psycopg2_binary-2.9.10-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:32581b3020c72d7a421009ee1c6bf4a131ef5f0a968fab2e2de0c9d2bb4577f1"}, + {file = "psycopg2_binary-2.9.10-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:2ce3e21dc3437b1d960521eca599d57408a695a0d3c26797ea0f72e834c7ffe5"}, + {file = "psycopg2_binary-2.9.10-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:e984839e75e0b60cfe75e351db53d6db750b00de45644c5d1f7ee5d1f34a1ce5"}, + {file = "psycopg2_binary-2.9.10-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3c4745a90b78e51d9ba06e2088a2fe0c693ae19cc8cb051ccda44e8df8a6eb53"}, + {file = "psycopg2_binary-2.9.10-cp310-cp310-win32.whl", hash = "sha256:e5720a5d25e3b99cd0dc5c8a440570469ff82659bb09431c1439b92caf184d3b"}, + {file = "psycopg2_binary-2.9.10-cp310-cp310-win_amd64.whl", hash = "sha256:3c18f74eb4386bf35e92ab2354a12c17e5eb4d9798e4c0ad3a00783eae7cd9f1"}, + {file = "psycopg2_binary-2.9.10-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:04392983d0bb89a8717772a193cfaac58871321e3ec69514e1c4e0d4957b5aff"}, + {file = "psycopg2_binary-2.9.10-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:1a6784f0ce3fec4edc64e985865c17778514325074adf5ad8f80636cd029ef7c"}, + {file = "psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5f86c56eeb91dc3135b3fd8a95dc7ae14c538a2f3ad77a19645cf55bab1799c"}, + {file = "psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b3d2491d4d78b6b14f76881905c7a8a8abcf974aad4a8a0b065273a0ed7a2cb"}, + {file = "psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2286791ececda3a723d1910441c793be44625d86d1a4e79942751197f4d30341"}, + {file = "psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:512d29bb12608891e349af6a0cccedce51677725a921c07dba6342beaf576f9a"}, + {file = "psycopg2_binary-2.9.10-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5a507320c58903967ef7384355a4da7ff3f28132d679aeb23572753cbf2ec10b"}, + {file = "psycopg2_binary-2.9.10-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:6d4fa1079cab9018f4d0bd2db307beaa612b0d13ba73b5c6304b9fe2fb441ff7"}, + {file = "psycopg2_binary-2.9.10-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:851485a42dbb0bdc1edcdabdb8557c09c9655dfa2ca0460ff210522e073e319e"}, + {file = "psycopg2_binary-2.9.10-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:35958ec9e46432d9076286dda67942ed6d968b9c3a6a2fd62b48939d1d78bf68"}, + {file = "psycopg2_binary-2.9.10-cp311-cp311-win32.whl", hash = "sha256:ecced182e935529727401b24d76634a357c71c9275b356efafd8a2a91ec07392"}, + {file = "psycopg2_binary-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:ee0e8c683a7ff25d23b55b11161c2663d4b099770f6085ff0a20d4505778d6b4"}, + {file = "psycopg2_binary-2.9.10-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:880845dfe1f85d9d5f7c412efea7a08946a46894537e4e5d091732eb1d34d9a0"}, + {file = "psycopg2_binary-2.9.10-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:9440fa522a79356aaa482aa4ba500b65f28e5d0e63b801abf6aa152a29bd842a"}, + {file = "psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3923c1d9870c49a2d44f795df0c889a22380d36ef92440ff618ec315757e539"}, + {file = "psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b2c956c028ea5de47ff3a8d6b3cc3330ab45cf0b7c3da35a2d6ff8420896526"}, + {file = "psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f758ed67cab30b9a8d2833609513ce4d3bd027641673d4ebc9c067e4d208eec1"}, + {file = "psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cd9b4f2cfab88ed4a9106192de509464b75a906462fb846b936eabe45c2063e"}, + {file = "psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dc08420625b5a20b53551c50deae6e231e6371194fa0651dbe0fb206452ae1f"}, + {file = "psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:d7cd730dfa7c36dbe8724426bf5612798734bff2d3c3857f36f2733f5bfc7c00"}, + {file = "psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:155e69561d54d02b3c3209545fb08938e27889ff5a10c19de8d23eb5a41be8a5"}, + {file = "psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c3cc28a6fd5a4a26224007712e79b81dbaee2ffb90ff406256158ec4d7b52b47"}, + {file = "psycopg2_binary-2.9.10-cp312-cp312-win32.whl", hash = "sha256:ec8a77f521a17506a24a5f626cb2aee7850f9b69a0afe704586f63a464f3cd64"}, + {file = "psycopg2_binary-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:18c5ee682b9c6dd3696dad6e54cc7ff3a1a9020df6a5c0f861ef8bfd338c3ca0"}, + {file = "psycopg2_binary-2.9.10-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:26540d4a9a4e2b096f1ff9cce51253d0504dca5a85872c7f7be23be5a53eb18d"}, + {file = "psycopg2_binary-2.9.10-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e217ce4d37667df0bc1c397fdcd8de5e81018ef305aed9415c3b093faaeb10fb"}, + {file = "psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:245159e7ab20a71d989da00f280ca57da7641fa2cdcf71749c193cea540a74f7"}, + {file = "psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c4ded1a24b20021ebe677b7b08ad10bf09aac197d6943bfe6fec70ac4e4690d"}, + {file = "psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3abb691ff9e57d4a93355f60d4f4c1dd2d68326c968e7db17ea96df3c023ef73"}, + {file = "psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8608c078134f0b3cbd9f89b34bd60a943b23fd33cc5f065e8d5f840061bd0673"}, + {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:230eeae2d71594103cd5b93fd29d1ace6420d0b86f4778739cb1a5a32f607d1f"}, + {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"}, + {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"}, + {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"}, + {file = "psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142"}, + {file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"}, + {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"}, + {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"}, + {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:056470c3dc57904bbf63d6f534988bafc4e970ffd50f6271fc4ee7daad9498a5"}, + {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73aa0e31fa4bb82578f3a6c74a73c273367727de397a7a0f07bd83cbea696baa"}, + {file = "psycopg2_binary-2.9.10-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:8de718c0e1c4b982a54b41779667242bc630b2197948405b7bd8ce16bcecac92"}, + {file = "psycopg2_binary-2.9.10-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:5c370b1e4975df846b0277b4deba86419ca77dbc25047f535b0bb03d1a544d44"}, + {file = "psycopg2_binary-2.9.10-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:ffe8ed017e4ed70f68b7b371d84b7d4a790368db9203dfc2d222febd3a9c8863"}, + {file = "psycopg2_binary-2.9.10-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:8aecc5e80c63f7459a1a2ab2c64df952051df196294d9f739933a9f6687e86b3"}, + {file = "psycopg2_binary-2.9.10-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:7a813c8bdbaaaab1f078014b9b0b13f5de757e2b5d9be6403639b298a04d218b"}, + {file = "psycopg2_binary-2.9.10-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d00924255d7fc916ef66e4bf22f354a940c67179ad3fd7067d7a0a9c84d2fbfc"}, + {file = "psycopg2_binary-2.9.10-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7559bce4b505762d737172556a4e6ea8a9998ecac1e39b5233465093e8cee697"}, + {file = "psycopg2_binary-2.9.10-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e8b58f0a96e7a1e341fc894f62c1177a7c83febebb5ff9123b579418fdc8a481"}, + {file = "psycopg2_binary-2.9.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b269105e59ac96aba877c1707c600ae55711d9dcd3fc4b5012e4af68e30c648"}, + {file = "psycopg2_binary-2.9.10-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:79625966e176dc97ddabc142351e0409e28acf4660b88d1cf6adb876d20c490d"}, + {file = "psycopg2_binary-2.9.10-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:8aabf1c1a04584c168984ac678a668094d831f152859d06e055288fa515e4d30"}, + {file = "psycopg2_binary-2.9.10-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:19721ac03892001ee8fdd11507e6a2e01f4e37014def96379411ca99d78aeb2c"}, + {file = "psycopg2_binary-2.9.10-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7f5d859928e635fa3ce3477704acee0f667b3a3d3e4bb109f2b18d4005f38287"}, + {file = "psycopg2_binary-2.9.10-cp39-cp39-win32.whl", hash = "sha256:3216ccf953b3f267691c90c6fe742e45d890d8272326b4a8b20850a03d05b7b8"}, + {file = "psycopg2_binary-2.9.10-cp39-cp39-win_amd64.whl", hash = "sha256:30e34c4e97964805f715206c7b789d54a78b70f3ff19fbe590104b71c45600e5"}, +] + [[package]] name = "pytest" version = "8.3.5" @@ -534,4 +612,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = ">=3.9" -content-hash = "137aed099538854f3fd8fa187da0be3c16f4e3ce7601a3000a7399b18099da6c" +content-hash = "1686f14aa63a712725dabaf7cea4d959a328bd3208c5475c082ca7e2386b00c0" diff --git a/pyproject.toml b/pyproject.toml index ea09298..8ad6422 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,8 @@ authors = [ readme = "README.md" requires-python = ">=3.9" dependencies = [ - "pandas (>=2.2.3,<3.0.0)" + "pandas (>=2.2.3,<3.0.0)", + "psycopg2-binary (>=2.9.10,<3.0.0)" ] [tool.poetry] From 5efaecf811d1443295e5dfd7825a04e0af9c5f94 Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Sun, 11 May 2025 22:54:18 -0400 Subject: [PATCH 018/117] docs: Add database schema documentation to README --- README.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/README.md b/README.md index fd6aaf1..426b75e 100644 --- a/README.md +++ b/README.md @@ -4,22 +4,46 @@ This SDK integrates directly with CVector's database. Each tenant has a schema and a database user, both named for the tenant. The API Key is the password of the user. The database user is restricted to only have access to the tenant's schema. Here are the available database tables: +## tag_data + +The tag_data table is a Timescale hypertable. Boolean tags are represented within this table using value 0 and 1. + ```sql CREATE TABLE tag_data ( tag_name_id INTEGER NOT NULL, tag_value_changed_at TIMESTAMP WITH TIME ZONE, tag_value DOUBLE PRECISION ) + +SELECT create_hypertable( + '${schema_name}.tag_data', + 'tag_value_changed_at', + chunk_time_interval => INTERVAL '1 hour', + if_not_exists => TRUE +); ``` +## tag_data_str + +The tag_data_str table is a Timescale hypertable. + ```sql CREATE TABLE tag_data_str ( tag_name_id INTEGER NOT NULL, tag_value_changed_at timestamptz NOT NULL, tag_value text ); + +SELECT create_hypertable( + '${schema_name}.tag_data_str', + 'tag_value_changed_at', + chunk_time_interval => INTERVAL '1 hour', + if_not_exists => TRUE +); ``` +## tag_names + ```sql CREATE TABLE tag_names ( id SERIAL PRIMARY KEY, @@ -29,6 +53,8 @@ CREATE TABLE tag_names ( ); ``` +## metrics + ```sql CREATE VIEW metrics AS SELECT td.tag_value AS value, From 05344e7f8933dc99639bed49ba975e46fbf7bf52 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Sun, 11 May 2025 22:56:49 -0400 Subject: [PATCH 019/117] refactor: Replace time_range with start_at and end_at parameters --- README.md | 16 ++++++++-------- src/cvec/cvec.py | 25 ++++++++++++++----------- tests/test_cvec.py | 17 +++++++++++------ 3 files changed, 33 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 426b75e..c8a3bdc 100644 --- a/README.md +++ b/README.md @@ -68,18 +68,18 @@ CREATE VIEW metrics AS The SDK provides an API client class named `CVec` with the following functions. -## `__init__(?host, ?tenant, ?api_key, ?default_time_range)` +## `__init__(?host, ?tenant, ?api_key, ?default_start_at, ?default_end_at)` -Setup the SDK with the given host and API Key. The host and API key are loaded from environment variables CVEC_HOST, CVEC_TENANT, CVEC_API_KEY, if they are not given as arguments to the constructor. The default_time_range constrains most API keys, and can be overridden by the time_range argument to each API function. +Setup the SDK with the given host and API Key. The host and API key are loaded from environment variables CVEC_HOST, CVEC_TENANT, CVEC_API_KEY, if they are not given as arguments to the constructor. The `default_start_at` and `default_end_at` constrain most API calls, and can be overridden by the `start_at` and `end_at` arguments to each API function. -## `get_spans(tag_name, ?time_range, ?limit)` +## `get_spans(tag_name, ?start_at, ?end_at, ?limit)` -Return all of the time spans where a tag has a constant value. The function returns a list of time-ranges with the value for each time-range. Returns a list of spans. Each span has the following fields: {id, tag_name, value, begin_at, end_at, raw_begin_at, raw_end_at, metadata}. In a future version of the SDK, spans can be annotated, edited, and deleted. +Return all of the time spans where a tag has a constant value within the specified [`start_at`, `end_at`) interval. The function returns a list of time-ranges with the value for each time-range. Returns a list of spans. Each span has the following fields: {id, tag_name, value, begin_at, end_at, raw_begin_at, raw_end_at, metadata}. In a future version of the SDK, spans can be annotated, edited, and deleted. -## `get_metric_data(?tag_names, ?time_range)` +## `get_metric_data(?tag_names, ?start_at, ?end_at)` -Return all data-points within a given time-range, optionally selecting a given list of tags. The return value is a Pandas DataFrame with three columns: tag_name, time, value. One row is returned for each tag value transition. +Return all data-points within a given [`start_at`, `end_at`) interval, optionally selecting a given list of tags. The return value is a Pandas DataFrame with three columns: tag_name, time, value. One row is returned for each tag value transition. -## `get_tags(?time_range)` +## `get_tags(?start_at, ?end_at)` -Return a list of tags that had at least one transition in the given time range. All tags are returned if no time_range is given. Each tag has {id, name, birth_at, death_at}. +Return a list of tags that had at least one transition in the given [`start_at`, `end_at`) interval. All tags are returned if no `start_at` and `end_at` are given. Each tag has {id, name, birth_at, death_at}. diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 04b0550..d656a08 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -7,18 +7,19 @@ class CVec: CVec API Client """ - def __init__(self, host=None, tenant=None, api_key=None, default_time_range=None): + def __init__(self, host=None, tenant=None, api_key=None, default_start_at=None, default_end_at=None): """ Setup the SDK with the given host and API Key. The host and API key are loaded from environment variables CVEC_HOST, CVEC_TENANT, CVEC_API_KEY, if they are not given as arguments to the constructor. - The default_time_range constrains most API keys, and can be overridden - by the time_range argument to each API function. + The default_start_at and default_end_at constrain most API keys, and can be overridden + by the start_at and end_at arguments to each API function. """ self.host = host or os.environ.get("CVEC_HOST") self.tenant = tenant or os.environ.get("CVEC_TENANT") self.api_key = api_key or os.environ.get("CVEC_API_KEY") - self.default_time_range = default_time_range + self.default_start_at = default_start_at + self.default_end_at = default_end_at if not self.host: raise ValueError( @@ -33,9 +34,10 @@ def __init__(self, host=None, tenant=None, api_key=None, default_time_range=None "CVEC_API_KEY must be set either as an argument or environment variable" ) - def get_spans(self, tag_name, time_range=None, limit=None): + def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): """ - Return all of the time spans where a tag has a constant value. + Return all of the time spans where a tag has a constant value + within the specified [start_at, end_at) interval. The function returns a list of time-ranges with the value for each time-range. Returns a list of spans. Each span has the following fields: {id, tag_name, value, begin_at, end_at, raw_begin_at, raw_end_at, metadata}. @@ -44,19 +46,20 @@ def get_spans(self, tag_name, time_range=None, limit=None): # Implementation to be added return [] - def get_metric_data(self, tag_names=None, time_range=None): + def get_metric_data(self, tag_names=None, start_at=None, end_at=None): """ - Return all data-points within a given time-range, optionally selecting a given list of tags. + Return all data-points within a given [start_at, end_at) interval, + optionally selecting a given list of tags. The return value is a Pandas DataFrame with three columns: tag_name, time, value. One row is returned for each tag value transition. """ # Implementation to be added return pd.DataFrame(columns=["tag_name", "time", "value"]) - def get_tags(self, time_range=None): + def get_tags(self, start_at=None, end_at=None): """ - Return a list of tags that had at least one transition in the given time range. - All tags are returned if no time_range is given. + Return a list of tags that had at least one transition in the given [start_at, end_at) interval. + All tags are returned if no start_at and end_at are given. Each tag has {id, name, birth_at, death_at}. """ # Implementation to be added diff --git a/tests/test_cvec.py b/tests/test_cvec.py index 19cb063..d669a7f 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -11,12 +11,14 @@ def test_constructor_with_arguments(self): host="test_host", tenant="test_tenant", api_key="test_api_key", - default_time_range="test_range", + default_start_at="test_start", + default_end_at="test_end", ) assert client.host == "test_host" assert client.tenant == "test_tenant" assert client.api_key == "test_api_key" - assert client.default_time_range == "test_range" + assert client.default_start_at == "test_start" + assert client.default_end_at == "test_end" @patch.dict( os.environ, @@ -29,11 +31,12 @@ def test_constructor_with_arguments(self): ) def test_constructor_with_env_vars(self): """Test CVec constructor with environment variables.""" - client = CVec(default_time_range="env_range") + client = CVec(default_start_at="env_start", default_end_at="env_end") assert client.host == "env_host" assert client.tenant == "env_tenant" assert client.api_key == "env_api_key" - assert client.default_time_range == "env_range" + assert client.default_start_at == "env_start" + assert client.default_end_at == "env_end" @patch.dict(os.environ, {}, clear=True) def test_constructor_missing_host_raises_value_error(self): @@ -94,9 +97,11 @@ def test_constructor_args_override_env_vars(self): host="arg_host", tenant="arg_tenant", api_key="arg_api_key", - default_time_range="arg_range", + default_start_at="arg_start", + default_end_at="arg_end", ) assert client.host == "arg_host" assert client.tenant == "arg_tenant" assert client.api_key == "arg_api_key" - assert client.default_time_range == "arg_range" + assert client.default_start_at == "arg_start" + assert client.default_end_at == "arg_end" From 1c6aeb80b052c76fdc8a7ec28a809ee4b5060339 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Sun, 11 May 2025 22:56:51 -0400 Subject: [PATCH 020/117] style: Run linter on cvec.py --- src/cvec/cvec.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index d656a08..85b238f 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -7,7 +7,14 @@ class CVec: CVec API Client """ - def __init__(self, host=None, tenant=None, api_key=None, default_start_at=None, default_end_at=None): + def __init__( + self, + host=None, + tenant=None, + api_key=None, + default_start_at=None, + default_end_at=None, + ): """ Setup the SDK with the given host and API Key. The host and API key are loaded from environment variables CVEC_HOST, From 05f0ebd20108471d5a1dc894d2fe59ed09b7e06a Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Sun, 11 May 2025 23:00:00 -0400 Subject: [PATCH 021/117] fix: Corrected begin_at to start_at in get_spans documentation --- README.md | 2 +- src/cvec/cvec.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c8a3bdc..3a9e276 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ Setup the SDK with the given host and API Key. The host and API key are loaded f ## `get_spans(tag_name, ?start_at, ?end_at, ?limit)` -Return all of the time spans where a tag has a constant value within the specified [`start_at`, `end_at`) interval. The function returns a list of time-ranges with the value for each time-range. Returns a list of spans. Each span has the following fields: {id, tag_name, value, begin_at, end_at, raw_begin_at, raw_end_at, metadata}. In a future version of the SDK, spans can be annotated, edited, and deleted. +Return all of the time spans where a tag has a constant value within the specified [`start_at`, `end_at`) interval. The function returns a list of time-ranges with the value for each time-range. Returns a list of spans. Each span has the following fields: {id, tag_name, value, start_at, end_at, raw_start_at, raw_end_at, metadata}. In a future version of the SDK, spans can be annotated, edited, and deleted. ## `get_metric_data(?tag_names, ?start_at, ?end_at)` diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 85b238f..a4e5eec 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -47,7 +47,7 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): within the specified [start_at, end_at) interval. The function returns a list of time-ranges with the value for each time-range. Returns a list of spans. Each span has the following fields: - {id, tag_name, value, begin_at, end_at, raw_begin_at, raw_end_at, metadata}. + {id, tag_name, value, start_at, end_at, raw_start_at, raw_end_at, metadata}. In a future version of the SDK, spans can be annotated, edited, and deleted. """ # Implementation to be added From d58638c66891a049706a519f72f170ea0a856ed7 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 08:25:46 -0400 Subject: [PATCH 022/117] feat: Implement get_spans from tag_data and tag_data_str. --- src/cvec/cvec.py | 128 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 126 insertions(+), 2 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index a4e5eec..d729ab1 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -1,5 +1,7 @@ import os import pandas as pd +import psycopg2 +from psycopg2.extras import RealDictCursor class CVec: @@ -41,6 +43,23 @@ def __init__( "CVEC_API_KEY must be set either as an argument or environment variable" ) + def _get_db_connection(self): + """Helper method to establish a database connection.""" + try: + # psycopg2 defaults to using the username as dbname if not specified. + # Here, self.tenant is used for both user and (implicitly) dbname. + conn = psycopg2.connect( + user=self.tenant, + password=self.api_key, + host=self.host, + # dbname=self.tenant # Implicitly self.tenant if not provided + ) + return conn + except psycopg2.Error as e: + # Consider logging this error or raising a custom exception + print(f"Database connection error: {e}") + raise + def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): """ Return all of the time spans where a tag has a constant value @@ -50,8 +69,113 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): {id, tag_name, value, start_at, end_at, raw_start_at, raw_end_at, metadata}. In a future version of the SDK, spans can be annotated, edited, and deleted. """ - # Implementation to be added - return [] + _start_at = start_at or self.default_start_at + _end_at = end_at or self.default_end_at + + if not _start_at or not _end_at: + raise ValueError( + "Effective start_at and end_at must be provided either as arguments or class defaults." + ) + + conn = None + try: + conn = self._get_db_connection() + with conn.cursor(cursor_factory=RealDictCursor) as cur: + # 1. Get tag_name_id + query_tag_id = ( + f"SELECT id FROM {self.tenant}.tag_names WHERE normalized_name = %s" + ) + cur.execute(query_tag_id, (tag_name,)) + tag_row = cur.fetchone() + if not tag_row: + return [] # Tag not found + tag_name_id = tag_row["id"] + + # 2. Fetch data points from tag_data (numeric) and tag_data_str (text) + all_points = [] + + # Query for numeric data + query_numeric = f""" + (SELECT tag_value_changed_at, tag_value + FROM {self.tenant}.tag_data + WHERE tag_name_id = %s AND tag_value_changed_at < %s + ORDER BY tag_value_changed_at DESC + LIMIT 1) + UNION ALL + (SELECT tag_value_changed_at, tag_value + FROM {self.tenant}.tag_data + WHERE tag_name_id = %s AND tag_value_changed_at >= %s AND tag_value_changed_at < %s) + """ + cur.execute( + query_numeric, (tag_name_id, _start_at, tag_name_id, _start_at, _end_at) + ) + for row in cur.fetchall(): + all_points.append( + {"time": row["tag_value_changed_at"], "value": float(row["tag_value"])} + ) + + # Query for string data + query_string = f""" + (SELECT tag_value_changed_at, tag_value + FROM {self.tenant}.tag_data_str + WHERE tag_name_id = %s AND tag_value_changed_at < %s + ORDER BY tag_value_changed_at DESC + LIMIT 1) + UNION ALL + (SELECT tag_value_changed_at, tag_value + FROM {self.tenant}.tag_data_str + WHERE tag_name_id = %s AND tag_value_changed_at >= %s AND tag_value_changed_at < %s) + """ + cur.execute( + query_string, (tag_name_id, _start_at, tag_name_id, _start_at, _end_at) + ) + for row in cur.fetchall(): + all_points.append( + {"time": row["tag_value_changed_at"], "value": str(row["tag_value"])} + ) + + # Sort all collected points by time + all_points.sort(key=lambda p: p["time"]) + + if not all_points: + return [] + + spans = [] + # 3. Construct spans + for i, point in enumerate(all_points): + current_raw_start_at = point["time"] + current_value = point["value"] + + span_actual_start = max(current_raw_start_at, _start_at) + + next_raw_event_at = None + if i + 1 < len(all_points): + next_raw_event_at = all_points[i + 1]["time"] + span_actual_end = min(next_raw_event_at, _end_at) + else: + span_actual_end = _end_at + + if span_actual_start < span_actual_end: # Ensure span has positive duration + spans.append( + { + "id": None, + "tag_name": tag_name, + "value": current_value, + "start_at": span_actual_start, + "end_at": span_actual_end, + "raw_start_at": current_raw_start_at, + "raw_end_at": next_raw_event_at, + "metadata": None, + } + ) + + if limit is not None and limit >= 0: # allow limit=0 to return empty list + spans = spans[:limit] + + return spans + finally: + if conn: + conn.close() def get_metric_data(self, tag_names=None, start_at=None, end_at=None): """ From e3e582fc25d70d9c7883973d30a27c59056df807 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 08:25:47 -0400 Subject: [PATCH 023/117] style: Run linter on cvec.py --- src/cvec/cvec.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index d729ab1..c47b406 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -107,11 +107,15 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): WHERE tag_name_id = %s AND tag_value_changed_at >= %s AND tag_value_changed_at < %s) """ cur.execute( - query_numeric, (tag_name_id, _start_at, tag_name_id, _start_at, _end_at) + query_numeric, + (tag_name_id, _start_at, tag_name_id, _start_at, _end_at), ) for row in cur.fetchall(): all_points.append( - {"time": row["tag_value_changed_at"], "value": float(row["tag_value"])} + { + "time": row["tag_value_changed_at"], + "value": float(row["tag_value"]), + } ) # Query for string data @@ -127,11 +131,15 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): WHERE tag_name_id = %s AND tag_value_changed_at >= %s AND tag_value_changed_at < %s) """ cur.execute( - query_string, (tag_name_id, _start_at, tag_name_id, _start_at, _end_at) + query_string, + (tag_name_id, _start_at, tag_name_id, _start_at, _end_at), ) for row in cur.fetchall(): all_points.append( - {"time": row["tag_value_changed_at"], "value": str(row["tag_value"])} + { + "time": row["tag_value_changed_at"], + "value": str(row["tag_value"]), + } ) # Sort all collected points by time @@ -154,8 +162,10 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): span_actual_end = min(next_raw_event_at, _end_at) else: span_actual_end = _end_at - - if span_actual_start < span_actual_end: # Ensure span has positive duration + + if ( + span_actual_start < span_actual_end + ): # Ensure span has positive duration spans.append( { "id": None, @@ -168,10 +178,12 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): "metadata": None, } ) - - if limit is not None and limit >= 0: # allow limit=0 to return empty list + + if ( + limit is not None and limit >= 0 + ): # allow limit=0 to return empty list spans = spans[:limit] - + return spans finally: if conn: From a541bafc6a2bd99bfacc1fae0583dad4690c43f7 Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Tue, 13 May 2025 08:33:54 -0400 Subject: [PATCH 024/117] docs: Clarify tag_data and tag_data_str table descriptions in README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3a9e276..2d79c20 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ This SDK integrates directly with CVector's database. Each tenant has a schema a ## tag_data -The tag_data table is a Timescale hypertable. Boolean tags are represented within this table using value 0 and 1. +The tag_data table is a Timescale hypertable. Boolean tags are represented within this table using value 0 and 1. The table uses a "report by exception" approach; a row is inserted only when the value of a metric changes. ```sql CREATE TABLE tag_data ( @@ -25,7 +25,7 @@ SELECT create_hypertable( ## tag_data_str -The tag_data_str table is a Timescale hypertable. +The tag_data_str table is a Timescale hypertable, similar to tag_data for string-valued tags. ```sql CREATE TABLE tag_data_str ( From 380d757026dabc7d03c7db6328b10d98562727a2 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 08:33:55 -0400 Subject: [PATCH 025/117] feat: Modify get_spans to only report spans within time period. --- README.md | 10 +++++++- src/cvec/cvec.py | 59 ++++++++++++++++++++++++++++-------------------- 2 files changed, 43 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 2d79c20..77cd246 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,15 @@ Setup the SDK with the given host and API Key. The host and API key are loaded f ## `get_spans(tag_name, ?start_at, ?end_at, ?limit)` -Return all of the time spans where a tag has a constant value within the specified [`start_at`, `end_at`) interval. The function returns a list of time-ranges with the value for each time-range. Returns a list of spans. Each span has the following fields: {id, tag_name, value, start_at, end_at, raw_start_at, raw_end_at, metadata}. In a future version of the SDK, spans can be annotated, edited, and deleted. +Return time spans for a tag, where each span's value is initiated by a value change occurring *within* the specified [`start_at`, `end_at`) interval. + +This function identifies all `tag_value_changed_at` timestamps for the given `tag_name` that are greater than or equal to `start_at` and less than `end_at`. For each such timestamp (`event_time`): +- The span's `value` is the value set at `event_time`. +- The span's `start_at` (and `raw_start_at`) is `event_time`. +- The span's `end_at` is the earlier of the next `tag_value_changed_at` timestamp for this tag, or the query's `end_at`. +- The span's `raw_end_at` is the timestamp of the next `tag_value_changed_at` (if one occurs before the query's `end_at`), or `None`. + +Returns a list of spans. Each span has the following fields: {id, tag_name, value, start_at, end_at, raw_start_at, raw_end_at, metadata}. In a future version of the SDK, spans can be annotated, edited, and deleted. ## `get_metric_data(?tag_names, ?start_at, ?end_at)` diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index c47b406..e7e2540 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -62,12 +62,31 @@ def _get_db_connection(self): def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): """ - Return all of the time spans where a tag has a constant value - within the specified [start_at, end_at) interval. - The function returns a list of time-ranges with the value for each time-range. - Returns a list of spans. Each span has the following fields: - {id, tag_name, value, start_at, end_at, raw_start_at, raw_end_at, metadata}. - In a future version of the SDK, spans can be annotated, edited, and deleted. + Return time spans for a tag, where each span's value is initiated by a + value change occurring within the specified [start_at, end_at) interval. + + This function identifies all `tag_value_changed_at` timestamps for the + given `tag_name` that are greater than or equal to `start_at` and less + than `end_at`. For each such timestamp (let's call it `event_time`), + a span is generated: + - `value`: The tag's value that was set at `event_time`. + - `tag_name`: The name of the tag. + - `start_at`: Equal to `event_time`. + - `raw_start_at`: Equal to `event_time`. + - `end_at`: The timestamp of the next value change for this tag, or the + query's `end_at` parameter, whichever is earlier. If there is no + subsequent value change up to the query's `end_at`, this will be the + query's `end_at`. + - `raw_end_at`: The timestamp of the next value change for this tag, if + another change is found by the query (i.e., before `end_at`). + Otherwise, `None`. + - `id`: Currently `None`. + - `metadata`: Currently `None`. + + Returns a list of dictionaries, where each dictionary represents a span. + If no value changes occur for the tag within the specified interval, an + empty list is returned. + The `limit` parameter restricts the number of spans returned. """ _start_at = start_at or self.default_start_at _end_at = end_at or self.default_end_at @@ -96,19 +115,14 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): # Query for numeric data query_numeric = f""" - (SELECT tag_value_changed_at, tag_value + SELECT tag_value_changed_at, tag_value FROM {self.tenant}.tag_data - WHERE tag_name_id = %s AND tag_value_changed_at < %s - ORDER BY tag_value_changed_at DESC - LIMIT 1) - UNION ALL - (SELECT tag_value_changed_at, tag_value - FROM {self.tenant}.tag_data - WHERE tag_name_id = %s AND tag_value_changed_at >= %s AND tag_value_changed_at < %s) + WHERE tag_name_id = %s AND tag_value_changed_at >= %s AND tag_value_changed_at < %s + ORDER BY tag_value_changed_at ASC """ cur.execute( query_numeric, - (tag_name_id, _start_at, tag_name_id, _start_at, _end_at), + (tag_name_id, _start_at, _end_at), ) for row in cur.fetchall(): all_points.append( @@ -120,19 +134,14 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): # Query for string data query_string = f""" - (SELECT tag_value_changed_at, tag_value - FROM {self.tenant}.tag_data_str - WHERE tag_name_id = %s AND tag_value_changed_at < %s - ORDER BY tag_value_changed_at DESC - LIMIT 1) - UNION ALL - (SELECT tag_value_changed_at, tag_value + SELECT tag_value_changed_at, tag_value FROM {self.tenant}.tag_data_str - WHERE tag_name_id = %s AND tag_value_changed_at >= %s AND tag_value_changed_at < %s) + WHERE tag_name_id = %s AND tag_value_changed_at >= %s AND tag_value_changed_at < %s + ORDER BY tag_value_changed_at ASC """ cur.execute( query_string, - (tag_name_id, _start_at, tag_name_id, _start_at, _end_at), + (tag_name_id, _start_at, _end_at), ) for row in cur.fetchall(): all_points.append( @@ -154,7 +163,7 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): current_raw_start_at = point["time"] current_value = point["value"] - span_actual_start = max(current_raw_start_at, _start_at) + span_actual_start = current_raw_start_at # Query now ensures current_raw_start_at >= _start_at next_raw_event_at = None if i + 1 < len(all_points): From b426e94efe50a40473a06ad5c7e8b1a12c3f9881 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 08:33:56 -0400 Subject: [PATCH 026/117] style: Apply linting to cvec.py --- src/cvec/cvec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index e7e2540..5990c11 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -163,7 +163,7 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): current_raw_start_at = point["time"] current_value = point["value"] - span_actual_start = current_raw_start_at # Query now ensures current_raw_start_at >= _start_at + span_actual_start = current_raw_start_at # Query now ensures current_raw_start_at >= _start_at next_raw_event_at = None if i + 1 < len(all_points): From 90de5de711216b723956d01b5df4c026662d315e Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Tue, 13 May 2025 08:40:49 -0400 Subject: [PATCH 027/117] docs: Improve docstring for get_time_spans_where_value_changed --- src/cvec/cvec.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 5990c11..c5dd87c 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -65,11 +65,9 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): Return time spans for a tag, where each span's value is initiated by a value change occurring within the specified [start_at, end_at) interval. - This function identifies all `tag_value_changed_at` timestamps for the - given `tag_name` that are greater than or equal to `start_at` and less - than `end_at`. For each such timestamp (let's call it `event_time`), - a span is generated: - - `value`: The tag's value that was set at `event_time`. + This function finds all spans where the metric value is constant that begin within the + specified interval: + - `value`: The tag's value during the span time period. - `tag_name`: The name of the tag. - `start_at`: Equal to `event_time`. - `raw_start_at`: Equal to `event_time`. From ce499f76b73afa198c1967ff52d493ce4c801b9c Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 08:40:50 -0400 Subject: [PATCH 028/117] feat: Allow unbounded start_at/end_at in get_spans and spans' end_at --- README.md | 16 ++++++---- src/cvec/cvec.py | 78 ++++++++++++++++++++++++++---------------------- 2 files changed, 52 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index 77cd246..7bf3254 100644 --- a/README.md +++ b/README.md @@ -74,15 +74,19 @@ Setup the SDK with the given host and API Key. The host and API key are loaded f ## `get_spans(tag_name, ?start_at, ?end_at, ?limit)` -Return time spans for a tag, where each span's value is initiated by a value change occurring *within* the specified [`start_at`, `end_at`) interval. +Return time spans for a tag. Spans are generated from value changes that occur after `start_at` (if specified) and before `end_at` (if specified). +If `start_at` is `None` (e.g., not provided as an argument and no class default `default_start_at` is set), the query for value changes is unbounded at the start. Similarly, if `end_at` is `None`, the query is unbounded at the end. -This function identifies all `tag_value_changed_at` timestamps for the given `tag_name` that are greater than or equal to `start_at` and less than `end_at`. For each such timestamp (`event_time`): -- The span's `value` is the value set at `event_time`. -- The span's `start_at` (and `raw_start_at`) is `event_time`. -- The span's `end_at` is the earlier of the next `tag_value_changed_at` timestamp for this tag, or the query's `end_at`. -- The span's `raw_end_at` is the timestamp of the next `tag_value_changed_at` (if one occurs before the query's `end_at`), or `None`. +Each span in the returned list represents a period where the tag's value is constant: +- `value`: The tag's value during the span. +- `tag_name`: The name of the tag. +- `start_at`: The timestamp of the value change that initiated this span's value. This will be greater than or equal to the query's `start_at` if one was specified. +- `raw_start_at`: Same as `start_at`. +- `end_at`: The timestamp of the next value change for this tag, or the query's `end_at` parameter, whichever is earlier. If the query's `end_at` is not specified (i.e., `None`) and there is no subsequent value change found by the query, this field will be `None`, indicating the span continues indefinitely. +- `raw_end_at`: The timestamp of the next value change for this tag found by the query. This will be `None` if no subsequent change is found within the query window (e.g., before the query's `end_at` or indefinitely if `end_at` is `None`). Returns a list of spans. Each span has the following fields: {id, tag_name, value, start_at, end_at, raw_start_at, raw_end_at, metadata}. In a future version of the SDK, spans can be annotated, edited, and deleted. +If no relevant value changes are found, an empty list is returned. ## `get_metric_data(?tag_names, ?start_at, ?end_at)` diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index c5dd87c..e433bf7 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -62,38 +62,33 @@ def _get_db_connection(self): def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): """ - Return time spans for a tag, where each span's value is initiated by a - value change occurring within the specified [start_at, end_at) interval. + Return time spans for a tag. Spans are generated from value changes + that occur after `start_at` (if specified) and before `end_at` (if specified). + If `start_at` is `None` (e.g., not provided via argument or class default), + the query is unbounded at the start. If `end_at` is `None`, it's unbounded at the end. - This function finds all spans where the metric value is constant that begin within the - specified interval: - - `value`: The tag's value during the span time period. + Each span represents a period where the tag's value is constant. + - `value`: The tag's value during the span. - `tag_name`: The name of the tag. - - `start_at`: Equal to `event_time`. - - `raw_start_at`: Equal to `event_time`. + - `start_at`: The timestamp of the value change that initiated this span's value. + This will be >= `_start_at` if `_start_at` was specified. + - `raw_start_at`: Same as `start_at`. - `end_at`: The timestamp of the next value change for this tag, or the - query's `end_at` parameter, whichever is earlier. If there is no - subsequent value change up to the query's `end_at`, this will be the - query's `end_at`. - - `raw_end_at`: The timestamp of the next value change for this tag, if - another change is found by the query (i.e., before `end_at`). - Otherwise, `None`. + query's `_end_at` parameter, whichever is earlier. If the query's `_end_at` + is `None` and there is no subsequent value change, this field will be `None`, + indicating the span continues indefinitely. + - `raw_end_at`: The timestamp of the next value change for this tag found by + the query. `None` if no subsequent change is found within the query window. - `id`: Currently `None`. - `metadata`: Currently `None`. Returns a list of dictionaries, where each dictionary represents a span. - If no value changes occur for the tag within the specified interval, an - empty list is returned. + If no relevant value changes are found, an empty list is returned. The `limit` parameter restricts the number of spans returned. """ _start_at = start_at or self.default_start_at _end_at = end_at or self.default_end_at - if not _start_at or not _end_at: - raise ValueError( - "Effective start_at and end_at must be provided either as arguments or class defaults." - ) - conn = None try: conn = self._get_db_connection() @@ -111,17 +106,28 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): # 2. Fetch data points from tag_data (numeric) and tag_data_str (text) all_points = [] + # Build WHERE clause and params for queries + where_conditions = ["tag_name_id = %s"] + query_params = [tag_name_id] + + if _start_at is not None: + where_conditions.append("tag_value_changed_at >= %s") + query_params.append(_start_at) + + if _end_at is not None: + where_conditions.append("tag_value_changed_at < %s") + query_params.append(_end_at) + + where_sql = " AND ".join(where_conditions) + # Query for numeric data query_numeric = f""" SELECT tag_value_changed_at, tag_value FROM {self.tenant}.tag_data - WHERE tag_name_id = %s AND tag_value_changed_at >= %s AND tag_value_changed_at < %s + WHERE {where_sql} ORDER BY tag_value_changed_at ASC """ - cur.execute( - query_numeric, - (tag_name_id, _start_at, _end_at), - ) + cur.execute(query_numeric, tuple(query_params)) for row in cur.fetchall(): all_points.append( { @@ -134,13 +140,10 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): query_string = f""" SELECT tag_value_changed_at, tag_value FROM {self.tenant}.tag_data_str - WHERE tag_name_id = %s AND tag_value_changed_at >= %s AND tag_value_changed_at < %s + WHERE {where_sql} ORDER BY tag_value_changed_at ASC """ - cur.execute( - query_string, - (tag_name_id, _start_at, _end_at), - ) + cur.execute(query_string, tuple(query_params)) for row in cur.fetchall(): all_points.append( { @@ -161,18 +164,21 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): current_raw_start_at = point["time"] current_value = point["value"] - span_actual_start = current_raw_start_at # Query now ensures current_raw_start_at >= _start_at + span_actual_start = current_raw_start_at next_raw_event_at = None if i + 1 < len(all_points): next_raw_event_at = all_points[i + 1]["time"] - span_actual_end = min(next_raw_event_at, _end_at) + + if next_raw_event_at is not None: + # If _end_at is specified, cap the span by it. Otherwise, span ends at next event. + span_actual_end = min(next_raw_event_at, _end_at) if _end_at is not None else next_raw_event_at else: + # No next event, so span extends to _end_at (which can be None if query is unbounded) span_actual_end = _end_at - - if ( - span_actual_start < span_actual_end - ): # Ensure span has positive duration + + # Add span if it has a positive duration or extends indefinitely (end_at is None) + if span_actual_end is None or span_actual_start < span_actual_end: spans.append( { "id": None, From 2b48944dc3bceeb554359c083000fc8b317e579c Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 08:40:51 -0400 Subject: [PATCH 029/117] style: Apply linter formatting to cvec.py --- src/cvec/cvec.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index e433bf7..e3719f8 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -113,11 +113,11 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): if _start_at is not None: where_conditions.append("tag_value_changed_at >= %s") query_params.append(_start_at) - + if _end_at is not None: where_conditions.append("tag_value_changed_at < %s") query_params.append(_end_at) - + where_sql = " AND ".join(where_conditions) # Query for numeric data @@ -172,11 +172,15 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): if next_raw_event_at is not None: # If _end_at is specified, cap the span by it. Otherwise, span ends at next event. - span_actual_end = min(next_raw_event_at, _end_at) if _end_at is not None else next_raw_event_at + span_actual_end = ( + min(next_raw_event_at, _end_at) + if _end_at is not None + else next_raw_event_at + ) else: # No next event, so span extends to _end_at (which can be None if query is unbounded) span_actual_end = _end_at - + # Add span if it has a positive duration or extends indefinitely (end_at is None) if span_actual_end is None or span_actual_start < span_actual_end: spans.append( From a0f77ebe8e317999eaf800ce7d004d9c3c27bea5 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 08:44:48 -0400 Subject: [PATCH 030/117] refactor: Simplify get_spans query construction with static SQL --- src/cvec/cvec.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index e3719f8..45131cd 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -106,19 +106,10 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): # 2. Fetch data points from tag_data (numeric) and tag_data_str (text) all_points = [] - # Build WHERE clause and params for queries - where_conditions = ["tag_name_id = %s"] - query_params = [tag_name_id] - - if _start_at is not None: - where_conditions.append("tag_value_changed_at >= %s") - query_params.append(_start_at) - - if _end_at is not None: - where_conditions.append("tag_value_changed_at < %s") - query_params.append(_end_at) - - where_sql = " AND ".join(where_conditions) + # Define a static WHERE clause that handles NULL _start_at/_end_at for unbounded intervals + where_sql = "tag_name_id = %s AND (tag_value_changed_at >= %s OR %s IS NULL) AND (tag_value_changed_at < %s OR %s IS NULL)" + # Parameters for the database query, matching the placeholders in where_sql + db_query_params = (tag_name_id, _start_at, _start_at, _end_at, _end_at) # Query for numeric data query_numeric = f""" @@ -127,7 +118,7 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): WHERE {where_sql} ORDER BY tag_value_changed_at ASC """ - cur.execute(query_numeric, tuple(query_params)) + cur.execute(query_numeric, db_query_params) for row in cur.fetchall(): all_points.append( { @@ -143,7 +134,7 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): WHERE {where_sql} ORDER BY tag_value_changed_at ASC """ - cur.execute(query_string, tuple(query_params)) + cur.execute(query_string, db_query_params) for row in cur.fetchall(): all_points.append( { From 937af38b47cf8af8095a3e54e6ef7f2179f244f3 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 08:45:32 -0400 Subject: [PATCH 031/117] refactor: Inline where_sql in get_spans for readability --- src/cvec/cvec.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 45131cd..c5163ac 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -106,16 +106,14 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): # 2. Fetch data points from tag_data (numeric) and tag_data_str (text) all_points = [] - # Define a static WHERE clause that handles NULL _start_at/_end_at for unbounded intervals - where_sql = "tag_name_id = %s AND (tag_value_changed_at >= %s OR %s IS NULL) AND (tag_value_changed_at < %s OR %s IS NULL)" - # Parameters for the database query, matching the placeholders in where_sql + # Parameters for the database query, matching the placeholders in the WHERE clause below db_query_params = (tag_name_id, _start_at, _start_at, _end_at, _end_at) # Query for numeric data query_numeric = f""" SELECT tag_value_changed_at, tag_value FROM {self.tenant}.tag_data - WHERE {where_sql} + WHERE tag_name_id = %s AND (tag_value_changed_at >= %s OR %s IS NULL) AND (tag_value_changed_at < %s OR %s IS NULL) ORDER BY tag_value_changed_at ASC """ cur.execute(query_numeric, db_query_params) @@ -131,7 +129,7 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): query_string = f""" SELECT tag_value_changed_at, tag_value FROM {self.tenant}.tag_data_str - WHERE {where_sql} + WHERE tag_name_id = %s AND (tag_value_changed_at >= %s OR %s IS NULL) AND (tag_value_changed_at < %s OR %s IS NULL) ORDER BY tag_value_changed_at ASC """ cur.execute(query_string, db_query_params) From a24e67bf54702739b154ece8223dc34a226df8a8 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 08:50:13 -0400 Subject: [PATCH 032/117] refactor: Combine tag_data and tag_data_str queries into one SQL query --- src/cvec/cvec.py | 56 +++++++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index c5163ac..2f604c3 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -106,38 +106,40 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): # 2. Fetch data points from tag_data (numeric) and tag_data_str (text) all_points = [] - # Parameters for the database query, matching the placeholders in the WHERE clause below - db_query_params = (tag_name_id, _start_at, _start_at, _end_at, _end_at) - - # Query for numeric data - query_numeric = f""" - SELECT tag_value_changed_at, tag_value - FROM {self.tenant}.tag_data - WHERE tag_name_id = %s AND (tag_value_changed_at >= %s OR %s IS NULL) AND (tag_value_changed_at < %s OR %s IS NULL) - ORDER BY tag_value_changed_at ASC + # Parameters for each part of the UNION ALL query. + # The tuple (tag_name_id, _start_at, _start_at, _end_at, _end_at) + # is repeated for the numeric and string parts of the query. + union_db_query_params = (tag_name_id, _start_at, _start_at, _end_at, _end_at) * 2 + + # Combined query for numeric and string data + combined_query = f""" + SELECT tag_value_changed_at, CAST(tag_value AS TEXT) AS tag_value, 'numeric' AS value_type + FROM {self.tenant}.tag_data + WHERE tag_name_id = %s AND (tag_value_changed_at >= %s OR %s IS NULL) AND (tag_value_changed_at < %s OR %s IS NULL) + UNION ALL + SELECT tag_value_changed_at, tag_value, 'string' AS value_type + FROM {self.tenant}.tag_data_str + WHERE tag_name_id = %s AND (tag_value_changed_at >= %s OR %s IS NULL) AND (tag_value_changed_at < %s OR %s IS NULL) + ORDER BY tag_value_changed_at ASC """ - cur.execute(query_numeric, db_query_params) + cur.execute(combined_query, union_db_query_params) for row in cur.fetchall(): + value = row["tag_value"] # This is TEXT due to CAST or original type + if row["value_type"] == 'numeric': + if value is not None: # Avoid float(None) which raises TypeError + try: + value = float(value) + except ValueError: + # This might occur if CAST to TEXT results in a string not convertible to float, + # though float() handles 'Infinity', '-Infinity', 'NaN' from strings. + # Log a warning and keep the value as a string in such edge cases. + print(f"Warning: Could not convert supposed numeric value '{value}' to float.") + # If value_type is 'string', value is already a string (or None if DB NULL). + # If value was NULL in the database, it remains Python None for both types. all_points.append( { "time": row["tag_value_changed_at"], - "value": float(row["tag_value"]), - } - ) - - # Query for string data - query_string = f""" - SELECT tag_value_changed_at, tag_value - FROM {self.tenant}.tag_data_str - WHERE tag_name_id = %s AND (tag_value_changed_at >= %s OR %s IS NULL) AND (tag_value_changed_at < %s OR %s IS NULL) - ORDER BY tag_value_changed_at ASC - """ - cur.execute(query_string, db_query_params) - for row in cur.fetchall(): - all_points.append( - { - "time": row["tag_value_changed_at"], - "value": str(row["tag_value"]), + "value": value, } ) From cc0f2ba1b4ce0b6839cf98cc8e99309d344b94b2 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 08:50:14 -0400 Subject: [PATCH 033/117] style: Apply linter to cvec.py --- src/cvec/cvec.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 2f604c3..7241af3 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -109,7 +109,13 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): # Parameters for each part of the UNION ALL query. # The tuple (tag_name_id, _start_at, _start_at, _end_at, _end_at) # is repeated for the numeric and string parts of the query. - union_db_query_params = (tag_name_id, _start_at, _start_at, _end_at, _end_at) * 2 + union_db_query_params = ( + tag_name_id, + _start_at, + _start_at, + _end_at, + _end_at, + ) * 2 # Combined query for numeric and string data combined_query = f""" @@ -124,16 +130,22 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): """ cur.execute(combined_query, union_db_query_params) for row in cur.fetchall(): - value = row["tag_value"] # This is TEXT due to CAST or original type - if row["value_type"] == 'numeric': - if value is not None: # Avoid float(None) which raises TypeError + value = row[ + "tag_value" + ] # This is TEXT due to CAST or original type + if row["value_type"] == "numeric": + if ( + value is not None + ): # Avoid float(None) which raises TypeError try: value = float(value) except ValueError: # This might occur if CAST to TEXT results in a string not convertible to float, # though float() handles 'Infinity', '-Infinity', 'NaN' from strings. # Log a warning and keep the value as a string in such edge cases. - print(f"Warning: Could not convert supposed numeric value '{value}' to float.") + print( + f"Warning: Could not convert supposed numeric value '{value}' to float." + ) # If value_type is 'string', value is already a string (or None if DB NULL). # If value was NULL in the database, it remains Python None for both types. all_points.append( From af3732ea90829c3c4d970cc0626a0f3e790d8a92 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 08:53:16 -0400 Subject: [PATCH 034/117] refactor: Return numeric and string tag values as separate columns --- src/cvec/cvec.py | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 7241af3..9d18b4e 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -119,35 +119,30 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): # Combined query for numeric and string data combined_query = f""" - SELECT tag_value_changed_at, CAST(tag_value AS TEXT) AS tag_value, 'numeric' AS value_type + SELECT + tag_value_changed_at, + tag_value AS value_double, + NULL::text AS value_string FROM {self.tenant}.tag_data WHERE tag_name_id = %s AND (tag_value_changed_at >= %s OR %s IS NULL) AND (tag_value_changed_at < %s OR %s IS NULL) UNION ALL - SELECT tag_value_changed_at, tag_value, 'string' AS value_type + SELECT + tag_value_changed_at, + NULL::double precision AS value_double, + tag_value AS value_string FROM {self.tenant}.tag_data_str WHERE tag_name_id = %s AND (tag_value_changed_at >= %s OR %s IS NULL) AND (tag_value_changed_at < %s OR %s IS NULL) ORDER BY tag_value_changed_at ASC """ cur.execute(combined_query, union_db_query_params) for row in cur.fetchall(): - value = row[ - "tag_value" - ] # This is TEXT due to CAST or original type - if row["value_type"] == "numeric": - if ( - value is not None - ): # Avoid float(None) which raises TypeError - try: - value = float(value) - except ValueError: - # This might occur if CAST to TEXT results in a string not convertible to float, - # though float() handles 'Infinity', '-Infinity', 'NaN' from strings. - # Log a warning and keep the value as a string in such edge cases. - print( - f"Warning: Could not convert supposed numeric value '{value}' to float." - ) - # If value_type is 'string', value is already a string (or None if DB NULL). - # If value was NULL in the database, it remains Python None for both types. + value = None + if row["value_double"] is not None: + value = row["value_double"] # psycopg2 converts to float + elif row["value_string"] is not None: + value = row["value_string"] # psycopg2 converts to string + # If both are None (e.g. original DB value was NULL), value remains None. + all_points.append( { "time": row["tag_value_changed_at"], From 643b85af8dee189b08e39190ceff7fcad44cdbe0 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 08:53:18 -0400 Subject: [PATCH 035/117] style: Apply linter to cvec.py --- src/cvec/cvec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 9d18b4e..a643775 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -140,7 +140,7 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): if row["value_double"] is not None: value = row["value_double"] # psycopg2 converts to float elif row["value_string"] is not None: - value = row["value_string"] # psycopg2 converts to string + value = row["value_string"] # psycopg2 converts to string # If both are None (e.g. original DB value was NULL), value remains None. all_points.append( From 162bd014b691ddf29c10e6df7174a195b3ee0739 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 08:55:26 -0400 Subject: [PATCH 036/117] feat: Add LIMIT clause to SQL query in get_spans method --- src/cvec/cvec.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index a643775..ec97774 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -106,16 +106,21 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): # 2. Fetch data points from tag_data (numeric) and tag_data_str (text) all_points = [] - # Parameters for each part of the UNION ALL query. - # The tuple (tag_name_id, _start_at, _start_at, _end_at, _end_at) - # is repeated for the numeric and string parts of the query. + # Calculate the limit for the SQL query. + # We fetch limit + 1 points to correctly determine the end_at for the limit-th span. + # If limit is None or negative, sql_limit will be None (LIMIT NULL in SQL, meaning no limit). + sql_limit_value = None + if limit is not None and limit >= 0: + sql_limit_value = limit + 1 + + # Parameters for the database query. + # Each part of the UNION ALL gets the same WHERE clause parameters. + # The final sql_limit_value is for the LIMIT clause. union_db_query_params = ( - tag_name_id, - _start_at, - _start_at, - _end_at, - _end_at, - ) * 2 + tag_name_id, _start_at, _start_at, _end_at, _end_at, # For tag_data + tag_name_id, _start_at, _start_at, _end_at, _end_at, # For tag_data_str + sql_limit_value # For the final LIMIT clause + ) # Combined query for numeric and string data combined_query = f""" @@ -133,6 +138,7 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): FROM {self.tenant}.tag_data_str WHERE tag_name_id = %s AND (tag_value_changed_at >= %s OR %s IS NULL) AND (tag_value_changed_at < %s OR %s IS NULL) ORDER BY tag_value_changed_at ASC + LIMIT %s """ cur.execute(combined_query, union_db_query_params) for row in cur.fetchall(): From d9b8b3ce76372b1cee4ffc87f7a1cc7154532d5f Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 08:55:27 -0400 Subject: [PATCH 037/117] style: Run linter on cvec.py --- src/cvec/cvec.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index ec97774..9de0ecc 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -117,9 +117,17 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): # Each part of the UNION ALL gets the same WHERE clause parameters. # The final sql_limit_value is for the LIMIT clause. union_db_query_params = ( - tag_name_id, _start_at, _start_at, _end_at, _end_at, # For tag_data - tag_name_id, _start_at, _start_at, _end_at, _end_at, # For tag_data_str - sql_limit_value # For the final LIMIT clause + tag_name_id, + _start_at, + _start_at, + _end_at, + _end_at, # For tag_data + tag_name_id, + _start_at, + _start_at, + _end_at, + _end_at, # For tag_data_str + sql_limit_value, # For the final LIMIT clause ) # Combined query for numeric and string data From d2bdccb34e63d0bbf5ceb41eb80ce9014cbe8bad Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 09:08:40 -0400 Subject: [PATCH 038/117] feat: Introduce Span class and update get_spans to use it. --- README.md | 6 +++-- src/cvec/__init__.py | 4 +-- src/cvec/cvec.py | 60 ++++++++++++++++++++++++++++++++++++-------- 3 files changed, 55 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 7bf3254..b576929 100644 --- a/README.md +++ b/README.md @@ -77,15 +77,17 @@ Setup the SDK with the given host and API Key. The host and API key are loaded f Return time spans for a tag. Spans are generated from value changes that occur after `start_at` (if specified) and before `end_at` (if specified). If `start_at` is `None` (e.g., not provided as an argument and no class default `default_start_at` is set), the query for value changes is unbounded at the start. Similarly, if `end_at` is `None`, the query is unbounded at the end. -Each span in the returned list represents a period where the tag's value is constant: +Each `Span` object in the returned list represents a period where the tag's value is constant and has the following attributes: - `value`: The tag's value during the span. - `tag_name`: The name of the tag. - `start_at`: The timestamp of the value change that initiated this span's value. This will be greater than or equal to the query's `start_at` if one was specified. - `raw_start_at`: Same as `start_at`. - `end_at`: The timestamp of the next value change for this tag, or the query's `end_at` parameter, whichever is earlier. If the query's `end_at` is not specified (i.e., `None`) and there is no subsequent value change found by the query, this field will be `None`, indicating the span continues indefinitely. - `raw_end_at`: The timestamp of the next value change for this tag found by the query. This will be `None` if no subsequent change is found within the query window (e.g., before the query's `end_at` or indefinitely if `end_at` is `None`). +- `id`: Currently `None`. In a future version of the SDK, this will be the span's unique identifier. +- `metadata`: Currently `None`. In a future version, this can be used to store annotations or other metadata related to the span. -Returns a list of spans. Each span has the following fields: {id, tag_name, value, start_at, end_at, raw_start_at, raw_end_at, metadata}. In a future version of the SDK, spans can be annotated, edited, and deleted. +Returns a list of `Span` objects. If no relevant value changes are found, an empty list is returned. ## `get_metric_data(?tag_names, ?start_at, ?end_at)` diff --git a/src/cvec/__init__.py b/src/cvec/__init__.py index e25fa13..9b74b04 100644 --- a/src/cvec/__init__.py +++ b/src/cvec/__init__.py @@ -1,3 +1,3 @@ -from .cvec import CVec +from .cvec import CVec, Span -__all__ = ["CVec"] +__all__ = ["CVec", "Span"] diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 9de0ecc..6aa5eac 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -1,9 +1,46 @@ import os +from datetime import datetime +from typing import Any, Optional, Union + import pandas as pd import psycopg2 from psycopg2.extras import RealDictCursor +class Span: + """ + Represents a time span where a tag has a constant value. + """ + + def __init__( + self, + id: Optional[Any], + tag_name: str, + value: Optional[Union[float, str]], + start_at: datetime, + end_at: Optional[datetime], + raw_start_at: datetime, + raw_end_at: Optional[datetime], + metadata: Optional[Any], + ): + self.id = id + self.tag_name = tag_name + self.value = value + self.start_at = start_at + self.end_at = end_at + self.raw_start_at = raw_start_at + self.raw_end_at = raw_end_at + self.metadata = metadata + + def __repr__(self) -> str: + return ( + f"Span(id={self.id!r}, tag_name={self.tag_name!r}, value={self.value!r}, " + f"start_at={self.start_at!r}, end_at={self.end_at!r}, " + f"raw_start_at={self.raw_start_at!r}, raw_end_at={self.raw_end_at!r}, " + f"metadata={self.metadata!r})" + ) + + class CVec: """ CVec API Client @@ -82,7 +119,8 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): - `id`: Currently `None`. - `metadata`: Currently `None`. - Returns a list of dictionaries, where each dictionary represents a span. + Returns a list of Span objects. Each Span object has attributes corresponding + to the fields listed above. If no relevant value changes are found, an empty list is returned. The `limit` parameter restricts the number of spans returned. """ @@ -196,16 +234,16 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): # Add span if it has a positive duration or extends indefinitely (end_at is None) if span_actual_end is None or span_actual_start < span_actual_end: spans.append( - { - "id": None, - "tag_name": tag_name, - "value": current_value, - "start_at": span_actual_start, - "end_at": span_actual_end, - "raw_start_at": current_raw_start_at, - "raw_end_at": next_raw_event_at, - "metadata": None, - } + Span( + id=None, + tag_name=tag_name, + value=current_value, + start_at=span_actual_start, + end_at=span_actual_end, + raw_start_at=current_raw_start_at, + raw_end_at=next_raw_event_at, + metadata=None, + ) ) if ( From 573b5b7db032ade5f6c33a61ab3f911f7f471317 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 09:09:11 -0400 Subject: [PATCH 039/117] refactor: Move Span class to its own module --- src/cvec/__init__.py | 3 ++- src/cvec/cvec.py | 36 +----------------------------------- src/cvec/span.py | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 36 deletions(-) create mode 100644 src/cvec/span.py diff --git a/src/cvec/__init__.py b/src/cvec/__init__.py index 9b74b04..a8d6f75 100644 --- a/src/cvec/__init__.py +++ b/src/cvec/__init__.py @@ -1,3 +1,4 @@ -from .cvec import CVec, Span +from .cvec import CVec +from .span import Span __all__ = ["CVec", "Span"] diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 6aa5eac..bc5d68e 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -1,44 +1,10 @@ import os -from datetime import datetime -from typing import Any, Optional, Union import pandas as pd import psycopg2 from psycopg2.extras import RealDictCursor - -class Span: - """ - Represents a time span where a tag has a constant value. - """ - - def __init__( - self, - id: Optional[Any], - tag_name: str, - value: Optional[Union[float, str]], - start_at: datetime, - end_at: Optional[datetime], - raw_start_at: datetime, - raw_end_at: Optional[datetime], - metadata: Optional[Any], - ): - self.id = id - self.tag_name = tag_name - self.value = value - self.start_at = start_at - self.end_at = end_at - self.raw_start_at = raw_start_at - self.raw_end_at = raw_end_at - self.metadata = metadata - - def __repr__(self) -> str: - return ( - f"Span(id={self.id!r}, tag_name={self.tag_name!r}, value={self.value!r}, " - f"start_at={self.start_at!r}, end_at={self.end_at!r}, " - f"raw_start_at={self.raw_start_at!r}, raw_end_at={self.raw_end_at!r}, " - f"metadata={self.metadata!r})" - ) +from .span import Span class CVec: diff --git a/src/cvec/span.py b/src/cvec/span.py new file mode 100644 index 0000000..878c988 --- /dev/null +++ b/src/cvec/span.py @@ -0,0 +1,36 @@ +from datetime import datetime +from typing import Any, Optional, Union + + +class Span: + """ + Represents a time span where a tag has a constant value. + """ + + def __init__( + self, + id: Optional[Any], + tag_name: str, + value: Optional[Union[float, str]], + start_at: datetime, + end_at: Optional[datetime], + raw_start_at: datetime, + raw_end_at: Optional[datetime], + metadata: Optional[Any], + ): + self.id = id + self.tag_name = tag_name + self.value = value + self.start_at = start_at + self.end_at = end_at + self.raw_start_at = raw_start_at + self.raw_end_at = raw_end_at + self.metadata = metadata + + def __repr__(self) -> str: + return ( + f"Span(id={self.id!r}, tag_name={self.tag_name!r}, value={self.value!r}, " + f"start_at={self.start_at!r}, end_at={self.end_at!r}, " + f"raw_start_at={self.raw_start_at!r}, raw_end_at={self.raw_end_at!r}, " + f"metadata={self.metadata!r})" + ) From f9b95dee3e88520f2d77da7518182bffe678451b Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Tue, 13 May 2025 09:19:07 -0400 Subject: [PATCH 040/117] refactor: Simplify value assignment in fetch_timeseries_data --- src/cvec/cvec.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index bc5d68e..18f492c 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -154,13 +154,7 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): """ cur.execute(combined_query, union_db_query_params) for row in cur.fetchall(): - value = None - if row["value_double"] is not None: - value = row["value_double"] # psycopg2 converts to float - elif row["value_string"] is not None: - value = row["value_string"] # psycopg2 converts to string - # If both are None (e.g. original DB value was NULL), value remains None. - + value = row["value_double"] if row["value_double"] is not None else row["value_string"] all_points.append( { "time": row["tag_value_changed_at"], @@ -168,9 +162,6 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): } ) - # Sort all collected points by time - all_points.sort(key=lambda p: p["time"]) - if not all_points: return [] From 8bc5f83be5bc5a1ef0aa724475b03ef27d5f5890 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 09:20:18 -0400 Subject: [PATCH 041/117] refactor: Use list comprehension for building all_points --- src/cvec/cvec.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 18f492c..2bb464c 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -153,14 +153,13 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): LIMIT %s """ cur.execute(combined_query, union_db_query_params) - for row in cur.fetchall(): - value = row["value_double"] if row["value_double"] is not None else row["value_string"] - all_points.append( - { - "time": row["tag_value_changed_at"], - "value": value, - } - ) + all_points = [ + { + "time": row["tag_value_changed_at"], + "value": row["value_double"] if row["value_double"] is not None else row["value_string"], + } + for row in cur.fetchall() + ] if not all_points: return [] From 9aec4262d36c35aec0e81d1346a4808db3dddc1c Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 09:20:20 -0400 Subject: [PATCH 042/117] style: Apply linter to fix code formatting issues --- src/cvec/cvec.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 2bb464c..0bb9863 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -156,7 +156,11 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): all_points = [ { "time": row["tag_value_changed_at"], - "value": row["value_double"] if row["value_double"] is not None else row["value_string"], + "value": ( + row["value_double"] + if row["value_double"] is not None + else row["value_string"] + ), } for row in cur.fetchall() ] From 0253771bc3cde91039cbd10f34a05026e5c216cc Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Tue, 13 May 2025 14:01:37 -0400 Subject: [PATCH 043/117] feat: Improve span retrieval with limit and tenant handling --- src/cvec/cvec.py | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 0bb9863..d6b6117 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -107,12 +107,8 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): return [] # Tag not found tag_name_id = tag_row["id"] - # 2. Fetch data points from tag_data (numeric) and tag_data_str (text) - all_points = [] - - # Calculate the limit for the SQL query. - # We fetch limit + 1 points to correctly determine the end_at for the limit-th span. - # If limit is None or negative, sql_limit will be None (LIMIT NULL in SQL, meaning no limit). + # Fetch limit + 1 points to correctly determine the end_at for the limit-th span. + # If limit is None or negative, sql_limit will be None (LIMIT NULL in PostgreSQL, meaning no limit). sql_limit_value = None if limit is not None and limit >= 0: sql_limit_value = limit + 1 @@ -140,14 +136,14 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): tag_value_changed_at, tag_value AS value_double, NULL::text AS value_string - FROM {self.tenant}.tag_data + FROM tag_data WHERE tag_name_id = %s AND (tag_value_changed_at >= %s OR %s IS NULL) AND (tag_value_changed_at < %s OR %s IS NULL) UNION ALL SELECT tag_value_changed_at, NULL::double precision AS value_double, tag_value AS value_string - FROM {self.tenant}.tag_data_str + FROM tag_data_str WHERE tag_name_id = %s AND (tag_value_changed_at >= %s OR %s IS NULL) AND (tag_value_changed_at < %s OR %s IS NULL) ORDER BY tag_value_changed_at ASC LIMIT %s @@ -165,9 +161,6 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): for row in cur.fetchall() ] - if not all_points: - return [] - spans = [] # 3. Construct spans for i, point in enumerate(all_points): @@ -205,12 +198,6 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): metadata=None, ) ) - - if ( - limit is not None and limit >= 0 - ): # allow limit=0 to return empty list - spans = spans[:limit] - return spans finally: if conn: From 727ab2b77a2a5b74073e5ccddd45f0bad691eeea Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 14:01:38 -0400 Subject: [PATCH 044/117] feat: Upgrade to psycopg3 and update connection logic. --- pyproject.toml | 2 +- src/cvec/cvec.py | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8ad6422..f9f67c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ readme = "README.md" requires-python = ">=3.9" dependencies = [ "pandas (>=2.2.3,<3.0.0)", - "psycopg2-binary (>=2.9.10,<3.0.0)" + "psycopg (>=3.1.0,<4.0.0)" # Assuming a recent version of psycopg3 ] [tool.poetry] diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index d6b6117..e9d1ca0 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -1,8 +1,8 @@ import os import pandas as pd -import psycopg2 -from psycopg2.extras import RealDictCursor +import psycopg +from psycopg.rows import dict_row from .span import Span @@ -49,16 +49,16 @@ def __init__( def _get_db_connection(self): """Helper method to establish a database connection.""" try: - # psycopg2 defaults to using the username as dbname if not specified. - # Here, self.tenant is used for both user and (implicitly) dbname. - conn = psycopg2.connect( + # psycopg3 connection string uses 'user', 'password', 'host', 'dbname' + conn = psycopg.connect( user=self.tenant, password=self.api_key, host=self.host, - # dbname=self.tenant # Implicitly self.tenant if not provided + dbname=self.tenant, # Explicitly set dbname for clarity with psycopg3 + row_factory=dict_row # Set row_factory at connection level ) return conn - except psycopg2.Error as e: + except psycopg.Error as e: # Consider logging this error or raising a custom exception print(f"Database connection error: {e}") raise @@ -96,7 +96,9 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): conn = None try: conn = self._get_db_connection() - with conn.cursor(cursor_factory=RealDictCursor) as cur: + # In psycopg3, if row_factory is set at connection, cursors inherit it. + # Otherwise, cur = conn.cursor(row_factory=dict_row) + with conn.cursor() as cur: # 1. Get tag_name_id query_tag_id = ( f"SELECT id FROM {self.tenant}.tag_names WHERE normalized_name = %s" From 0be9980551042be99397f794a24beebde1af57e7 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 14:01:39 -0400 Subject: [PATCH 045/117] style: Apply linting to cvec.py --- src/cvec/cvec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index e9d1ca0..6e7e1bd 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -55,7 +55,7 @@ def _get_db_connection(self): password=self.api_key, host=self.host, dbname=self.tenant, # Explicitly set dbname for clarity with psycopg3 - row_factory=dict_row # Set row_factory at connection level + row_factory=dict_row, # Set row_factory at connection level ) return conn except psycopg.Error as e: From 6f18d33f17c8bdd1cab91b18692d84fbcabe870c Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 14:11:59 -0400 Subject: [PATCH 046/117] refactor: Use named parameters in SQL queries for get_spans method --- src/cvec/cvec.py | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 6e7e1bd..9e3028a 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -101,9 +101,9 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): with conn.cursor() as cur: # 1. Get tag_name_id query_tag_id = ( - f"SELECT id FROM {self.tenant}.tag_names WHERE normalized_name = %s" + f"SELECT id FROM {self.tenant}.tag_names WHERE normalized_name = %(tag_name)s" ) - cur.execute(query_tag_id, (tag_name,)) + cur.execute(query_tag_id, {"tag_name": tag_name}) tag_row = cur.fetchone() if not tag_row: return [] # Tag not found @@ -118,19 +118,12 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): # Parameters for the database query. # Each part of the UNION ALL gets the same WHERE clause parameters. # The final sql_limit_value is for the LIMIT clause. - union_db_query_params = ( - tag_name_id, - _start_at, - _start_at, - _end_at, - _end_at, # For tag_data - tag_name_id, - _start_at, - _start_at, - _end_at, - _end_at, # For tag_data_str - sql_limit_value, # For the final LIMIT clause - ) + union_db_query_params = { + "tag_name_id": tag_name_id, + "start_at": _start_at, + "end_at": _end_at, + "limit": sql_limit_value, + } # Combined query for numeric and string data combined_query = f""" @@ -139,16 +132,16 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): tag_value AS value_double, NULL::text AS value_string FROM tag_data - WHERE tag_name_id = %s AND (tag_value_changed_at >= %s OR %s IS NULL) AND (tag_value_changed_at < %s OR %s IS NULL) + WHERE tag_name_id = %(tag_name_id)s AND (tag_value_changed_at >= %(start_at)s OR %(start_at)s IS NULL) AND (tag_value_changed_at < %(end_at)s OR %(end_at)s IS NULL) UNION ALL SELECT tag_value_changed_at, NULL::double precision AS value_double, tag_value AS value_string FROM tag_data_str - WHERE tag_name_id = %s AND (tag_value_changed_at >= %s OR %s IS NULL) AND (tag_value_changed_at < %s OR %s IS NULL) + WHERE tag_name_id = %(tag_name_id)s AND (tag_value_changed_at >= %(start_at)s OR %(start_at)s IS NULL) AND (tag_value_changed_at < %(end_at)s OR %(end_at)s IS NULL) ORDER BY tag_value_changed_at ASC - LIMIT %s + LIMIT %(limit)s """ cur.execute(combined_query, union_db_query_params) all_points = [ From 6449b786288ae7e82d115120ce69da69a8a9e8f3 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 14:12:01 -0400 Subject: [PATCH 047/117] style: Apply linting to cvec.py --- src/cvec/cvec.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 9e3028a..2ffee1c 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -100,9 +100,7 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): # Otherwise, cur = conn.cursor(row_factory=dict_row) with conn.cursor() as cur: # 1. Get tag_name_id - query_tag_id = ( - f"SELECT id FROM {self.tenant}.tag_names WHERE normalized_name = %(tag_name)s" - ) + query_tag_id = f"SELECT id FROM {self.tenant}.tag_names WHERE normalized_name = %(tag_name)s" cur.execute(query_tag_id, {"tag_name": tag_name}) tag_row = cur.fetchone() if not tag_row: From 7cc9808400673123209578e1b55a81cd9901f455 Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Tue, 13 May 2025 14:19:13 -0400 Subject: [PATCH 048/117] refactor: Simplify database connection in CVec class --- src/cvec/cvec.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 2ffee1c..a6d87c1 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -48,20 +48,13 @@ def __init__( def _get_db_connection(self): """Helper method to establish a database connection.""" - try: - # psycopg3 connection string uses 'user', 'password', 'host', 'dbname' - conn = psycopg.connect( + return psycopg.connect( user=self.tenant, password=self.api_key, host=self.host, - dbname=self.tenant, # Explicitly set dbname for clarity with psycopg3 - row_factory=dict_row, # Set row_factory at connection level + dbname=self.tenant, + row_factory=dict_row, ) - return conn - except psycopg.Error as e: - # Consider logging this error or raising a custom exception - print(f"Database connection error: {e}") - raise def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): """ @@ -96,8 +89,6 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): conn = None try: conn = self._get_db_connection() - # In psycopg3, if row_factory is set at connection, cursors inherit it. - # Otherwise, cur = conn.cursor(row_factory=dict_row) with conn.cursor() as cur: # 1. Get tag_name_id query_tag_id = f"SELECT id FROM {self.tenant}.tag_names WHERE normalized_name = %(tag_name)s" From 434197dc5da71901304992ae504cb70bc7e0b940 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 14:19:14 -0400 Subject: [PATCH 049/117] perf: Improve get_spans query by joining tag_names table. --- src/cvec/cvec.py | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index a6d87c1..9aeaf1a 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -90,14 +90,6 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): try: conn = self._get_db_connection() with conn.cursor() as cur: - # 1. Get tag_name_id - query_tag_id = f"SELECT id FROM {self.tenant}.tag_names WHERE normalized_name = %(tag_name)s" - cur.execute(query_tag_id, {"tag_name": tag_name}) - tag_row = cur.fetchone() - if not tag_row: - return [] # Tag not found - tag_name_id = tag_row["id"] - # Fetch limit + 1 points to correctly determine the end_at for the limit-th span. # If limit is None or negative, sql_limit will be None (LIMIT NULL in PostgreSQL, meaning no limit). sql_limit_value = None @@ -107,8 +99,8 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): # Parameters for the database query. # Each part of the UNION ALL gets the same WHERE clause parameters. # The final sql_limit_value is for the LIMIT clause. - union_db_query_params = { - "tag_name_id": tag_name_id, + query_params = { + "tag_name": tag_name, "start_at": _start_at, "end_at": _end_at, "limit": sql_limit_value, @@ -117,22 +109,24 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): # Combined query for numeric and string data combined_query = f""" SELECT - tag_value_changed_at, - tag_value AS value_double, + td.tag_value_changed_at, + td.tag_value AS value_double, NULL::text AS value_string - FROM tag_data - WHERE tag_name_id = %(tag_name_id)s AND (tag_value_changed_at >= %(start_at)s OR %(start_at)s IS NULL) AND (tag_value_changed_at < %(end_at)s OR %(end_at)s IS NULL) + FROM {self.tenant}.tag_data td + JOIN {self.tenant}.tag_names tn ON td.tag_name_id = tn.id + WHERE tn.normalized_name = %(tag_name)s AND (td.tag_value_changed_at >= %(start_at)s OR %(start_at)s IS NULL) AND (td.tag_value_changed_at < %(end_at)s OR %(end_at)s IS NULL) UNION ALL SELECT - tag_value_changed_at, + tds.tag_value_changed_at, NULL::double precision AS value_double, - tag_value AS value_string - FROM tag_data_str - WHERE tag_name_id = %(tag_name_id)s AND (tag_value_changed_at >= %(start_at)s OR %(start_at)s IS NULL) AND (tag_value_changed_at < %(end_at)s OR %(end_at)s IS NULL) + tds.tag_value AS value_string + FROM {self.tenant}.tag_data_str tds + JOIN {self.tenant}.tag_names tn ON tds.tag_name_id = tn.id + WHERE tn.normalized_name = %(tag_name)s AND (tds.tag_value_changed_at >= %(start_at)s OR %(start_at)s IS NULL) AND (tds.tag_value_changed_at < %(end_at)s OR %(end_at)s IS NULL) ORDER BY tag_value_changed_at ASC LIMIT %(limit)s """ - cur.execute(combined_query, union_db_query_params) + cur.execute(combined_query, query_params) all_points = [ { "time": row["tag_value_changed_at"], From 1dc9306b681a5725c10ebe53ff9043699823c48c Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 14:19:15 -0400 Subject: [PATCH 050/117] style: Apply code formatting to cvec.py --- src/cvec/cvec.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 9aeaf1a..6129a8d 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -49,12 +49,12 @@ def __init__( def _get_db_connection(self): """Helper method to establish a database connection.""" return psycopg.connect( - user=self.tenant, - password=self.api_key, - host=self.host, - dbname=self.tenant, - row_factory=dict_row, - ) + user=self.tenant, + password=self.api_key, + host=self.host, + dbname=self.tenant, + row_factory=dict_row, + ) def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): """ From 91561a2e9cb730cf8346f13f919b92187371e73b Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Tue, 13 May 2025 14:30:07 -0400 Subject: [PATCH 051/117] refactor: Simplify span creation and query parameters in CVec --- src/cvec/cvec.py | 69 +++++++++++++++--------------------------------- 1 file changed, 22 insertions(+), 47 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 6129a8d..f7fe9b7 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -90,38 +90,32 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): try: conn = self._get_db_connection() with conn.cursor() as cur: - # Fetch limit + 1 points to correctly determine the end_at for the limit-th span. - # If limit is None or negative, sql_limit will be None (LIMIT NULL in PostgreSQL, meaning no limit). - sql_limit_value = None - if limit is not None and limit >= 0: - sql_limit_value = limit + 1 - - # Parameters for the database query. - # Each part of the UNION ALL gets the same WHERE clause parameters. - # The final sql_limit_value is for the LIMIT clause. query_params = { "tag_name": tag_name, "start_at": _start_at, "end_at": _end_at, - "limit": sql_limit_value, + + # Fetch limit + 1 points to correctly determine the end_at for the limit-th + # span. If limit is None or negative, sql_limit will be None (LIMIT NULL in + # PostgreSQL, meaning no limit). + "limit": limit + 1 if limit is not None and limit >= 0 else None, } - # Combined query for numeric and string data combined_query = f""" SELECT td.tag_value_changed_at, td.tag_value AS value_double, NULL::text AS value_string - FROM {self.tenant}.tag_data td - JOIN {self.tenant}.tag_names tn ON td.tag_name_id = tn.id + FROM tag_data td + JOIN tag_names tn ON td.tag_name_id = tn.id WHERE tn.normalized_name = %(tag_name)s AND (td.tag_value_changed_at >= %(start_at)s OR %(start_at)s IS NULL) AND (td.tag_value_changed_at < %(end_at)s OR %(end_at)s IS NULL) UNION ALL SELECT tds.tag_value_changed_at, NULL::double precision AS value_double, tds.tag_value AS value_string - FROM {self.tenant}.tag_data_str tds - JOIN {self.tenant}.tag_names tn ON tds.tag_name_id = tn.id + FROM tag_data_str tds + JOIN tag_names tn ON tds.tag_name_id = tn.id WHERE tn.normalized_name = %(tag_name)s AND (tds.tag_value_changed_at >= %(start_at)s OR %(start_at)s IS NULL) AND (tds.tag_value_changed_at < %(end_at)s OR %(end_at)s IS NULL) ORDER BY tag_value_changed_at ASC LIMIT %(limit)s @@ -140,42 +134,23 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): ] spans = [] - # 3. Construct spans for i, point in enumerate(all_points): current_raw_start_at = point["time"] current_value = point["value"] - - span_actual_start = current_raw_start_at - - next_raw_event_at = None - if i + 1 < len(all_points): - next_raw_event_at = all_points[i + 1]["time"] - - if next_raw_event_at is not None: - # If _end_at is specified, cap the span by it. Otherwise, span ends at next event. - span_actual_end = ( - min(next_raw_event_at, _end_at) - if _end_at is not None - else next_raw_event_at - ) - else: - # No next event, so span extends to _end_at (which can be None if query is unbounded) - span_actual_end = _end_at - - # Add span if it has a positive duration or extends indefinitely (end_at is None) - if span_actual_end is None or span_actual_start < span_actual_end: - spans.append( - Span( - id=None, - tag_name=tag_name, - value=current_value, - start_at=span_actual_start, - end_at=span_actual_end, - raw_start_at=current_raw_start_at, - raw_end_at=next_raw_event_at, - metadata=None, - ) + next_raw_event_at = all_points[i + 1]["time"] if i + 1 < len(all_points) else None + + spans.append( + Span( + id=None, + tag_name=tag_name, + value=current_value, + start_at=current_raw_start_at, # TODO: lookup span override start_at + end_at=next_raw_event_at, # TODO: lookup span override end_at + raw_start_at=current_raw_start_at, + raw_end_at=next_raw_event_at, + metadata=None, ) + ) return spans finally: if conn: From 3d13b230fb1bb4063ab42d5a927b97f879b41b11 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 14:30:08 -0400 Subject: [PATCH 052/117] refactor: Use list comprehension to build spans list in get_spans --- src/cvec/cvec.py | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index f7fe9b7..f9bfe64 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -133,24 +133,19 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): for row in cur.fetchall() ] - spans = [] - for i, point in enumerate(all_points): - current_raw_start_at = point["time"] - current_value = point["value"] - next_raw_event_at = all_points[i + 1]["time"] if i + 1 < len(all_points) else None - - spans.append( - Span( - id=None, - tag_name=tag_name, - value=current_value, - start_at=current_raw_start_at, # TODO: lookup span override start_at - end_at=next_raw_event_at, # TODO: lookup span override end_at - raw_start_at=current_raw_start_at, - raw_end_at=next_raw_event_at, - metadata=None, - ) + spans = [ + Span( + id=None, + tag_name=tag_name, + value=point["value"], + start_at=point["time"], # TODO: lookup span override start_at + end_at=all_points[i + 1]["time"] if i + 1 < len(all_points) else None, # TODO: lookup span override end_at + raw_start_at=point["time"], + raw_end_at=all_points[i + 1]["time"] if i + 1 < len(all_points) else None, + metadata=None, ) + for i, point in enumerate(all_points) + ] return spans finally: if conn: From 2e7b133f951e1c68ed9dc4ebccef71f2344383c7 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 14:30:10 -0400 Subject: [PATCH 053/117] style: Apply code formatting with black --- src/cvec/cvec.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index f9bfe64..1d9f42a 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -94,7 +94,6 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): "tag_name": tag_name, "start_at": _start_at, "end_at": _end_at, - # Fetch limit + 1 points to correctly determine the end_at for the limit-th # span. If limit is None or negative, sql_limit will be None (LIMIT NULL in # PostgreSQL, meaning no limit). @@ -139,9 +138,17 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): tag_name=tag_name, value=point["value"], start_at=point["time"], # TODO: lookup span override start_at - end_at=all_points[i + 1]["time"] if i + 1 < len(all_points) else None, # TODO: lookup span override end_at + end_at=( + all_points[i + 1]["time"] + if i + 1 < len(all_points) + else None + ), # TODO: lookup span override end_at raw_start_at=point["time"], - raw_end_at=all_points[i + 1]["time"] if i + 1 < len(all_points) else None, + raw_end_at=( + all_points[i + 1]["time"] + if i + 1 < len(all_points) + else None + ), metadata=None, ) for i, point in enumerate(all_points) From 8df38db24c7bc220fe44eda95c1164a42f4f558f Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 14:34:08 -0400 Subject: [PATCH 054/117] refactor: Remove start_at and end_at fields from Span class --- README.md | 6 ++---- src/cvec/cvec.py | 15 +++------------ src/cvec/span.py | 5 ----- 3 files changed, 5 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index b576929..cdf0fa6 100644 --- a/README.md +++ b/README.md @@ -80,10 +80,8 @@ If `start_at` is `None` (e.g., not provided as an argument and no class default Each `Span` object in the returned list represents a period where the tag's value is constant and has the following attributes: - `value`: The tag's value during the span. - `tag_name`: The name of the tag. -- `start_at`: The timestamp of the value change that initiated this span's value. This will be greater than or equal to the query's `start_at` if one was specified. -- `raw_start_at`: Same as `start_at`. -- `end_at`: The timestamp of the next value change for this tag, or the query's `end_at` parameter, whichever is earlier. If the query's `end_at` is not specified (i.e., `None`) and there is no subsequent value change found by the query, this field will be `None`, indicating the span continues indefinitely. -- `raw_end_at`: The timestamp of the next value change for this tag found by the query. This will be `None` if no subsequent change is found within the query window (e.g., before the query's `end_at` or indefinitely if `end_at` is `None`). +- `raw_start_at`: The timestamp of the value change that initiated this span's value. This will be greater than or equal to the query's `start_at` if one was specified. +- `raw_end_at`: The timestamp of the next value change for this tag found by the query. This will be `None` if no subsequent change is found within the query window (e.g., before the query's `end_at` or indefinitely if `end_at` is `None`). If the query's `end_at` is not specified (i.e., `None`) and there is no subsequent value change found by the query, this field will be `None`, indicating the span continues indefinitely. - `id`: Currently `None`. In a future version of the SDK, this will be the span's unique identifier. - `metadata`: Currently `None`. In a future version, this can be used to store annotations or other metadata related to the span. diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 1d9f42a..7beda92 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -66,15 +66,12 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): Each span represents a period where the tag's value is constant. - `value`: The tag's value during the span. - `tag_name`: The name of the tag. - - `start_at`: The timestamp of the value change that initiated this span's value. + - `raw_start_at`: The timestamp of the value change that initiated this span's value. This will be >= `_start_at` if `_start_at` was specified. - - `raw_start_at`: Same as `start_at`. - - `end_at`: The timestamp of the next value change for this tag, or the - query's `_end_at` parameter, whichever is earlier. If the query's `_end_at` - is `None` and there is no subsequent value change, this field will be `None`, - indicating the span continues indefinitely. - `raw_end_at`: The timestamp of the next value change for this tag found by the query. `None` if no subsequent change is found within the query window. + If the query's `_end_at` is `None` and there is no subsequent value change, + this field will be `None`, indicating the span continues indefinitely. - `id`: Currently `None`. - `metadata`: Currently `None`. @@ -137,12 +134,6 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): id=None, tag_name=tag_name, value=point["value"], - start_at=point["time"], # TODO: lookup span override start_at - end_at=( - all_points[i + 1]["time"] - if i + 1 < len(all_points) - else None - ), # TODO: lookup span override end_at raw_start_at=point["time"], raw_end_at=( all_points[i + 1]["time"] diff --git a/src/cvec/span.py b/src/cvec/span.py index 878c988..eebcd58 100644 --- a/src/cvec/span.py +++ b/src/cvec/span.py @@ -12,8 +12,6 @@ def __init__( id: Optional[Any], tag_name: str, value: Optional[Union[float, str]], - start_at: datetime, - end_at: Optional[datetime], raw_start_at: datetime, raw_end_at: Optional[datetime], metadata: Optional[Any], @@ -21,8 +19,6 @@ def __init__( self.id = id self.tag_name = tag_name self.value = value - self.start_at = start_at - self.end_at = end_at self.raw_start_at = raw_start_at self.raw_end_at = raw_end_at self.metadata = metadata @@ -30,7 +26,6 @@ def __init__( def __repr__(self) -> str: return ( f"Span(id={self.id!r}, tag_name={self.tag_name!r}, value={self.value!r}, " - f"start_at={self.start_at!r}, end_at={self.end_at!r}, " f"raw_start_at={self.raw_start_at!r}, raw_end_at={self.raw_end_at!r}, " f"metadata={self.metadata!r})" ) From 56e5b5926559a42ff45adc58a5f7acb3093cbd83 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 14:36:34 -0400 Subject: [PATCH 055/117] refactor: Eliminate all_points list in get_spans method --- src/cvec/cvec.py | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 7beda92..0158b96 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -117,32 +117,26 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): LIMIT %(limit)s """ cur.execute(combined_query, query_params) - all_points = [ - { - "time": row["tag_value_changed_at"], - "value": ( - row["value_double"] - if row["value_double"] is not None - else row["value_string"] - ), - } - for row in cur.fetchall() - ] + db_rows = cur.fetchall() spans = [ Span( id=None, tag_name=tag_name, - value=point["value"], - raw_start_at=point["time"], + value=( + row["value_double"] + if row["value_double"] is not None + else row["value_string"] + ), + raw_start_at=row["tag_value_changed_at"], raw_end_at=( - all_points[i + 1]["time"] - if i + 1 < len(all_points) + db_rows[i + 1]["tag_value_changed_at"] + if i + 1 < len(db_rows) else None ), metadata=None, ) - for i, point in enumerate(all_points) + for i, row in enumerate(db_rows) ] return spans finally: From daca04a680399985b460170df8ee3ed5929dbefc Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 15:56:22 -0400 Subject: [PATCH 056/117] feat: Return spans in descending order, newest first --- README.md | 4 +-- src/cvec/cvec.py | 74 ++++++++++++++++++++++++++++++------------------ 2 files changed, 48 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index cdf0fa6..01e4fa9 100644 --- a/README.md +++ b/README.md @@ -81,11 +81,11 @@ Each `Span` object in the returned list represents a period where the tag's valu - `value`: The tag's value during the span. - `tag_name`: The name of the tag. - `raw_start_at`: The timestamp of the value change that initiated this span's value. This will be greater than or equal to the query's `start_at` if one was specified. -- `raw_end_at`: The timestamp of the next value change for this tag found by the query. This will be `None` if no subsequent change is found within the query window (e.g., before the query's `end_at` or indefinitely if `end_at` is `None`). If the query's `end_at` is not specified (i.e., `None`) and there is no subsequent value change found by the query, this field will be `None`, indicating the span continues indefinitely. +- `raw_end_at`: The timestamp marking the end of this span's constant value. For the newest span (first in the returned list), this is the query's `end_at` parameter (if specified, otherwise `None`). For other spans, it's the `raw_start_at` of the chronologically newer preceding span in the list. If the query's `end_at` is not specified and it's the newest span based on available data, this field will be `None`, indicating the span continues indefinitely. - `id`: Currently `None`. In a future version of the SDK, this will be the span's unique identifier. - `metadata`: Currently `None`. In a future version, this can be used to store annotations or other metadata related to the span. -Returns a list of `Span` objects. +Returns a list of `Span` objects, sorted in descending chronological order (newest span first). If no relevant value changes are found, an empty list is returned. ## `get_metric_data(?tag_names, ?start_at, ?end_at)` diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 0158b96..cd44416 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -68,15 +68,16 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): - `tag_name`: The name of the tag. - `raw_start_at`: The timestamp of the value change that initiated this span's value. This will be >= `_start_at` if `_start_at` was specified. - - `raw_end_at`: The timestamp of the next value change for this tag found by - the query. `None` if no subsequent change is found within the query window. - If the query's `_end_at` is `None` and there is no subsequent value change, + - `raw_end_at`: The timestamp marking the end of this span's constant value. + For the newest span, this is the query's `_end_at` (if specified, else `None`). + For other spans, it's the `raw_start_at` of the next newer span. + If `_end_at` is `None` and it's the newest span based on available data, this field will be `None`, indicating the span continues indefinitely. - `id`: Currently `None`. - `metadata`: Currently `None`. - Returns a list of Span objects. Each Span object has attributes corresponding - to the fields listed above. + Returns a list of Span objects, sorted in descending chronological order (newest span first). + Each Span object has attributes corresponding to the fields listed above. If no relevant value changes are found, an empty list is returned. The `limit` parameter restricts the number of spans returned. """ @@ -91,10 +92,9 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): "tag_name": tag_name, "start_at": _start_at, "end_at": _end_at, - # Fetch limit + 1 points to correctly determine the end_at for the limit-th - # span. If limit is None or negative, sql_limit will be None (LIMIT NULL in - # PostgreSQL, meaning no limit). - "limit": limit + 1 if limit is not None and limit >= 0 else None, + # Fetch up to 'limit' points. If limit is None or negative, + # sql_limit will be None (LIMIT NULL in PostgreSQL, meaning no limit). + "limit": limit if limit is not None and limit >= 0 else None, } combined_query = f""" @@ -113,31 +113,49 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): FROM tag_data_str tds JOIN tag_names tn ON tds.tag_name_id = tn.id WHERE tn.normalized_name = %(tag_name)s AND (tds.tag_value_changed_at >= %(start_at)s OR %(start_at)s IS NULL) AND (tds.tag_value_changed_at < %(end_at)s OR %(end_at)s IS NULL) - ORDER BY tag_value_changed_at ASC + ORDER BY tag_value_changed_at DESC LIMIT %(limit)s """ cur.execute(combined_query, query_params) db_rows = cur.fetchall() - spans = [ - Span( - id=None, - tag_name=tag_name, - value=( - row["value_double"] - if row["value_double"] is not None - else row["value_string"] - ), - raw_start_at=row["tag_value_changed_at"], - raw_end_at=( - db_rows[i + 1]["tag_value_changed_at"] - if i + 1 < len(db_rows) - else None - ), - metadata=None, + spans = [] + num_points_fetched = len(db_rows) + + # Determine how many spans to create based on user's limit and fetched points + # If limit (from get_spans arg) is None, create spans for all fetched points. + # Otherwise, create at most 'limit' spans. + count_spans_to_create = num_points_fetched + if limit is not None and limit >= 0: + count_spans_to_create = min(limit, num_points_fetched) + + for i in range(count_spans_to_create): + current_row = db_rows[i] + raw_start_at = current_row["tag_value_changed_at"] + value = ( + current_row["value_double"] + if current_row["value_double"] is not None + else current_row["value_string"] + ) + + if i == 0: + # This is the newest point fetched. Its span ends at _end_at (if specified) + # or continues indefinitely if _end_at is None. + raw_end_at = _end_at + else: + # Span ends when the chronologically next point (which is previous in DESC list) started. + raw_end_at = db_rows[i - 1]["tag_value_changed_at"] + + spans.append( + Span( + id=None, + tag_name=tag_name, + value=value, + raw_start_at=raw_start_at, + raw_end_at=raw_end_at, + metadata=None, + ) ) - for i, row in enumerate(db_rows) - ] return spans finally: if conn: From d701c5b083aab012d9eef68a97a76f3586698ff1 Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Tue, 13 May 2025 16:20:17 -0400 Subject: [PATCH 057/117] fix: Correctly handle limit and end_at in get_spans query. --- poetry.lock | 94 +++++++++++------------------------------------- src/cvec/cvec.py | 40 ++++++--------------- 2 files changed, 31 insertions(+), 103 deletions(-) diff --git a/poetry.lock b/poetry.lock index 8abdf10..9146fba 100644 --- a/poetry.lock +++ b/poetry.lock @@ -402,83 +402,29 @@ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] [[package]] -name = "psycopg2-binary" -version = "2.9.10" -description = "psycopg2 - Python-PostgreSQL Database Adapter" +name = "psycopg" +version = "3.2.9" +description = "PostgreSQL database adapter for Python" optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "psycopg2-binary-2.9.10.tar.gz", hash = "sha256:4b3df0e6990aa98acda57d983942eff13d824135fe2250e6522edaa782a06de2"}, - {file = "psycopg2_binary-2.9.10-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:0ea8e3d0ae83564f2fc554955d327fa081d065c8ca5cc6d2abb643e2c9c1200f"}, - {file = "psycopg2_binary-2.9.10-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:3e9c76f0ac6f92ecfc79516a8034a544926430f7b080ec5a0537bca389ee0906"}, - {file = "psycopg2_binary-2.9.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ad26b467a405c798aaa1458ba09d7e2b6e5f96b1ce0ac15d82fd9f95dc38a92"}, - {file = "psycopg2_binary-2.9.10-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:270934a475a0e4b6925b5f804e3809dd5f90f8613621d062848dd82f9cd62007"}, - {file = "psycopg2_binary-2.9.10-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:48b338f08d93e7be4ab2b5f1dbe69dc5e9ef07170fe1f86514422076d9c010d0"}, - {file = "psycopg2_binary-2.9.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f4152f8f76d2023aac16285576a9ecd2b11a9895373a1f10fd9db54b3ff06b4"}, - {file = "psycopg2_binary-2.9.10-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:32581b3020c72d7a421009ee1c6bf4a131ef5f0a968fab2e2de0c9d2bb4577f1"}, - {file = "psycopg2_binary-2.9.10-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:2ce3e21dc3437b1d960521eca599d57408a695a0d3c26797ea0f72e834c7ffe5"}, - {file = "psycopg2_binary-2.9.10-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:e984839e75e0b60cfe75e351db53d6db750b00de45644c5d1f7ee5d1f34a1ce5"}, - {file = "psycopg2_binary-2.9.10-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3c4745a90b78e51d9ba06e2088a2fe0c693ae19cc8cb051ccda44e8df8a6eb53"}, - {file = "psycopg2_binary-2.9.10-cp310-cp310-win32.whl", hash = "sha256:e5720a5d25e3b99cd0dc5c8a440570469ff82659bb09431c1439b92caf184d3b"}, - {file = "psycopg2_binary-2.9.10-cp310-cp310-win_amd64.whl", hash = "sha256:3c18f74eb4386bf35e92ab2354a12c17e5eb4d9798e4c0ad3a00783eae7cd9f1"}, - {file = "psycopg2_binary-2.9.10-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:04392983d0bb89a8717772a193cfaac58871321e3ec69514e1c4e0d4957b5aff"}, - {file = "psycopg2_binary-2.9.10-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:1a6784f0ce3fec4edc64e985865c17778514325074adf5ad8f80636cd029ef7c"}, - {file = "psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5f86c56eeb91dc3135b3fd8a95dc7ae14c538a2f3ad77a19645cf55bab1799c"}, - {file = "psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b3d2491d4d78b6b14f76881905c7a8a8abcf974aad4a8a0b065273a0ed7a2cb"}, - {file = "psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2286791ececda3a723d1910441c793be44625d86d1a4e79942751197f4d30341"}, - {file = "psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:512d29bb12608891e349af6a0cccedce51677725a921c07dba6342beaf576f9a"}, - {file = "psycopg2_binary-2.9.10-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5a507320c58903967ef7384355a4da7ff3f28132d679aeb23572753cbf2ec10b"}, - {file = "psycopg2_binary-2.9.10-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:6d4fa1079cab9018f4d0bd2db307beaa612b0d13ba73b5c6304b9fe2fb441ff7"}, - {file = "psycopg2_binary-2.9.10-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:851485a42dbb0bdc1edcdabdb8557c09c9655dfa2ca0460ff210522e073e319e"}, - {file = "psycopg2_binary-2.9.10-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:35958ec9e46432d9076286dda67942ed6d968b9c3a6a2fd62b48939d1d78bf68"}, - {file = "psycopg2_binary-2.9.10-cp311-cp311-win32.whl", hash = "sha256:ecced182e935529727401b24d76634a357c71c9275b356efafd8a2a91ec07392"}, - {file = "psycopg2_binary-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:ee0e8c683a7ff25d23b55b11161c2663d4b099770f6085ff0a20d4505778d6b4"}, - {file = "psycopg2_binary-2.9.10-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:880845dfe1f85d9d5f7c412efea7a08946a46894537e4e5d091732eb1d34d9a0"}, - {file = "psycopg2_binary-2.9.10-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:9440fa522a79356aaa482aa4ba500b65f28e5d0e63b801abf6aa152a29bd842a"}, - {file = "psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3923c1d9870c49a2d44f795df0c889a22380d36ef92440ff618ec315757e539"}, - {file = "psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b2c956c028ea5de47ff3a8d6b3cc3330ab45cf0b7c3da35a2d6ff8420896526"}, - {file = "psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f758ed67cab30b9a8d2833609513ce4d3bd027641673d4ebc9c067e4d208eec1"}, - {file = "psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cd9b4f2cfab88ed4a9106192de509464b75a906462fb846b936eabe45c2063e"}, - {file = "psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dc08420625b5a20b53551c50deae6e231e6371194fa0651dbe0fb206452ae1f"}, - {file = "psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:d7cd730dfa7c36dbe8724426bf5612798734bff2d3c3857f36f2733f5bfc7c00"}, - {file = "psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:155e69561d54d02b3c3209545fb08938e27889ff5a10c19de8d23eb5a41be8a5"}, - {file = "psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c3cc28a6fd5a4a26224007712e79b81dbaee2ffb90ff406256158ec4d7b52b47"}, - {file = "psycopg2_binary-2.9.10-cp312-cp312-win32.whl", hash = "sha256:ec8a77f521a17506a24a5f626cb2aee7850f9b69a0afe704586f63a464f3cd64"}, - {file = "psycopg2_binary-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:18c5ee682b9c6dd3696dad6e54cc7ff3a1a9020df6a5c0f861ef8bfd338c3ca0"}, - {file = "psycopg2_binary-2.9.10-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:26540d4a9a4e2b096f1ff9cce51253d0504dca5a85872c7f7be23be5a53eb18d"}, - {file = "psycopg2_binary-2.9.10-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e217ce4d37667df0bc1c397fdcd8de5e81018ef305aed9415c3b093faaeb10fb"}, - {file = "psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:245159e7ab20a71d989da00f280ca57da7641fa2cdcf71749c193cea540a74f7"}, - {file = "psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c4ded1a24b20021ebe677b7b08ad10bf09aac197d6943bfe6fec70ac4e4690d"}, - {file = "psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3abb691ff9e57d4a93355f60d4f4c1dd2d68326c968e7db17ea96df3c023ef73"}, - {file = "psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8608c078134f0b3cbd9f89b34bd60a943b23fd33cc5f065e8d5f840061bd0673"}, - {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:230eeae2d71594103cd5b93fd29d1ace6420d0b86f4778739cb1a5a32f607d1f"}, - {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909"}, - {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1"}, - {file = "psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567"}, - {file = "psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142"}, - {file = "psycopg2_binary-2.9.10-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:eb09aa7f9cecb45027683bb55aebaaf45a0df8bf6de68801a6afdc7947bb09d4"}, - {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b73d6d7f0ccdad7bc43e6d34273f70d587ef62f824d7261c4ae9b8b1b6af90e8"}, - {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce5ab4bf46a211a8e924d307c1b1fcda82368586a19d0a24f8ae166f5c784864"}, - {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:056470c3dc57904bbf63d6f534988bafc4e970ffd50f6271fc4ee7daad9498a5"}, - {file = "psycopg2_binary-2.9.10-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73aa0e31fa4bb82578f3a6c74a73c273367727de397a7a0f07bd83cbea696baa"}, - {file = "psycopg2_binary-2.9.10-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:8de718c0e1c4b982a54b41779667242bc630b2197948405b7bd8ce16bcecac92"}, - {file = "psycopg2_binary-2.9.10-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:5c370b1e4975df846b0277b4deba86419ca77dbc25047f535b0bb03d1a544d44"}, - {file = "psycopg2_binary-2.9.10-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:ffe8ed017e4ed70f68b7b371d84b7d4a790368db9203dfc2d222febd3a9c8863"}, - {file = "psycopg2_binary-2.9.10-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:8aecc5e80c63f7459a1a2ab2c64df952051df196294d9f739933a9f6687e86b3"}, - {file = "psycopg2_binary-2.9.10-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:7a813c8bdbaaaab1f078014b9b0b13f5de757e2b5d9be6403639b298a04d218b"}, - {file = "psycopg2_binary-2.9.10-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d00924255d7fc916ef66e4bf22f354a940c67179ad3fd7067d7a0a9c84d2fbfc"}, - {file = "psycopg2_binary-2.9.10-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7559bce4b505762d737172556a4e6ea8a9998ecac1e39b5233465093e8cee697"}, - {file = "psycopg2_binary-2.9.10-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e8b58f0a96e7a1e341fc894f62c1177a7c83febebb5ff9123b579418fdc8a481"}, - {file = "psycopg2_binary-2.9.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b269105e59ac96aba877c1707c600ae55711d9dcd3fc4b5012e4af68e30c648"}, - {file = "psycopg2_binary-2.9.10-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:79625966e176dc97ddabc142351e0409e28acf4660b88d1cf6adb876d20c490d"}, - {file = "psycopg2_binary-2.9.10-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:8aabf1c1a04584c168984ac678a668094d831f152859d06e055288fa515e4d30"}, - {file = "psycopg2_binary-2.9.10-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:19721ac03892001ee8fdd11507e6a2e01f4e37014def96379411ca99d78aeb2c"}, - {file = "psycopg2_binary-2.9.10-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7f5d859928e635fa3ce3477704acee0f667b3a3d3e4bb109f2b18d4005f38287"}, - {file = "psycopg2_binary-2.9.10-cp39-cp39-win32.whl", hash = "sha256:3216ccf953b3f267691c90c6fe742e45d890d8272326b4a8b20850a03d05b7b8"}, - {file = "psycopg2_binary-2.9.10-cp39-cp39-win_amd64.whl", hash = "sha256:30e34c4e97964805f715206c7b789d54a78b70f3ff19fbe590104b71c45600e5"}, + {file = "psycopg-3.2.9-py3-none-any.whl", hash = "sha256:01a8dadccdaac2123c916208c96e06631641c0566b22005493f09663c7a8d3b6"}, + {file = "psycopg-3.2.9.tar.gz", hash = "sha256:2fbb46fcd17bc81f993f28c47f1ebea38d66ae97cc2dbc3cad73b37cefbff700"}, ] +[package.dependencies] +typing-extensions = {version = ">=4.6", markers = "python_version < \"3.13\""} +tzdata = {version = "*", markers = "sys_platform == \"win32\""} + +[package.extras] +binary = ["psycopg-binary (==3.2.9) ; implementation_name != \"pypy\""] +c = ["psycopg-c (==3.2.9) ; implementation_name != \"pypy\""] +dev = ["ast-comments (>=1.1.2)", "black (>=24.1.0)", "codespell (>=2.2)", "dnspython (>=2.1)", "flake8 (>=4.0)", "isort-psycopg", "isort[colors] (>=6.0)", "mypy (>=1.14)", "pre-commit (>=4.0.1)", "types-setuptools (>=57.4)", "types-shapely (>=2.0)", "wheel (>=0.37)"] +docs = ["Sphinx (>=5.0)", "furo (==2022.6.21)", "sphinx-autobuild (>=2021.3.14)", "sphinx-autodoc-typehints (>=1.12)"] +pool = ["psycopg-pool"] +test = ["anyio (>=4.0)", "mypy (>=1.14)", "pproxy (>=2.7)", "pytest (>=6.2.5)", "pytest-cov (>=3.0)", "pytest-randomly (>=3.5)"] + [[package]] name = "pytest" version = "8.3.5" @@ -590,12 +536,12 @@ version = "4.13.2" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" -groups = ["dev"] -markers = "python_version < \"3.11\"" +groups = ["main", "dev"] files = [ {file = "typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c"}, {file = "typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef"}, ] +markers = {main = "python_version < \"3.13\"", dev = "python_version < \"3.11\""} [[package]] name = "tzdata" @@ -612,4 +558,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = ">=3.9" -content-hash = "1686f14aa63a712725dabaf7cea4d959a328bd3208c5475c082ca7e2386b00c0" +content-hash = "cf56356ec0efef18d3fe40d3434d59baf4795b37f1a2c2880a67aa5c55595c31" diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index cd44416..2745c14 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -92,9 +92,9 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): "tag_name": tag_name, "start_at": _start_at, "end_at": _end_at, - # Fetch up to 'limit' points. If limit is None or negative, - # sql_limit will be None (LIMIT NULL in PostgreSQL, meaning no limit). - "limit": limit if limit is not None and limit >= 0 else None, + # Fetch up to 'limit' points. If limit is None, then the `LIMIT NULL` clause + # has no effect (in PostgreSQL). + "limit": limit, } combined_query = f""" @@ -118,34 +118,14 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): """ cur.execute(combined_query, query_params) db_rows = cur.fetchall() - spans = [] - num_points_fetched = len(db_rows) - - # Determine how many spans to create based on user's limit and fetched points - # If limit (from get_spans arg) is None, create spans for all fetched points. - # Otherwise, create at most 'limit' spans. - count_spans_to_create = num_points_fetched - if limit is not None and limit >= 0: - count_spans_to_create = min(limit, num_points_fetched) - - for i in range(count_spans_to_create): - current_row = db_rows[i] - raw_start_at = current_row["tag_value_changed_at"] - value = ( - current_row["value_double"] - if current_row["value_double"] is not None - else current_row["value_string"] - ) - - if i == 0: - # This is the newest point fetched. Its span ends at _end_at (if specified) - # or continues indefinitely if _end_at is None. - raw_end_at = _end_at - else: - # Span ends when the chronologically next point (which is previous in DESC list) started. - raw_end_at = db_rows[i - 1]["tag_value_changed_at"] + # None indicates that the end time is not known; the span extends beyond + # the query period. + raw_end_at = None + for row in db_rows: + raw_start_at = row["tag_value_changed_at"] + value = row["value_double"] if row["value_double"] is not None else row["value_string"] spans.append( Span( id=None, @@ -156,6 +136,8 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): metadata=None, ) ) + raw_end_at = raw_start_at + return spans finally: if conn: From da4ad18009a8803c96e1b52ad8cf66a6d13fdf12 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 20:33:06 -0400 Subject: [PATCH 058/117] feat: Add type hints to Span class member fields --- src/cvec/span.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/cvec/span.py b/src/cvec/span.py index eebcd58..3a280c3 100644 --- a/src/cvec/span.py +++ b/src/cvec/span.py @@ -6,6 +6,12 @@ class Span: """ Represents a time span where a tag has a constant value. """ + id: Optional[Any] + tag_name: str + value: Optional[Union[float, str]] + raw_start_at: datetime + raw_end_at: Optional[datetime] + metadata: Optional[Any] def __init__( self, From 19163e9aae75473a6d1c948fc5ca45aadcfea583 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 20:33:08 -0400 Subject: [PATCH 059/117] style: Run linter on span.py --- src/cvec/span.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cvec/span.py b/src/cvec/span.py index 3a280c3..aaaf33e 100644 --- a/src/cvec/span.py +++ b/src/cvec/span.py @@ -6,6 +6,7 @@ class Span: """ Represents a time span where a tag has a constant value. """ + id: Optional[Any] tag_name: str value: Optional[Union[float, str]] From ff3c0d9822670dec8440a87c7060023c6fc867ca Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 20:33:28 -0400 Subject: [PATCH 060/117] feat: Add type hints to cvec.py --- src/cvec/cvec.py | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 2745c14..313ce9e 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -1,4 +1,6 @@ import os +from datetime import datetime +from typing import Any, List, Optional import pandas as pd import psycopg @@ -11,15 +13,20 @@ class CVec: """ CVec API Client """ + host: Optional[str] + tenant: Optional[str] + api_key: Optional[str] + default_start_at: Optional[datetime] + default_end_at: Optional[datetime] def __init__( self, - host=None, - tenant=None, - api_key=None, - default_start_at=None, - default_end_at=None, - ): + host: Optional[str] = None, + tenant: Optional[str] = None, + api_key: Optional[str] = None, + default_start_at: Optional[datetime] = None, + default_end_at: Optional[datetime] = None, + ) -> None: """ Setup the SDK with the given host and API Key. The host and API key are loaded from environment variables CVEC_HOST, @@ -46,7 +53,7 @@ def __init__( "CVEC_API_KEY must be set either as an argument or environment variable" ) - def _get_db_connection(self): + def _get_db_connection(self) -> psycopg.Connection: """Helper method to establish a database connection.""" return psycopg.connect( user=self.tenant, @@ -56,7 +63,13 @@ def _get_db_connection(self): row_factory=dict_row, ) - def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): + def get_spans( + self, + tag_name: str, + start_at: Optional[datetime] = None, + end_at: Optional[datetime] = None, + limit: Optional[int] = None, + ) -> List[Span]: """ Return time spans for a tag. Spans are generated from value changes that occur after `start_at` (if specified) and before `end_at` (if specified). @@ -143,7 +156,12 @@ def get_spans(self, tag_name, start_at=None, end_at=None, limit=None): if conn: conn.close() - def get_metric_data(self, tag_names=None, start_at=None, end_at=None): + def get_metric_data( + self, + tag_names: Optional[List[str]] = None, + start_at: Optional[datetime] = None, + end_at: Optional[datetime] = None, + ) -> pd.DataFrame: """ Return all data-points within a given [start_at, end_at) interval, optionally selecting a given list of tags. @@ -153,7 +171,9 @@ def get_metric_data(self, tag_names=None, start_at=None, end_at=None): # Implementation to be added return pd.DataFrame(columns=["tag_name", "time", "value"]) - def get_tags(self, start_at=None, end_at=None): + def get_tags( + self, start_at: Optional[datetime] = None, end_at: Optional[datetime] = None + ) -> List[Any]: # TODO: Define a Tag TypedDict or class """ Return a list of tags that had at least one transition in the given [start_at, end_at) interval. All tags are returned if no start_at and end_at are given. From 58c55c2dc3217857a9a9045e5a63be9d2eec90e6 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 20:33:29 -0400 Subject: [PATCH 061/117] style: Run linter on cvec.py --- src/cvec/cvec.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 313ce9e..dbaecda 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -13,6 +13,7 @@ class CVec: """ CVec API Client """ + host: Optional[str] tenant: Optional[str] api_key: Optional[str] @@ -138,7 +139,11 @@ def get_spans( raw_end_at = None for row in db_rows: raw_start_at = row["tag_value_changed_at"] - value = row["value_double"] if row["value_double"] is not None else row["value_string"] + value = ( + row["value_double"] + if row["value_double"] is not None + else row["value_string"] + ) spans.append( Span( id=None, From 293056cb447a7b6adc007bfb40f82ec7ea7c4dc6 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 20:36:33 -0400 Subject: [PATCH 062/117] refactor: Use "with" statement for db connection in get_spans --- src/cvec/cvec.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index dbaecda..a18aad8 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -98,9 +98,7 @@ def get_spans( _start_at = start_at or self.default_start_at _end_at = end_at or self.default_end_at - conn = None - try: - conn = self._get_db_connection() + with self._get_db_connection() as conn: with conn.cursor() as cur: query_params = { "tag_name": tag_name, @@ -157,9 +155,6 @@ def get_spans( raw_end_at = raw_start_at return spans - finally: - if conn: - conn.close() def get_metric_data( self, From d84f61883869102c2b3ff47cfed27089e3337748 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 20:47:18 -0400 Subject: [PATCH 063/117] refactor: Use tuple-based rows instead of dict_row in psycopg --- src/cvec/cvec.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index a18aad8..edcb4bc 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -4,7 +4,6 @@ import pandas as pd import psycopg -from psycopg.rows import dict_row from .span import Span @@ -61,7 +60,6 @@ def _get_db_connection(self) -> psycopg.Connection: password=self.api_key, host=self.host, dbname=self.tenant, - row_factory=dict_row, ) def get_spans( @@ -136,11 +134,11 @@ def get_spans( # the query period. raw_end_at = None for row in db_rows: - raw_start_at = row["tag_value_changed_at"] + raw_start_at = row[0] # tag_value_changed_at value = ( - row["value_double"] - if row["value_double"] is not None - else row["value_string"] + row[1] # value_double + if row[1] is not None + else row[2] # value_string ) spans.append( Span( From 8105e12a35f294e4fb38d74924e6ec2545d47767 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 20:47:19 -0400 Subject: [PATCH 064/117] style: Apply linting to cvec.py --- src/cvec/cvec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index edcb4bc..b779ccd 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -137,7 +137,7 @@ def get_spans( raw_start_at = row[0] # tag_value_changed_at value = ( row[1] # value_double - if row[1] is not None + if row[1] is not None else row[2] # value_string ) spans.append( From 454c37f5f6ce82240b1c3cad7612f61dae5505a1 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 20:48:52 -0400 Subject: [PATCH 065/117] refactor: Unpack db_rows tuple directly into named variables. --- src/cvec/cvec.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index b779ccd..5cbcbef 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -133,12 +133,12 @@ def get_spans( # None indicates that the end time is not known; the span extends beyond # the query period. raw_end_at = None - for row in db_rows: - raw_start_at = row[0] # tag_value_changed_at + for tag_value_changed_at, value_double, value_string in db_rows: + raw_start_at = tag_value_changed_at value = ( - row[1] # value_double - if row[1] is not None - else row[2] # value_string + value_double + if value_double is not None + else value_string ) spans.append( Span( From 878aab1c5e47a91417ee49d937476dc7d377e5ac Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 20:48:54 -0400 Subject: [PATCH 066/117] style: Apply linter to cvec.py --- src/cvec/cvec.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 5cbcbef..70741e3 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -135,11 +135,7 @@ def get_spans( raw_end_at = None for tag_value_changed_at, value_double, value_string in db_rows: raw_start_at = tag_value_changed_at - value = ( - value_double - if value_double is not None - else value_string - ) + value = value_double if value_double is not None else value_string spans.append( Span( id=None, From c642227a18a3a93a69184480d78e5cf9623586db Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 20:50:55 -0400 Subject: [PATCH 067/117] refactor: Use CTE to simplify and optimize get_spans SQL query --- src/cvec/cvec.py | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 70741e3..27585ab 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -108,22 +108,31 @@ def get_spans( } combined_query = f""" + WITH combined_tag_data AS ( + SELECT + tag_name_id, + tag_value_changed_at, + tag_value AS value_double, + NULL::text AS value_string + FROM {self.tenant}.tag_data + UNION ALL + SELECT + tag_name_id, + tag_value_changed_at, + NULL::double precision AS value_double, + tag_value AS value_string + FROM {self.tenant}.tag_data_str + ) SELECT - td.tag_value_changed_at, - td.tag_value AS value_double, - NULL::text AS value_string - FROM tag_data td - JOIN tag_names tn ON td.tag_name_id = tn.id - WHERE tn.normalized_name = %(tag_name)s AND (td.tag_value_changed_at >= %(start_at)s OR %(start_at)s IS NULL) AND (td.tag_value_changed_at < %(end_at)s OR %(end_at)s IS NULL) - UNION ALL - SELECT - tds.tag_value_changed_at, - NULL::double precision AS value_double, - tds.tag_value AS value_string - FROM tag_data_str tds - JOIN tag_names tn ON tds.tag_name_id = tn.id - WHERE tn.normalized_name = %(tag_name)s AND (tds.tag_value_changed_at >= %(start_at)s OR %(start_at)s IS NULL) AND (tds.tag_value_changed_at < %(end_at)s OR %(end_at)s IS NULL) - ORDER BY tag_value_changed_at DESC + ctd.tag_value_changed_at, + ctd.value_double, + ctd.value_string + FROM combined_tag_data ctd + JOIN {self.tenant}.tag_names tn ON ctd.tag_name_id = tn.id + WHERE tn.normalized_name = %(tag_name)s + AND (ctd.tag_value_changed_at >= %(start_at)s OR %(start_at)s IS NULL) + AND (ctd.tag_value_changed_at < %(end_at)s OR %(end_at)s IS NULL) + ORDER BY ctd.tag_value_changed_at DESC LIMIT %(limit)s """ cur.execute(combined_query, query_params) From 590c4bb1a5b691b7309eca9a7b620b80ba861d82 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 20:54:04 -0400 Subject: [PATCH 068/117] fix: Resolve mypy type checking errors in tests and cvec module --- src/cvec/py.typed | 1 + tests/test_cvec.py | 14 +++++++------- 2 files changed, 8 insertions(+), 7 deletions(-) create mode 100644 src/cvec/py.typed diff --git a/src/cvec/py.typed b/src/cvec/py.typed new file mode 100644 index 0000000..7632ecf --- /dev/null +++ b/src/cvec/py.typed @@ -0,0 +1 @@ +# Marker file for PEP 561 diff --git a/tests/test_cvec.py b/tests/test_cvec.py index d669a7f..cee2759 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -5,7 +5,7 @@ class TestCVecConstructor: - def test_constructor_with_arguments(self): + def test_constructor_with_arguments(self) -> None: """Test CVec constructor with all arguments provided.""" client = CVec( host="test_host", @@ -29,7 +29,7 @@ def test_constructor_with_arguments(self): }, clear=True, ) - def test_constructor_with_env_vars(self): + def test_constructor_with_env_vars(self) -> None: """Test CVec constructor with environment variables.""" client = CVec(default_start_at="env_start", default_end_at="env_end") assert client.host == "env_host" @@ -39,7 +39,7 @@ def test_constructor_with_env_vars(self): assert client.default_end_at == "env_end" @patch.dict(os.environ, {}, clear=True) - def test_constructor_missing_host_raises_value_error(self): + def test_constructor_missing_host_raises_value_error(self) -> None: """Test CVec constructor raises ValueError if host is missing.""" with pytest.raises( ValueError, @@ -48,7 +48,7 @@ def test_constructor_missing_host_raises_value_error(self): CVec(tenant="test_tenant", api_key="test_api_key") @patch.dict(os.environ, {}, clear=True) - def test_constructor_missing_tenant_raises_value_error(self): + def test_constructor_missing_tenant_raises_value_error(self) -> None: """Test CVec constructor raises ValueError if tenant is missing.""" with pytest.raises( ValueError, @@ -57,7 +57,7 @@ def test_constructor_missing_tenant_raises_value_error(self): CVec(host="test_host", api_key="test_api_key") @patch.dict(os.environ, {}, clear=True) - def test_constructor_missing_api_key_raises_value_error(self): + def test_constructor_missing_api_key_raises_value_error(self) -> None: """Test CVec constructor raises ValueError if api_key is missing.""" with pytest.raises( ValueError, @@ -74,7 +74,7 @@ def test_constructor_missing_api_key_raises_value_error(self): }, clear=True, ) - def test_constructor_missing_tenant_env_var_raises_value_error(self): + def test_constructor_missing_tenant_env_var_raises_value_error(self) -> None: """Test CVec constructor raises ValueError if CVEC_TENANT env var is missing.""" with pytest.raises( ValueError, @@ -82,7 +82,7 @@ def test_constructor_missing_tenant_env_var_raises_value_error(self): ): CVec() - def test_constructor_args_override_env_vars(self): + def test_constructor_args_override_env_vars(self) -> None: """Test CVec constructor arguments override environment variables.""" with patch.dict( os.environ, From 766b82bb34be520714cd81b00484bc652331b7d8 Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Tue, 13 May 2025 20:55:26 -0400 Subject: [PATCH 069/117] chore: Remove Python 3.9 from CI matrix --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index aac22cd..b9bc233 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-22.04 strategy: matrix: - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 From 27a0f125ef68cd7db51cb293f025ba67aecf2dba Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 20:55:27 -0400 Subject: [PATCH 070/117] ci: Run mypy type checker in CI workflow --- .github/workflows/ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b9bc233..886f056 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -45,6 +45,10 @@ jobs: - name: Run linters run: | poetry run black --check . + + - name: Run type checker + run: | + poetry run mypy --strict . - name: Run tests run: | From 22ef3ccf9605c46a41147d6e996a0e9e59ec7f82 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 20:58:37 -0400 Subject: [PATCH 071/117] fix: Resolve mypy errors in tests by using datetime objects --- tests/test_cvec.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/tests/test_cvec.py b/tests/test_cvec.py index cee2759..9e5b08c 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -1,6 +1,7 @@ import pytest import os from unittest.mock import patch +from datetime import datetime from cvec import CVec @@ -11,14 +12,14 @@ def test_constructor_with_arguments(self) -> None: host="test_host", tenant="test_tenant", api_key="test_api_key", - default_start_at="test_start", - default_end_at="test_end", + default_start_at=datetime(2023, 1, 1, 0, 0, 0), + default_end_at=datetime(2023, 1, 2, 0, 0, 0), ) assert client.host == "test_host" assert client.tenant == "test_tenant" assert client.api_key == "test_api_key" - assert client.default_start_at == "test_start" - assert client.default_end_at == "test_end" + assert client.default_start_at == datetime(2023, 1, 1, 0, 0, 0) + assert client.default_end_at == datetime(2023, 1, 2, 0, 0, 0) @patch.dict( os.environ, @@ -31,12 +32,12 @@ def test_constructor_with_arguments(self) -> None: ) def test_constructor_with_env_vars(self) -> None: """Test CVec constructor with environment variables.""" - client = CVec(default_start_at="env_start", default_end_at="env_end") + client = CVec(default_start_at=datetime(2023, 2, 1, 0, 0, 0), default_end_at=datetime(2023, 2, 2, 0, 0, 0)) assert client.host == "env_host" assert client.tenant == "env_tenant" assert client.api_key == "env_api_key" - assert client.default_start_at == "env_start" - assert client.default_end_at == "env_end" + assert client.default_start_at == datetime(2023, 2, 1, 0, 0, 0) + assert client.default_end_at == datetime(2023, 2, 2, 0, 0, 0) @patch.dict(os.environ, {}, clear=True) def test_constructor_missing_host_raises_value_error(self) -> None: @@ -97,11 +98,11 @@ def test_constructor_args_override_env_vars(self) -> None: host="arg_host", tenant="arg_tenant", api_key="arg_api_key", - default_start_at="arg_start", - default_end_at="arg_end", + default_start_at=datetime(2023, 3, 1, 0, 0, 0), + default_end_at=datetime(2023, 3, 2, 0, 0, 0), ) assert client.host == "arg_host" assert client.tenant == "arg_tenant" assert client.api_key == "arg_api_key" - assert client.default_start_at == "arg_start" - assert client.default_end_at == "arg_end" + assert client.default_start_at == datetime(2023, 3, 1, 0, 0, 0) + assert client.default_end_at == datetime(2023, 3, 2, 0, 0, 0) From fdf6b12b31e9a9f9df9d2b2d76aa8c5e58509d3b Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Tue, 13 May 2025 20:58:38 -0400 Subject: [PATCH 072/117] style: Apply linter formatting to test_cvec.py --- tests/test_cvec.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_cvec.py b/tests/test_cvec.py index 9e5b08c..31b2558 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -32,7 +32,10 @@ def test_constructor_with_arguments(self) -> None: ) def test_constructor_with_env_vars(self) -> None: """Test CVec constructor with environment variables.""" - client = CVec(default_start_at=datetime(2023, 2, 1, 0, 0, 0), default_end_at=datetime(2023, 2, 2, 0, 0, 0)) + client = CVec( + default_start_at=datetime(2023, 2, 1, 0, 0, 0), + default_end_at=datetime(2023, 2, 2, 0, 0, 0), + ) assert client.host == "env_host" assert client.tenant == "env_tenant" assert client.api_key == "env_api_key" From 2784ae4915e68661ecbc04da566db51b552d97fd Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Wed, 14 May 2025 13:38:17 -0400 Subject: [PATCH 073/117] build: Add mypy and pandas-stubs, enforce strict type checking --- .aider.conf.yml | 1 + poetry.lock | 168 +++++++++++++++++++++++++---------------------- pyproject.toml | 4 +- src/cvec/cvec.py | 8 +-- 4 files changed, 96 insertions(+), 85 deletions(-) diff --git a/.aider.conf.yml b/.aider.conf.yml index 9be80fb..227d414 100644 --- a/.aider.conf.yml +++ b/.aider.conf.yml @@ -1,5 +1,6 @@ lint-cmd: - "python: poetry run black" + - "python: poetry run mypy --strict" auto-lint: true test-cmd: poetry run pytest auto-test: true diff --git a/poetry.lock b/poetry.lock index 9146fba..d8158e8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -47,22 +47,6 @@ d = ["aiohttp (>=3.10)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] -[[package]] -name = "click" -version = "8.1.8" -description = "Composable command line interface toolkit" -optional = false -python-versions = ">=3.7" -groups = ["dev"] -markers = "python_version < \"3.11\"" -files = [ - {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, - {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, -] - -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} - [[package]] name = "click" version = "8.2.0" @@ -70,7 +54,6 @@ description = "Composable command line interface toolkit" optional = false python-versions = ">=3.10" groups = ["dev"] -markers = "python_version >= \"3.11\"" files = [ {file = "click-8.2.0-py3-none-any.whl", hash = "sha256:6b303f0b2aa85f1cb4e5303078fadcbcd4e476f114fab9b5007005711839325c"}, {file = "click-8.2.0.tar.gz", hash = "sha256:f5452aeddd9988eefa20f90f05ab66f17fce1ee2a36907fd30b05bbb5953814d"}, @@ -99,7 +82,7 @@ description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" groups = ["dev"] -markers = "python_version < \"3.11\"" +markers = "python_version == \"3.10\"" files = [ {file = "exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10"}, {file = "exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88"}, @@ -123,6 +106,60 @@ files = [ {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, ] +[[package]] +name = "mypy" +version = "1.15.0" +description = "Optional static typing for Python" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "mypy-1.15.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:979e4e1a006511dacf628e36fadfecbcc0160a8af6ca7dad2f5025529e082c13"}, + {file = "mypy-1.15.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c4bb0e1bd29f7d34efcccd71cf733580191e9a264a2202b0239da95984c5b559"}, + {file = "mypy-1.15.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:be68172e9fd9ad8fb876c6389f16d1c1b5f100ffa779f77b1fb2176fcc9ab95b"}, + {file = "mypy-1.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c7be1e46525adfa0d97681432ee9fcd61a3964c2446795714699a998d193f1a3"}, + {file = "mypy-1.15.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:2e2c2e6d3593f6451b18588848e66260ff62ccca522dd231cd4dd59b0160668b"}, + {file = "mypy-1.15.0-cp310-cp310-win_amd64.whl", hash = "sha256:6983aae8b2f653e098edb77f893f7b6aca69f6cffb19b2cc7443f23cce5f4828"}, + {file = "mypy-1.15.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2922d42e16d6de288022e5ca321cd0618b238cfc5570e0263e5ba0a77dbef56f"}, + {file = "mypy-1.15.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2ee2d57e01a7c35de00f4634ba1bbf015185b219e4dc5909e281016df43f5ee5"}, + {file = "mypy-1.15.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:973500e0774b85d9689715feeffcc980193086551110fd678ebe1f4342fb7c5e"}, + {file = "mypy-1.15.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a95fb17c13e29d2d5195869262f8125dfdb5c134dc8d9a9d0aecf7525b10c2c"}, + {file = "mypy-1.15.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1905f494bfd7d85a23a88c5d97840888a7bd516545fc5aaedff0267e0bb54e2f"}, + {file = "mypy-1.15.0-cp311-cp311-win_amd64.whl", hash = "sha256:c9817fa23833ff189db061e6d2eff49b2f3b6ed9856b4a0a73046e41932d744f"}, + {file = "mypy-1.15.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:aea39e0583d05124836ea645f412e88a5c7d0fd77a6d694b60d9b6b2d9f184fd"}, + {file = "mypy-1.15.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2f2147ab812b75e5b5499b01ade1f4a81489a147c01585cda36019102538615f"}, + {file = "mypy-1.15.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ce436f4c6d218a070048ed6a44c0bbb10cd2cc5e272b29e7845f6a2f57ee4464"}, + {file = "mypy-1.15.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8023ff13985661b50a5928fc7a5ca15f3d1affb41e5f0a9952cb68ef090b31ee"}, + {file = "mypy-1.15.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1124a18bc11a6a62887e3e137f37f53fbae476dc36c185d549d4f837a2a6a14e"}, + {file = "mypy-1.15.0-cp312-cp312-win_amd64.whl", hash = "sha256:171a9ca9a40cd1843abeca0e405bc1940cd9b305eaeea2dda769ba096932bb22"}, + {file = "mypy-1.15.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:93faf3fdb04768d44bf28693293f3904bbb555d076b781ad2530214ee53e3445"}, + {file = "mypy-1.15.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:811aeccadfb730024c5d3e326b2fbe9249bb7413553f15499a4050f7c30e801d"}, + {file = "mypy-1.15.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:98b7b9b9aedb65fe628c62a6dc57f6d5088ef2dfca37903a7d9ee374d03acca5"}, + {file = "mypy-1.15.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c43a7682e24b4f576d93072216bf56eeff70d9140241f9edec0c104d0c515036"}, + {file = "mypy-1.15.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:baefc32840a9f00babd83251560e0ae1573e2f9d1b067719479bfb0e987c6357"}, + {file = "mypy-1.15.0-cp313-cp313-win_amd64.whl", hash = "sha256:b9378e2c00146c44793c98b8d5a61039a048e31f429fb0eb546d93f4b000bedf"}, + {file = "mypy-1.15.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e601a7fa172c2131bff456bb3ee08a88360760d0d2f8cbd7a75a65497e2df078"}, + {file = "mypy-1.15.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:712e962a6357634fef20412699a3655c610110e01cdaa6180acec7fc9f8513ba"}, + {file = "mypy-1.15.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f95579473af29ab73a10bada2f9722856792a36ec5af5399b653aa28360290a5"}, + {file = "mypy-1.15.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8f8722560a14cde92fdb1e31597760dc35f9f5524cce17836c0d22841830fd5b"}, + {file = "mypy-1.15.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1fbb8da62dc352133d7d7ca90ed2fb0e9d42bb1a32724c287d3c76c58cbaa9c2"}, + {file = "mypy-1.15.0-cp39-cp39-win_amd64.whl", hash = "sha256:d10d994b41fb3497719bbf866f227b3489048ea4bbbb5015357db306249f7980"}, + {file = "mypy-1.15.0-py3-none-any.whl", hash = "sha256:5469affef548bd1895d86d3bf10ce2b44e33d86923c29e4d675b3e323437ea3e"}, + {file = "mypy-1.15.0.tar.gz", hash = "sha256:404534629d51d3efea5c800ee7c42b72a6554d6c400e6a79eafe15d11341fd43"}, +] + +[package.dependencies] +mypy_extensions = ">=1.0.0" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing_extensions = ">=4.6.0" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +faster-cache = ["orjson"] +install-types = ["pip"] +mypyc = ["setuptools (>=50)"] +reports = ["lxml"] + [[package]] name = "mypy-extensions" version = "1.1.0" @@ -135,70 +172,13 @@ files = [ {file = "mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"}, ] -[[package]] -name = "numpy" -version = "2.0.2" -description = "Fundamental package for array computing in Python" -optional = false -python-versions = ">=3.9" -groups = ["main"] -markers = "python_version < \"3.11\"" -files = [ - {file = "numpy-2.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:51129a29dbe56f9ca83438b706e2e69a39892b5eda6cedcb6b0c9fdc9b0d3ece"}, - {file = "numpy-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f15975dfec0cf2239224d80e32c3170b1d168335eaedee69da84fbe9f1f9cd04"}, - {file = "numpy-2.0.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:8c5713284ce4e282544c68d1c3b2c7161d38c256d2eefc93c1d683cf47683e66"}, - {file = "numpy-2.0.2-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:becfae3ddd30736fe1889a37f1f580e245ba79a5855bff5f2a29cb3ccc22dd7b"}, - {file = "numpy-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2da5960c3cf0df7eafefd806d4e612c5e19358de82cb3c343631188991566ccd"}, - {file = "numpy-2.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:496f71341824ed9f3d2fd36cf3ac57ae2e0165c143b55c3a035ee219413f3318"}, - {file = "numpy-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a61ec659f68ae254e4d237816e33171497e978140353c0c2038d46e63282d0c8"}, - {file = "numpy-2.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d731a1c6116ba289c1e9ee714b08a8ff882944d4ad631fd411106a30f083c326"}, - {file = "numpy-2.0.2-cp310-cp310-win32.whl", hash = "sha256:984d96121c9f9616cd33fbd0618b7f08e0cfc9600a7ee1d6fd9b239186d19d97"}, - {file = "numpy-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:c7b0be4ef08607dd04da4092faee0b86607f111d5ae68036f16cc787e250a131"}, - {file = "numpy-2.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:49ca4decb342d66018b01932139c0961a8f9ddc7589611158cb3c27cbcf76448"}, - {file = "numpy-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:11a76c372d1d37437857280aa142086476136a8c0f373b2e648ab2c8f18fb195"}, - {file = "numpy-2.0.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:807ec44583fd708a21d4a11d94aedf2f4f3c3719035c76a2bbe1fe8e217bdc57"}, - {file = "numpy-2.0.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:8cafab480740e22f8d833acefed5cc87ce276f4ece12fdaa2e8903db2f82897a"}, - {file = "numpy-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a15f476a45e6e5a3a79d8a14e62161d27ad897381fecfa4a09ed5322f2085669"}, - {file = "numpy-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13e689d772146140a252c3a28501da66dfecd77490b498b168b501835041f951"}, - {file = "numpy-2.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9ea91dfb7c3d1c56a0e55657c0afb38cf1eeae4544c208dc465c3c9f3a7c09f9"}, - {file = "numpy-2.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c1c9307701fec8f3f7a1e6711f9089c06e6284b3afbbcd259f7791282d660a15"}, - {file = "numpy-2.0.2-cp311-cp311-win32.whl", hash = "sha256:a392a68bd329eafac5817e5aefeb39038c48b671afd242710b451e76090e81f4"}, - {file = "numpy-2.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:286cd40ce2b7d652a6f22efdfc6d1edf879440e53e76a75955bc0c826c7e64dc"}, - {file = "numpy-2.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:df55d490dea7934f330006d0f81e8551ba6010a5bf035a249ef61a94f21c500b"}, - {file = "numpy-2.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8df823f570d9adf0978347d1f926b2a867d5608f434a7cff7f7908c6570dcf5e"}, - {file = "numpy-2.0.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:9a92ae5c14811e390f3767053ff54eaee3bf84576d99a2456391401323f4ec2c"}, - {file = "numpy-2.0.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:a842d573724391493a97a62ebbb8e731f8a5dcc5d285dfc99141ca15a3302d0c"}, - {file = "numpy-2.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05e238064fc0610c840d1cf6a13bf63d7e391717d247f1bf0318172e759e692"}, - {file = "numpy-2.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0123ffdaa88fa4ab64835dcbde75dcdf89c453c922f18dced6e27c90d1d0ec5a"}, - {file = "numpy-2.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:96a55f64139912d61de9137f11bf39a55ec8faec288c75a54f93dfd39f7eb40c"}, - {file = "numpy-2.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ec9852fb39354b5a45a80bdab5ac02dd02b15f44b3804e9f00c556bf24b4bded"}, - {file = "numpy-2.0.2-cp312-cp312-win32.whl", hash = "sha256:671bec6496f83202ed2d3c8fdc486a8fc86942f2e69ff0e986140339a63bcbe5"}, - {file = "numpy-2.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:cfd41e13fdc257aa5778496b8caa5e856dc4896d4ccf01841daee1d96465467a"}, - {file = "numpy-2.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9059e10581ce4093f735ed23f3b9d283b9d517ff46009ddd485f1747eb22653c"}, - {file = "numpy-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:423e89b23490805d2a5a96fe40ec507407b8ee786d66f7328be214f9679df6dd"}, - {file = "numpy-2.0.2-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:2b2955fa6f11907cf7a70dab0d0755159bca87755e831e47932367fc8f2f2d0b"}, - {file = "numpy-2.0.2-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:97032a27bd9d8988b9a97a8c4d2c9f2c15a81f61e2f21404d7e8ef00cb5be729"}, - {file = "numpy-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e795a8be3ddbac43274f18588329c72939870a16cae810c2b73461c40718ab1"}, - {file = "numpy-2.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26b258c385842546006213344c50655ff1555a9338e2e5e02a0756dc3e803dd"}, - {file = "numpy-2.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5fec9451a7789926bcf7c2b8d187292c9f93ea30284802a0ab3f5be8ab36865d"}, - {file = "numpy-2.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:9189427407d88ff25ecf8f12469d4d39d35bee1db5d39fc5c168c6f088a6956d"}, - {file = "numpy-2.0.2-cp39-cp39-win32.whl", hash = "sha256:905d16e0c60200656500c95b6b8dca5d109e23cb24abc701d41c02d74c6b3afa"}, - {file = "numpy-2.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:a3f4ab0caa7f053f6797fcd4e1e25caee367db3112ef2b6ef82d749530768c73"}, - {file = "numpy-2.0.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7f0a0c6f12e07fa94133c8a67404322845220c06a9e80e85999afe727f7438b8"}, - {file = "numpy-2.0.2-pp39-pypy39_pp73-macosx_14_0_x86_64.whl", hash = "sha256:312950fdd060354350ed123c0e25a71327d3711584beaef30cdaa93320c392d4"}, - {file = "numpy-2.0.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26df23238872200f63518dd2aa984cfca675d82469535dc7162dc2ee52d9dd5c"}, - {file = "numpy-2.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a46288ec55ebbd58947d31d72be2c63cbf839f0a63b49cb755022310792a3385"}, - {file = "numpy-2.0.2.tar.gz", hash = "sha256:883c987dee1880e2a864ab0dc9892292582510604156762362d9326444636e78"}, -] - [[package]] name = "numpy" version = "2.2.5" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.10" -groups = ["main"] -markers = "python_version >= \"3.11\"" +groups = ["main", "dev"] files = [ {file = "numpy-2.2.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1f4a922da1729f4c40932b2af4fe84909c7a6e167e6e99f71838ce3a29f3fe26"}, {file = "numpy-2.2.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b6f91524d31b34f4a5fee24f5bc16dcd1491b668798b6d85585d836c1e633a6a"}, @@ -356,6 +336,22 @@ sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-d test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] xml = ["lxml (>=4.9.2)"] +[[package]] +name = "pandas-stubs" +version = "2.2.3.250308" +description = "Type annotations for pandas" +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "pandas_stubs-2.2.3.250308-py3-none-any.whl", hash = "sha256:a377edff3b61f8b268c82499fdbe7c00fdeed13235b8b71d6a1dc347aeddc74d"}, + {file = "pandas_stubs-2.2.3.250308.tar.gz", hash = "sha256:3a6e9daf161f00b85c83772ed3d5cff9522028f07a94817472c07b91f46710fd"}, +] + +[package.dependencies] +numpy = ">=1.23.5" +types-pytz = ">=2022.1.1" + [[package]] name = "pathspec" version = "0.12.1" @@ -494,7 +490,7 @@ description = "A lil' TOML parser" optional = false python-versions = ">=3.8" groups = ["dev"] -markers = "python_version < \"3.11\"" +markers = "python_version == \"3.10\"" files = [ {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, @@ -530,6 +526,18 @@ files = [ {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"}, ] +[[package]] +name = "types-pytz" +version = "2025.2.0.20250326" +description = "Typing stubs for pytz" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "types_pytz-2025.2.0.20250326-py3-none-any.whl", hash = "sha256:3c397fd1b845cd2b3adc9398607764ced9e578a98a5d1fbb4a9bc9253edfb162"}, + {file = "types_pytz-2025.2.0.20250326.tar.gz", hash = "sha256:deda02de24f527066fc8d6a19e284ab3f3ae716a42b4adb6b40e75e408c08d36"}, +] + [[package]] name = "typing-extensions" version = "4.13.2" @@ -541,7 +549,7 @@ files = [ {file = "typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c"}, {file = "typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef"}, ] -markers = {main = "python_version < \"3.13\"", dev = "python_version < \"3.11\""} +markers = {main = "python_version < \"3.13\""} [[package]] name = "tzdata" @@ -557,5 +565,5 @@ files = [ [metadata] lock-version = "2.1" -python-versions = ">=3.9" -content-hash = "cf56356ec0efef18d3fe40d3434d59baf4795b37f1a2c2880a67aa5c55595c31" +python-versions = ">=3.10" +content-hash = "fbbbfabb5ce91021c56f7ef13363c3d0ab49d09d41c9a3f987a4fc851dfb002e" diff --git a/pyproject.toml b/pyproject.toml index f9f67c7..369e324 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ authors = [ {name = "Joshua Napoli",email = "jnapoli@cvector.energy"} ] readme = "README.md" -requires-python = ">=3.9" +requires-python = ">=3.10" dependencies = [ "pandas (>=2.2.3,<3.0.0)", "psycopg (>=3.1.0,<4.0.0)" # Assuming a recent version of psycopg3 @@ -19,6 +19,8 @@ packages = [{include = "cvec", from = "src"}] [tool.poetry.group.dev.dependencies] black = "^25.1.0" pytest = "^8.3.5" +mypy = "^1.15.0" +pandas-stubs = "^2.2.3.250308" [build-system] requires = ["poetry-core>=2.0.0,<3.0.0"] diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 27585ab..36e8c39 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -107,28 +107,28 @@ def get_spans( "limit": limit, } - combined_query = f""" + combined_query = """ WITH combined_tag_data AS ( SELECT tag_name_id, tag_value_changed_at, tag_value AS value_double, NULL::text AS value_string - FROM {self.tenant}.tag_data + FROM tag_data UNION ALL SELECT tag_name_id, tag_value_changed_at, NULL::double precision AS value_double, tag_value AS value_string - FROM {self.tenant}.tag_data_str + FROM tag_data_str ) SELECT ctd.tag_value_changed_at, ctd.value_double, ctd.value_string FROM combined_tag_data ctd - JOIN {self.tenant}.tag_names tn ON ctd.tag_name_id = tn.id + JOIN tag_names tn ON ctd.tag_name_id = tn.id WHERE tn.normalized_name = %(tag_name)s AND (ctd.tag_value_changed_at >= %(start_at)s OR %(start_at)s IS NULL) AND (ctd.tag_value_changed_at < %(end_at)s OR %(end_at)s IS NULL) From 078283c40796bf632fd5e77ccbb0c743abc5f646 Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Wed, 14 May 2025 14:20:30 -0400 Subject: [PATCH 074/117] feat: Use metric_data view in get_metric and add view definition --- README.md | 25 +++++++++++++++++++++++++ src/cvec/cvec.py | 38 +++++++++++--------------------------- 2 files changed, 36 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 01e4fa9..3147a86 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,31 @@ CREATE VIEW metrics AS JOIN tag_names tn ON td.tag_name_id = tn.id; ``` +## metric_data + +The metric_data view unifies the tag_data and tag_data_str tables and joins the tag name. + +```sql +CREATE OR REPLACE VIEW metric_data AS + ( + WITH td AS (SELECT + tag_name_id, + tag_value_changed_at AS time, + tag_value AS value_double, + NULL::text AS value_string + FROM tag_data + UNION ALL + SELECT + tag_name_id, + tag_value_changed_at AS time, + NULL::double precision AS value_double, + tag_value AS value_string + FROM tag_data_str) + SELECT time, value_double, value_string, tag_name_id AS metric_id, normalized_name AS metric FROM td + JOIN tag_names ON tag_name_id = tag_names.id + ); +``` + # CVec Class The SDK provides an API client class named `CVec` with the following functions. diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 36e8c39..7b319e8 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -99,7 +99,7 @@ def get_spans( with self._get_db_connection() as conn: with conn.cursor() as cur: query_params = { - "tag_name": tag_name, + "metric": tag_name, "start_at": _start_at, "end_at": _end_at, # Fetch up to 'limit' points. If limit is None, then the `LIMIT NULL` clause @@ -108,31 +108,15 @@ def get_spans( } combined_query = """ - WITH combined_tag_data AS ( - SELECT - tag_name_id, - tag_value_changed_at, - tag_value AS value_double, - NULL::text AS value_string - FROM tag_data - UNION ALL - SELECT - tag_name_id, - tag_value_changed_at, - NULL::double precision AS value_double, - tag_value AS value_string - FROM tag_data_str - ) SELECT - ctd.tag_value_changed_at, - ctd.value_double, - ctd.value_string - FROM combined_tag_data ctd - JOIN tag_names tn ON ctd.tag_name_id = tn.id - WHERE tn.normalized_name = %(tag_name)s - AND (ctd.tag_value_changed_at >= %(start_at)s OR %(start_at)s IS NULL) - AND (ctd.tag_value_changed_at < %(end_at)s OR %(end_at)s IS NULL) - ORDER BY ctd.tag_value_changed_at DESC + time, + value_double, + value_string + FROM metric_data + WHERE metric = %(metric)s + AND (time >= %(start_at)s OR %(start_at)s IS NULL) + AND (time < %(end_at)s OR %(end_at)s IS NULL) + ORDER BY time DESC LIMIT %(limit)s """ cur.execute(combined_query, query_params) @@ -142,8 +126,8 @@ def get_spans( # None indicates that the end time is not known; the span extends beyond # the query period. raw_end_at = None - for tag_value_changed_at, value_double, value_string in db_rows: - raw_start_at = tag_value_changed_at + for time, value_double, value_string in db_rows: + raw_start_at = time value = value_double if value_double is not None else value_string spans.append( Span( From 4820b52a69304e7c8d139c0fccb9aeb0e1882d83 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Wed, 14 May 2025 14:22:52 -0400 Subject: [PATCH 075/117] test: Add unit test for get_spans method in CVec class --- tests/test_cvec.py | 65 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/tests/test_cvec.py b/tests/test_cvec.py index 31b2558..8a059d2 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -1,8 +1,8 @@ import pytest import os -from unittest.mock import patch +from unittest.mock import patch, MagicMock from datetime import datetime -from cvec import CVec +from cvec import CVec, Span class TestCVecConstructor: @@ -109,3 +109,64 @@ def test_constructor_args_override_env_vars(self) -> None: assert client.api_key == "arg_api_key" assert client.default_start_at == datetime(2023, 3, 1, 0, 0, 0) assert client.default_end_at == datetime(2023, 3, 2, 0, 0, 0) + + +class TestCVecGetSpans: + @patch("cvec.cvec.psycopg.connect") + def test_get_spans_basic_case(self, mock_connect: MagicMock) -> None: + """Test get_spans with a few data points.""" + # Setup mock connection and cursor + mock_conn = MagicMock() + mock_cur = MagicMock() + mock_connect.return_value.__enter__.return_value = mock_conn + mock_conn.cursor.return_value.__enter__.return_value = mock_cur + + # Sample data (time, value_double, value_string) - newest first + time1 = datetime(2023, 1, 1, 10, 0, 0) + time2 = datetime(2023, 1, 1, 11, 0, 0) + time3 = datetime(2023, 1, 1, 12, 0, 0) + mock_db_rows = [ + (time3, 30.0, None), # Newest + (time2, None, "val2"), + (time1, 10.0, None), # Oldest + ] + mock_cur.fetchall.return_value = mock_db_rows + + client = CVec(host="test_host", tenant="test_tenant", api_key="test_api_key") + tag_name = "test_tag" + spans = client.get_spans(tag_name=tag_name) + + assert len(spans) == 3 + mock_cur.execute.assert_called_once() + + # Verify query parameters (optional, but good for sanity check) + # args, kwargs = mock_cur.execute.call_args + # assert kwargs['params']['metric'] == tag_name + # assert kwargs['params']['limit'] is None # Default limit + + # Span 1 (from newest data point: time3) + # Based on current implementation, raw_end_at for the first span is None + assert spans[0].tag_name == tag_name + assert spans[0].value == 30.0 + assert spans[0].raw_start_at == time3 + assert spans[0].raw_end_at is None + + # Span 2 (from data point: time2) + assert spans[1].tag_name == tag_name + assert spans[1].value == "val2" + assert spans[1].raw_start_at == time2 + assert spans[1].raw_end_at == time3 + + # Span 3 (from oldest data point: time1) + assert spans[2].tag_name == tag_name + assert spans[2].value == 10.0 + assert spans[2].raw_start_at == time1 + assert spans[2].raw_end_at == time2 + + # TODO: Add more tests for get_spans: + # - No data points + # - One data point + # - With limit parameter + # - With start_at/end_at parameters affecting results + # - When _end_at is provided to get_spans (to see its effect on the first span's raw_end_at, + # once the suspected bug is addressed or confirmed as intended behavior) From 0ffdf2b7212631faefd529abd4d6c3b7b2db3b4a Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Wed, 14 May 2025 14:22:54 -0400 Subject: [PATCH 076/117] style: Apply linter to test_cvec.py --- tests/test_cvec.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_cvec.py b/tests/test_cvec.py index 8a059d2..78129af 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -138,7 +138,7 @@ def test_get_spans_basic_case(self, mock_connect: MagicMock) -> None: assert len(spans) == 3 mock_cur.execute.assert_called_once() - + # Verify query parameters (optional, but good for sanity check) # args, kwargs = mock_cur.execute.call_args # assert kwargs['params']['metric'] == tag_name @@ -149,7 +149,7 @@ def test_get_spans_basic_case(self, mock_connect: MagicMock) -> None: assert spans[0].tag_name == tag_name assert spans[0].value == 30.0 assert spans[0].raw_start_at == time3 - assert spans[0].raw_end_at is None + assert spans[0].raw_end_at is None # Span 2 (from data point: time2) assert spans[1].tag_name == tag_name From fc10d7146a41172d232555c6542204d52b8caa80 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Wed, 14 May 2025 14:26:28 -0400 Subject: [PATCH 077/117] test: Add unit test for get_spans with end_at parameter --- tests/test_cvec.py | 57 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tests/test_cvec.py b/tests/test_cvec.py index 78129af..afc6bfd 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -166,7 +166,64 @@ def test_get_spans_basic_case(self, mock_connect: MagicMock) -> None: # TODO: Add more tests for get_spans: # - No data points # - One data point + # - No data points + # - One data point # - With limit parameter # - With start_at/end_at parameters affecting results # - When _end_at is provided to get_spans (to see its effect on the first span's raw_end_at, # once the suspected bug is addressed or confirmed as intended behavior) + + @patch("cvec.cvec.psycopg.connect") + def test_get_spans_with_end_at_parameter(self, mock_connect: MagicMock) -> None: + """Test get_spans when an end_at parameter is provided.""" + # Setup mock connection and cursor + mock_conn = MagicMock() + mock_cur = MagicMock() + mock_connect.return_value.__enter__.return_value = mock_conn + mock_conn.cursor.return_value.__enter__.return_value = mock_cur + + # Sample data (time, value_double, value_string) - newest first + time1 = datetime(2023, 1, 1, 10, 0, 0) + time2 = datetime(2023, 1, 1, 11, 0, 0) + time3 = datetime(2023, 1, 1, 12, 0, 0) + mock_db_rows = [ + (time3, 30.0, None), # Newest + (time2, None, "val2"), + (time1, 10.0, None), # Oldest + ] + mock_cur.fetchall.return_value = mock_db_rows + + client = CVec(host="test_host", tenant="test_tenant", api_key="test_api_key") + tag_name = "test_tag" + # Provide an end_at time that is after all sample data points + query_end_at = datetime(2023, 1, 1, 13, 0, 0) + spans = client.get_spans(tag_name=tag_name, end_at=query_end_at) + + assert len(spans) == 3 + mock_cur.execute.assert_called_once() + + # Verify query parameters + _args, kwargs = mock_cur.execute.call_args + assert kwargs["params"]["metric"] == tag_name + assert kwargs["params"]["end_at"] == query_end_at + assert kwargs["params"]["limit"] is None # Default limit + + # Span 1 (from newest data point: time3) + # Current implementation results in raw_end_at being None for the newest span, + # regardless of the _end_at query parameter. + assert spans[0].tag_name == tag_name + assert spans[0].value == 30.0 + assert spans[0].raw_start_at == time3 + assert spans[0].raw_end_at is None + + # Span 2 (from data point: time2) + assert spans[1].tag_name == tag_name + assert spans[1].value == "val2" + assert spans[1].raw_start_at == time2 + assert spans[1].raw_end_at == time3 + + # Span 3 (from oldest data point: time1) + assert spans[2].tag_name == tag_name + assert spans[2].value == 10.0 + assert spans[2].raw_start_at == time1 + assert spans[2].raw_end_at == time2 From 32d91cdd29a90629aa328fdc7391f99fb6928517 Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Wed, 14 May 2025 14:37:03 -0400 Subject: [PATCH 078/117] test: Verify psycopg query parameters in get_spans tests --- tests/test_cvec.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/tests/test_cvec.py b/tests/test_cvec.py index afc6bfd..7c73b1e 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -139,10 +139,11 @@ def test_get_spans_basic_case(self, mock_connect: MagicMock) -> None: assert len(spans) == 3 mock_cur.execute.assert_called_once() - # Verify query parameters (optional, but good for sanity check) - # args, kwargs = mock_cur.execute.call_args - # assert kwargs['params']['metric'] == tag_name - # assert kwargs['params']['limit'] is None # Default limit + # Verify psycopg query parameters + (_sql, params), _kwargs = mock_cur.execute.call_args + assert params["metric"] == tag_name + assert params["end_at"] is None # Default end_at + assert params["limit"] is None # Default limit # Span 1 (from newest data point: time3) # Based on current implementation, raw_end_at for the first span is None @@ -170,8 +171,6 @@ def test_get_spans_basic_case(self, mock_connect: MagicMock) -> None: # - One data point # - With limit parameter # - With start_at/end_at parameters affecting results - # - When _end_at is provided to get_spans (to see its effect on the first span's raw_end_at, - # once the suspected bug is addressed or confirmed as intended behavior) @patch("cvec.cvec.psycopg.connect") def test_get_spans_with_end_at_parameter(self, mock_connect: MagicMock) -> None: @@ -202,15 +201,14 @@ def test_get_spans_with_end_at_parameter(self, mock_connect: MagicMock) -> None: assert len(spans) == 3 mock_cur.execute.assert_called_once() - # Verify query parameters - _args, kwargs = mock_cur.execute.call_args - assert kwargs["params"]["metric"] == tag_name - assert kwargs["params"]["end_at"] == query_end_at - assert kwargs["params"]["limit"] is None # Default limit + # Verify psycopg query parameters + (_sql, params), _kwargs = mock_cur.execute.call_args + assert params["metric"] == tag_name + assert params["end_at"] == query_end_at + assert params["limit"] is None # Default limit # Span 1 (from newest data point: time3) - # Current implementation results in raw_end_at being None for the newest span, - # regardless of the _end_at query parameter. + # The raw_end_at is None for the newest span, regardless of the _end_at query parameter. assert spans[0].tag_name == tag_name assert spans[0].value == 30.0 assert spans[0].raw_start_at == time3 From 778918cd362a705cb54ab40b357f40256bdb8616 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Wed, 14 May 2025 14:37:32 -0400 Subject: [PATCH 079/117] feat: Add unit test for get_spans with no data points --- tests/test_cvec.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/tests/test_cvec.py b/tests/test_cvec.py index 7c73b1e..428913a 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -165,13 +165,34 @@ def test_get_spans_basic_case(self, mock_connect: MagicMock) -> None: assert spans[2].raw_end_at == time2 # TODO: Add more tests for get_spans: - # - No data points - # - One data point - # - No data points # - One data point # - With limit parameter # - With start_at/end_at parameters affecting results + @patch("cvec.cvec.psycopg.connect") + def test_get_spans_no_data_points(self, mock_connect: MagicMock) -> None: + """Test get_spans when no data points are returned from the database.""" + # Setup mock connection and cursor + mock_conn = MagicMock() + mock_cur = MagicMock() + mock_connect.return_value.__enter__.return_value = mock_conn + mock_conn.cursor.return_value.__enter__.return_value = mock_cur + + mock_cur.fetchall.return_value = [] # No data points + + client = CVec(host="test_host", tenant="test_tenant", api_key="test_api_key") + tag_name = "test_tag_no_data" + spans = client.get_spans(tag_name=tag_name) + + assert len(spans) == 0 + mock_cur.execute.assert_called_once() + + # Verify psycopg query parameters + (_sql, params) = mock_cur.execute.call_args.args + assert params["metric"] == tag_name + assert params["end_at"] is None # Default end_at + assert params["limit"] is None # Default limit + @patch("cvec.cvec.psycopg.connect") def test_get_spans_with_end_at_parameter(self, mock_connect: MagicMock) -> None: """Test get_spans when an end_at parameter is provided.""" From a5d2d1d4a292e9444d73e8d25dcd209fb36899a1 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Wed, 14 May 2025 14:38:38 -0400 Subject: [PATCH 080/117] test: Add unit test for get_spans with limit parameter --- tests/test_cvec.py | 49 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/tests/test_cvec.py b/tests/test_cvec.py index 428913a..810a64d 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -166,7 +166,6 @@ def test_get_spans_basic_case(self, mock_connect: MagicMock) -> None: # TODO: Add more tests for get_spans: # - One data point - # - With limit parameter # - With start_at/end_at parameters affecting results @patch("cvec.cvec.psycopg.connect") @@ -193,6 +192,54 @@ def test_get_spans_no_data_points(self, mock_connect: MagicMock) -> None: assert params["end_at"] is None # Default end_at assert params["limit"] is None # Default limit + @patch("cvec.cvec.psycopg.connect") + def test_get_spans_with_limit_parameter(self, mock_connect: MagicMock) -> None: + """Test get_spans when a limit parameter is provided.""" + # Setup mock connection and cursor + mock_conn = MagicMock() + mock_cur = MagicMock() + mock_connect.return_value.__enter__.return_value = mock_conn + mock_conn.cursor.return_value.__enter__.return_value = mock_cur + + # Sample data (time, value_double, value_string) - newest first + time1 = datetime(2023, 1, 1, 10, 0, 0) + time2 = datetime(2023, 1, 1, 11, 0, 0) + time3 = datetime(2023, 1, 1, 12, 0, 0) + # Provide more rows than the limit to test truncation + mock_db_rows = [ + (time3, 30.0, None), # Newest + (time2, None, "val2"), + (time1, 10.0, None), # Oldest + ] + mock_cur.fetchall.return_value = mock_db_rows + + client = CVec(host="test_host", tenant="test_tenant", api_key="test_api_key") + tag_name = "test_tag_limited" + query_limit = 2 + spans = client.get_spans(tag_name=tag_name, limit=query_limit) + + assert len(spans) == query_limit + mock_cur.execute.assert_called_once() + + # Verify psycopg query parameters + (_sql, params), _kwargs = mock_cur.execute.call_args + assert params["metric"] == tag_name + assert params["limit"] == query_limit + + # Span 1 (from newest data point: time3) + # The raw_end_at is None for the newest span. + assert spans[0].tag_name == tag_name + assert spans[0].value == 30.0 + assert spans[0].raw_start_at == time3 + assert spans[0].raw_end_at is None + + # Span 2 (from data point: time2) + assert spans[1].tag_name == tag_name + assert spans[1].value == "val2" + assert spans[1].raw_start_at == time2 + assert spans[1].raw_end_at == time3 + # The third data point (time1) should not be processed into a span + @patch("cvec.cvec.psycopg.connect") def test_get_spans_with_end_at_parameter(self, mock_connect: MagicMock) -> None: """Test get_spans when an end_at parameter is provided.""" From 3b8ac18dcc1e868e1277d244e11ee6d677901cd3 Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Wed, 14 May 2025 14:40:23 -0400 Subject: [PATCH 081/117] test: Remove redundant assertions in TestCVecGetSpans --- tests/test_cvec.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/tests/test_cvec.py b/tests/test_cvec.py index 810a64d..d1fcf3f 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -146,7 +146,7 @@ def test_get_spans_basic_case(self, mock_connect: MagicMock) -> None: assert params["limit"] is None # Default limit # Span 1 (from newest data point: time3) - # Based on current implementation, raw_end_at for the first span is None + # The raw_end_at is None for the newest span, because the span is still open. assert spans[0].tag_name == tag_name assert spans[0].value == 30.0 assert spans[0].raw_start_at == time3 @@ -218,7 +218,6 @@ def test_get_spans_with_limit_parameter(self, mock_connect: MagicMock) -> None: query_limit = 2 spans = client.get_spans(tag_name=tag_name, limit=query_limit) - assert len(spans) == query_limit mock_cur.execute.assert_called_once() # Verify psycopg query parameters @@ -226,20 +225,6 @@ def test_get_spans_with_limit_parameter(self, mock_connect: MagicMock) -> None: assert params["metric"] == tag_name assert params["limit"] == query_limit - # Span 1 (from newest data point: time3) - # The raw_end_at is None for the newest span. - assert spans[0].tag_name == tag_name - assert spans[0].value == 30.0 - assert spans[0].raw_start_at == time3 - assert spans[0].raw_end_at is None - - # Span 2 (from data point: time2) - assert spans[1].tag_name == tag_name - assert spans[1].value == "val2" - assert spans[1].raw_start_at == time2 - assert spans[1].raw_end_at == time3 - # The third data point (time1) should not be processed into a span - @patch("cvec.cvec.psycopg.connect") def test_get_spans_with_end_at_parameter(self, mock_connect: MagicMock) -> None: """Test get_spans when an end_at parameter is provided.""" From c86a9255fc361f5fc3814d1f0dc947fae83635f1 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Wed, 14 May 2025 14:42:38 -0400 Subject: [PATCH 082/117] feat: Implement get_metric_data to fetch metric data as DataFrame --- src/cvec/cvec.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 7b319e8..f9454d8 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -155,8 +155,58 @@ def get_metric_data( The return value is a Pandas DataFrame with three columns: tag_name, time, value. One row is returned for each tag value transition. """ - # Implementation to be added - return pd.DataFrame(columns=["tag_name", "time", "value"]) + _start_at = start_at or self.default_start_at + _end_at = end_at or self.default_end_at + + # Base query selecting from the metric_data view + # The view provides: time, value_double, value_string, metric (as tag name) + sql_query_base = """ + SELECT metric AS tag_name, time, value_double, value_string + FROM metric_data + """ + + conditions = [] + params = {} + + # Add time-based conditions + # The (condition OR param IS NULL) pattern handles cases where _start_at or _end_at might be None + conditions.append("(time >= %(start_at)s OR %(start_at)s IS NULL)") + params["start_at"] = _start_at + + conditions.append("(time < %(end_at)s OR %(end_at)s IS NULL)") + params["end_at"] = _end_at + + # Add tag_names filter if tag_names is provided (not None) + # If tag_names is an empty list, metric = ANY('{}') will correctly yield no results for this part. + if tag_names is not None: + conditions.append("metric = ANY(%(tag_names)s)") + params["tag_names"] = tuple(tag_names) + + # Construct the full query + if conditions: + sql_query_full = sql_query_base + " WHERE " + " AND ".join(conditions) + else: + sql_query_full = sql_query_base + + sql_query_full += " ORDER BY tag_name, time ASC" + + with self._get_db_connection() as conn: + with conn.cursor() as cur: + cur.execute(sql_query_full, params) + rows = cur.fetchall() + + if not rows: + return pd.DataFrame(columns=["tag_name", "time", "value"]) + + # Create DataFrame from fetched rows + df = pd.DataFrame(rows, columns=["tag_name", "time", "value_double", "value_string"]) + + # Combine value_double and value_string into a single 'value' column + # NaNs in value_double (where it was NULL in DB) will be filled by values from value_string + df["value"] = df["value_double"].combine_first(df["value_string"]) + + # Return the DataFrame with the required columns + return df[["tag_name", "time", "value"]] def get_tags( self, start_at: Optional[datetime] = None, end_at: Optional[datetime] = None From bd41ade5ebddeefdf51fd6a2d4c6834ddc426d84 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Wed, 14 May 2025 14:42:39 -0400 Subject: [PATCH 083/117] style: Apply linting to cvec.py --- src/cvec/cvec.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index f9454d8..cd1dd18 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -199,7 +199,9 @@ def get_metric_data( return pd.DataFrame(columns=["tag_name", "time", "value"]) # Create DataFrame from fetched rows - df = pd.DataFrame(rows, columns=["tag_name", "time", "value_double", "value_string"]) + df = pd.DataFrame( + rows, columns=["tag_name", "time", "value_double", "value_string"] + ) # Combine value_double and value_string into a single 'value' column # NaNs in value_double (where it was NULL in DB) will be filled by values from value_string From 0700a65558b9e43ae621bbd1f6ca88f229f7ff5e Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Wed, 14 May 2025 15:00:49 -0400 Subject: [PATCH 084/117] refactor: Statically include conditions in get_metric_data query --- src/cvec/cvec.py | 45 ++++++++++++++++----------------------------- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index cd1dd18..52189d6 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -158,41 +158,28 @@ def get_metric_data( _start_at = start_at or self.default_start_at _end_at = end_at or self.default_end_at - # Base query selecting from the metric_data view - # The view provides: time, value_double, value_string, metric (as tag name) - sql_query_base = """ + params = { + "start_at": _start_at, + "end_at": _end_at, + "tag_names_is_null": tag_names is None, + # Pass an empty tuple if tag_names is None or empty, otherwise the tuple of tag_names. + # ANY(%(empty_tuple)s) will correctly result in no matches if tag_names is empty. + # If tag_names is None, the tag_names_is_null condition handles it. + "tag_names_list": tuple(tag_names) if tag_names else (), + } + + sql_query = """ SELECT metric AS tag_name, time, value_double, value_string FROM metric_data + WHERE (time >= %(start_at)s OR %(start_at)s IS NULL) + AND (time < %(end_at)s OR %(end_at)s IS NULL) + AND (%(tag_names_is_null)s IS TRUE OR metric = ANY(%(tag_names_list)s)) + ORDER BY tag_name, time ASC """ - conditions = [] - params = {} - - # Add time-based conditions - # The (condition OR param IS NULL) pattern handles cases where _start_at or _end_at might be None - conditions.append("(time >= %(start_at)s OR %(start_at)s IS NULL)") - params["start_at"] = _start_at - - conditions.append("(time < %(end_at)s OR %(end_at)s IS NULL)") - params["end_at"] = _end_at - - # Add tag_names filter if tag_names is provided (not None) - # If tag_names is an empty list, metric = ANY('{}') will correctly yield no results for this part. - if tag_names is not None: - conditions.append("metric = ANY(%(tag_names)s)") - params["tag_names"] = tuple(tag_names) - - # Construct the full query - if conditions: - sql_query_full = sql_query_base + " WHERE " + " AND ".join(conditions) - else: - sql_query_full = sql_query_base - - sql_query_full += " ORDER BY tag_name, time ASC" - with self._get_db_connection() as conn: with conn.cursor() as cur: - cur.execute(sql_query_full, params) + cur.execute(sql_query, params) rows = cur.fetchall() if not rows: From f9b4ea697642df21ef06567d1c2df2260b711349 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Thu, 15 May 2025 08:52:49 -0400 Subject: [PATCH 085/117] test: Add unit tests for get_metric_data method --- tests/test_cvec.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_cvec.py b/tests/test_cvec.py index d1fcf3f..6a58e6a 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -2,6 +2,8 @@ import os from unittest.mock import patch, MagicMock from datetime import datetime +import pandas as pd +from pandas.testing import assert_frame_equal from cvec import CVec, Span From 569cc01163affdedb0bdae7c48a17da88278f9b9 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Thu, 15 May 2025 08:53:10 -0400 Subject: [PATCH 086/117] feat: Add tests for get_metric_data endpoint --- tests/test_cvec.py | 70 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/tests/test_cvec.py b/tests/test_cvec.py index 6a58e6a..cbb721c 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -166,6 +166,76 @@ def test_get_spans_basic_case(self, mock_connect: MagicMock) -> None: assert spans[2].raw_start_at == time1 assert spans[2].raw_end_at == time2 + +class TestCVecGetMetricData: + @patch("cvec.cvec.psycopg.connect") + def test_get_metric_data_basic_case(self, mock_connect: MagicMock) -> None: + """Test get_metric_data with a few data points for multiple tags.""" + # Setup mock connection and cursor + mock_conn = MagicMock() + mock_cur = MagicMock() + mock_connect.return_value.__enter__.return_value = mock_conn + mock_conn.cursor.return_value.__enter__.return_value = mock_cur + + # Sample data (metric, time, value_double, value_string) + time1 = datetime(2023, 1, 1, 10, 0, 0) + time2 = datetime(2023, 1, 1, 11, 0, 0) + time3 = datetime(2023, 1, 1, 12, 0, 0) + mock_db_rows = [ + ("tag1", time1, 10.0, None), + ("tag1", time2, 20.0, None), + ("tag2", time3, None, "val_str"), + ] + mock_cur.fetchall.return_value = mock_db_rows + + client = CVec(host="test_host", tenant="test_tenant", api_key="test_api_key") + tag_names_to_query = ["tag1", "tag2"] + df = client.get_metric_data(tag_names=tag_names_to_query) + + mock_cur.execute.assert_called_once() + (_sql, params), _kwargs = mock_cur.execute.call_args + assert params["tag_names_is_null"] is False + assert params["tag_names_list"] == tuple(tag_names_to_query) + assert params["start_at"] is None # Default start_at + assert params["end_at"] is None # Default end_at + + + expected_data = { + "tag_name": ["tag1", "tag1", "tag2"], + "time": [time1, time2, time3], + "value": [10.0, 20.0, "val_str"], + } + expected_df = pd.DataFrame(expected_data) + # Convert 'value' column to object to handle mixed types for comparison + expected_df["value"] = expected_df["value"].astype(object) + df["value"] = df["value"].astype(object) + + + assert_frame_equal(df, expected_df, check_dtype=False) + + @patch("cvec.cvec.psycopg.connect") + def test_get_metric_data_no_data_points(self, mock_connect: MagicMock) -> None: + """Test get_metric_data when no data points are returned.""" + mock_conn = MagicMock() + mock_cur = MagicMock() + mock_connect.return_value.__enter__.return_value = mock_conn + mock_conn.cursor.return_value.__enter__.return_value = mock_cur + + mock_cur.fetchall.return_value = [] + + client = CVec(host="test_host", tenant="test_tenant", api_key="test_api_key") + df = client.get_metric_data(tag_names=["non_existent_tag"]) + + mock_cur.execute.assert_called_once() + expected_df = pd.DataFrame(columns=["tag_name", "time", "value"]) + assert_frame_equal(df, expected_df, check_dtype=False) + + # TODO: Add more tests for get_metric_data: + # - With start_at/end_at parameters + # - With tag_names=None (all tags) + # - With tag_names=[] (empty list, should return no data based on current query logic) + # - Mixed numeric and string values for the same tag (if applicable/possible) + # TODO: Add more tests for get_spans: # - One data point # - With start_at/end_at parameters affecting results From 07a4be1e1055fec05a85bd08560f1a69ddc85abd Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Thu, 15 May 2025 08:53:11 -0400 Subject: [PATCH 087/117] style: Apply linter to tests/test_cvec.py --- tests/test_cvec.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/test_cvec.py b/tests/test_cvec.py index cbb721c..e64e2a8 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -196,9 +196,8 @@ def test_get_metric_data_basic_case(self, mock_connect: MagicMock) -> None: (_sql, params), _kwargs = mock_cur.execute.call_args assert params["tag_names_is_null"] is False assert params["tag_names_list"] == tuple(tag_names_to_query) - assert params["start_at"] is None # Default start_at - assert params["end_at"] is None # Default end_at - + assert params["start_at"] is None # Default start_at + assert params["end_at"] is None # Default end_at expected_data = { "tag_name": ["tag1", "tag1", "tag2"], @@ -210,7 +209,6 @@ def test_get_metric_data_basic_case(self, mock_connect: MagicMock) -> None: expected_df["value"] = expected_df["value"].astype(object) df["value"] = df["value"].astype(object) - assert_frame_equal(df, expected_df, check_dtype=False) @patch("cvec.cvec.psycopg.connect") From 781ab6fe120264955037ba1dfd01b7fc87db1b89 Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Thu, 15 May 2025 08:55:17 -0400 Subject: [PATCH 088/117] fix: Use list instead of tuple for tag_names_list in SQL query params --- src/cvec/cvec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 52189d6..711db93 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -165,7 +165,7 @@ def get_metric_data( # Pass an empty tuple if tag_names is None or empty, otherwise the tuple of tag_names. # ANY(%(empty_tuple)s) will correctly result in no matches if tag_names is empty. # If tag_names is None, the tag_names_is_null condition handles it. - "tag_names_list": tuple(tag_names) if tag_names else (), + "tag_names_list": tuple(tag_names) if tag_names else [], } sql_query = """ From 29e0c9e73e60249a498103bc39840e56ade690f6 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Thu, 15 May 2025 08:55:17 -0400 Subject: [PATCH 089/117] feat: implement get_tags method to fetch tags from the database --- src/cvec/cvec.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 711db93..b6e9d5d 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -205,5 +205,49 @@ def get_tags( All tags are returned if no start_at and end_at are given. Each tag has {id, name, birth_at, death_at}. """ - # Implementation to be added - return [] + sql_query: str + params: Optional[dict[str, Any]] + + if start_at is None and end_at is None: + # Case 1: No time interval specified by arguments, return all tags + sql_query = f""" + SELECT id, normalized_name AS name, birth_at, death_at + FROM {self.tenant}.tag_names + ORDER BY name ASC; + """ + params = None + else: + # Case 2: Time interval specified, find tags with transitions in the interval + _start_at = start_at or self.default_start_at + _end_at = end_at or self.default_end_at + + params = {"start_at_param": _start_at, "end_at_param": _end_at} + sql_query = f""" + SELECT DISTINCT tn.id, tn.normalized_name AS name, tn.birth_at, tn.death_at + FROM {self.tenant}.tag_names tn + JOIN ( + SELECT tag_name_id, tag_value_changed_at AS time FROM {self.tenant}.tag_data + UNION ALL + SELECT tag_name_id, tag_value_changed_at AS time FROM {self.tenant}.tag_data_str + ) AS transitions ON tn.id = transitions.tag_name_id + WHERE (transitions.time >= %(start_at_param)s OR %(start_at_param)s IS NULL) + AND (transitions.time < %(end_at_param)s OR %(end_at_param)s IS NULL) + ORDER BY name ASC; + """ + + with self._get_db_connection() as conn: + with conn.cursor() as cur: + cur.execute(sql_query, params) + rows = cur.fetchall() + + # Format rows into list of dictionaries + tags_list = [ + { + "id": row[0], + "name": row[1], + "birth_at": row[2], + "death_at": row[3], + } + for row in rows + ] + return tags_list From 83ccc21f3371e568d11307b6ebd1d06c666a3ff8 Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Thu, 15 May 2025 08:58:02 -0400 Subject: [PATCH 090/117] refactor: Remove tenant from SQL queries in get_tags --- src/cvec/cvec.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index b6e9d5d..81f2833 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -209,26 +209,26 @@ def get_tags( params: Optional[dict[str, Any]] if start_at is None and end_at is None: - # Case 1: No time interval specified by arguments, return all tags + # No time interval specified by arguments, return all tags sql_query = f""" SELECT id, normalized_name AS name, birth_at, death_at - FROM {self.tenant}.tag_names + FROM tag_names ORDER BY name ASC; """ params = None else: - # Case 2: Time interval specified, find tags with transitions in the interval + # Time interval specified, find tags with transitions in the interval _start_at = start_at or self.default_start_at _end_at = end_at or self.default_end_at params = {"start_at_param": _start_at, "end_at_param": _end_at} sql_query = f""" SELECT DISTINCT tn.id, tn.normalized_name AS name, tn.birth_at, tn.death_at - FROM {self.tenant}.tag_names tn + FROM tag_names tn JOIN ( - SELECT tag_name_id, tag_value_changed_at AS time FROM {self.tenant}.tag_data + SELECT tag_name_id, tag_value_changed_at AS time FROM tag_data UNION ALL - SELECT tag_name_id, tag_value_changed_at AS time FROM {self.tenant}.tag_data_str + SELECT tag_name_id, tag_value_changed_at AS time FROM tag_data_str ) AS transitions ON tn.id = transitions.tag_name_id WHERE (transitions.time >= %(start_at_param)s OR %(start_at_param)s IS NULL) AND (transitions.time < %(end_at_param)s OR %(end_at_param)s IS NULL) From e6fe12cf4364a8b473ce208b7cb90d847f6ace96 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Thu, 15 May 2025 08:58:03 -0400 Subject: [PATCH 091/117] refactor: Rename get_tags to get_metrics and update documentation --- README.md | 4 ++-- src/cvec/cvec.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 3147a86..c8014fb 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,6 @@ If no relevant value changes are found, an empty list is returned. Return all data-points within a given [`start_at`, `end_at`) interval, optionally selecting a given list of tags. The return value is a Pandas DataFrame with three columns: tag_name, time, value. One row is returned for each tag value transition. -## `get_tags(?start_at, ?end_at)` +## `get_metrics(?start_at, ?end_at)` -Return a list of tags that had at least one transition in the given [`start_at`, `end_at`) interval. All tags are returned if no `start_at` and `end_at` are given. Each tag has {id, name, birth_at, death_at}. +Return a list of metrics that had at least one transition in the given [`start_at`, `end_at`) interval. All metrics are returned if no `start_at` and `end_at` are given. Each metric has {id, name, birth_at, death_at}. diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 81f2833..03d4269 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -197,13 +197,13 @@ def get_metric_data( # Return the DataFrame with the required columns return df[["tag_name", "time", "value"]] - def get_tags( + def get_metrics( self, start_at: Optional[datetime] = None, end_at: Optional[datetime] = None - ) -> List[Any]: # TODO: Define a Tag TypedDict or class + ) -> List[Any]: # TODO: Define a Metric TypedDict or class """ - Return a list of tags that had at least one transition in the given [start_at, end_at) interval. - All tags are returned if no start_at and end_at are given. - Each tag has {id, name, birth_at, death_at}. + Return a list of metrics that had at least one transition in the given [start_at, end_at) interval. + All metrics are returned if no start_at and end_at are given. + Each metric has {id, name, birth_at, death_at}. """ sql_query: str params: Optional[dict[str, Any]] From 0f27ba0a56c3e6b666c42e04b56b1c1a04dbbf1d Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Thu, 15 May 2025 09:00:08 -0400 Subject: [PATCH 092/117] feat: Introduce Metric class and update get_metrics to return Metric list --- src/cvec/__init__.py | 3 ++- src/cvec/cvec.py | 21 +++++++++++---------- src/cvec/metric.py | 31 +++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 11 deletions(-) create mode 100644 src/cvec/metric.py diff --git a/src/cvec/__init__.py b/src/cvec/__init__.py index a8d6f75..30c8e17 100644 --- a/src/cvec/__init__.py +++ b/src/cvec/__init__.py @@ -1,4 +1,5 @@ from .cvec import CVec from .span import Span +from .metric import Metric -__all__ = ["CVec", "Span"] +__all__ = ["CVec", "Span", "Metric"] diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 03d4269..e1b8198 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -6,6 +6,7 @@ import psycopg from .span import Span +from .metric import Metric class CVec: @@ -199,7 +200,7 @@ def get_metric_data( def get_metrics( self, start_at: Optional[datetime] = None, end_at: Optional[datetime] = None - ) -> List[Any]: # TODO: Define a Metric TypedDict or class + ) -> List[Metric]: """ Return a list of metrics that had at least one transition in the given [start_at, end_at) interval. All metrics are returned if no start_at and end_at are given. @@ -240,14 +241,14 @@ def get_metrics( cur.execute(sql_query, params) rows = cur.fetchall() - # Format rows into list of dictionaries - tags_list = [ - { - "id": row[0], - "name": row[1], - "birth_at": row[2], - "death_at": row[3], - } + # Format rows into list of Metric objects + metrics_list = [ + Metric( + id=row[0], + name=row[1], + birth_at=row[2], + death_at=row[3], + ) for row in rows ] - return tags_list + return metrics_list diff --git a/src/cvec/metric.py b/src/cvec/metric.py new file mode 100644 index 0000000..45d2da6 --- /dev/null +++ b/src/cvec/metric.py @@ -0,0 +1,31 @@ +from datetime import datetime +from typing import Any, Optional + + +class Metric: + """ + Represents metadata for a metric. + """ + + id: int + name: str + birth_at: Optional[datetime] + death_at: Optional[datetime] + + def __init__( + self, + id: int, + name: str, + birth_at: Optional[datetime], + death_at: Optional[datetime], + ): + self.id = id + self.name = name + self.birth_at = birth_at + self.death_at = death_at + + def __repr__(self) -> str: + return ( + f"Metric(id={self.id!r}, name={self.name!r}, " + f"birth_at={self.birth_at!r}, death_at={self.death_at!r})" + ) From e07ee14e380a34c8de3ad9694caea1bb8ee5d95f Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Thu, 15 May 2025 09:03:49 -0400 Subject: [PATCH 093/117] test: Add unit tests for get_metrics method in CVec class --- tests/test_cvec.py | 97 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 96 insertions(+), 1 deletion(-) diff --git a/tests/test_cvec.py b/tests/test_cvec.py index e64e2a8..94f1460 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -4,7 +4,7 @@ from datetime import datetime import pandas as pd from pandas.testing import assert_frame_equal -from cvec import CVec, Span +from cvec import CVec, Span, Metric class TestCVecConstructor: @@ -167,6 +167,101 @@ def test_get_spans_basic_case(self, mock_connect: MagicMock) -> None: assert spans[2].raw_end_at == time2 +class TestCVecGetMetrics: + @patch("cvec.cvec.psycopg.connect") + def test_get_metrics_no_interval(self, mock_connect: MagicMock) -> None: + """Test get_metrics when no start_at or end_at is provided (fetches all metrics).""" + mock_conn = MagicMock() + mock_cur = MagicMock() + mock_connect.return_value.__enter__.return_value = mock_conn + mock_conn.cursor.return_value.__enter__.return_value = mock_cur + + time_birth1 = datetime(2023, 1, 1, 0, 0, 0) + time_death1 = datetime(2023, 1, 10, 0, 0, 0) + time_birth2 = datetime(2023, 2, 1, 0, 0, 0) + mock_db_rows = [ + (1, "metric1", time_birth1, time_death1), + (2, "metric2", time_birth2, None), + ] + mock_cur.fetchall.return_value = mock_db_rows + + client = CVec(host="test_host", tenant="test_tenant", api_key="test_api_key") + metrics = client.get_metrics() + + mock_cur.execute.assert_called_once() + sql_query, params = mock_cur.execute.call_args.args + assert "SELECT id, normalized_name AS name, birth_at, death_at" in sql_query + assert "FROM tag_names" in sql_query + assert "ORDER BY name ASC" in sql_query + assert params is None # No params when fetching all + + assert len(metrics) == 2 + assert isinstance(metrics[0], Metric) + assert metrics[0].id == 1 + assert metrics[0].name == "metric1" + assert metrics[0].birth_at == time_birth1 + assert metrics[0].death_at == time_death1 + + assert isinstance(metrics[1], Metric) + assert metrics[1].id == 2 + assert metrics[1].name == "metric2" + assert metrics[1].birth_at == time_birth2 + assert metrics[1].death_at is None + + @patch("cvec.cvec.psycopg.connect") + def test_get_metrics_with_interval(self, mock_connect: MagicMock) -> None: + """Test get_metrics when a start_at and end_at interval is provided.""" + mock_conn = MagicMock() + mock_cur = MagicMock() + mock_connect.return_value.__enter__.return_value = mock_conn + mock_conn.cursor.return_value.__enter__.return_value = mock_cur + + time_birth1 = datetime(2023, 1, 1, 0, 0, 0) + mock_db_rows = [ + (1, "metric_in_interval", time_birth1, None), + ] + mock_cur.fetchall.return_value = mock_db_rows + + client = CVec(host="test_host", tenant="test_tenant", api_key="test_api_key") + start_query = datetime(2023, 1, 5, 0, 0, 0) + end_query = datetime(2023, 1, 15, 0, 0, 0) + metrics = client.get_metrics(start_at=start_query, end_at=end_query) + + mock_cur.execute.assert_called_once() + sql_query, params = mock_cur.execute.call_args.args + assert "SELECT DISTINCT tn.id, tn.normalized_name AS name, tn.birth_at, tn.death_at" in sql_query + assert "FROM tag_names tn" in sql_query + assert "JOIN (" in sql_query # Check for join with transitions + assert "WHERE (transitions.time >= %(start_at_param)s OR %(start_at_param)s IS NULL)" in sql_query + assert "AND (transitions.time < %(end_at_param)s OR %(end_at_param)s IS NULL)" in sql_query + assert params is not None + assert params["start_at_param"] == start_query + assert params["end_at_param"] == end_query + + assert len(metrics) == 1 + assert isinstance(metrics[0], Metric) + assert metrics[0].id == 1 + assert metrics[0].name == "metric_in_interval" + assert metrics[0].birth_at == time_birth1 + assert metrics[0].death_at is None + + @patch("cvec.cvec.psycopg.connect") + def test_get_metrics_no_data_found(self, mock_connect: MagicMock) -> None: + """Test get_metrics when no metrics are found for the given criteria.""" + mock_conn = MagicMock() + mock_cur = MagicMock() + mock_connect.return_value.__enter__.return_value = mock_conn + mock_conn.cursor.return_value.__enter__.return_value = mock_cur + + mock_cur.fetchall.return_value = [] # No rows returned + + client = CVec(host="test_host", tenant="test_tenant", api_key="test_api_key") + metrics = client.get_metrics(start_at=datetime(2024,1,1), end_at=datetime(2024,1,2)) + + mock_cur.execute.assert_called_once() + assert len(metrics) == 0 + + class TestCVecGetMetricData: @patch("cvec.cvec.psycopg.connect") def test_get_metric_data_basic_case(self, mock_connect: MagicMock) -> None: From 4c52bbd36319655b3297aa409fb1c72f6f42f9bf Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Thu, 15 May 2025 09:03:50 -0400 Subject: [PATCH 094/117] style: Apply linter to tests/test_cvec.py --- tests/test_cvec.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/tests/test_cvec.py b/tests/test_cvec.py index 94f1460..cd20268 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -229,11 +229,20 @@ def test_get_metrics_with_interval(self, mock_connect: MagicMock) -> None: mock_cur.execute.assert_called_once() sql_query, params = mock_cur.execute.call_args.args - assert "SELECT DISTINCT tn.id, tn.normalized_name AS name, tn.birth_at, tn.death_at" in sql_query + assert ( + "SELECT DISTINCT tn.id, tn.normalized_name AS name, tn.birth_at, tn.death_at" + in sql_query + ) assert "FROM tag_names tn" in sql_query - assert "JOIN (" in sql_query # Check for join with transitions - assert "WHERE (transitions.time >= %(start_at_param)s OR %(start_at_param)s IS NULL)" in sql_query - assert "AND (transitions.time < %(end_at_param)s OR %(end_at_param)s IS NULL)" in sql_query + assert "JOIN (" in sql_query # Check for join with transitions + assert ( + "WHERE (transitions.time >= %(start_at_param)s OR %(start_at_param)s IS NULL)" + in sql_query + ) + assert ( + "AND (transitions.time < %(end_at_param)s OR %(end_at_param)s IS NULL)" + in sql_query + ) assert params is not None assert params["start_at_param"] == start_query assert params["end_at_param"] == end_query @@ -253,10 +262,12 @@ def test_get_metrics_no_data_found(self, mock_connect: MagicMock) -> None: mock_connect.return_value.__enter__.return_value = mock_conn mock_conn.cursor.return_value.__enter__.return_value = mock_cur - mock_cur.fetchall.return_value = [] # No rows returned + mock_cur.fetchall.return_value = [] # No rows returned client = CVec(host="test_host", tenant="test_tenant", api_key="test_api_key") - metrics = client.get_metrics(start_at=datetime(2024,1,1), end_at=datetime(2024,1,2)) + metrics = client.get_metrics( + start_at=datetime(2024, 1, 1), end_at=datetime(2024, 1, 2) + ) mock_cur.execute.assert_called_once() assert len(metrics) == 0 From 0b3e6660eeaa489ba1f098b1cc692ab625557ec4 Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Thu, 15 May 2025 09:11:40 -0400 Subject: [PATCH 095/117] feat: Add birth_at and death_at to metric_data view --- README.md | 2 +- tests/test_cvec.py | 10 ---------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/README.md b/README.md index c8014fb..6cd8d1a 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,7 @@ CREATE OR REPLACE VIEW metric_data AS NULL::double precision AS value_double, tag_value AS value_string FROM tag_data_str) - SELECT time, value_double, value_string, tag_name_id AS metric_id, normalized_name AS metric FROM td + SELECT time, value_double, value_string, tag_name_id AS metric_id, normalized_name AS metric, birth_at, death_at FROM td JOIN tag_names ON tag_name_id = tag_names.id ); ``` diff --git a/tests/test_cvec.py b/tests/test_cvec.py index cd20268..3333dfd 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -334,16 +334,6 @@ def test_get_metric_data_no_data_points(self, mock_connect: MagicMock) -> None: expected_df = pd.DataFrame(columns=["tag_name", "time", "value"]) assert_frame_equal(df, expected_df, check_dtype=False) - # TODO: Add more tests for get_metric_data: - # - With start_at/end_at parameters - # - With tag_names=None (all tags) - # - With tag_names=[] (empty list, should return no data based on current query logic) - # - Mixed numeric and string values for the same tag (if applicable/possible) - - # TODO: Add more tests for get_spans: - # - One data point - # - With start_at/end_at parameters affecting results - @patch("cvec.cvec.psycopg.connect") def test_get_spans_no_data_points(self, mock_connect: MagicMock) -> None: """Test get_spans when no data points are returned from the database.""" From 23a460430ff851777c8a3afb3329c8bd04152105 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Thu, 15 May 2025 09:12:28 -0400 Subject: [PATCH 096/117] refactor: Use metric_data view in get_metrics for time intervals --- src/cvec/cvec.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index e1b8198..39ed5ff 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -224,15 +224,10 @@ def get_metrics( params = {"start_at_param": _start_at, "end_at_param": _end_at} sql_query = f""" - SELECT DISTINCT tn.id, tn.normalized_name AS name, tn.birth_at, tn.death_at - FROM tag_names tn - JOIN ( - SELECT tag_name_id, tag_value_changed_at AS time FROM tag_data - UNION ALL - SELECT tag_name_id, tag_value_changed_at AS time FROM tag_data_str - ) AS transitions ON tn.id = transitions.tag_name_id - WHERE (transitions.time >= %(start_at_param)s OR %(start_at_param)s IS NULL) - AND (transitions.time < %(end_at_param)s OR %(end_at_param)s IS NULL) + SELECT DISTINCT metric_id AS id, metric AS name, birth_at, death_at + FROM {self.tenant}.metric_data + WHERE (time >= %(start_at_param)s OR %(start_at_param)s IS NULL) + AND (time < %(end_at_param)s OR %(end_at_param)s IS NULL) ORDER BY name ASC; """ From 85fc9f7f35f8a1959514f4cf66c23ac884f14652 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Thu, 15 May 2025 09:13:03 -0400 Subject: [PATCH 097/117] fix: Update assertions in test_get_metrics_with_interval --- tests/test_cvec.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/test_cvec.py b/tests/test_cvec.py index 3333dfd..97583dc 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -230,17 +230,16 @@ def test_get_metrics_with_interval(self, mock_connect: MagicMock) -> None: mock_cur.execute.assert_called_once() sql_query, params = mock_cur.execute.call_args.args assert ( - "SELECT DISTINCT tn.id, tn.normalized_name AS name, tn.birth_at, tn.death_at" + "SELECT DISTINCT metric_id AS id, metric AS name, birth_at, death_at" in sql_query ) - assert "FROM tag_names tn" in sql_query - assert "JOIN (" in sql_query # Check for join with transitions + assert f"FROM {client.tenant}.metric_data" in sql_query assert ( - "WHERE (transitions.time >= %(start_at_param)s OR %(start_at_param)s IS NULL)" + "WHERE (time >= %(start_at_param)s OR %(start_at_param)s IS NULL)" in sql_query ) assert ( - "AND (transitions.time < %(end_at_param)s OR %(end_at_param)s IS NULL)" + "AND (time < %(end_at_param)s OR %(end_at_param)s IS NULL)" in sql_query ) assert params is not None From 420f76d6482bc0222f1c0095f86d8bbc8dd1c023 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Thu, 15 May 2025 09:13:05 -0400 Subject: [PATCH 098/117] style: Apply linter fixes to test_cvec.py --- tests/test_cvec.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/test_cvec.py b/tests/test_cvec.py index 97583dc..d055c0b 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -238,10 +238,7 @@ def test_get_metrics_with_interval(self, mock_connect: MagicMock) -> None: "WHERE (time >= %(start_at_param)s OR %(start_at_param)s IS NULL)" in sql_query ) - assert ( - "AND (time < %(end_at_param)s OR %(end_at_param)s IS NULL)" - in sql_query - ) + assert "AND (time < %(end_at_param)s OR %(end_at_param)s IS NULL)" in sql_query assert params is not None assert params["start_at_param"] == start_query assert params["end_at_param"] == end_query From d26ea33cca85e1dd84014c7ce99ec8013a9869c4 Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Thu, 15 May 2025 09:17:19 -0400 Subject: [PATCH 099/117] refactor: Simplify metric_data view creation --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6cd8d1a..c9e81a5 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ CREATE VIEW metrics AS The metric_data view unifies the tag_data and tag_data_str tables and joins the tag name. ```sql -CREATE OR REPLACE VIEW metric_data AS +CREATE VIEW metric_data AS ( WITH td AS (SELECT tag_name_id, From ecb633d2bb0ba0b34cd2df17e12623b8dce7051f Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Thu, 15 May 2025 10:19:03 -0400 Subject: [PATCH 100/117] docs: Clarify raw_end_at description in Span object documentation --- README.md | 2 +- src/cvec/cvec.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c9e81a5..5bc197f 100644 --- a/README.md +++ b/README.md @@ -106,7 +106,7 @@ Each `Span` object in the returned list represents a period where the tag's valu - `value`: The tag's value during the span. - `tag_name`: The name of the tag. - `raw_start_at`: The timestamp of the value change that initiated this span's value. This will be greater than or equal to the query's `start_at` if one was specified. -- `raw_end_at`: The timestamp marking the end of this span's constant value. For the newest span (first in the returned list), this is the query's `end_at` parameter (if specified, otherwise `None`). For other spans, it's the `raw_start_at` of the chronologically newer preceding span in the list. If the query's `end_at` is not specified and it's the newest span based on available data, this field will be `None`, indicating the span continues indefinitely. +- `raw_end_at`: The timestamp marking the end of this span's constant value. For the newest span, the value is `None`. For other spans, it's the `raw_start_at` of the chronologically newer preceding span in the list. - `id`: Currently `None`. In a future version of the SDK, this will be the span's unique identifier. - `metadata`: Currently `None`. In a future version, this can be used to store annotations or other metadata related to the span. diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 39ed5ff..88d4279 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -82,10 +82,8 @@ def get_spans( - `raw_start_at`: The timestamp of the value change that initiated this span's value. This will be >= `_start_at` if `_start_at` was specified. - `raw_end_at`: The timestamp marking the end of this span's constant value. - For the newest span, this is the query's `_end_at` (if specified, else `None`). + For the newest span, the value is `None`. For other spans, it's the `raw_start_at` of the next newer span. - If `_end_at` is `None` and it's the newest span based on available data, - this field will be `None`, indicating the span continues indefinitely. - `id`: Currently `None`. - `metadata`: Currently `None`. From 81d2891414db86b1fa2d0cc299f33b311c84b7c8 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Thu, 15 May 2025 11:01:17 -0400 Subject: [PATCH 101/117] feat: Add lint script to run black and mypy --- scripts/lint.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 scripts/lint.sh diff --git a/scripts/lint.sh b/scripts/lint.sh new file mode 100644 index 0000000..f4a0752 --- /dev/null +++ b/scripts/lint.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# This script runs black and mypy on the specified file or directory. + +# Exit immediately if a command exits with a non-zero status. +set -e + +TARGET=${1:-.} + +echo "Running black..." +poetry run black "$TARGET" + +echo "Running mypy..." +poetry run mypy --strict "$TARGET" + +echo "Linting complete." From 570ad6815e69e142d4c60987da1eb446356a4336 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Thu, 15 May 2025 11:08:10 -0400 Subject: [PATCH 102/117] refactor: Rename tag_name to name in Span and get_spans, update docs --- tests/test_cvec.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/test_cvec.py b/tests/test_cvec.py index d055c0b..163e86e 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -136,7 +136,7 @@ def test_get_spans_basic_case(self, mock_connect: MagicMock) -> None: client = CVec(host="test_host", tenant="test_tenant", api_key="test_api_key") tag_name = "test_tag" - spans = client.get_spans(tag_name=tag_name) + spans = client.get_spans(name=tag_name) assert len(spans) == 3 mock_cur.execute.assert_called_once() @@ -149,19 +149,19 @@ def test_get_spans_basic_case(self, mock_connect: MagicMock) -> None: # Span 1 (from newest data point: time3) # The raw_end_at is None for the newest span, because the span is still open. - assert spans[0].tag_name == tag_name + assert spans[0].name == tag_name assert spans[0].value == 30.0 assert spans[0].raw_start_at == time3 assert spans[0].raw_end_at is None # Span 2 (from data point: time2) - assert spans[1].tag_name == tag_name + assert spans[1].name == tag_name assert spans[1].value == "val2" assert spans[1].raw_start_at == time2 assert spans[1].raw_end_at == time3 # Span 3 (from oldest data point: time1) - assert spans[2].tag_name == tag_name + assert spans[2].name == tag_name assert spans[2].value == 10.0 assert spans[2].raw_start_at == time1 assert spans[2].raw_end_at == time2 @@ -343,7 +343,7 @@ def test_get_spans_no_data_points(self, mock_connect: MagicMock) -> None: client = CVec(host="test_host", tenant="test_tenant", api_key="test_api_key") tag_name = "test_tag_no_data" - spans = client.get_spans(tag_name=tag_name) + spans = client.get_spans(name=tag_name) assert len(spans) == 0 mock_cur.execute.assert_called_once() @@ -378,7 +378,7 @@ def test_get_spans_with_limit_parameter(self, mock_connect: MagicMock) -> None: client = CVec(host="test_host", tenant="test_tenant", api_key="test_api_key") tag_name = "test_tag_limited" query_limit = 2 - spans = client.get_spans(tag_name=tag_name, limit=query_limit) + spans = client.get_spans(name=tag_name, limit=query_limit) mock_cur.execute.assert_called_once() @@ -411,7 +411,7 @@ def test_get_spans_with_end_at_parameter(self, mock_connect: MagicMock) -> None: tag_name = "test_tag" # Provide an end_at time that is after all sample data points query_end_at = datetime(2023, 1, 1, 13, 0, 0) - spans = client.get_spans(tag_name=tag_name, end_at=query_end_at) + spans = client.get_spans(name=tag_name, end_at=query_end_at) assert len(spans) == 3 mock_cur.execute.assert_called_once() @@ -424,19 +424,19 @@ def test_get_spans_with_end_at_parameter(self, mock_connect: MagicMock) -> None: # Span 1 (from newest data point: time3) # The raw_end_at is None for the newest span, regardless of the _end_at query parameter. - assert spans[0].tag_name == tag_name + assert spans[0].name == tag_name assert spans[0].value == 30.0 assert spans[0].raw_start_at == time3 assert spans[0].raw_end_at is None # Span 2 (from data point: time2) - assert spans[1].tag_name == tag_name + assert spans[1].name == tag_name assert spans[1].value == "val2" assert spans[1].raw_start_at == time2 assert spans[1].raw_end_at == time3 # Span 3 (from oldest data point: time1) - assert spans[2].tag_name == tag_name + assert spans[2].name == tag_name assert spans[2].value == 10.0 assert spans[2].raw_start_at == time1 assert spans[2].raw_end_at == time2 From d737b78c34c96aec66e747b69b8a7caacd3a841f Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Thu, 15 May 2025 11:09:14 -0400 Subject: [PATCH 103/117] refactor: Rename tag_name to name for spans and get_spans function --- README.md | 10 +++++----- src/cvec/cvec.py | 14 +++++++------- src/cvec/span.py | 10 +++++----- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 5bc197f..137218d 100644 --- a/README.md +++ b/README.md @@ -97,14 +97,14 @@ The SDK provides an API client class named `CVec` with the following functions. Setup the SDK with the given host and API Key. The host and API key are loaded from environment variables CVEC_HOST, CVEC_TENANT, CVEC_API_KEY, if they are not given as arguments to the constructor. The `default_start_at` and `default_end_at` constrain most API calls, and can be overridden by the `start_at` and `end_at` arguments to each API function. -## `get_spans(tag_name, ?start_at, ?end_at, ?limit)` +## `get_spans(name, ?start_at, ?end_at, ?limit)` -Return time spans for a tag. Spans are generated from value changes that occur after `start_at` (if specified) and before `end_at` (if specified). +Return time spans for a metric. Spans are generated from value changes that occur after `start_at` (if specified) and before `end_at` (if specified). If `start_at` is `None` (e.g., not provided as an argument and no class default `default_start_at` is set), the query for value changes is unbounded at the start. Similarly, if `end_at` is `None`, the query is unbounded at the end. -Each `Span` object in the returned list represents a period where the tag's value is constant and has the following attributes: -- `value`: The tag's value during the span. -- `tag_name`: The name of the tag. +Each `Span` object in the returned list represents a period where the metric's value is constant and has the following attributes: +- `value`: The metric's value during the span. +- `name`: The name of the metric. - `raw_start_at`: The timestamp of the value change that initiated this span's value. This will be greater than or equal to the query's `start_at` if one was specified. - `raw_end_at`: The timestamp marking the end of this span's constant value. For the newest span, the value is `None`. For other spans, it's the `raw_start_at` of the chronologically newer preceding span in the list. - `id`: Currently `None`. In a future version of the SDK, this will be the span's unique identifier. diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 88d4279..a4d0756 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -65,20 +65,20 @@ def _get_db_connection(self) -> psycopg.Connection: def get_spans( self, - tag_name: str, + name: str, start_at: Optional[datetime] = None, end_at: Optional[datetime] = None, limit: Optional[int] = None, ) -> List[Span]: """ - Return time spans for a tag. Spans are generated from value changes + Return time spans for a metric. Spans are generated from value changes that occur after `start_at` (if specified) and before `end_at` (if specified). If `start_at` is `None` (e.g., not provided via argument or class default), the query is unbounded at the start. If `end_at` is `None`, it's unbounded at the end. - Each span represents a period where the tag's value is constant. - - `value`: The tag's value during the span. - - `tag_name`: The name of the tag. + Each span represents a period where the metric's value is constant. + - `value`: The metric's value during the span. + - `name`: The name of the metric. - `raw_start_at`: The timestamp of the value change that initiated this span's value. This will be >= `_start_at` if `_start_at` was specified. - `raw_end_at`: The timestamp marking the end of this span's constant value. @@ -98,7 +98,7 @@ def get_spans( with self._get_db_connection() as conn: with conn.cursor() as cur: query_params = { - "metric": tag_name, + "metric": name, "start_at": _start_at, "end_at": _end_at, # Fetch up to 'limit' points. If limit is None, then the `LIMIT NULL` clause @@ -131,7 +131,7 @@ def get_spans( spans.append( Span( id=None, - tag_name=tag_name, + name=name, value=value, raw_start_at=raw_start_at, raw_end_at=raw_end_at, diff --git a/src/cvec/span.py b/src/cvec/span.py index aaaf33e..3ae2581 100644 --- a/src/cvec/span.py +++ b/src/cvec/span.py @@ -4,11 +4,11 @@ class Span: """ - Represents a time span where a tag has a constant value. + Represents a time span where a metric has a constant value. """ id: Optional[Any] - tag_name: str + name: str value: Optional[Union[float, str]] raw_start_at: datetime raw_end_at: Optional[datetime] @@ -17,14 +17,14 @@ class Span: def __init__( self, id: Optional[Any], - tag_name: str, + name: str, value: Optional[Union[float, str]], raw_start_at: datetime, raw_end_at: Optional[datetime], metadata: Optional[Any], ): self.id = id - self.tag_name = tag_name + self.name = name self.value = value self.raw_start_at = raw_start_at self.raw_end_at = raw_end_at @@ -32,7 +32,7 @@ def __init__( def __repr__(self) -> str: return ( - f"Span(id={self.id!r}, tag_name={self.tag_name!r}, value={self.value!r}, " + f"Span(id={self.id!r}, name={self.name!r}, value={self.value!r}, " f"raw_start_at={self.raw_start_at!r}, raw_end_at={self.raw_end_at!r}, " f"metadata={self.metadata!r})" ) From e872c72ef4c1d8da9917fa9228e843483d7da92e Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Thu, 15 May 2025 11:09:34 -0400 Subject: [PATCH 104/117] chore: Use lint.sh script for linting commands --- .aider.conf.yml | 3 +-- scripts/lint.sh | 0 2 files changed, 1 insertion(+), 2 deletions(-) mode change 100644 => 100755 scripts/lint.sh diff --git a/.aider.conf.yml b/.aider.conf.yml index 227d414..a34ab62 100644 --- a/.aider.conf.yml +++ b/.aider.conf.yml @@ -1,6 +1,5 @@ lint-cmd: - - "python: poetry run black" - - "python: poetry run mypy --strict" + - "python: scripts/lint.sh" auto-lint: true test-cmd: poetry run pytest auto-test: true diff --git a/scripts/lint.sh b/scripts/lint.sh old mode 100644 new mode 100755 From 92859096404a084c8efae4dbfd7a9b00d53ec769 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Thu, 15 May 2025 11:11:24 -0400 Subject: [PATCH 105/117] refactor: Rename tag_names to names in get_metric_data method --- README.md | 4 ++-- src/cvec/cvec.py | 14 +++++++------- tests/test_cvec.py | 8 ++++---- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 137218d..aca7ad2 100644 --- a/README.md +++ b/README.md @@ -113,9 +113,9 @@ Each `Span` object in the returned list represents a period where the metric's v Returns a list of `Span` objects, sorted in descending chronological order (newest span first). If no relevant value changes are found, an empty list is returned. -## `get_metric_data(?tag_names, ?start_at, ?end_at)` +## `get_metric_data(?names, ?start_at, ?end_at)` -Return all data-points within a given [`start_at`, `end_at`) interval, optionally selecting a given list of tags. The return value is a Pandas DataFrame with three columns: tag_name, time, value. One row is returned for each tag value transition. +Return all data-points within a given [`start_at`, `end_at`) interval, optionally selecting a given list of metric names. The return value is a Pandas DataFrame with three columns: tag_name, time, value. One row is returned for each tag value transition. ## `get_metrics(?start_at, ?end_at)` diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index a4d0756..4dbb891 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -144,13 +144,13 @@ def get_spans( def get_metric_data( self, - tag_names: Optional[List[str]] = None, + names: Optional[List[str]] = None, start_at: Optional[datetime] = None, end_at: Optional[datetime] = None, ) -> pd.DataFrame: """ Return all data-points within a given [start_at, end_at) interval, - optionally selecting a given list of tags. + optionally selecting a given list of metric names. The return value is a Pandas DataFrame with three columns: tag_name, time, value. One row is returned for each tag value transition. """ @@ -160,11 +160,11 @@ def get_metric_data( params = { "start_at": _start_at, "end_at": _end_at, - "tag_names_is_null": tag_names is None, - # Pass an empty tuple if tag_names is None or empty, otherwise the tuple of tag_names. - # ANY(%(empty_tuple)s) will correctly result in no matches if tag_names is empty. - # If tag_names is None, the tag_names_is_null condition handles it. - "tag_names_list": tuple(tag_names) if tag_names else [], + "tag_names_is_null": names is None, + # Pass an empty tuple if names is None or empty, otherwise the tuple of names. + # ANY(%(empty_tuple)s) will correctly result in no matches if names is empty. + # If names is None, the tag_names_is_null condition handles it. + "tag_names_list": tuple(names) if names else [], } sql_query = """ diff --git a/tests/test_cvec.py b/tests/test_cvec.py index 163e86e..46ca11d 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -291,13 +291,13 @@ def test_get_metric_data_basic_case(self, mock_connect: MagicMock) -> None: mock_cur.fetchall.return_value = mock_db_rows client = CVec(host="test_host", tenant="test_tenant", api_key="test_api_key") - tag_names_to_query = ["tag1", "tag2"] - df = client.get_metric_data(tag_names=tag_names_to_query) + names_to_query = ["tag1", "tag2"] + df = client.get_metric_data(names=names_to_query) mock_cur.execute.assert_called_once() (_sql, params), _kwargs = mock_cur.execute.call_args assert params["tag_names_is_null"] is False - assert params["tag_names_list"] == tuple(tag_names_to_query) + assert params["tag_names_list"] == tuple(names_to_query) assert params["start_at"] is None # Default start_at assert params["end_at"] is None # Default end_at @@ -324,7 +324,7 @@ def test_get_metric_data_no_data_points(self, mock_connect: MagicMock) -> None: mock_cur.fetchall.return_value = [] client = CVec(host="test_host", tenant="test_tenant", api_key="test_api_key") - df = client.get_metric_data(tag_names=["non_existent_tag"]) + df = client.get_metric_data(names=["non_existent_tag"]) mock_cur.execute.assert_called_once() expected_df = pd.DataFrame(columns=["tag_name", "time", "value"]) From 3f069559941f362f9c20dccbf36863337e1be998 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Thu, 15 May 2025 11:12:34 -0400 Subject: [PATCH 106/117] feat: Rename tag_name column to name in get_metric_data output --- README.md | 2 +- src/cvec/cvec.py | 12 ++++++------ tests/test_cvec.py | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index aca7ad2..c6bd5ea 100644 --- a/README.md +++ b/README.md @@ -115,7 +115,7 @@ If no relevant value changes are found, an empty list is returned. ## `get_metric_data(?names, ?start_at, ?end_at)` -Return all data-points within a given [`start_at`, `end_at`) interval, optionally selecting a given list of metric names. The return value is a Pandas DataFrame with three columns: tag_name, time, value. One row is returned for each tag value transition. +Return all data-points within a given [`start_at`, `end_at`) interval, optionally selecting a given list of metric names. The return value is a Pandas DataFrame with three columns: name, time, value. One row is returned for each tag value transition. ## `get_metrics(?start_at, ?end_at)` diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 4dbb891..1a98f3a 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -151,7 +151,7 @@ def get_metric_data( """ Return all data-points within a given [start_at, end_at) interval, optionally selecting a given list of metric names. - The return value is a Pandas DataFrame with three columns: tag_name, time, value. + The return value is a Pandas DataFrame with three columns: name, time, value. One row is returned for each tag value transition. """ _start_at = start_at or self.default_start_at @@ -168,12 +168,12 @@ def get_metric_data( } sql_query = """ - SELECT metric AS tag_name, time, value_double, value_string + SELECT metric AS name, time, value_double, value_string FROM metric_data WHERE (time >= %(start_at)s OR %(start_at)s IS NULL) AND (time < %(end_at)s OR %(end_at)s IS NULL) AND (%(tag_names_is_null)s IS TRUE OR metric = ANY(%(tag_names_list)s)) - ORDER BY tag_name, time ASC + ORDER BY name, time ASC """ with self._get_db_connection() as conn: @@ -182,11 +182,11 @@ def get_metric_data( rows = cur.fetchall() if not rows: - return pd.DataFrame(columns=["tag_name", "time", "value"]) + return pd.DataFrame(columns=["name", "time", "value"]) # Create DataFrame from fetched rows df = pd.DataFrame( - rows, columns=["tag_name", "time", "value_double", "value_string"] + rows, columns=["name", "time", "value_double", "value_string"] ) # Combine value_double and value_string into a single 'value' column @@ -194,7 +194,7 @@ def get_metric_data( df["value"] = df["value_double"].combine_first(df["value_string"]) # Return the DataFrame with the required columns - return df[["tag_name", "time", "value"]] + return df[["name", "time", "value"]] def get_metrics( self, start_at: Optional[datetime] = None, end_at: Optional[datetime] = None diff --git a/tests/test_cvec.py b/tests/test_cvec.py index 46ca11d..c15af7e 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -302,7 +302,7 @@ def test_get_metric_data_basic_case(self, mock_connect: MagicMock) -> None: assert params["end_at"] is None # Default end_at expected_data = { - "tag_name": ["tag1", "tag1", "tag2"], + "name": ["tag1", "tag1", "tag2"], "time": [time1, time2, time3], "value": [10.0, 20.0, "val_str"], } @@ -327,7 +327,7 @@ def test_get_metric_data_no_data_points(self, mock_connect: MagicMock) -> None: df = client.get_metric_data(names=["non_existent_tag"]) mock_cur.execute.assert_called_once() - expected_df = pd.DataFrame(columns=["tag_name", "time", "value"]) + expected_df = pd.DataFrame(columns=["name", "time", "value"]) assert_frame_equal(df, expected_df, check_dtype=False) @patch("cvec.cvec.psycopg.connect") From c918602b7051b3f26425a4ebdf8d437095c5b1fe Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Thu, 15 May 2025 11:14:49 -0400 Subject: [PATCH 107/117] feat: Return value_double and value_string columns in get_metric_data --- README.md | 2 +- src/cvec/cvec.py | 10 +++------- tests/test_cvec.py | 37 ++++++++++++++++++++++++++++++------- 3 files changed, 34 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index c6bd5ea..f4e6572 100644 --- a/README.md +++ b/README.md @@ -115,7 +115,7 @@ If no relevant value changes are found, an empty list is returned. ## `get_metric_data(?names, ?start_at, ?end_at)` -Return all data-points within a given [`start_at`, `end_at`) interval, optionally selecting a given list of metric names. The return value is a Pandas DataFrame with three columns: name, time, value. One row is returned for each tag value transition. +Return all data-points within a given [`start_at`, `end_at`) interval, optionally selecting a given list of metric names. The return value is a Pandas DataFrame with four columns: name, time, value_double, value_string. One row is returned for each tag value transition. ## `get_metrics(?start_at, ?end_at)` diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 1a98f3a..247b9f5 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -151,7 +151,7 @@ def get_metric_data( """ Return all data-points within a given [start_at, end_at) interval, optionally selecting a given list of metric names. - The return value is a Pandas DataFrame with three columns: name, time, value. + The return value is a Pandas DataFrame with four columns: name, time, value_double, value_string. One row is returned for each tag value transition. """ _start_at = start_at or self.default_start_at @@ -182,19 +182,15 @@ def get_metric_data( rows = cur.fetchall() if not rows: - return pd.DataFrame(columns=["name", "time", "value"]) + return pd.DataFrame(columns=["name", "time", "value_double", "value_string"]) # Create DataFrame from fetched rows df = pd.DataFrame( rows, columns=["name", "time", "value_double", "value_string"] ) - # Combine value_double and value_string into a single 'value' column - # NaNs in value_double (where it was NULL in DB) will be filled by values from value_string - df["value"] = df["value_double"].combine_first(df["value_string"]) - # Return the DataFrame with the required columns - return df[["name", "time", "value"]] + return df[["name", "time", "value_double", "value_string"]] def get_metrics( self, start_at: Optional[datetime] = None, end_at: Optional[datetime] = None diff --git a/tests/test_cvec.py b/tests/test_cvec.py index c15af7e..86547a0 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -304,14 +304,18 @@ def test_get_metric_data_basic_case(self, mock_connect: MagicMock) -> None: expected_data = { "name": ["tag1", "tag1", "tag2"], "time": [time1, time2, time3], - "value": [10.0, 20.0, "val_str"], + "value_double": [10.0, 20.0, pd.NA], # Use pd.NA for missing float + "value_string": [pd.NA, pd.NA, "val_str"], # Use pd.NA for missing string } expected_df = pd.DataFrame(expected_data) - # Convert 'value' column to object to handle mixed types for comparison - expected_df["value"] = expected_df["value"].astype(object) - df["value"] = df["value"].astype(object) - assert_frame_equal(df, expected_df, check_dtype=False) + # Ensure correct dtypes for comparison, especially for NA handling + expected_df = expected_df.astype( + {"value_double": "float64", "value_string": "object"} + ) + df = df.astype({"value_double": "float64", "value_string": "object"}) + + assert_frame_equal(df, expected_df, check_dtype=True) @patch("cvec.cvec.psycopg.connect") def test_get_metric_data_no_data_points(self, mock_connect: MagicMock) -> None: @@ -327,8 +331,27 @@ def test_get_metric_data_no_data_points(self, mock_connect: MagicMock) -> None: df = client.get_metric_data(names=["non_existent_tag"]) mock_cur.execute.assert_called_once() - expected_df = pd.DataFrame(columns=["name", "time", "value"]) - assert_frame_equal(df, expected_df, check_dtype=False) + expected_df = pd.DataFrame( + columns=["name", "time", "value_double", "value_string"] + ) + # Ensure correct dtypes for empty DataFrame comparison + expected_df = expected_df.astype( + { + "name": "object", + "time": "datetime64[ns]", + "value_double": "float64", + "value_string": "object", + } + ) + df = df.astype( + { + "name": "object", + "time": "datetime64[ns]", + "value_double": "float64", + "value_string": "object", + } + ) + assert_frame_equal(df, expected_df, check_dtype=True) @patch("cvec.cvec.psycopg.connect") def test_get_spans_no_data_points(self, mock_connect: MagicMock) -> None: From 664bc12efc45d0b39d6e7752884d7c5de152bded Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Thu, 15 May 2025 11:14:54 -0400 Subject: [PATCH 108/117] style: Apply linter to fix code formatting issues --- src/cvec/cvec.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 247b9f5..c84f1a1 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -182,7 +182,9 @@ def get_metric_data( rows = cur.fetchall() if not rows: - return pd.DataFrame(columns=["name", "time", "value_double", "value_string"]) + return pd.DataFrame( + columns=["name", "time", "value_double", "value_string"] + ) # Create DataFrame from fetched rows df = pd.DataFrame( From d8e883a23c83478546a3ed6a52b9a792b5bb5d88 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Thu, 15 May 2025 11:15:41 -0400 Subject: [PATCH 109/117] fix: Correctly handle missing values in get_metric_data test --- tests/test_cvec.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_cvec.py b/tests/test_cvec.py index 86547a0..d218fdc 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -3,6 +3,7 @@ from unittest.mock import patch, MagicMock from datetime import datetime import pandas as pd +import numpy as np from pandas.testing import assert_frame_equal from cvec import CVec, Span, Metric @@ -304,8 +305,8 @@ def test_get_metric_data_basic_case(self, mock_connect: MagicMock) -> None: expected_data = { "name": ["tag1", "tag1", "tag2"], "time": [time1, time2, time3], - "value_double": [10.0, 20.0, pd.NA], # Use pd.NA for missing float - "value_string": [pd.NA, pd.NA, "val_str"], # Use pd.NA for missing string + "value_double": [10.0, 20.0, np.nan], # Use np.nan for missing float + "value_string": [None, None, "val_str"], # Use None for missing string } expected_df = pd.DataFrame(expected_data) From 2e5ccd3a4d2d2ff13c17e38ffffbfeaf7792027d Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Thu, 15 May 2025 11:51:23 -0400 Subject: [PATCH 110/117] feat: Format Span timestamps in RFC 3339 format in repr --- src/cvec/span.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/cvec/span.py b/src/cvec/span.py index 3ae2581..55078bc 100644 --- a/src/cvec/span.py +++ b/src/cvec/span.py @@ -31,8 +31,12 @@ def __init__( self.metadata = metadata def __repr__(self) -> str: + raw_start_at_repr = ( + self.raw_start_at.isoformat() if self.raw_start_at else "None" + ) + raw_end_at_repr = self.raw_end_at.isoformat() if self.raw_end_at else "None" return ( f"Span(id={self.id!r}, name={self.name!r}, value={self.value!r}, " - f"raw_start_at={self.raw_start_at!r}, raw_end_at={self.raw_end_at!r}, " + f"raw_start_at={raw_start_at_repr}, raw_end_at={raw_end_at_repr}, " f"metadata={self.metadata!r})" ) From 06009c4c6b27422012dea222820f47f0610ce4e8 Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Thu, 15 May 2025 16:01:27 -0400 Subject: [PATCH 111/117] docs: Add example usage and class documentation to README --- README.md | 101 +++++++++++++++++++++++++++++++---------------- src/cvec/cvec.py | 3 +- 2 files changed, 67 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index f4e6572..1b2b314 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,69 @@ # CVec Client Library -# Data Model +# Example Usage + +Install the cvec package: + +``` +pip install cvec +``` + +Here is an example of using the package: + +``` +import cvec +from datetime import datetime +``` + +Create the connection. The host, tenant, and api_key can be given through parameters to the constructor or from the environment variables CVEC_HOST, CVEC_TENANT, and CVEC_API_KEY: + +``` +client = cvec.CVec() +``` + +List the spans on a metric: + +``` +for item in client.get_spans("my_tag_name", start_at=datetime(2025, 5, 14, 10, 0, 0)): + print(item) +``` + + + + +# CVec Class + +The SDK provides an API client class named `CVec` with the following functions. + +## `__init__(?host, ?tenant, ?api_key, ?default_start_at, ?default_end_at)` + +Setup the SDK with the given host and API Key. The host and API key are loaded from environment variables CVEC_HOST, CVEC_TENANT, CVEC_API_KEY, if they are not given as arguments to the constructor. The `default_start_at` and `default_end_at` constrain most API calls, and can be overridden by the `start_at` and `end_at` arguments to each API function. + +## `get_spans(name, ?start_at, ?end_at, ?limit)` + +Return time spans for a metric. Spans are generated from value changes that occur after `start_at` (if specified) and before `end_at` (if specified). +If `start_at` is `None` (e.g., not provided as an argument and no class default `default_start_at` is set), the query for value changes is unbounded at the start. Similarly, if `end_at` is `None`, the query is unbounded at the end. + +Each `Span` object in the returned list represents a period where the metric's value is constant and has the following attributes: +- `value`: The metric's value during the span. +- `name`: The name of the metric. +- `raw_start_at`: The timestamp of the value change that initiated this span's value. This will be greater than or equal to the query's `start_at` if one was specified. +- `raw_end_at`: The timestamp marking the end of this span's constant value. For the newest span, the value is `None`. For other spans, it's the `raw_start_at` of the chronologically newer preceding span in the list. +- `id`: Currently `None`. In a future version of the SDK, this will be the span's unique identifier. +- `metadata`: Currently `None`. In a future version, this can be used to store annotations or other metadata related to the span. + +Returns a list of `Span` objects, sorted in descending chronological order (newest span first). +If no relevant value changes are found, an empty list is returned. + +## `get_metric_data(?names, ?start_at, ?end_at)` + +Return all data-points within a given [`start_at`, `end_at`) interval, optionally selecting a given list of metric names. The return value is a Pandas DataFrame with four columns: name, time, value_double, value_string. One row is returned for each tag value transition. + +## `get_metrics(?start_at, ?end_at)` + +Return a list of metrics that had at least one transition in the given [`start_at`, `end_at`) interval. All metrics are returned if no `start_at` and `end_at` are given. Each metric has {id, name, birth_at, death_at}. + +# Schema This SDK integrates directly with CVector's database. Each tenant has a schema and a database user, both named for the tenant. The API Key is the password of the user. The database user is restricted to only have access to the tenant's schema. Here are the available database tables: @@ -16,7 +79,7 @@ CREATE TABLE tag_data ( ) SELECT create_hypertable( - '${schema_name}.tag_data', + 'tag_data', 'tag_value_changed_at', chunk_time_interval => INTERVAL '1 hour', if_not_exists => TRUE @@ -35,7 +98,7 @@ CREATE TABLE tag_data_str ( ); SELECT create_hypertable( - '${schema_name}.tag_data_str', + 'tag_data_str', 'tag_value_changed_at', chunk_time_interval => INTERVAL '1 hour', if_not_exists => TRUE @@ -88,35 +151,3 @@ CREATE VIEW metric_data AS JOIN tag_names ON tag_name_id = tag_names.id ); ``` - -# CVec Class - -The SDK provides an API client class named `CVec` with the following functions. - -## `__init__(?host, ?tenant, ?api_key, ?default_start_at, ?default_end_at)` - -Setup the SDK with the given host and API Key. The host and API key are loaded from environment variables CVEC_HOST, CVEC_TENANT, CVEC_API_KEY, if they are not given as arguments to the constructor. The `default_start_at` and `default_end_at` constrain most API calls, and can be overridden by the `start_at` and `end_at` arguments to each API function. - -## `get_spans(name, ?start_at, ?end_at, ?limit)` - -Return time spans for a metric. Spans are generated from value changes that occur after `start_at` (if specified) and before `end_at` (if specified). -If `start_at` is `None` (e.g., not provided as an argument and no class default `default_start_at` is set), the query for value changes is unbounded at the start. Similarly, if `end_at` is `None`, the query is unbounded at the end. - -Each `Span` object in the returned list represents a period where the metric's value is constant and has the following attributes: -- `value`: The metric's value during the span. -- `name`: The name of the metric. -- `raw_start_at`: The timestamp of the value change that initiated this span's value. This will be greater than or equal to the query's `start_at` if one was specified. -- `raw_end_at`: The timestamp marking the end of this span's constant value. For the newest span, the value is `None`. For other spans, it's the `raw_start_at` of the chronologically newer preceding span in the list. -- `id`: Currently `None`. In a future version of the SDK, this will be the span's unique identifier. -- `metadata`: Currently `None`. In a future version, this can be used to store annotations or other metadata related to the span. - -Returns a list of `Span` objects, sorted in descending chronological order (newest span first). -If no relevant value changes are found, an empty list is returned. - -## `get_metric_data(?names, ?start_at, ?end_at)` - -Return all data-points within a given [`start_at`, `end_at`) interval, optionally selecting a given list of metric names. The return value is a Pandas DataFrame with four columns: name, time, value_double, value_string. One row is returned for each tag value transition. - -## `get_metrics(?start_at, ?end_at)` - -Return a list of metrics that had at least one transition in the given [`start_at`, `end_at`) interval. All metrics are returned if no `start_at` and `end_at` are given. Each metric has {id, name, birth_at, death_at}. diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index c84f1a1..26c9a28 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -82,8 +82,7 @@ def get_spans( - `raw_start_at`: The timestamp of the value change that initiated this span's value. This will be >= `_start_at` if `_start_at` was specified. - `raw_end_at`: The timestamp marking the end of this span's constant value. - For the newest span, the value is `None`. - For other spans, it's the `raw_start_at` of the next newer span. + For the newest span, the value is `None`. For other spans, it's the raw_start_at of the immediately preceding span in the returned list (which, being sorted newest-first, is the next chronologically newer span). This timestamp marks the end of the current span's constant value. - `id`: Currently `None`. - `metadata`: Currently `None`. From a13d8eb2632a1f843e48a1c2c1082c369545d8df Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Fri, 16 May 2025 21:24:28 -0400 Subject: [PATCH 112/117] chore: use ruff linter --- .github/workflows/ci.yml | 10 +++- README.md | 40 ++++++++++--- poetry.lock | 123 ++++++++++----------------------------- pyproject.toml | 2 +- scripts/lint.sh | 10 +--- src/cvec/cvec.py | 2 +- src/cvec/metric.py | 2 +- tests/test_cvec.py | 10 ++-- 8 files changed, 80 insertions(+), 119 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 886f056..642db7a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,11 +42,15 @@ jobs: - name: Install project run: poetry install --no-interaction - - name: Run linters + - name: Ruff lint check run: | - poetry run black --check . + poetry run ruff check . - - name: Run type checker + - name: Ruff format check + run: | + poetry run ruff format --check . + + - name: Type check run: | poetry run mypy --strict . diff --git a/README.md b/README.md index 1b2b314..b61539b 100644 --- a/README.md +++ b/README.md @@ -1,35 +1,61 @@ # CVec Client Library -# Example Usage +The "cvec" package is the Python SDK for CVector Energy. -Install the cvec package: +# Getting Started + +## Installation + +Assuming that you have a supported version of Python installed, you can first create a venv with: + +``` +python -m venv .venv +``` + +Then, activate the venv: + +``` +. .venv/bin/activate +``` + +Then, you can install cvec from PyPI with: ``` pip install cvec ``` -Here is an example of using the package: +## Using cvec + +Import the cvec package. We will also use the datetime module. ``` import cvec from datetime import datetime ``` -Create the connection. The host, tenant, and api_key can be given through parameters to the constructor or from the environment variables CVEC_HOST, CVEC_TENANT, and CVEC_API_KEY: +Construct the CVec client. The host, tenant, and api_key can be given through parameters to the constructor or from the environment variables CVEC_HOST, CVEC_TENANT, and CVEC_API_KEY: ``` client = cvec.CVec() ``` -List the spans on a metric: +### Spans + +A span is a period of interest, such as an experiment, a baseline recording session, or an alarm. The initial state of a Span is implicitly defined by a period where a given metric has a constant value. + +The newest span for a metric does not have an end time, since it has not ended yet (or has not ended by the finish of the queried period). + +To get the spans on `my_tag_name` since 2025-05-14 10am, run: ``` -for item in client.get_spans("my_tag_name", start_at=datetime(2025, 5, 14, 10, 0, 0)): - print(item) +spans = client.get_spans("my_tag_name", start_at=datetime(2025, 5, 14, 10, 0, 0)) ``` +### Metrics +A metric is a named set of time-series data points pertaining to a particular resource (for example, the value reported by a sensor). A metric has a lifecycle of being activated or added to the system (birth_at) and later removed from the system (death_at). Metrics can have numeric or string values. Boolean values are mapped to 0 and 1. +### Metric Data # CVec Class diff --git a/poetry.lock b/poetry.lock index d8158e8..2cd0855 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,67 +1,5 @@ # This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. -[[package]] -name = "black" -version = "25.1.0" -description = "The uncompromising code formatter." -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "black-25.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:759e7ec1e050a15f89b770cefbf91ebee8917aac5c20483bc2d80a6c3a04df32"}, - {file = "black-25.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e519ecf93120f34243e6b0054db49c00a35f84f195d5bce7e9f5cfc578fc2da"}, - {file = "black-25.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:055e59b198df7ac0b7efca5ad7ff2516bca343276c466be72eb04a3bcc1f82d7"}, - {file = "black-25.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:db8ea9917d6f8fc62abd90d944920d95e73c83a5ee3383493e35d271aca872e9"}, - {file = "black-25.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a39337598244de4bae26475f77dda852ea00a93bd4c728e09eacd827ec929df0"}, - {file = "black-25.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96c1c7cd856bba8e20094e36e0f948718dc688dba4a9d78c3adde52b9e6c2299"}, - {file = "black-25.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bce2e264d59c91e52d8000d507eb20a9aca4a778731a08cfff7e5ac4a4bb7096"}, - {file = "black-25.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:172b1dbff09f86ce6f4eb8edf9dede08b1fce58ba194c87d7a4f1a5aa2f5b3c2"}, - {file = "black-25.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b"}, - {file = "black-25.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc"}, - {file = "black-25.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f"}, - {file = "black-25.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba"}, - {file = "black-25.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f0b18a02996a836cc9c9c78e5babec10930862827b1b724ddfe98ccf2f2fe4f"}, - {file = "black-25.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:afebb7098bfbc70037a053b91ae8437c3857482d3a690fefc03e9ff7aa9a5fd3"}, - {file = "black-25.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:030b9759066a4ee5e5aca28c3c77f9c64789cdd4de8ac1df642c40b708be6171"}, - {file = "black-25.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:a22f402b410566e2d1c950708c77ebf5ebd5d0d88a6a2e87c86d9fb48afa0d18"}, - {file = "black-25.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1ee0a0c330f7b5130ce0caed9936a904793576ef4d2b98c40835d6a65afa6a0"}, - {file = "black-25.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3df5f1bf91d36002b0a75389ca8663510cf0531cca8aa5c1ef695b46d98655f"}, - {file = "black-25.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9e6827d563a2c820772b32ce8a42828dc6790f095f441beef18f96aa6f8294e"}, - {file = "black-25.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:bacabb307dca5ebaf9c118d2d2f6903da0d62c9faa82bd21a33eecc319559355"}, - {file = "black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717"}, - {file = "black-25.1.0.tar.gz", hash = "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666"}, -] - -[package.dependencies] -click = ">=8.0.0" -mypy-extensions = ">=0.4.3" -packaging = ">=22.0" -pathspec = ">=0.9.0" -platformdirs = ">=2" -tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} -typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} - -[package.extras] -colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.10)"] -jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] -uvloop = ["uvloop (>=0.15.2)"] - -[[package]] -name = "click" -version = "8.2.0" -description = "Composable command line interface toolkit" -optional = false -python-versions = ">=3.10" -groups = ["dev"] -files = [ - {file = "click-8.2.0-py3-none-any.whl", hash = "sha256:6b303f0b2aa85f1cb4e5303078fadcbcd4e476f114fab9b5007005711839325c"}, - {file = "click-8.2.0.tar.gz", hash = "sha256:f5452aeddd9988eefa20f90f05ab66f17fce1ee2a36907fd30b05bbb5953814d"}, -] - -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} - [[package]] name = "colorama" version = "0.4.6" @@ -69,7 +7,7 @@ description = "Cross-platform colored terminal text." optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" groups = ["dev"] -markers = "platform_system == \"Windows\" or sys_platform == \"win32\"" +markers = "sys_platform == \"win32\"" files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, @@ -352,35 +290,6 @@ files = [ numpy = ">=1.23.5" types-pytz = ">=2022.1.1" -[[package]] -name = "pathspec" -version = "0.12.1" -description = "Utility library for gitignore style pattern matching of file paths." -optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, - {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, -] - -[[package]] -name = "platformdirs" -version = "4.3.8" -description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." -optional = false -python-versions = ">=3.9" -groups = ["dev"] -files = [ - {file = "platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4"}, - {file = "platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc"}, -] - -[package.extras] -docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] -test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.4)", "pytest-cov (>=6)", "pytest-mock (>=3.14)"] -type = ["mypy (>=1.14.1)"] - [[package]] name = "pluggy" version = "1.5.0" @@ -471,6 +380,34 @@ files = [ {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, ] +[[package]] +name = "ruff" +version = "0.11.10" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +groups = ["dev"] +files = [ + {file = "ruff-0.11.10-py3-none-linux_armv6l.whl", hash = "sha256:859a7bfa7bc8888abbea31ef8a2b411714e6a80f0d173c2a82f9041ed6b50f58"}, + {file = "ruff-0.11.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:968220a57e09ea5e4fd48ed1c646419961a0570727c7e069842edd018ee8afed"}, + {file = "ruff-0.11.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:1067245bad978e7aa7b22f67113ecc6eb241dca0d9b696144256c3a879663bca"}, + {file = "ruff-0.11.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4854fd09c7aed5b1590e996a81aeff0c9ff51378b084eb5a0b9cd9518e6cff2"}, + {file = "ruff-0.11.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8b4564e9f99168c0f9195a0fd5fa5928004b33b377137f978055e40008a082c5"}, + {file = "ruff-0.11.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b6a9cc5b62c03cc1fea0044ed8576379dbaf751d5503d718c973d5418483641"}, + {file = "ruff-0.11.10-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:607ecbb6f03e44c9e0a93aedacb17b4eb4f3563d00e8b474298a201622677947"}, + {file = "ruff-0.11.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7b3a522fa389402cd2137df9ddefe848f727250535c70dafa840badffb56b7a4"}, + {file = "ruff-0.11.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2f071b0deed7e9245d5820dac235cbdd4ef99d7b12ff04c330a241ad3534319f"}, + {file = "ruff-0.11.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a60e3a0a617eafba1f2e4186d827759d65348fa53708ca547e384db28406a0b"}, + {file = "ruff-0.11.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:da8ec977eaa4b7bf75470fb575bea2cb41a0e07c7ea9d5a0a97d13dbca697bf2"}, + {file = "ruff-0.11.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:ddf8967e08227d1bd95cc0851ef80d2ad9c7c0c5aab1eba31db49cf0a7b99523"}, + {file = "ruff-0.11.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:5a94acf798a82db188f6f36575d80609072b032105d114b0f98661e1679c9125"}, + {file = "ruff-0.11.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:3afead355f1d16d95630df28d4ba17fb2cb9c8dfac8d21ced14984121f639bad"}, + {file = "ruff-0.11.10-py3-none-win32.whl", hash = "sha256:dc061a98d32a97211af7e7f3fa1d4ca2fcf919fb96c28f39551f35fc55bdbc19"}, + {file = "ruff-0.11.10-py3-none-win_amd64.whl", hash = "sha256:5cc725fbb4d25b0f185cb42df07ab6b76c4489b4bfb740a175f3a59c70e8a224"}, + {file = "ruff-0.11.10-py3-none-win_arm64.whl", hash = "sha256:ef69637b35fb8b210743926778d0e45e1bffa850a7c61e428c6b971549b5f5d1"}, + {file = "ruff-0.11.10.tar.gz", hash = "sha256:d522fb204b4959909ecac47da02830daec102eeb100fb50ea9554818d47a5fa6"}, +] + [[package]] name = "six" version = "1.17.0" @@ -566,4 +503,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = ">=3.10" -content-hash = "fbbbfabb5ce91021c56f7ef13363c3d0ab49d09d41c9a3f987a4fc851dfb002e" +content-hash = "88c626506f796301947928a0229fda5f5656dffdd6d3e79a8426a85643e65002" diff --git a/pyproject.toml b/pyproject.toml index 369e324..71f75b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,10 +17,10 @@ packages = [{include = "cvec", from = "src"}] [tool.poetry.group.dev.dependencies] -black = "^25.1.0" pytest = "^8.3.5" mypy = "^1.15.0" pandas-stubs = "^2.2.3.250308" +ruff = "^0.11.10" [build-system] requires = ["poetry-core>=2.0.0,<3.0.0"] diff --git a/scripts/lint.sh b/scripts/lint.sh index f4a0752..ae674b4 100755 --- a/scripts/lint.sh +++ b/scripts/lint.sh @@ -1,15 +1,11 @@ #!/bin/bash -# This script runs black and mypy on the specified file or directory. +# This script runs ruff and mypy on the specified file or directory. # Exit immediately if a command exits with a non-zero status. set -e TARGET=${1:-.} -echo "Running black..." -poetry run black "$TARGET" - -echo "Running mypy..." +poetry run ruff check --fix "$TARGET" +poetry run ruff format "$TARGET" poetry run mypy --strict "$TARGET" - -echo "Linting complete." diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 26c9a28..51ae54b 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -206,7 +206,7 @@ def get_metrics( if start_at is None and end_at is None: # No time interval specified by arguments, return all tags - sql_query = f""" + sql_query = """ SELECT id, normalized_name AS name, birth_at, death_at FROM tag_names ORDER BY name ASC; diff --git a/src/cvec/metric.py b/src/cvec/metric.py index 45d2da6..5a392b8 100644 --- a/src/cvec/metric.py +++ b/src/cvec/metric.py @@ -1,5 +1,5 @@ from datetime import datetime -from typing import Any, Optional +from typing import Optional class Metric: diff --git a/tests/test_cvec.py b/tests/test_cvec.py index d218fdc..c3d0162 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -5,7 +5,7 @@ import pandas as pd import numpy as np from pandas.testing import assert_frame_equal -from cvec import CVec, Span, Metric +from cvec import CVec, Metric class TestCVecConstructor: @@ -381,7 +381,6 @@ def test_get_spans_no_data_points(self, mock_connect: MagicMock) -> None: @patch("cvec.cvec.psycopg.connect") def test_get_spans_with_limit_parameter(self, mock_connect: MagicMock) -> None: """Test get_spans when a limit parameter is provided.""" - # Setup mock connection and cursor mock_conn = MagicMock() mock_cur = MagicMock() mock_connect.return_value.__enter__.return_value = mock_conn @@ -390,11 +389,8 @@ def test_get_spans_with_limit_parameter(self, mock_connect: MagicMock) -> None: # Sample data (time, value_double, value_string) - newest first time1 = datetime(2023, 1, 1, 10, 0, 0) time2 = datetime(2023, 1, 1, 11, 0, 0) - time3 = datetime(2023, 1, 1, 12, 0, 0) - # Provide more rows than the limit to test truncation mock_db_rows = [ - (time3, 30.0, None), # Newest - (time2, None, "val2"), + (time2, None, "val2"), # Newest (time1, 10.0, None), # Oldest ] mock_cur.fetchall.return_value = mock_db_rows @@ -411,6 +407,8 @@ def test_get_spans_with_limit_parameter(self, mock_connect: MagicMock) -> None: assert params["metric"] == tag_name assert params["limit"] == query_limit + assert len(spans) == 2 + @patch("cvec.cvec.psycopg.connect") def test_get_spans_with_end_at_parameter(self, mock_connect: MagicMock) -> None: """Test get_spans when an end_at parameter is provided.""" From 7ab76977126856adb78db8814a7e243e4baa1952 Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Mon, 19 May 2025 13:54:00 -0400 Subject: [PATCH 113/117] docs: Update README and pyproject metadata --- README.md | 89 -------------------------------------------------- pyproject.toml | 4 +-- 2 files changed, 2 insertions(+), 91 deletions(-) diff --git a/README.md b/README.md index b61539b..12e5b8f 100644 --- a/README.md +++ b/README.md @@ -88,92 +88,3 @@ Return all data-points within a given [`start_at`, `end_at`) interval, optionall ## `get_metrics(?start_at, ?end_at)` Return a list of metrics that had at least one transition in the given [`start_at`, `end_at`) interval. All metrics are returned if no `start_at` and `end_at` are given. Each metric has {id, name, birth_at, death_at}. - -# Schema - -This SDK integrates directly with CVector's database. Each tenant has a schema and a database user, both named for the tenant. The API Key is the password of the user. The database user is restricted to only have access to the tenant's schema. Here are the available database tables: - -## tag_data - -The tag_data table is a Timescale hypertable. Boolean tags are represented within this table using value 0 and 1. The table uses a "report by exception" approach; a row is inserted only when the value of a metric changes. - -```sql -CREATE TABLE tag_data ( - tag_name_id INTEGER NOT NULL, - tag_value_changed_at TIMESTAMP WITH TIME ZONE, - tag_value DOUBLE PRECISION -) - -SELECT create_hypertable( - 'tag_data', - 'tag_value_changed_at', - chunk_time_interval => INTERVAL '1 hour', - if_not_exists => TRUE -); -``` - -## tag_data_str - -The tag_data_str table is a Timescale hypertable, similar to tag_data for string-valued tags. - -```sql -CREATE TABLE tag_data_str ( - tag_name_id INTEGER NOT NULL, - tag_value_changed_at timestamptz NOT NULL, - tag_value text -); - -SELECT create_hypertable( - 'tag_data_str', - 'tag_value_changed_at', - chunk_time_interval => INTERVAL '1 hour', - if_not_exists => TRUE -); -``` - -## tag_names - -```sql -CREATE TABLE tag_names ( - id SERIAL PRIMARY KEY, - normalized_name VARCHAR NOT NULL, - birth_at TIMESTAMPTZ NULL, - death_at TIMESTAMPTZ NULL -); -``` - -## metrics - -```sql -CREATE VIEW metrics AS - SELECT td.tag_value AS value, - td.tag_value_changed_at AS "time", - tn.normalized_name AS metric - FROM tag_data td - JOIN tag_names tn ON td.tag_name_id = tn.id; -``` - -## metric_data - -The metric_data view unifies the tag_data and tag_data_str tables and joins the tag name. - -```sql -CREATE VIEW metric_data AS - ( - WITH td AS (SELECT - tag_name_id, - tag_value_changed_at AS time, - tag_value AS value_double, - NULL::text AS value_string - FROM tag_data - UNION ALL - SELECT - tag_name_id, - tag_value_changed_at AS time, - NULL::double precision AS value_double, - tag_value AS value_string - FROM tag_data_str) - SELECT time, value_double, value_string, tag_name_id AS metric_id, normalized_name AS metric, birth_at, death_at FROM td - JOIN tag_names ON tag_name_id = tag_names.id - ); -``` diff --git a/pyproject.toml b/pyproject.toml index 71f75b7..8a34276 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,9 +1,9 @@ [project] name = "cvec" version = "0.1.0" -description = "" +description = "SDK for CVector Energy" authors = [ - {name = "Joshua Napoli",email = "jnapoli@cvector.energy"} + {name = "CVector",email = "support@cvector.energy"} ] readme = "README.md" requires-python = ">=3.10" From 8d3e14a6b28d067726adf0afed304eb2eaeefcb2 Mon Sep 17 00:00:00 2001 From: "Joshua Napoli (aider)" Date: Mon, 19 May 2025 14:24:40 -0400 Subject: [PATCH 114/117] docs: Complete Metrics documentation in the cvec SDK. --- README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 12e5b8f..fa9ed1e 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,13 @@ spans = client.get_spans("my_tag_name", start_at=datetime(2025, 5, 14, 10, 0, 0) ### Metrics -A metric is a named set of time-series data points pertaining to a particular resource (for example, the value reported by a sensor). A metric has a lifecycle of being activated or added to the system (birth_at) and later removed from the system (death_at). Metrics can have numeric or string values. Boolean values are mapped to 0 and 1. +A metric is a named set of time-series data points pertaining to a particular resource (for example, the value reported by a sensor). Metrics can have numeric or string values. Boolean values are mapped to 0 and 1. + +Each `Metric` object, as returned by `get_metrics`, has the following attributes: +- `id`: The unique integer identifier for the metric. +- `name`: The string name of the metric. +- `birth_at`: A `datetime` object indicating when the metric was first activated or added to the system. This can be `None` if the birth time is unknown. +- `death_at`: A `datetime` object indicating when the metric was removed or deactivated from the system. This is `None` if the metric is still active or its death time is unknown. ### Metric Data From 6a1189880b34afdcfcb803d48ff5f9cb7219a93b Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Mon, 19 May 2025 14:34:23 -0400 Subject: [PATCH 115/117] docs: Add documentation for get_metrics and get_metric_data functions --- README.md | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index fa9ed1e..e3a52bf 100644 --- a/README.md +++ b/README.md @@ -53,16 +53,24 @@ spans = client.get_spans("my_tag_name", start_at=datetime(2025, 5, 14, 10, 0, 0) ### Metrics -A metric is a named set of time-series data points pertaining to a particular resource (for example, the value reported by a sensor). Metrics can have numeric or string values. Boolean values are mapped to 0 and 1. +A metric is a named set of time-series data points pertaining to a particular resource (for example, the value reported by a sensor). Metrics can have numeric or string values. Boolean values are mapped to 0 and 1. The get_metrics function returns a list of metric metadata. -Each `Metric` object, as returned by `get_metrics`, has the following attributes: -- `id`: The unique integer identifier for the metric. -- `name`: The string name of the metric. -- `birth_at`: A `datetime` object indicating when the metric was first activated or added to the system. This can be `None` if the birth time is unknown. -- `death_at`: A `datetime` object indicating when the metric was removed or deactivated from the system. This is `None` if the metric is still active or its death time is unknown. +To get all of the metrics that changed value at 10am on 2025-05-14, run: + +``` +client.get_metrics(start_at=datetime(2025, 5, 14, 10, 0, 0), end_at=datetime(2025, 5, 14, 11, 0, 0)) +``` ### Metric Data +The main content for a metric is a set of points where the metric value changed. These are returned as a Pandas Dataframe with columns for name, time, value_double, value_string. + +To get all of the value changes for all metrics at 10am on 2025-05-14, run: + +``` +client.get_metric_data(start_at=datetime(2025, 5, 14, 10, 0, 0), end_at=datetime(2025, 5, 14, 11, 0, 0)) +``` + # CVec Class The SDK provides an API client class named `CVec` with the following functions. From 364451b1b08e0db383c5c013c043348f74a77c86 Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Mon, 19 May 2025 15:03:20 -0400 Subject: [PATCH 116/117] feat: Update SDK documentation and examples, fix minor issues --- README.md | 56 ++++++++++++++++++++++++++++++++++++++++++------ src/cvec/cvec.py | 8 +++---- 2 files changed, 53 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index e3a52bf..07d6dee 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,18 @@ The newest span for a metric does not have an end time, since it has not ended y To get the spans on `my_tag_name` since 2025-05-14 10am, run: ``` -spans = client.get_spans("my_tag_name", start_at=datetime(2025, 5, 14, 10, 0, 0)) +for span in client.get_spans("mygroup/myedge/mode", start_at=datetime(2025, 5, 14, 10, 0, 0)): + print("%s\t%s" % (span.value, span.raw_start_at)) +``` + +The output will be like: + +``` +offline 2025-05-19 16:28:02.130000+00:00 +starting 2025-05-19 16:28:01.107000+00:00 +running 2025-05-19 15:29:28.795000+00:00 +stopping 2025-05-19 15:29:27.788000+00:00 +offline 2025-05-19 14:14:43.752000+00:00 ``` ### Metrics @@ -58,7 +69,21 @@ A metric is a named set of time-series data points pertaining to a particular re To get all of the metrics that changed value at 10am on 2025-05-14, run: ``` -client.get_metrics(start_at=datetime(2025, 5, 14, 10, 0, 0), end_at=datetime(2025, 5, 14, 11, 0, 0)) +for item in client.get_metrics(start_at=datetime(2025, 5, 14, 10, 0, 0), end_at=datetime(2025, 5, 14, 11, 0, 0)): + print(item.name) +``` + +Example output: + +``` +mygroup/myedge/compressor01/status +mygroup/myedge/compressor01/interlocks/emergency_stop +mygroup/myedge/compressor01/stage1/pressure_out/psig +mygroup/myedge/compressor01/stage1/temp_out/c +mygroup/myedge/compressor01/stage2/pressure_out/psig +mygroup/myedge/compressor01/stage2/temp_out/c +mygroup/myedge/compressor01/motor/current/a +mygroup/myedge/compressor01/motor/power_kw ``` ### Metric Data @@ -71,13 +96,32 @@ To get all of the value changes for all metrics at 10am on 2025-05-14, run: client.get_metric_data(start_at=datetime(2025, 5, 14, 10, 0, 0), end_at=datetime(2025, 5, 14, 11, 0, 0)) ``` +Example output: + +``` + name time value_double value_string +0 mygroup/myedge/mode 2025-05-14 10:10:41.949000+00:00 24.900000 starting +1 mygroup/myedge/compressor01/interlocks/emergency_stop 2025-05-14 10:27:24.899000+00:00 0.0000000 None +2 mygroup/myedge/compressor01/stage1/pressure_out/psig 2025-05-14 10:43:38.282000+00:00 123.50000 None +3 mygroup/myedge/compressor01/stage1/temp_out/c 2025-05-14 10:10:41.948000+00:00 24.900000 None +4 mygroup/myedge/compressor01/motor/current/a 2025-05-14 10:27:24.897000+00:00 12.000000 None +... ... ... ... ... +46253 mygroup/myedge/compressor01/stage1/temp_out/c 2025-05-14 10:59:55.725000+00:00 25.300000 None +46254 mygroup/myedge/compressor01/stage2/pressure_out/psig 2025-05-14 10:59:56.736000+00:00 250.00000 None +46255 mygroup/myedge/compressor01/stage2/temp_out/c 2025-05-14 10:59:57.746000+00:00 12.700000 None +46256 mygroup/myedge/compressor01/motor/current/a 2025-05-14 10:59:58.752000+00:00 11.300000 None +46257 mygroup/myedge/compressor01/motor/power_kw 2025-05-14 10:59:59.760000+00:00 523.40000 None + +[46257 rows x 4 columns] +``` + # CVec Class The SDK provides an API client class named `CVec` with the following functions. ## `__init__(?host, ?tenant, ?api_key, ?default_start_at, ?default_end_at)` -Setup the SDK with the given host and API Key. The host and API key are loaded from environment variables CVEC_HOST, CVEC_TENANT, CVEC_API_KEY, if they are not given as arguments to the constructor. The `default_start_at` and `default_end_at` constrain most API calls, and can be overridden by the `start_at` and `end_at` arguments to each API function. +Setup the SDK with the given host and API Key. The host and API key are loaded from environment variables CVEC_HOST, CVEC_TENANT, CVEC_API_KEY, if they are not given as arguments to the constructor. The `default_start_at` and `default_end_at` can provide a default query time interval for API methods. ## `get_spans(name, ?start_at, ?end_at, ?limit)` @@ -88,7 +132,7 @@ Each `Span` object in the returned list represents a period where the metric's v - `value`: The metric's value during the span. - `name`: The name of the metric. - `raw_start_at`: The timestamp of the value change that initiated this span's value. This will be greater than or equal to the query's `start_at` if one was specified. -- `raw_end_at`: The timestamp marking the end of this span's constant value. For the newest span, the value is `None`. For other spans, it's the `raw_start_at` of the chronologically newer preceding span in the list. +- `raw_end_at`: The timestamp marking the end of this span's constant value. For the newest span, the value is `None`. For other spans, it's the raw_start_at of the immediately newer data point, which is next span in the list. - `id`: Currently `None`. In a future version of the SDK, this will be the span's unique identifier. - `metadata`: Currently `None`. In a future version, this can be used to store annotations or other metadata related to the span. @@ -97,8 +141,8 @@ If no relevant value changes are found, an empty list is returned. ## `get_metric_data(?names, ?start_at, ?end_at)` -Return all data-points within a given [`start_at`, `end_at`) interval, optionally selecting a given list of metric names. The return value is a Pandas DataFrame with four columns: name, time, value_double, value_string. One row is returned for each tag value transition. +Return all data-points within a given [`start_at`, `end_at`) interval, optionally selecting a given list of metric names. The return value is a Pandas DataFrame with four columns: name, time, value_double, value_string. One row is returned for each metric value transition. ## `get_metrics(?start_at, ?end_at)` -Return a list of metrics that had at least one transition in the given [`start_at`, `end_at`) interval. All metrics are returned if no `start_at` and `end_at` are given. Each metric has {id, name, birth_at, death_at}. +Return a list of metrics that had at least one transition in the given [`start_at`, `end_at`) interval. All metrics are returned if no `start_at` and `end_at` are given. diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 51ae54b..2c69d62 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -32,8 +32,7 @@ def __init__( Setup the SDK with the given host and API Key. The host and API key are loaded from environment variables CVEC_HOST, CVEC_TENANT, CVEC_API_KEY, if they are not given as arguments to the constructor. - The default_start_at and default_end_at constrain most API keys, and can be overridden - by the start_at and end_at arguments to each API function. + The default_start_at and default_end_at can provide a default query time interval for API methods. """ self.host = host or os.environ.get("CVEC_HOST") self.tenant = tenant or os.environ.get("CVEC_TENANT") @@ -82,7 +81,7 @@ def get_spans( - `raw_start_at`: The timestamp of the value change that initiated this span's value. This will be >= `_start_at` if `_start_at` was specified. - `raw_end_at`: The timestamp marking the end of this span's constant value. - For the newest span, the value is `None`. For other spans, it's the raw_start_at of the immediately preceding span in the returned list (which, being sorted newest-first, is the next chronologically newer span). This timestamp marks the end of the current span's constant value. + For the newest span, the value is `None`. For other spans, it's the raw_start_at of the immediately newer data point, which is next span in the list. - `id`: Currently `None`. - `metadata`: Currently `None`. @@ -151,7 +150,7 @@ def get_metric_data( Return all data-points within a given [start_at, end_at) interval, optionally selecting a given list of metric names. The return value is a Pandas DataFrame with four columns: name, time, value_double, value_string. - One row is returned for each tag value transition. + One row is returned for each metric value transition. """ _start_at = start_at or self.default_start_at _end_at = end_at or self.default_end_at @@ -199,7 +198,6 @@ def get_metrics( """ Return a list of metrics that had at least one transition in the given [start_at, end_at) interval. All metrics are returned if no start_at and end_at are given. - Each metric has {id, name, birth_at, death_at}. """ sql_query: str params: Optional[dict[str, Any]] From 56f10f6ea9cdb3584151576a562e7163b9f95a7f Mon Sep 17 00:00:00 2001 From: Joshua Napoli Date: Mon, 19 May 2025 17:28:52 -0400 Subject: [PATCH 117/117] fix: psycopg requires list rather than tuple --- README.md | 8 ++++---- src/cvec/cvec.py | 2 +- tests/test_cvec.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 07d6dee..2e5413d 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ from datetime import datetime Construct the CVec client. The host, tenant, and api_key can be given through parameters to the constructor or from the environment variables CVEC_HOST, CVEC_TENANT, and CVEC_API_KEY: ``` -client = cvec.CVec() +cvec = cvec.CVec() ``` ### Spans @@ -48,7 +48,7 @@ The newest span for a metric does not have an end time, since it has not ended y To get the spans on `my_tag_name` since 2025-05-14 10am, run: ``` -for span in client.get_spans("mygroup/myedge/mode", start_at=datetime(2025, 5, 14, 10, 0, 0)): +for span in cvec.get_spans("mygroup/myedge/mode", start_at=datetime(2025, 5, 14, 10, 0, 0)): print("%s\t%s" % (span.value, span.raw_start_at)) ``` @@ -69,7 +69,7 @@ A metric is a named set of time-series data points pertaining to a particular re To get all of the metrics that changed value at 10am on 2025-05-14, run: ``` -for item in client.get_metrics(start_at=datetime(2025, 5, 14, 10, 0, 0), end_at=datetime(2025, 5, 14, 11, 0, 0)): +for item in cvec.get_metrics(start_at=datetime(2025, 5, 14, 10, 0, 0), end_at=datetime(2025, 5, 14, 11, 0, 0)): print(item.name) ``` @@ -93,7 +93,7 @@ The main content for a metric is a set of points where the metric value changed. To get all of the value changes for all metrics at 10am on 2025-05-14, run: ``` -client.get_metric_data(start_at=datetime(2025, 5, 14, 10, 0, 0), end_at=datetime(2025, 5, 14, 11, 0, 0)) +cvec.get_metric_data(start_at=datetime(2025, 5, 14, 10, 0, 0), end_at=datetime(2025, 5, 14, 11, 0, 0)) ``` Example output: diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 2c69d62..062a143 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -162,7 +162,7 @@ def get_metric_data( # Pass an empty tuple if names is None or empty, otherwise the tuple of names. # ANY(%(empty_tuple)s) will correctly result in no matches if names is empty. # If names is None, the tag_names_is_null condition handles it. - "tag_names_list": tuple(names) if names else [], + "tag_names_list": names if names else [], } sql_query = """ diff --git a/tests/test_cvec.py b/tests/test_cvec.py index c3d0162..eb31985 100644 --- a/tests/test_cvec.py +++ b/tests/test_cvec.py @@ -298,7 +298,7 @@ def test_get_metric_data_basic_case(self, mock_connect: MagicMock) -> None: mock_cur.execute.assert_called_once() (_sql, params), _kwargs = mock_cur.execute.call_args assert params["tag_names_is_null"] is False - assert params["tag_names_list"] == tuple(names_to_query) + assert params["tag_names_list"] == names_to_query assert params["start_at"] is None # Default start_at assert params["end_at"] is None # Default end_at