Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
0e491fa
add prelim implementation for reproduction blob
echoumcp1 Mar 5, 2026
c0992c3
reduce code dupe
echoumcp1 Mar 5, 2026
b537c0e
remove extraneous error msg
echoumcp1 Mar 5, 2026
976d602
fix reference client to raise error when failure does not repro
echoumcp1 Mar 5, 2026
3f7ebdf
fix lint
echoumcp1 Mar 5, 2026
f4a5283
update flake.lock
echoumcp1 Mar 5, 2026
5c63cd0
Merge branch 'main' into echoumcp1/add-repro-support
echoumcp1 Mar 5, 2026
ccdd496
update uv lock
echoumcp1 Mar 5, 2026
900fdfd
release.md
echoumcp1 Mar 6, 2026
300d67a
Merge branch 'main' into echoumcp1/add-repro-support
echoumcp1 Mar 10, 2026
344a99d
remove database
echoumcp1 Mar 10, 2026
70ddc5f
linting
echoumcp1 Mar 10, 2026
c34c0aa
remove buffer hardcoding
echoumcp1 Mar 10, 2026
b775111
Merge branch 'main' into echoumcp1/add-repro-support
echoumcp1 Mar 11, 2026
e53e612
remove run_test name param
echoumcp1 Mar 11, 2026
3dfd026
fix lint
echoumcp1 Mar 11, 2026
061cd7b
Merge branch 'main' into echoumcp1/add-repro-support
echoumcp1 Mar 11, 2026
806b4b6
failure blob handling code dedupe
echoumcp1 Mar 11, 2026
e4fd9b3
fix mypy type check
echoumcp1 Mar 11, 2026
de5b734
fix mypy error
echoumcp1 Mar 11, 2026
0d9e45f
send multiple failure blobs
echoumcp1 Mar 12, 2026
d5ad5ca
fix linting
echoumcp1 Mar 12, 2026
bb0287c
support python 3.10
echoumcp1 Mar 13, 2026
39dbb2a
Merge branch 'main' into echoumcp1/add-repro-support
echoumcp1 Mar 16, 2026
81257af
lint
echoumcp1 Mar 16, 2026
0f2f906
Merge branch 'main' into echoumcp1/add-repro-support
echoumcp1 Mar 26, 2026
a7d7929
Update RELEASE.md
echoumcp1 Mar 26, 2026
2cab92b
better RELEASE.md msg
echoumcp1 Mar 31, 2026
507fcdc
remove extra print_blob arg
echoumcp1 Mar 31, 2026
9c315e1
Merge branch 'main' into echoumcp1/add-repro-support
echoumcp1 Apr 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/scripts/release.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import re
import subprocess
from datetime import datetime, timezone
from datetime import datetime
from pathlib import Path

SOURCE_DIRS = ["src/"]
Expand Down Expand Up @@ -61,7 +61,7 @@ def set_version(pyproject: Path, new_version: str) -> None:


def add_changelog(path: Path, *, version: str, content: str) -> None:
date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
date = datetime.now(datetime.UTC).strftime("%Y-%m-%d")
entry = f"## {version} - {date}\n\n{content}"

existing = path.read_text()
Expand Down
3 changes: 3 additions & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
RELEASE_TYPE: patch

Add protocol support for reporting failure blobs back to the client. These are strings that can be used to reproduce a specific failure exactly.
24 changes: 12 additions & 12 deletions nix/flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

62 changes: 47 additions & 15 deletions src/hegel/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import cbor2
from hypothesis import HealthCheck, settings
from hypothesis.control import BuildContext
from hypothesis.core import decode_failure, encode_failure
from hypothesis.database import DirectoryBasedExampleDatabase
from hypothesis.errors import (
FailedHealthCheck,
Expand Down Expand Up @@ -297,6 +298,7 @@ def run_server_on_connection(connection: Connection) -> None:
test_cases=message["test_cases"],
database_key=message.get("database_key"),
seed=message.get("seed"),
failure_blob=message.get("failure_blob"),
suppress_health_check=message.get(
"suppress_health_check", []
),
Expand Down Expand Up @@ -328,6 +330,7 @@ def _run_test(
test_cases: int,
database_key: bytes | None,
seed: int | None,
failure_blob: bytes | None = None,
suppress_health_check: list[str] | None,
derandomize: bool,
database: str | UniqueIdentifier | None,
Expand Down Expand Up @@ -398,7 +401,47 @@ def _run_test(
database_key=database_key,
)
try:
runner.run()
if failure_blob is not None:
choices = decode_failure(failure_blob)
data = ConjectureData.for_choices(choices)
with contextlib.suppress(StopTest):
state.test_function(data)

is_interesting = data.status is Status.INTERESTING
result = {
"passed": not is_interesting,
"test_cases": 1,
"valid_test_cases": 0,
"invalid_test_cases": 0,
"interesting_test_cases": int(is_interesting),
}
if is_interesting:
result["failure_blobs"] = [failure_blob]
interesting_choices = [choices]
else:
result["failure_blobs"] = []
interesting_choices = []
else:
runner.run()

result = {
"passed": len(runner.interesting_examples) == 0,
"test_cases": runner.call_count,
"valid_test_cases": runner.valid_examples,
"invalid_test_cases": runner.invalid_examples,
"interesting_test_cases": len(runner.interesting_examples),
"seed": str(seed),
}
interesting_examples = sorted(
runner.interesting_examples.values(),
key=lambda d: sort_key(d.nodes),
)

interesting_choices = [v.choices for v in interesting_examples]

result["failure_blobs"] = [
encode_failure(choices) for choices in interesting_choices
]
except FailedHealthCheck as e:
result = {
"passed": False,
Expand All @@ -416,15 +459,6 @@ def _run_test(
channel.send_request({"event": "test_done", "results": result}).get()
return result

result = {
"passed": len(runner.interesting_examples) == 0,
"test_cases": runner.call_count,
"valid_test_cases": runner.valid_examples,
"invalid_test_cases": runner.invalid_examples,
"interesting_test_cases": len(runner.interesting_examples),
"seed": str(seed),
}

# Check for flaky behavior detected during test execution
flaky_error = state.flaky_error
if flaky_error is not None:
Expand All @@ -435,14 +469,12 @@ def _run_test(
result["flaky"] = FLAKY_TEST_RESULT_MSG

channel.send_request({"event": "test_done", "results": result}).get()

final_state = HegelState(connection, channel, is_final=True)

for v in sorted(
runner.interesting_examples.values(),
key=lambda d: sort_key(d.nodes),
):
for choices in interesting_choices:
with contextlib.suppress(StopTest):
final_state.test_function(ConjectureData.for_choices(v.choices))
final_state.test_function(ConjectureData.for_choices(choices))

return result
except Exception:
Expand Down
6 changes: 6 additions & 0 deletions tests/client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,15 @@ def __init__(self, connection: ClientConnection):
self.connection = connection
self._control = connection.control_channel
self.__lock = threading.Lock()
self.last_result: dict | None = None

def run_test(
self,
test_fn: Callable[[], None],
*,
test_cases: int = 100,
seed: int | None = None,
failure_blob: bytes | None = None,
suppress_health_check: list[str] | None = None,
database_key: bytes | None = None,
derandomize: bool = False,
Expand All @@ -72,6 +74,7 @@ def run_test(
"channel_id": test_channel.channel_id,
"database_key": database_key,
"derandomize": derandomize,
"failure_blob": failure_blob,
}
if database is not not_set:
message["database"] = database
Expand Down Expand Up @@ -105,6 +108,7 @@ def run_test(
)

assert result_data is not None
self.last_result = result_data

if "error" in result_data:
raise ValueError(result_data["error"])
Expand All @@ -117,6 +121,8 @@ def run_test(

n_interesting = result_data["interesting_test_cases"]

if result_data["passed"] and failure_blob:
raise AssertionError("failure blob did not reproduce")
if n_interesting == 0:
return

Expand Down
63 changes: 63 additions & 0 deletions tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
)
from tests.client.client import _request

try:
ExceptionGroup
except NameError: # pragma: no cover
from exceptiongroup import ExceptionGroup


def test_start_and_stop_span(client):
def test():
Expand Down Expand Up @@ -323,6 +328,64 @@ def test():
client.run_test(test, test_cases=10)


def test_reproduce_failure(client):
def test():
assert (
generate_from_schema({"type": "integer", "min_value": 0, "max_value": 1000})
<= 10
)

with pytest.raises(AssertionError):
client.run_test(test, test_cases=100)

blob = client.last_result["failure_blobs"][0]
assert isinstance(blob, bytes)

with pytest.raises(AssertionError):
client.run_test(test, failure_blob=blob)


def test_reproduce_failure_blob_no_longer_fails(client):
"""When a blob no longer reproduces, the client raises RuntimeError."""

def failing_test():
assert (
generate_from_schema({"type": "integer", "min_value": 0, "max_value": 1000})
<= 10
)

with pytest.raises(AssertionError):
client.run_test(failing_test, test_cases=100)

blob = client.last_result["failure_blobs"][0]

# The blob was for failing_test, but we replay with a test that always passes.
with pytest.raises(AssertionError, match="failure blob did not reproduce"):
client.run_test(lambda: None, failure_blob=blob)


def test_reproduce_failure_result_not_in_passing_test(client):
def test():
x = generate_from_schema({"type": "integer", "min_value": 0, "max_value": 100})
assert x >= 0

client.run_test(test, test_cases=50)
assert client.last_result["failure_blobs"] == []


def test_multiple_blobs(client):
def test():
x = generate_from_schema({"type": "integer", "min_value": 0, "max_value": 100})
assert x <= 10

y = generate_from_schema({"type": "integer", "min_value": -10, "max_value": -1})
assert y >= 0

with pytest.raises(ExceptionGroup):
client.run_test(test, test_cases=50)
assert len(client.last_result["failure_blobs"]) == 2


def test_derandomize_with_database_key(client):
"""Tests that derandomize=True derives seed from database_key."""

Expand Down
Loading