Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions app/core/balancer/logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def select_account(
if state.status == AccountStatus.RATE_LIMITED:
if state.reset_at and current >= state.reset_at:
state.status = AccountStatus.ACTIVE
state.used_percent = 0.0
state.error_count = 0
state.reset_at = None
else:
Expand All @@ -71,6 +72,7 @@ def select_account(
if state.reset_at and current >= state.reset_at:
state.status = AccountStatus.ACTIVE
state.used_percent = 0.0
state.secondary_used_percent = 0.0
state.reset_at = None
else:
continue
Expand Down
14 changes: 14 additions & 0 deletions app/modules/proxy/load_balancer.py
Original file line number Diff line number Diff line change
Expand Up @@ -809,6 +809,20 @@ def _state_from_account(
secondary_used = effective_secondary_entry.used_percent if effective_secondary_entry else None
secondary_reset = effective_secondary_entry.reset_at if effective_secondary_entry else None

# If the usage window has reset (reset_at is in the past) but the last
# recorded sample still shows 100 % usage, the data is stale. Zero it
# out so the account is not incorrectly blocked or deprioritised while
# waiting for the next usage refresh to fetch fresh numbers.
now_epoch = int(time.time())
if primary_used is not None and primary_used >= 100.0:
if primary_reset is not None and primary_reset <= now_epoch:
primary_used = 0.0
primary_reset = None
if secondary_used is not None and secondary_used >= 100.0:
if secondary_reset is not None and secondary_reset <= now_epoch:
secondary_used = 0.0
secondary_reset = None

# Use account.reset_at from DB as the authoritative source for runtime reset
# and to survive process restarts.
db_reset_at = float(account.reset_at) if account.reset_at else None
Expand Down
34 changes: 32 additions & 2 deletions app/modules/usage/updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,11 @@ async def refresh_accounts(
continue
latest = latest_usage.get(account.id)
if _latest_usage_is_fresh(latest, now=now, interval_seconds=interval):
continue
# Still force a refresh when the account is blocked with an
# expired reset — the fresh primary entry does not capture
# that the upstream window has turned over.
if not _account_needs_post_reset_refresh(account, now):
continue
Comment on lines +191 to +195
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Bypass stale gate when forcing post-reset usage refresh

This branch intends to force a refresh for blocked accounts whose reset_at has already expired, but it still falls through to _refresh_account_if_stale(), which re-checks primary freshness and can return early without fetching. In the secondary-window rollover scenario (account remains QUOTA_EXCEEDED/RATE_LIMITED, primary row still fresh), the new _account_needs_post_reset_refresh path does not actually trigger an upstream refresh, so recovery is delayed until the normal primary interval expires.

Useful? React with 👍 / 👎.

# Additional-only accounts have no main UsageHistory entry.
# Check DB-backed freshness (works across workers/restarts)
# with process-local cache as a fast path.
Expand Down Expand Up @@ -636,7 +640,18 @@ def _latest_usage_is_fresh(
now: datetime,
interval_seconds: int,
) -> bool:
return latest is not None and (now - latest.recorded_at).total_seconds() < interval_seconds
if latest is None:
return False
if (now - latest.recorded_at).total_seconds() >= interval_seconds:
return False
# If the usage window has reset since this entry was recorded, the
# recorded percentage is stale — force a re-fetch so the scheduler
# picks up the new (lower) usage from the upstream API promptly.
if latest.reset_at is not None:
now_epoch = int(now.replace(tzinfo=timezone.utc).timestamp())
if now_epoch >= latest.reset_at:
return False
return True


def _parse_credits_balance(value: str | int | float | None) -> float | None:
Expand All @@ -652,6 +667,21 @@ def _parse_credits_balance(value: str | int | float | None) -> float | None:
return None


def _account_needs_post_reset_refresh(account: Account, now: datetime) -> bool:
"""Return True when the account is blocked with an expired reset time.

This catches the case where only the *secondary* window has reset (the
primary entry may still look fresh) but the account is still persisted as
RATE_LIMITED or QUOTA_EXCEEDED from the previous cycle.
"""
if account.status not in (AccountStatus.RATE_LIMITED, AccountStatus.QUOTA_EXCEEDED):
return False
if account.reset_at is None:
return False
now_epoch = int(now.replace(tzinfo=timezone.utc).timestamp())
return now_epoch >= account.reset_at


def _window_minutes(limit_seconds: int | None) -> int | None:
if not limit_seconds or limit_seconds <= 0:
return None
Expand Down
61 changes: 61 additions & 0 deletions tests/unit/test_load_balancer.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,3 +377,64 @@ def test_error_backoff_expired_account_does_not_immediately_relock():
result2 = select_account([state], now=now + 2)
assert result2.account is not None
assert result2.account.account_id == "a"


def test_select_account_resets_used_percent_when_rate_limit_expires():
"""When a rate-limited account's reset_at expires, used_percent must be
cleared so the account is not deprioritised in the usage-weighted sort."""
now = 1_700_000_000.0
state = AccountState(
"a",
AccountStatus.RATE_LIMITED,
used_percent=100.0,
reset_at=now - 10,
)
result = select_account([state], now=now)
assert result.account is not None
assert state.status == AccountStatus.ACTIVE
assert state.used_percent == 0.0
assert state.reset_at is None


def test_select_account_resets_secondary_used_percent_when_quota_exceeded_expires():
"""When a quota-exceeded account's reset_at expires, secondary_used_percent
must be cleared so the account is not deprioritised in the sort key."""
now = 1_700_000_000.0
state = AccountState(
"a",
AccountStatus.QUOTA_EXCEEDED,
used_percent=100.0,
secondary_used_percent=100.0,
reset_at=now - 10,
)
result = select_account([state], now=now)
assert result.account is not None
assert state.status == AccountStatus.ACTIVE
assert state.used_percent == 0.0
assert state.secondary_used_percent == 0.0
assert state.reset_at is None


def test_select_account_prefers_reset_account_over_high_usage():
"""An account whose quota just reset (via the selection-time clear) should
be preferred over an active account with moderate usage."""
now = 1_700_000_000.0
states = [
AccountState(
"a",
AccountStatus.QUOTA_EXCEEDED,
used_percent=100.0,
secondary_used_percent=100.0,
reset_at=now - 10,
),
AccountState(
"b",
AccountStatus.ACTIVE,
used_percent=60.0,
secondary_used_percent=60.0,
),
]
result = select_account(states, now=now)
assert result.account is not None
# Account "a" should be preferred because after reset its usage is 0%
assert result.account.account_id == "a"
113 changes: 112 additions & 1 deletion tests/unit/test_usage_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,12 @@
from app.core.usage.models import UsagePayload
from app.db.models import Account, AccountStatus, UsageHistory
from app.modules.usage.additional_quota_keys import canonicalize_additional_quota_key
from app.modules.usage.updater import UsageUpdater, _last_successful_refresh
from app.modules.usage.updater import (
UsageUpdater,
_account_needs_post_reset_refresh,
_last_successful_refresh,
_latest_usage_is_fresh,
)

pytestmark = pytest.mark.unit

Expand Down Expand Up @@ -1521,3 +1526,109 @@ async def stub_fetch_usage(**_: Any) -> UsagePayload:
assert not hasattr(entry, "credits_has")
assert not hasattr(entry, "credits_unlimited")
assert not hasattr(entry, "credits_balance")


# ---------------------------------------------------------------------------
# _latest_usage_is_fresh — reset boundary awareness
# ---------------------------------------------------------------------------


def test_latest_usage_is_fresh_returns_true_when_recent_no_reset():
now = datetime(2024, 6, 1, 12, 0, 30)
entry = UsageHistory(
id=1,
account_id="a",
used_percent=50.0,
recorded_at=datetime(2024, 6, 1, 12, 0, 0),
window="primary",
reset_at=None,
window_minutes=300,
)
assert _latest_usage_is_fresh(entry, now=now, interval_seconds=60) is True


def test_latest_usage_is_fresh_returns_false_when_old():
now = datetime(2024, 6, 1, 12, 1, 30)
entry = UsageHistory(
id=1,
account_id="a",
used_percent=50.0,
recorded_at=datetime(2024, 6, 1, 12, 0, 0),
window="primary",
reset_at=None,
window_minutes=300,
)
assert _latest_usage_is_fresh(entry, now=now, interval_seconds=60) is False


def test_latest_usage_is_fresh_returns_false_when_reset_at_has_passed():
"""Data recorded recently but the usage window has already reset."""
# recorded_at is 10s ago → within interval. But reset_at was 5s ago.
reset_epoch = int(datetime(2024, 6, 1, 12, 0, 25).replace(tzinfo=timezone.utc).timestamp())
now = datetime(2024, 6, 1, 12, 0, 30)
entry = UsageHistory(
id=1,
account_id="a",
used_percent=100.0,
recorded_at=datetime(2024, 6, 1, 12, 0, 20),
window="primary",
reset_at=reset_epoch,
window_minutes=300,
)
assert _latest_usage_is_fresh(entry, now=now, interval_seconds=60) is False


def test_latest_usage_is_fresh_returns_true_when_reset_at_is_in_future():
"""Data recorded recently and the reset hasn't happened yet."""
reset_epoch = int(datetime(2024, 6, 1, 12, 5, 0).replace(tzinfo=timezone.utc).timestamp())
now = datetime(2024, 6, 1, 12, 0, 30)
entry = UsageHistory(
id=1,
account_id="a",
used_percent=100.0,
recorded_at=datetime(2024, 6, 1, 12, 0, 20),
window="primary",
reset_at=reset_epoch,
window_minutes=300,
)
assert _latest_usage_is_fresh(entry, now=now, interval_seconds=60) is True


# ---------------------------------------------------------------------------
# _account_needs_post_reset_refresh
# ---------------------------------------------------------------------------


def test_account_needs_refresh_active_account_returns_false():
acc = _make_account("a", "ws_a")
acc.status = AccountStatus.ACTIVE
acc.reset_at = int(datetime(2024, 6, 1, 11, 0, 0).replace(tzinfo=timezone.utc).timestamp())
assert _account_needs_post_reset_refresh(acc, datetime(2024, 6, 1, 12, 0, 0)) is False


def test_account_needs_refresh_quota_exceeded_future_reset_returns_false():
acc = _make_account("a", "ws_a")
acc.status = AccountStatus.QUOTA_EXCEEDED
acc.reset_at = int(datetime(2024, 6, 1, 13, 0, 0).replace(tzinfo=timezone.utc).timestamp())
assert _account_needs_post_reset_refresh(acc, datetime(2024, 6, 1, 12, 0, 0)) is False


def test_account_needs_refresh_quota_exceeded_past_reset_returns_true():
acc = _make_account("a", "ws_a")
acc.status = AccountStatus.QUOTA_EXCEEDED
acc.reset_at = int(datetime(2024, 6, 1, 11, 0, 0).replace(tzinfo=timezone.utc).timestamp())
assert _account_needs_post_reset_refresh(acc, datetime(2024, 6, 1, 12, 0, 0)) is True


def test_account_needs_refresh_rate_limited_past_reset_returns_true():
acc = _make_account("a", "ws_a")
acc.status = AccountStatus.RATE_LIMITED
acc.reset_at = int(datetime(2024, 6, 1, 11, 0, 0).replace(tzinfo=timezone.utc).timestamp())
assert _account_needs_post_reset_refresh(acc, datetime(2024, 6, 1, 12, 0, 0)) is True


def test_account_needs_refresh_no_reset_at_returns_false():
acc = _make_account("a", "ws_a")
acc.status = AccountStatus.QUOTA_EXCEEDED
acc.reset_at = None
assert _account_needs_post_reset_refresh(acc, datetime(2024, 6, 1, 12, 0, 0)) is False
Loading