|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import logging |
| 4 | +from collections import defaultdict |
| 5 | +from collections.abc import Sequence |
| 6 | +from datetime import datetime, timedelta, timezone |
| 7 | + |
| 8 | +from google.protobuf.timestamp_pb2 import Timestamp |
| 9 | +from sentry_protos.billing.v1.date_pb2 import Date |
| 10 | +from sentry_protos.billing.v1.services.usage.v1.endpoint_usage_pb2 import ( |
| 11 | + CategoryUsage, |
| 12 | + DailyUsage, |
| 13 | + GetUsageRequest, |
| 14 | + GetUsageResponse, |
| 15 | +) |
| 16 | +from sentry_protos.billing.v1.usage_data_pb2 import UsageData |
| 17 | +from snuba_sdk import ( |
| 18 | + Column, |
| 19 | + Condition, |
| 20 | + Entity, |
| 21 | + Function, |
| 22 | + Granularity, |
| 23 | + Limit, |
| 24 | + Op, |
| 25 | + OrderBy, |
| 26 | + Query, |
| 27 | + Request, |
| 28 | +) |
| 29 | +from snuba_sdk.orderby import Direction |
| 30 | + |
| 31 | +from sentry.billing.platform.services.usage._category_mapping import proto_to_relay_category |
| 32 | +from sentry.snuba.referrer import Referrer |
| 33 | +from sentry.utils import metrics |
| 34 | +from sentry.utils.outcomes import Outcome |
| 35 | +from sentry.utils.snuba import raw_snql_query |
| 36 | + |
| 37 | +logger = logging.getLogger(__name__) |
| 38 | + |
| 39 | +_REFERRER = Referrer.BILLING_USAGE_SERVICE_CLICKHOUSE.value |
| 40 | +_APP_ID = "billing" |
| 41 | +_DATASET = "outcomes" |
| 42 | +_DAILY_GRANULARITY = 86400 |
| 43 | +_QUERY_LIMIT = 10000 |
| 44 | + |
| 45 | +# Outcomes stored in PG BillingMetricUsage (getsentry outcomes consumer |
| 46 | +# filters to these three at ingest). The CH outcomes table also has |
| 47 | +# INVALID, ABUSE, CLIENT_DISCARD, and CARDINALITY_LIMITED. |
| 48 | +_BILLABLE_OUTCOMES = [Outcome.ACCEPTED, Outcome.FILTERED, Outcome.RATE_LIMITED] |
| 49 | + |
| 50 | + |
| 51 | +def query_outcomes_usage(request: GetUsageRequest) -> GetUsageResponse: |
| 52 | + org_id = request.organization_id |
| 53 | + start = _timestamp_to_datetime(request.start) |
| 54 | + # The proto contract defines `end` as inclusive (midnight of the last |
| 55 | + # included day). Snuba queries use a half-open interval [start, end), |
| 56 | + # so we add one day to convert inclusive→exclusive. Without this, all |
| 57 | + # hourly rows on the last day would be excluded. |
| 58 | + end = _timestamp_to_datetime(request.end) + timedelta(days=1) |
| 59 | + # Proto categories use different int values from Relay/ClickHouse |
| 60 | + # (e.g., proto ATTACHMENT=3 vs Relay ATTACHMENT=4). Convert before querying. |
| 61 | + categories = [proto_to_relay_category(c) for c in request.categories] |
| 62 | + |
| 63 | + snuba_request = _build_query(org_id, start, end, categories, total_outcomes=_BILLABLE_OUTCOMES) |
| 64 | + result = raw_snql_query(snuba_request, referrer=_REFERRER) |
| 65 | + rows = result["data"] |
| 66 | + |
| 67 | + if len(rows) >= _QUERY_LIMIT: |
| 68 | + logger.warning( |
| 69 | + "billing.usage_query.truncated", |
| 70 | + extra={"org_id": org_id, "row_count": len(rows)}, |
| 71 | + ) |
| 72 | + metrics.incr( |
| 73 | + "billing.usage_query.truncated", |
| 74 | + tags={"org_id": str(org_id)}, |
| 75 | + sample_rate=1.0, |
| 76 | + ) |
| 77 | + |
| 78 | + return _build_response(rows) |
| 79 | + |
| 80 | + |
| 81 | +def _build_query( |
| 82 | + org_id: int, |
| 83 | + start: datetime, |
| 84 | + end: datetime, |
| 85 | + categories: Sequence[int], |
| 86 | + *, |
| 87 | + total_outcomes: Sequence[int] | None = None, |
| 88 | +) -> Request: |
| 89 | + # Half-open interval [start, end) — standard sentry.snuba.outcomes convention. |
| 90 | + # `end` has already been shifted +1 day in query_outcomes_usage() to convert |
| 91 | + # the proto's inclusive end into the exclusive boundary Snuba expects. |
| 92 | + where = [ |
| 93 | + Condition(Column("org_id"), Op.EQ, org_id), |
| 94 | + Condition(Column("timestamp"), Op.GTE, start), |
| 95 | + Condition(Column("timestamp"), Op.LT, end), |
| 96 | + ] |
| 97 | + if categories: |
| 98 | + where.append(Condition(Column("category"), Op.IN, categories)) |
| 99 | + |
| 100 | + query = Query( |
| 101 | + match=Entity("outcomes"), |
| 102 | + select=[ |
| 103 | + Column("category"), |
| 104 | + Column("time"), |
| 105 | + _total_function(total_outcomes), |
| 106 | + Function( |
| 107 | + "sumIf", |
| 108 | + [Column("quantity"), Function("equals", [Column("outcome"), Outcome.ACCEPTED])], |
| 109 | + "accepted", |
| 110 | + ), |
| 111 | + Function( |
| 112 | + "sumIf", |
| 113 | + [ |
| 114 | + Column("quantity"), |
| 115 | + Function("equals", [Column("outcome"), Outcome.RATE_LIMITED]), |
| 116 | + ], |
| 117 | + "dropped", |
| 118 | + ), |
| 119 | + Function( |
| 120 | + "sumIf", |
| 121 | + [Column("quantity"), Function("equals", [Column("outcome"), Outcome.FILTERED])], |
| 122 | + "filtered", |
| 123 | + ), |
| 124 | + Function("sumIf", [Column("quantity"), _over_quota_condition()], "over_quota"), |
| 125 | + Function( |
| 126 | + "sumIf", |
| 127 | + [ |
| 128 | + Column("quantity"), |
| 129 | + Function( |
| 130 | + "and", |
| 131 | + [ |
| 132 | + Function("equals", [Column("outcome"), Outcome.RATE_LIMITED]), |
| 133 | + Function("equals", [Column("reason"), "smart_rate_limit"]), |
| 134 | + ], |
| 135 | + ), |
| 136 | + ], |
| 137 | + "spike_protection", |
| 138 | + ), |
| 139 | + Function( |
| 140 | + "sumIf", |
| 141 | + [ |
| 142 | + Column("quantity"), |
| 143 | + Function( |
| 144 | + "and", |
| 145 | + [ |
| 146 | + Function("equals", [Column("outcome"), Outcome.FILTERED]), |
| 147 | + Function("startsWith", [Column("reason"), "Sampled:"]), |
| 148 | + ], |
| 149 | + ), |
| 150 | + ], |
| 151 | + "dynamic_sampling", |
| 152 | + ), |
| 153 | + ], |
| 154 | + groupby=[Column("category"), Column("time")], |
| 155 | + where=where, |
| 156 | + orderby=[OrderBy(Column("time"), Direction.ASC)], |
| 157 | + granularity=Granularity(_DAILY_GRANULARITY), |
| 158 | + limit=Limit(_QUERY_LIMIT), |
| 159 | + ) |
| 160 | + return Request( |
| 161 | + dataset=_DATASET, |
| 162 | + app_id=_APP_ID, |
| 163 | + query=query, |
| 164 | + tenant_ids={"organization_id": org_id}, |
| 165 | + ) |
| 166 | + |
| 167 | + |
| 168 | +def _build_response(rows: list[dict]) -> GetUsageResponse: |
| 169 | + # Two-level accumulator: days_map[day_str][category_id] -> usage fields. |
| 170 | + # Each row already contains all 7 sumIf-aggregated fields from ClickHouse. |
| 171 | + # |
| 172 | + # NOTE: CategoryUsage.category carries Relay/Sentry int values (not proto |
| 173 | + # DataCategory ints). The proto field is typed as DataCategory but every |
| 174 | + # existing consumer (getsentry postgres backend, shadow comparison, |
| 175 | + # UsagePricerService, customer_usage, projection, etc.) interprets it as a |
| 176 | + # Relay int. Converting to proto ints here would break all consumers and |
| 177 | + # the shadow comparison. See the TODO in getsentry's |
| 178 | + # usage_pricer/service.py for the planned migration. |
| 179 | + days_map: defaultdict[str, dict[int, dict[str, int]]] = defaultdict(dict) |
| 180 | + |
| 181 | + for row in rows: |
| 182 | + day = row["time"] |
| 183 | + category = int(row["category"]) |
| 184 | + days_map[day][category] = { |
| 185 | + "total": int(row["total"]), |
| 186 | + "accepted": int(row["accepted"]), |
| 187 | + "dropped": int(row["dropped"]), |
| 188 | + "filtered": int(row["filtered"]), |
| 189 | + "over_quota": int(row["over_quota"]), |
| 190 | + "spike_protection": int(row["spike_protection"]), |
| 191 | + "dynamic_sampling": int(row["dynamic_sampling"]), |
| 192 | + } |
| 193 | + |
| 194 | + days = [] |
| 195 | + for day_str in sorted(days_map): |
| 196 | + date = _parse_day(day_str) |
| 197 | + usage = [ |
| 198 | + CategoryUsage(category=cat, data=UsageData(**fields)) # type: ignore[arg-type] |
| 199 | + for cat, fields in sorted(days_map[day_str].items()) |
| 200 | + ] |
| 201 | + days.append(DailyUsage(date=date, usage=usage)) |
| 202 | + |
| 203 | + return GetUsageResponse(days=days, seats=[]) |
| 204 | + |
| 205 | + |
| 206 | +def _total_function(outcomes: Sequence[int] | None) -> Function: |
| 207 | + """Build the ``total`` aggregate. |
| 208 | +
|
| 209 | + When *outcomes* is provided, only those outcome types are counted |
| 210 | + (billing callers pass ``_BILLABLE_OUTCOMES``). When ``None``, every |
| 211 | + outcome is counted (useful for general-purpose usage queries). |
| 212 | + """ |
| 213 | + if outcomes is None: |
| 214 | + return Function("sum", [Column("quantity")], "total") |
| 215 | + return Function( |
| 216 | + "sumIf", |
| 217 | + [ |
| 218 | + Column("quantity"), |
| 219 | + Function( |
| 220 | + "in", |
| 221 | + [ |
| 222 | + Column("outcome"), |
| 223 | + Function("tuple", list(outcomes)), |
| 224 | + ], |
| 225 | + ), |
| 226 | + ], |
| 227 | + "total", |
| 228 | + ) |
| 229 | + |
| 230 | + |
| 231 | +def _over_quota_condition() -> Function: |
| 232 | + """ClickHouse condition for over-quota rate limiting. |
| 233 | +
|
| 234 | + Matches: outcome=RATE_LIMITED AND (reason ends with "_usage_exceeded" |
| 235 | + OR reason="usage_exceeded" OR reason="grace_period"). |
| 236 | + """ |
| 237 | + return Function( |
| 238 | + "and", |
| 239 | + [ |
| 240 | + Function("equals", [Column("outcome"), Outcome.RATE_LIMITED]), |
| 241 | + Function( |
| 242 | + "or", |
| 243 | + [ |
| 244 | + Function("endsWith", [Column("reason"), "_usage_exceeded"]), |
| 245 | + Function( |
| 246 | + "or", |
| 247 | + [ |
| 248 | + Function("equals", [Column("reason"), "usage_exceeded"]), |
| 249 | + Function("equals", [Column("reason"), "grace_period"]), |
| 250 | + ], |
| 251 | + ), |
| 252 | + ], |
| 253 | + ), |
| 254 | + ], |
| 255 | + ) |
| 256 | + |
| 257 | + |
| 258 | +def _timestamp_to_datetime(ts: Timestamp) -> datetime: |
| 259 | + return ts.ToDatetime(tzinfo=timezone.utc) |
| 260 | + |
| 261 | + |
| 262 | +def _parse_day(value: str) -> Date: |
| 263 | + dt = datetime.fromisoformat(value) |
| 264 | + return Date(year=dt.year, month=dt.month, day=dt.day) |
0 commit comments