Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 116 additions & 37 deletions src/sentry/api/endpoints/organization_trace_item_attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@
TraceItemType as ProtoTraceItemType,
)
from sentry_protos.snuba.v1.trace_item_attribute_pb2 import AttributeKey
from sentry_protos.snuba.v1.trace_item_filter_pb2 import ExistsFilter, OrFilter, TraceItemFilter
from sentry_protos.snuba.v1.trace_item_filter_pb2 import (
ExistsFilter,
OrFilter,
TraceItemFilter,
)

from sentry import features, options
from sentry.api.api_owners import ApiOwner
Expand Down Expand Up @@ -54,7 +58,11 @@
from sentry.search.eap.resolver import SearchResolver
from sentry.search.eap.spans.definitions import SPAN_DEFINITIONS
from sentry.search.eap.trace_metrics.definitions import TRACE_METRICS_DEFINITIONS
from sentry.search.eap.types import SearchResolverConfig, SupportedTraceItemType
from sentry.search.eap.types import (
AttributeSourceType,
SearchResolverConfig,
SupportedTraceItemType,
)
from sentry.search.eap.utils import (
can_expose_attribute,
get_secondary_aliases,
Expand All @@ -79,6 +87,10 @@
POSSIBLE_ATTRIBUTE_TYPES = ["string", "number", "boolean"]


class ProxyResolvedAttribute(ResolvedAttribute):
pass


class TraceItemAttributeKey(TypedDict):
key: str
name: str
Expand Down Expand Up @@ -218,6 +230,7 @@
name: str,
attr_type: Literal["string", "number", "boolean"],
item_type: SupportedTraceItemType,
is_proxy: bool = False,
) -> TraceItemAttributeKey:
public_key, public_name, attribute_source = translate_internal_to_public_alias(
name, attr_type, item_type
Expand All @@ -226,6 +239,8 @@

if public_key is not None and public_name is not None:
pass
elif is_proxy:
public_key = public_name = name
elif attr_type == "number":
public_key = f"tags[{name},number]"
public_name = name
Expand All @@ -237,7 +252,11 @@
public_name = name

serialized_source: dict[str, str | bool] = {
"source_type": attribute_source["source_type"].value
"source_type": (
attribute_source["source_type"].value
if not is_proxy
else AttributeSourceType.SENTRY.value
)
}
if attribute_source.get("is_transformed_alias"):
serialized_source["is_transformed_alias"] = True
Expand Down Expand Up @@ -378,18 +397,54 @@
all_aliased_attributes = []
# our aliases don't exist in the db, so filter over our aliases
# virtually page through defined aliases before we hit the db
if substring_match and offset <= len(column_definitions.columns):
for index, column in enumerate(column_definitions.columns.values()):
if (
column.proto_type == attr_type
and substring_match in column.public_alias
and not column.secondary_alias
and not column.private
):
all_aliased_attributes.append(column)
if offset <= len(column_definitions.columns) + len(column_definitions.contexts):
if substring_match:
for column in column_definitions.columns.values():
if (
column.proto_type == attr_type
and substring_match in column.public_alias
and not column.secondary_alias
and not column.private
):
all_aliased_attributes.append(column)
for (
public_label,
virtual_context,
) in column_definitions.contexts.items():
if (
substring_match in public_label
and virtual_context.search_type is not None
and not virtual_context.secondary_alias
and constants.TYPE_MAP[virtual_context.search_type] == attr_type
):
all_aliased_attributes.append(
ProxyResolvedAttribute(
public_alias=public_label,
internal_name=public_label,
search_type=virtual_context.search_type,
)
)
else:
for (
public_label,
virtual_context,
) in column_definitions.contexts.items():
if (
substring_match in public_label
and virtual_context.search_type is not None
and not virtual_context.secondary_alias
and constants.TYPE_MAP[virtual_context.search_type] == attr_type
Comment on lines +427 to +436
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: When substring_match is empty, the else branch in query_trace_attributes fails to iterate over column_definitions.columns, breaking pagination logic.
Severity: MEDIUM

Suggested Fix

Update the else block to mirror the if block's logic. It should iterate through column_definitions.columns and append them to the all_aliased_attributes list, ensuring consistent pagination behavior regardless of whether substring_match is provided.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: src/sentry/api/endpoints/organization_trace_item_attributes.py#L427-L436

Potential issue: In the `query_trace_attributes` method, when `substring_match` is an
empty string, the logic enters an `else` block that only processes
`column_definitions.contexts`. It omits iterating over `column_definitions.columns`,
unlike the `if` block which handles both. This omission results in an incomplete
`all_aliased_attributes` list, which is critical for pagination. Consequently,
pagination offset calculations will be incorrect, leading to skipped, repeated, or
duplicate attributes in the API response when no substring filter is applied.

):
all_aliased_attributes.append(
ProxyResolvedAttribute(
public_alias=public_label,
internal_name=public_label,
search_type=virtual_context.search_type,
)
)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Identical context iteration duplicated across if/else branches

Low Severity

The context iteration in the else branch (lines 427–444) is an exact copy of the context iteration in the if substring_match branch (lines 410–426). Since substring_match is "" in the else branch, the substring_match in public_label check is always True, making the two blocks functionally identical. The context loop could be extracted outside the if/else to eliminate this duplication.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 53fd538. Configure here.

aliased_attributes = all_aliased_attributes[offset : offset + limit]
with sentry_sdk.start_span(op="query", name="attribute_names") as span:
if len(aliased_attributes) < limit - 1:
if len(aliased_attributes) < limit:
offset -= len(all_aliased_attributes) - len(aliased_attributes)
Comment on lines 445 to 448
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: When paginating without a substring_match, the offset calculation for the RPC call is incorrect because it fails to account for standard columns, leading to incorrect pagination results.
Severity: HIGH

Suggested Fix

Adjust the offset calculation to correctly account for the items that are not included in all_aliased_attributes when substring_match is falsy. The calculation should subtract the number of standard columns (len(column_definitions.columns)) from the offset if the pagination has moved past them.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: src/sentry/api/endpoints/organization_trace_item_attributes.py#L445-L448

Potential issue: In `query_trace_attributes`, when paginating without a
`substring_match` filter, the code incorrectly calculates the offset for the subsequent
RPC request. The logic populates `all_aliased_attributes` with only context attributes,
but the offset calculation `offset -= len(all_aliased_attributes) -
len(aliased_attributes)` implicitly assumes it also contains standard column attributes
that were conceptually skipped by the pagination offset. This results in an incorrect
offset being sent to the backend, leading to missing or duplicated attributes in the
paginated response.

limit -= len(aliased_attributes)
rpc_request = TraceItemAttributeNamesRequest(
Expand Down Expand Up @@ -419,6 +474,7 @@
include_internal,
substring_match,
aliased_attributes,
all_aliased_attributes,
)

sentry_sdk.set_context("api_response", {"attributes": attributes})
Expand All @@ -433,7 +489,8 @@
trace_item_type: SupportedTraceItemType,
include_internal: bool,
substring_match: str,
aliased_attributes: list[ResolvedAttribute],
aliased_attributes: list[ResolvedAttribute | ProxyResolvedAttribute],
exclude_attributes: list[ResolvedAttribute | ProxyResolvedAttribute],
) -> list[TraceItemAttributeKey]:
attribute_keys = {}
for attribute in rpc_response.attributes:
Expand All @@ -457,11 +514,21 @@
)

attribute_keys[attr_key["name"]] = attr_key
for aliased_attr in exclude_attributes:
attr_key = as_attribute_key(
aliased_attr.internal_name,
attribute_type,
trace_item_type,
is_proxy=isinstance(aliased_attr, ProxyResolvedAttribute),
)
if attr_key["name"] in attribute_keys:
del attribute_keys[attr_key["name"]]
for aliased_attr in aliased_attributes:
attr_key = as_attribute_key(
aliased_attr.internal_name,
attribute_type,
trace_item_type,
is_proxy=isinstance(aliased_attr, ProxyResolvedAttribute),
)
attribute_keys[attr_key["name"]] = attr_key

Expand All @@ -476,33 +543,42 @@
trace_item_type: SupportedTraceItemType,
include_internal: bool,
substring_match: str,
aliased_attributes: list[ResolvedAttribute],
aliased_attributes: list[ResolvedAttribute | ProxyResolvedAttribute],
exclude_attributes: list[ResolvedAttribute | ProxyResolvedAttribute],
) -> list[TraceItemAttributeKey]:
attributes = list(
filter(
lambda x: (
not is_sentry_convention_replacement_attribute(x["name"], trace_item_type)
attribute_keys = {}
for attribute in rpc_response.attributes:
if attribute.name and can_expose_attribute(
attribute.name,
trace_item_type,
include_internal=include_internal,
):
attr_key = as_attribute_key(
attribute.name,
attribute_type,
trace_item_type,
)
if (
not is_sentry_convention_replacement_attribute(
attr_key["name"], trace_item_type
)
# Remove anything where the public alias doesn't match the substring
# This can happen when the public alias is different, but that's handled by
# aliased_attributes
and (substring_match in x["name"] if substring_match else True)
),
[
as_attribute_key(
attribute.name,
attribute_type,
trace_item_type,
)
for attribute in rpc_response.attributes
if attribute.name
and can_expose_attribute(
attribute.name,
trace_item_type,
include_internal=include_internal,
)
],
and (substring_match in attr_key["name"] if substring_match else True)
):
attribute_keys[attr_key["key"]] = attr_key
# We need to exclude any aliased attributes here since because of pagination they might have already been seen
# earlier
for aliased_attr in exclude_attributes:
attr_key = as_attribute_key(
aliased_attr.internal_name,
attribute_type,
trace_item_type,
is_proxy=isinstance(aliased_attr, ProxyResolvedAttribute),
)
)
if attr_key["name"] in attribute_keys:
del attribute_keys[attr_key["name"]]

Check warning on line 581 in src/sentry/api/endpoints/organization_trace_item_attributes.py

View check run for this annotation

@sentry/warden / warden: sentry-backend-bugs

Dict key mismatch prevents attribute exclusion from working correctly

The `serialize_trace_attributes` method uses `attr_key["key"]` when adding attributes to `attribute_keys` dict (line 570, 594), but uses `attr_key["name"]` when checking for exclusion (line 580). Since `key` and `name` can differ (e.g., `key="tags[foo,number]"` vs `name="foo"`), the exclusion check will fail to find matching attributes, causing duplicates to appear in paginated results.
Comment on lines +580 to +581
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

exclude_attributes loop checks wrong dict key, exclusions never happen for tag attributes

The attribute_keys dict is populated using attr_key["key"] as the dictionary key (line 570), but the exclusion loop at lines 580-581 checks membership and deletes using attr_key["name"]. For tag attributes, these differ: "key" is like "tags[my_tag,number]" while "name" is just "my_tag". Since the membership check fails, the exclusion is silently skipped, causing duplicate attributes to appear across pagination pages.

Verification

Read as_attribute_key (lines 244-252) which shows key = f"tags[{name},number]" while name = name. Confirmed attribute_keys is populated with ["key"] at line 570. The membership check at line 580 uses ["name"] which won't match the stored keys for tag attributes.

Suggested fix: Use attr_key["key"] consistently for both dict key storage and deletion lookup

Suggested change
if attr_key["name"] in attribute_keys:
del attribute_keys[attr_key["name"]]
if attr_key["key"] in attribute_keys:
del attribute_keys[attr_key["key"]]

Identified by Warden sentry-backend-bugs · 3GY-QXY

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix attempt detected (commit 63b88cd)

The code was refactored to use a dictionary approach at line 570 with attr_key['key'] as the key, but the exclusion logic at lines 580-581 still incorrectly uses attr_key['name'] for both the membership check and deletion, so the fix was attempted but the core issue persists.

The original issue appears unresolved. Please review and try again.

Evaluated by Warden

Comment thread
sentry[bot] marked this conversation as resolved.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dictionary keyed by "key" but exclusion uses "name"

Medium Severity

In serialize_trace_attributes, the attribute_keys dict is keyed by attr_key["key"] (line 570 and 594), but the exclusion loop checks and deletes using attr_key["name"] (lines 580–581). For number/boolean attributes, "key" is tags[name,type] while "name" is just the raw name, so the lookup silently fails and exclusion doesn't work. The sibling function serialize_trace_attributes_using_sentry_conventions consistently uses attr_key["name"] throughout, so this appears to be an oversight.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 53fd538. Configure here.

for aliased_attr in aliased_attributes:
if can_expose_attribute(
aliased_attr.public_alias,
Expand All @@ -513,8 +589,11 @@
aliased_attr.internal_name,
attribute_type,
trace_item_type,
is_proxy=isinstance(aliased_attr, ProxyResolvedAttribute),
)
attributes.append(attr_key)
attribute_keys[attr_key["key"]] = attr_key
attributes = list(attribute_keys.values())
sentry_sdk.set_context("api_response", {"attributes": attributes})
return attributes


Expand Down
27 changes: 19 additions & 8 deletions src/sentry/search/eap/columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,9 @@ class AttributeArgumentDefinition(BaseArgumentDefinition):
@dataclass
class VirtualColumnDefinition:
constructor: Callable[[SnubaParams, Any], VirtualColumnContext]
# Need a type for the attributes endpoint
search_type: constants.SearchType
secondary_alias: bool = False
# Allows additional processing to the term after its been resolved
term_resolver: (
Callable[
Expand Down Expand Up @@ -255,7 +258,9 @@ class ResolvedTraceMetricAggregate(ResolvedFunction):
trace_filter: TraceItemFilter | None

@property
def proto_definition(self) -> AttributeAggregation | AttributeConditionalAggregation:
def proto_definition(
self,
) -> AttributeAggregation | AttributeConditionalAggregation:
if self.trace_metric is None and self.trace_filter is None:
return AttributeAggregation(
aggregate=self.internal_name,
Expand Down Expand Up @@ -460,9 +465,11 @@ def resolve(
),
key=cast(AttributeKey, resolved_attribute),
trace_metric=trace_metric,
trace_filter=cast(TraceItemFilter, resolved_arguments[0])
if isinstance(self, ConditionalTraceMetricAggregateDefinition)
else None,
trace_filter=(
cast(TraceItemFilter, resolved_arguments[0])
if isinstance(self, ConditionalTraceMetricAggregateDefinition)
else None
),
)


Expand Down Expand Up @@ -674,7 +681,9 @@ class ColumnDefinitions:
column_to_alias: Callable[[str], str | None] | None


def attribute_key_to_tuple(attribute_key: AttributeKey) -> tuple[str, AttributeKey.Type.ValueType]:
def attribute_key_to_tuple(
attribute_key: AttributeKey,
) -> tuple[str, AttributeKey.Type.ValueType]:
return (attribute_key.name, attribute_key.type)


Expand Down Expand Up @@ -731,9 +740,11 @@ def extract_trace_metric_aggregate_arguments(
return TraceMetric(
metric_name=cast(str, resolved_arguments[offset + 1]),
metric_type=cast(TraceMetricType, resolved_arguments[offset + 2]),
metric_unit=None
if resolved_arguments[offset + 3] == "-"
else cast(str, resolved_arguments[offset + 3]),
metric_unit=(
None
if resolved_arguments[offset + 3] == "-"
else cast(str, resolved_arguments[offset + 3])
),
)
elif all(resolved_argument == "" for resolved_argument in resolved_arguments[offset + 1 :]):
# no metrics were specified, assume we query all metrics
Expand Down
2 changes: 2 additions & 0 deletions src/sentry/search/eap/common_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
constructor=project_context_constructor(key),
term_resolver=project_term_resolver,
filter_column="project.id",
search_type="string",
secondary_alias=key != "project",
)
for key in constants.PROJECT_FIELDS
}
Expand Down
3 changes: 3 additions & 0 deletions src/sentry/search/eap/spans/attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,14 +682,17 @@ def is_starred_segment_context_constructor(
# TODO: need to change this so the VCC is using it too, but would require rewriting the term_resolver
default_value="Unknown",
sort_column="sentry.device.class",
search_type="string",
),
"span.module": VirtualColumnDefinition(
constructor=module_context_constructor,
search_type="string",
),
"is_starred_transaction": VirtualColumnDefinition(
constructor=is_starred_segment_context_constructor,
default_value="false",
processor=lambda x: True if x == "true" else False,
search_type="boolean",
),
**project_virtual_contexts(),
}
Expand Down
Loading
Loading