Skip to content

Commit bedcd41

Browse files
committed
feat(data-export): Exported attributes in data export should have public names
Exported attributes in full export of trace items should have public and user defined tags, no internal representation should be exposed
1 parent af27c1d commit bedcd41

File tree

6 files changed

+106
-26
lines changed

6 files changed

+106
-26
lines changed

src/sentry/data_export/endpoints/data_export_details.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,11 @@
88
from sentry.api.base import cell_silo_endpoint
99
from sentry.api.bases.organization import OrganizationDataExportPermission, OrganizationEndpoint
1010
from sentry.api.serializers import serialize
11+
from sentry.data_export.models import ExportedData
1112
from sentry.models.organization import Organization
1213
from sentry.models.project import Project
1314
from sentry.utils import metrics
1415

15-
from ..models import ExportedData
16-
1716

1817
@cell_silo_endpoint
1918
class DataExportDetailsEndpoint(OrganizationEndpoint):

src/sentry/data_export/processors/discover.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from sentry_relay.consts import SPAN_STATUS_CODE_TO_NAME
55

66
from sentry.api.utils import get_date_range_from_params
7+
from sentry.data_export.base import ExportError
78
from sentry.models.environment import Environment
89
from sentry.models.group import Group
910
from sentry.models.organization import Organization
@@ -13,8 +14,6 @@
1314
from sentry.snuba import discover
1415
from sentry.snuba.utils import get_dataset
1516

16-
from ..base import ExportError
17-
1817
logger = logging.getLogger(__name__)
1918

2019

src/sentry/data_export/processors/explore.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@
2121
EAPResponse,
2222
FieldsACL,
2323
SearchResolverConfig,
24+
SupportedTraceItemType,
2425
)
26+
from sentry.search.eap.utils import build_internal_to_public_export_map
2527
from sentry.search.events.types import SAMPLING_MODES, SnubaParams
2628
from sentry.snuba.ourlogs import OurLogs
2729
from sentry.snuba.referrer import Referrer
@@ -76,6 +78,11 @@ def __init__(
7678
if dataset == "spans"
7779
else TraceItemType.TRACE_ITEM_TYPE_LOG
7880
)
81+
self._supported_trace_item_type = (
82+
SupportedTraceItemType.LOGS
83+
if self.trace_item_type == TraceItemType.TRACE_ITEM_TYPE_LOG
84+
else SupportedTraceItemType.SPANS
85+
)
7986

8087
use_aggregate_conditions = explore_query.get("allowAggregateConditions", "1") == "1"
8188
disable_extrapolation = explore_query.get("disableAggregateExtrapolation", "0") == "1"
@@ -178,6 +185,7 @@ def __init__(
178185
super().__init__(organization, explore_query, output_mode=output_mode)
179186
self.page_token = page_token
180187
self._last_emitted_item_id_hex: str | None = last_emitted_item_id_hex
188+
self._rename_mapping = build_internal_to_public_export_map(self._supported_trace_item_type)
181189

182190
@property
183191
def last_emitted_item_id_hex(self) -> str | None:
@@ -215,18 +223,24 @@ def run_query(self, _offset: int, limit: int) -> list[dict[str, Any]]:
215223
token.ParseFromString(self.page_token)
216224
request.page_token.CopyFrom(token)
217225
http_resp = export_logs_rpc(request)
218-
rows = list(iter_export_trace_items_rows(http_resp))
226+
rows = list(
227+
iter_export_trace_items_rows(
228+
http_resp,
229+
self._rename_mapping,
230+
self._supported_trace_item_type,
231+
)
232+
)
219233

220234
if self._last_emitted_item_id_hex is not None:
221-
while rows and rows[0].get("item_id") == self._last_emitted_item_id_hex:
235+
while rows and rows[0].get("id") == self._last_emitted_item_id_hex:
222236
rows = rows[1:]
223237

224238
self._sync_page_token_from_snuba_response(http_resp)
225239

226240
if not rows:
227241
return []
228242

229-
last_id = rows[-1].get("item_id")
243+
last_id = rows[-1].get("id")
230244
if isinstance(last_id, str):
231245
self._last_emitted_item_id_hex = last_id
232246
return rows

src/sentry/data_export/utils.py

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,20 @@
1010
from sentry_protos.snuba.v1.request_common_pb2 import TraceItemType
1111
from sentry_protos.snuba.v1.trace_item_pb2 import AnyValue, TraceItem
1212

13+
from sentry.data_export.base import ExportError
14+
from sentry.search.eap.types import SupportedTraceItemType
15+
from sentry.search.eap.utils import can_expose_attribute
1316
from sentry.search.events.constants import TIMEOUT_ERROR_MESSAGE
1417
from sentry.snuba import discover
1518
from sentry.utils import metrics, snuba
1619
from sentry.utils.sdk import capture_exception
1720

18-
from .base import ExportError
21+
TRACE_ITEM_EXPORT_TOP_LEVEL_PUBLIC_KEYS: dict[str, str] = {
22+
"organization_id": "organization.id",
23+
"project_id": "project.id",
24+
"trace_id": "trace",
25+
"item_id": "id",
26+
}
1927

2028

2129
# Adapted into decorator from 'src/sentry/api/endpoints/organization_events.py'
@@ -94,7 +102,45 @@ def _ts_to_epoch(ts: Timestamp) -> float:
94102
return ts.seconds + ts.nanos / 1e9
95103

96104

97-
def trace_item_to_row(item: TraceItem) -> dict[str, Any]:
105+
def apply_public_names_to_trace_export_row(
106+
row: dict[str, Any],
107+
*,
108+
rename_map: dict[str, str],
109+
item_type: SupportedTraceItemType,
110+
) -> dict[str, Any]:
111+
"""
112+
Rename known internal columns to EAP public aliases, keep extra/user attributes that are
113+
exposable, and drop private or internal-only keys.
114+
"""
115+
ordered_keys = [
116+
*[k for k in TRACE_ITEM_EXPORT_TOP_LEVEL_PUBLIC_KEYS if k in row],
117+
*[k for k in row if k not in TRACE_ITEM_EXPORT_TOP_LEVEL_PUBLIC_KEYS],
118+
]
119+
120+
out: dict[str, Any] = {}
121+
for key in ordered_keys:
122+
value = row[key]
123+
if key in TRACE_ITEM_EXPORT_TOP_LEVEL_PUBLIC_KEYS:
124+
new_key = TRACE_ITEM_EXPORT_TOP_LEVEL_PUBLIC_KEYS[key]
125+
elif key in rename_map:
126+
new_key = rename_map[key]
127+
else:
128+
if not can_expose_attribute(key, item_type, include_internal=False):
129+
continue
130+
new_key = key
131+
if new_key not in out:
132+
out[new_key] = value
133+
elif out[new_key] is None and value is not None:
134+
out[new_key] = value
135+
return out
136+
137+
138+
def trace_item_to_row(
139+
item: TraceItem,
140+
*,
141+
rename_mapping: dict[str, str],
142+
item_type: SupportedTraceItemType,
143+
) -> dict[str, Any]:
98144
row: dict[str, Any] = {}
99145
for key, av in item.attributes.items():
100146
row[key] = None if av.WhichOneof("value") is None else anyvalue_to_python(av)
@@ -111,12 +157,15 @@ def trace_item_to_row(item: TraceItem) -> dict[str, Any]:
111157
row["server_sample_rate"] = item.server_sample_rate
112158
row["retention_days"] = item.retention_days
113159
row["downsampled_retention_days"] = item.downsampled_retention_days
114-
115-
return row
160+
return apply_public_names_to_trace_export_row(
161+
row, rename_map=rename_mapping, item_type=item_type
162+
)
116163

117164

118165
def iter_export_trace_items_rows(
119166
resp: ExportTraceItemsResponse,
167+
rename_mapping: dict[str, str],
168+
item_type: SupportedTraceItemType,
120169
) -> Iterator[dict[str, Any]]:
121170
for item in resp.trace_items:
122-
yield trace_item_to_row(item)
171+
yield trace_item_to_row(item, rename_mapping=rename_mapping, item_type=item_type)

src/sentry/search/eap/utils.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,3 +207,27 @@ def is_sentry_convention_replacement_attribute(
207207
def translate_to_sentry_conventions(public_alias: str, item_type: SupportedTraceItemType) -> str:
208208
mapping = SENTRY_CONVENTIONS_REPLACEMENT_MAPPINGS.get(item_type, {})
209209
return mapping.get(public_alias, public_alias)
210+
211+
212+
def build_internal_to_public_export_map(
213+
item_type: SupportedTraceItemType,
214+
) -> dict[str, str]:
215+
"""
216+
Map Snuba internal attribute names (e.g. ``sentry.item_id``) to a single **primary**
217+
public alias per internal name.
218+
"""
219+
definitions = PUBLIC_ALIAS_TO_INTERNAL_MAPPING.get(item_type, {})
220+
by_internal: dict[str, str] = {}
221+
for resolved in definitions.values():
222+
if resolved.private or resolved.secondary_alias:
223+
continue
224+
if not can_expose_attribute(resolved.public_alias, item_type, include_internal=False):
225+
continue
226+
internal = resolved.internal_name
227+
if internal not in by_internal:
228+
by_internal[internal] = resolved.public_alias
229+
else:
230+
# Stable tie-break when multiple public aliases share one internal (e.g. project.id vs project_id).
231+
by_internal[internal] = min(by_internal[internal], resolved.public_alias)
232+
233+
return by_internal

tests/sentry/data_export/test_tasks.py

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -855,7 +855,7 @@ def test_explore_logs_jsonl_format(self, emailer: MagicMock) -> None:
855855

856856
lines = [ln for ln in content.split(b"\n") if ln]
857857
assert len(lines) == rows_exported
858-
message_key = "log.body" if len(fields) else "sentry.body"
858+
message_key = "log.body" if len(fields) else "message"
859859
bodies = {json.loads(ln.decode("utf-8"))[message_key] for ln in lines}
860860
assert bodies == {
861861
"jsonl log message",
@@ -930,11 +930,11 @@ def test_explore_logs_jsonl_full_dataset_rich_fields(self, emailer: MagicMock) -
930930
"tags[code.line.number,number]": 148.0,
931931
"logger.name": "sentry.access.api",
932932
"origin": "auto.log.stdlib",
933-
"sentry.body": "api.access.alpha",
933+
"message": "api.access.alpha",
934934
"rate_limited": "False",
935-
"sentry.severity_text": "info",
935+
"severity": "info",
936936
"environment": "prod",
937-
"sentry.severity_number": 9.0,
937+
"severity_number": 9.0,
938938
"payload_size": 981.0,
939939
"tags[process.pid,number]": 6639.0,
940940
"response": "200",
@@ -948,13 +948,13 @@ def test_explore_logs_jsonl_full_dataset_rich_fields(self, emailer: MagicMock) -
948948
"tags[code.line.number,number]": 148.0,
949949
"logger.name": "sentry.access.api",
950950
"origin": "auto.log.stdlib",
951-
"sentry.body": "api.access.beta",
951+
"message": "api.access.beta",
952952
"rate_limited": "False",
953-
"sentry.severity_text": "info",
953+
"severity": "info",
954954
"environment": "prod",
955955
"payload_size": 2048.0,
956956
"tags[process.pid,number]": 6639.0,
957-
"sentry.severity_number": 9.0,
957+
"severity_number": 9.0,
958958
"response": "201",
959959
},
960960
]
@@ -967,15 +967,10 @@ def test_explore_logs_jsonl_full_dataset_rich_fields(self, emailer: MagicMock) -
967967
"observed_timestamp",
968968
"trace",
969969
"id",
970-
"item_id",
971-
"sentry.timestamp_precise",
972-
"sentry.observed_timestamp_nanos",
973-
"organization_id",
974-
"project_id",
975-
"trace_id",
970+
"organization.id",
971+
"project.id",
976972
"item_type",
977973
"timestamp",
978-
"sentry._internal.ingested_at",
979974
"client_sample_rate",
980975
"server_sample_rate",
981976
"retention_days",

0 commit comments

Comments
 (0)