-
-
Notifications
You must be signed in to change notification settings - Fork 4.7k
feat(data-explore): Add new Query type for full export for simpler request validation #112953
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -67,7 +67,10 @@ def _validate_dataset(self, query_type: str, query_info: dict[str, Any]) -> dict | |
| dataset = dataset or "discover" | ||
| if dataset not in SUPPORTED_DATASETS: | ||
| raise serializers.ValidationError(f"{dataset} is not supported for exports") | ||
| elif query_type == ExportQueryType.EXPLORE_STR: | ||
| elif query_type in ( | ||
| ExportQueryType.EXPLORE_STR, | ||
| ExportQueryType.TRACE_ITEM_FULL_EXPORT_STR, | ||
| ): | ||
| if not dataset: | ||
| raise serializers.ValidationError( | ||
| f"Please specify dataset. Supported datasets for this query type are {str(SUPPORTED_TRACE_ITEM_DATASETS.keys())}." | ||
|
|
@@ -87,18 +90,13 @@ def _validate_query_info( | |
| elif not isinstance(base_fields, list): | ||
| base_fields = [base_fields] | ||
|
|
||
| is_jsonl_trace_item_full_export = ( | ||
| query_type == ExportQueryType.EXPLORE_STR | ||
| and export_format == OutputMode.JSONL.value | ||
| and len(base_fields) == 0 | ||
| ) | ||
| is_jsonl_trace_item_full_export = query_type == ExportQueryType.TRACE_ITEM_FULL_EXPORT_STR | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unused
|
||
|
|
||
| if len(base_fields) > MAX_FIELDS: | ||
| detail = f"You can export up to {MAX_FIELDS} fields at a time. Please delete some and try again." | ||
| raise serializers.ValidationError(detail) | ||
| elif len(base_fields) == 0: | ||
| if not is_jsonl_trace_item_full_export: | ||
| raise serializers.ValidationError("at least one field is required to export") | ||
| elif len(base_fields) == 0 and not is_jsonl_trace_item_full_export: | ||
| raise serializers.ValidationError("at least one field is required to export") | ||
|
|
||
| if "query" not in query_info: | ||
| if is_jsonl_trace_item_full_export: | ||
|
|
@@ -136,7 +134,10 @@ def _validate_query_info( | |
| query_info["start"] = start.isoformat() | ||
| query_info["end"] = end.isoformat() | ||
|
|
||
| if query_type == ExportQueryType.EXPLORE_STR: | ||
| if ( | ||
| query_type == ExportQueryType.EXPLORE_STR | ||
| or query_type == ExportQueryType.TRACE_ITEM_FULL_EXPORT_STR | ||
| ): | ||
| sort = query_info.get("sort", []) | ||
| if sort and isinstance(sort, str): | ||
| sort = [sort] | ||
|
|
@@ -215,38 +216,43 @@ def validate(self, data: dict[str, Any]) -> dict[str, Any]: | |
| ) | ||
| query_info = self._validate_dataset(query_type, query_info) | ||
| explore_output_mode = OutputMode.from_value(export_format) | ||
| is_full_jsonl_trace_item_export = ( | ||
| export_format == OutputMode.JSONL.value and len(query_info.get("field", [])) == 0 | ||
| ) | ||
| if not is_full_jsonl_trace_item_export: | ||
| try: | ||
| explore_processor = ExploreProcessor( | ||
| explore_query=query_info, | ||
| organization=organization, | ||
| output_mode=explore_output_mode, | ||
| ) | ||
| sort = query_info.get("sort", []) | ||
| orderby = [sort] if isinstance(sort, str) else sort | ||
|
|
||
| explore_processor.validate_export_query( | ||
| rpc_dataset_common.TableQuery( | ||
| query_string=query_info["query"], | ||
| selected_columns=query_info["field"], | ||
| orderby=orderby, | ||
| offset=0, | ||
| limit=1, | ||
| referrer=Referrer.DATA_EXPORT_TASKS_EXPLORE, | ||
| sampling_mode=explore_processor.sampling_mode, | ||
| resolver=explore_processor.search_resolver, | ||
| equations=query_info.get("equations", []), | ||
| ) | ||
| try: | ||
| explore_processor = ExploreProcessor( | ||
| explore_query=query_info, | ||
| organization=organization, | ||
| output_mode=explore_output_mode, | ||
| ) | ||
| sort = query_info.get("sort", []) | ||
| orderby = [sort] if isinstance(sort, str) else sort | ||
|
|
||
| explore_processor.validate_export_query( | ||
| rpc_dataset_common.TableQuery( | ||
| query_string=query_info["query"], | ||
| selected_columns=query_info["field"], | ||
| orderby=orderby, | ||
| offset=0, | ||
| limit=1, | ||
| referrer=Referrer.DATA_EXPORT_TASKS_EXPLORE, | ||
manessaraj marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| sampling_mode=explore_processor.sampling_mode, | ||
| resolver=explore_processor.search_resolver, | ||
| equations=query_info.get("equations", []), | ||
| ) | ||
| except InvalidSearchQuery as err: | ||
| sentry_sdk.capture_exception(err) | ||
| raise serializers.ValidationError("Invalid table query.") | ||
| ) | ||
| except InvalidSearchQuery as err: | ||
| sentry_sdk.capture_exception(err) | ||
| raise serializers.ValidationError("Invalid table query.") | ||
| elif query_type == ExportQueryType.TRACE_ITEM_FULL_EXPORT_STR: | ||
| query_info = self._validate_query_info( | ||
| query_type, query_info, export_format=export_format | ||
| ) | ||
| query_info = self._validate_dataset(query_type, query_info) | ||
sentry[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| explore_output_mode = OutputMode.from_value(export_format) | ||
| if explore_output_mode != OutputMode.JSONL: | ||
| raise serializers.ValidationError("For full export, output mode must be JSONL.") | ||
|
|
||
| elif data["query_type"] == ExportQueryType.ISSUES_BY_TAG_STR: | ||
| issues_by_tag_validate(query_info) | ||
| data["query_info"] = query_info | ||
| return data | ||
|
|
||
|
|
||
|
|
@@ -287,7 +293,7 @@ def _parse_limit(self, data: dict[str, Any]) -> tuple[int | None, bool]: | |
| run_sync = ( | ||
| limit is not None | ||
| and limit <= MAX_SYNC_LIMIT | ||
| and data["query_type"] == ExportQueryType.EXPLORE_STR | ||
| and data["query_type"] == ExportQueryType.TRACE_ITEM_FULL_EXPORT_STR | ||
manessaraj marked this conversation as resolved.
Show resolved
Hide resolved
cursor[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| and data["query_info"].get("dataset") == "logs" | ||
| ) | ||
| return limit, run_sync | ||
|
|
@@ -308,7 +314,10 @@ def post(self, request: Request, organization: Organization) -> Response: | |
| # The data export feature is only available alongside `discover-query` (except for explore). | ||
| # So to export issue tags, they must have have `discover-query` | ||
| if not features.has("organizations:discover-query", organization): | ||
| if request.data.get("query_type") != ExportQueryType.EXPLORE_STR: | ||
| if request.data.get("query_type") not in { | ||
| ExportQueryType.EXPLORE_STR, | ||
| ExportQueryType.TRACE_ITEM_FULL_EXPORT_STR, | ||
| }: | ||
| return Response(status=404) | ||
|
|
||
| # Get environment_id and limit if available | ||
|
|
||


Uh oh!
There was an error while loading. Please reload this page.