-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathfql.py
More file actions
278 lines (200 loc) · 7.87 KB
/
fql.py
File metadata and controls
278 lines (200 loc) · 7.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
"""Fiddler Query Language (FQL) parsing and manipulation utilities.
This module provides utilities for working with FQL expressions used in
segments, custom metrics, and other Fiddler assets.
FQL Syntax Rules:
- Column names: Always in double quotes (e.g., "column_name")
- String values: Always in single quotes (e.g., 'value')
- Numeric values: No quotes (e.g., 42, 3.14)
"""
import re
from typing import Set, Dict, Optional, List
import logging
from .exceptions import FQLError
logger = logging.getLogger(__name__)
def extract_columns(expression: str) -> Set[str]:
"""Extract column references from an FQL expression.
Args:
expression: FQL expression string
Returns:
Set of column names referenced in the expression
Example:
>>> extract_columns('"age" > 30 and "geography" == \'California\'')
{'age', 'geography'}
>>> extract_columns('sum(if(fp(), 1, 0) * "transaction_value")')
{'transaction_value'}
"""
if not expression:
return set()
# Extract all double-quoted identifiers (column names in FQL)
column_pattern = r'"([^"]+)"'
columns = set(re.findall(column_pattern, expression))
logger.debug(f'Extracted {len(columns)} columns from expression: {columns}')
return columns
def replace_column_names(expression: str, column_mapping: Dict[str, str]) -> str:
"""Replace column names in an FQL expression based on a mapping.
This is useful when copying assets between models with different
column names.
Args:
expression: FQL expression string
column_mapping: Dict mapping old column names to new column names
Returns:
Expression with column names replaced
Example:
>>> expr = '"old_col" > 30 and "status" == \'active\''
>>> mapping = {'old_col': 'new_col'}
>>> replace_column_names(expr, mapping)
'"new_col" > 30 and "status" == \'active\''
"""
if not expression or not column_mapping:
return expression
result = expression
for old_name, new_name in column_mapping.items():
# Use word boundaries to avoid partial matches
pattern = f'"{re.escape(old_name)}"'
replacement = f'"{new_name}"'
result = re.sub(pattern, replacement, result)
logger.debug(f'Applied {len(column_mapping)} column replacements')
return result
def validate_fql_syntax(expression: str) -> tuple[bool, Optional[str]]:
"""Perform basic FQL syntax validation.
This does not guarantee the expression will work in Fiddler,
but catches common syntax errors.
Args:
expression: FQL expression to validate
Returns:
Tuple of (is_valid, error_message)
If valid, error_message is None
Example:
>>> validate_fql_syntax('"age" > 30')
(True, None)
>>> validate_fql_syntax('"unclosed > 30')
(False, 'Unclosed double quote at position 0')
"""
if not expression:
return True, None
errors = []
# Check for balanced double quotes
if expression.count('"') % 2 != 0:
return False, 'Unbalanced double quotes (column names)'
# Check for balanced single quotes
if expression.count("'") % 2 != 0:
return False, 'Unbalanced single quotes (string values)'
# Check for balanced parentheses
open_count = expression.count('(')
close_count = expression.count(')')
if open_count != close_count:
return (
False,
f"Unbalanced parentheses (found {open_count} '(' and {close_count} ')')",
)
# Check for empty column references
empty_columns = re.findall(r'""', expression)
if empty_columns:
return False, 'Empty column reference found'
# Check for likely typos with nested quotes
nested_double = re.findall(r'"[^"]*"[^"]*"', expression)
if nested_double:
logger.warning('Potential nested double quotes detected')
return True, None
def normalize_expression(expression: str) -> str:
"""Normalize an FQL expression for comparison.
This standardizes whitespace and formatting to make it easier
to compare two expressions for equality.
Args:
expression: FQL expression to normalize
Returns:
Normalized expression
Example:
>>> normalize_expression('"age" > 30')
'"age" > 30'
"""
if not expression:
return expression
# Normalize whitespace around operators
result = re.sub(r'\s*([<>=!]+)\s*', r' \1 ', expression)
# Normalize whitespace around parentheses
result = re.sub(r'\s*\(\s*', ' (', result)
result = re.sub(r'\s*\)\s*', ') ', result)
# Normalize whitespace around commas
result = re.sub(r'\s*,\s*', ', ', result)
# Remove extra whitespace
result = re.sub(r'\s+', ' ', result)
return result.strip()
def get_fql_functions(expression: str) -> Set[str]:
"""Extract FQL function names used in an expression.
Args:
expression: FQL expression
Returns:
Set of function names found (e.g., {'sum', 'if', 'fp'})
Example:
>>> get_fql_functions('sum(if(fp(), 1, 0))')
{'sum', 'if', 'fp'}
"""
if not expression:
return set()
# Match function names: word characters followed by (
function_pattern = r'(\w+)\s*\('
functions = set(re.findall(function_pattern, expression))
logger.debug(f'Found FQL functions: {functions}')
return functions
def is_simple_filter(expression: str) -> bool:
"""Check if expression is a simple filter (no aggregations).
Simple filters can be used in segments. Complex aggregations
are typically used in custom metrics.
Args:
expression: FQL expression
Returns:
True if expression appears to be a simple filter
Example:
>>> is_simple_filter('"age" > 30 and "status" == \'active\'')
True
>>> is_simple_filter('sum(if(fp(), 1, 0))')
False
"""
# Check for common aggregation functions
agg_functions = {'sum', 'avg', 'count', 'min', 'max', 'mean', 'std'}
used_functions = get_fql_functions(expression)
has_aggregation = bool(agg_functions & used_functions)
return not has_aggregation
def split_fql_and_condition(expression: str) -> List[str]:
"""Split an FQL expression on 'and' operators at the top level.
Useful for breaking down complex segment definitions.
Args:
expression: FQL expression
Returns:
List of sub-expressions
Example:
>>> split_fql_and_condition('"age" > 30 and "status" == \'active\'')
['"age" > 30', '"status" == \'active\'']
Note:
This is a simple implementation that may not handle all cases
(e.g., 'and' inside function calls). Use with caution.
"""
if not expression or ' and ' not in expression.lower():
return [expression] if expression else []
# Simple split on ' and ' (case-insensitive)
# More sophisticated parsing would require a full parser
parts = re.split(r'\s+and\s+', expression, flags=re.IGNORECASE)
return [part.strip() for part in parts if part.strip()]
def validate_column_references(
expression: str, valid_columns: Set[str]
) -> tuple[bool, List[str]]:
"""Validate that all column references in expression exist in valid_columns.
Args:
expression: FQL expression
valid_columns: Set of valid column names
Returns:
Tuple of (all_valid, missing_columns)
Example:
>>> expr = '"age" > 30 and "unknown_col" == 1'
>>> validate_column_references(expr, {'age', 'status'})
(False, ['unknown_col'])
"""
referenced_columns = extract_columns(expression)
missing_columns = [col for col in referenced_columns if col not in valid_columns]
is_valid = len(missing_columns) == 0
if not is_valid:
logger.warning(
f'Found {len(missing_columns)} missing column references: {missing_columns}'
)
return is_valid, missing_columns