@@ -143,40 +143,136 @@ def get_data_col_names_from_root(data_root):
143143 ][- 1 ]
144144
145145
146- def parse_request_to_col_names (
146+ def find_cols_with_tide_tags (
147+ data_columns : pd .Index | list [str ], request : str
148+ ) -> list [str ]:
149+ request_parts = request .split ("__" )
150+
151+ if not (1 <= len (request_parts ) <= 4 ):
152+ raise ValueError (
153+ f"Request '{ request } ' is malformed. "
154+ f"Use 'name__unit__bloc__sub_bloc' format or a "
155+ f"combination of these tags."
156+ )
157+
158+ full_tag_col_map = {
159+ col_name_tag_enrichment (col , get_tags_max_level (data_columns )): col
160+ for col in data_columns
161+ }
162+
163+ def find_exact_match (search_str , target ):
164+ pattern = rf"(?:^|__)(?:{ re .escape (search_str )} )(?:$|__)"
165+ match = re .search (pattern , target )
166+ return match is not None
167+
168+ return [
169+ full_tag_col_map [augmented_col ]
170+ for augmented_col in full_tag_col_map .keys ()
171+ if all (find_exact_match (part , augmented_col ) for part in request_parts )
172+ ]
173+
174+
175+ def find_cols_multiple_tag_groups (
176+ data_columns : pd .Index | list [str ], request : str
177+ ) -> list [str ]:
178+ request_parts = request .split ("|" )
179+ list_to_return = []
180+ for req in request_parts :
181+ list_to_return .extend (find_cols_with_tide_tags (data_columns , req ))
182+ return list_to_return
183+
184+
185+ def tide_request (
147186 data_columns : pd .Index | list [str ], request : str | pd .Index | list [str ] = None
148187) -> list [str ]:
188+ """
189+ Select columns by matching structured TIDE-style tags.
190+
191+ Filters column names based on a TIDE-style structured tag syntax. Columns are
192+ expected to use a naming convention with double underscores (`__`) separating
193+ tags.
194+
195+ A column name can include up to four hierarchical parts:
196+ 'name__unit__bloc__sub_bloc' where each part is optional, but must be separated
197+ with double underscores.
198+
199+ The `request` argument allows searching for columns matching one or more
200+ of these parts using full or partial tag patterns. Multiple tag patterns
201+ can be combined using the pipe `|` character to form OR conditions.
202+
203+ Parameters
204+ ----------
205+ data_columns : pandas.Index or list of str
206+ A collection of column names to filter. Each column name should follow
207+ the TIDE format (e.g., "sensor__°C__bloc1").
208+
209+ request : str or list of str or pandas.Index, optional
210+ Tag(s) to match against the column names. Each tag string may be:
211+
212+ - A full structured tag (e.g., "name__°C__bloc2")
213+ - A partial tag (e.g., "°C", "bloc1")
214+ - A group of tags separated by "|" (e.g., "kWh|°C")
215+
216+ If None, all columns from `data_columns` are returned.
217+
218+ Returns
219+ -------
220+ list of str
221+ The list of column names that match any of the provided tag queries.
222+
223+ Notes
224+ -----
225+ - Matching is done per tag part, not substrings. For instance, the query
226+ "bloc1" will match "name__°C__bloc1" but not "bloc11".
227+ - If multiple requests are given, columns are returned if they match
228+ at least one of them (logical OR).
229+ - Tags can include between 1 and 4 parts, split by `__`.
230+
231+ Examples
232+ --------
233+ >>> DF_COLUMNS = [
234+ ... "name_1__°C__bloc1",
235+ ... "name_1__°C__bloc2",
236+ ... "name_2",
237+ ... "name_2__DIMENSIONLESS__bloc2",
238+ ... "name_3__kWh/m²",
239+ ... "name_5__kWh",
240+ ... "name4__DIMENSIONLESS__bloc4",
241+ ... ]
242+
243+ >>> tide_request(DF_COLUMNS)
244+ ['name_1__°C__bloc1', 'name_1__°C__bloc2', 'name_2',
245+ 'name_2__DIMENSIONLESS__bloc2', 'name_3__kWh/m²',
246+ 'name_5__kWh', 'name4__DIMENSIONLESS__bloc4']
247+
248+ >>> tide_request(DF_COLUMNS, "°C")
249+ ['name_1__°C__bloc1', 'name_1__°C__bloc2']
250+
251+ >>> tide_request(DF_COLUMNS, "kWh|°C")
252+ ['name_5__kWh', 'name_1__°C__bloc1', 'name_1__°C__bloc2']
253+
254+ >>> # Columns are not selected twice
255+ >>> tide_request(DF_COLUMNS, ["kWh|°C", "name_5__kWh"])
256+ ['name_5__kWh', 'name_1__°C__bloc1', 'name_1__°C__bloc2']
257+ """
258+
149259 if request is None :
150260 return list (data_columns )
151261
152- elif isinstance (request , pd . Index ) or isinstance ( request , list ):
153- return [ col for col in request if col in data_columns ]
262+ elif isinstance (request , str ):
263+ request = [ request ]
154264
155- else :
156- request_parts = request .split ("__" )
157-
158- if not (1 <= len (request_parts ) <= 4 ):
159- raise ValueError (
160- f"Request '{ request } ' is malformed. "
161- f"Use 'name__unit__bloc__sub_bloc' format or a "
162- f"combination of these tags."
163- )
164-
165- full_tag_col_map = {
166- col_name_tag_enrichment (col , get_tags_max_level (data_columns )): col
167- for col in data_columns
168- }
169-
170- def find_exact_match (search_str , target ):
171- pattern = rf"(?:^|__)(?:{ re .escape (search_str )} )(?:$|__)"
172- match = re .search (pattern , target )
173- return match is not None
174-
175- return [
176- full_tag_col_map [augmented_col ]
177- for augmented_col in full_tag_col_map .keys ()
178- if all (find_exact_match (part , augmented_col ) for part in request_parts )
179- ]
265+ if not (isinstance (request , pd .Index ) or isinstance (request , list )):
266+ raise ValueError (
267+ "Invalid request. Was expected an instance of str, pd.Index or List[str]"
268+ f"got { type (request )} instead"
269+ )
270+
271+ list_to_return = []
272+ for req in request :
273+ list_to_return .extend (find_cols_multiple_tag_groups (data_columns , req ))
274+
275+ return list (dict .fromkeys (list_to_return ))
180276
181277
182278def data_columns_to_tree (columns : pd .Index | list [str ]) -> T :
0 commit comments