Skip to content

Commit 227ea32

Browse files
committed
fix: parse indented rule/timestamp lines in group/pipe job blocks
Snakemake indents log output for jobs within group/pipe blocks by 4 spaces. The parser used RULE_START_PATTERN.match() anchored at position 0 and line.startswith("[") checks that both fail on indented lines, causing group jobs to be invisible or assigned wrong rule names. Fix by using RULE_START_PATTERN.match(line.lstrip()) for rule detection and TIMESTAMP_PATTERN.search() for timestamp detection across all parser functions. Add _parse_indented_or_group_line() to LogLineParser for the same handling in the streaming path. Closes #42
1 parent 684359e commit 227ea32

3 files changed

Lines changed: 383 additions & 16 deletions

File tree

snakesee/parser/core.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ def parse_rules_from_log(log_path: Path) -> dict[str, int]:
263263
try:
264264
for line in log_path.read_text().splitlines():
265265
# Track current rule being executed
266-
if match := RULE_START_PATTERN.match(line):
266+
if match := RULE_START_PATTERN.match(line.lstrip()):
267267
current_rule = match.group(1)
268268
# Count "Finished job" as rule completion
269269
elif "Finished job" in line and current_rule is not None:
@@ -320,7 +320,7 @@ def record_pending_error() -> None:
320320
lines = _cached_lines if _cached_lines is not None else log_path.read_text().splitlines()
321321
for line_num, line in enumerate(lines):
322322
# Track current rule being executed
323-
if match := RULE_START_PATTERN.match(line):
323+
if match := RULE_START_PATTERN.match(line.lstrip()):
324324
record_pending_error()
325325
current_rule = match.group(1)
326326
current_jobid = None # Reset jobid for new rule block
@@ -329,7 +329,7 @@ def record_pending_error() -> None:
329329
current_log_path = None
330330

331331
# Timestamp lines end error blocks
332-
elif line.startswith("[") and TIMESTAMP_PATTERN.match(line):
332+
elif TIMESTAMP_PATTERN.search(line):
333333
record_pending_error()
334334

335335
# Capture wildcards within rule block
@@ -484,7 +484,7 @@ def emit_pending_error() -> None:
484484
lines = _cached_lines if _cached_lines is not None else log_path.read_text().splitlines()
485485
for line in lines:
486486
# Track current rule - this also ends any pending error block
487-
if match := RULE_START_PATTERN.match(line):
487+
if match := RULE_START_PATTERN.match(line.lstrip()):
488488
emit_pending_error()
489489
current_rule = match.group(1)
490490
current_jobid = None
@@ -493,7 +493,7 @@ def emit_pending_error() -> None:
493493
current_log_path = None
494494

495495
# Timestamp lines end error blocks
496-
elif line.startswith("[") and TIMESTAMP_PATTERN.match(line):
496+
elif TIMESTAMP_PATTERN.search(line):
497497
emit_pending_error()
498498

499499
# Capture wildcards - applies to both rule blocks and error blocks
@@ -633,12 +633,12 @@ def _get_first_log_timestamp(
633633
try:
634634
if _cached_lines is not None:
635635
for line in _cached_lines:
636-
if match := TIMESTAMP_PATTERN.match(line):
636+
if match := TIMESTAMP_PATTERN.search(line):
637637
return _parse_timestamp(match.group(1))
638638
else:
639639
with log_path.open() as f:
640640
for line in f:
641-
if match := TIMESTAMP_PATTERN.match(line):
641+
if match := TIMESTAMP_PATTERN.search(line):
642642
return _parse_timestamp(match.group(1))
643643
except OSError as e:
644644
logger.info("Could not read log file %s: %s", log_path, e)
@@ -681,11 +681,11 @@ def parse_completed_jobs_from_log(
681681
lines = _cached_lines if _cached_lines is not None else log_path.read_text().splitlines()
682682
for line in lines:
683683
# Check for timestamp
684-
if match := TIMESTAMP_PATTERN.match(line):
684+
if match := TIMESTAMP_PATTERN.search(line):
685685
current_timestamp = _parse_timestamp(match.group(1))
686686

687687
# Track current rule being executed
688-
elif match := RULE_START_PATTERN.match(line):
688+
elif match := RULE_START_PATTERN.match(line.lstrip()):
689689
current_rule = match.group(1)
690690
current_wildcards = None
691691
current_threads = None
@@ -769,7 +769,7 @@ def parse_threads_from_log(log_path: Path) -> dict[str, int]:
769769
try:
770770
for line in log_path.read_text().splitlines():
771771
# Track current rule (resets context)
772-
if RULE_START_PATTERN.match(line):
772+
if RULE_START_PATTERN.match(line.lstrip()):
773773
current_jobid = None
774774
current_threads = None
775775

@@ -826,7 +826,7 @@ def parse_all_jobs_from_log(
826826
lines = _cached_lines if _cached_lines is not None else log_path.read_text().splitlines()
827827
for line in lines:
828828
# Track current rule being scheduled
829-
if match := RULE_START_PATTERN.match(line):
829+
if match := RULE_START_PATTERN.match(line.lstrip()):
830830
# Save previous job if complete
831831
if current_rule is not None and current_jobid is not None:
832832
if current_jobid not in seen_jobids:

snakesee/parser/line_parser.py

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -178,12 +178,10 @@ def parse_line(self, line: str) -> list[ParseEvent]:
178178
events.append(ParseEvent(ParseEventType.TIMESTAMP, {"timestamp": timestamp}))
179179
return events
180180

181-
# Indented lines (properties) start with space/tab
181+
# Indented lines start with space/tab. In group/pipe job blocks,
182+
# rule starts and timestamps are indented by 4 spaces.
182183
if first_char in (" ", "\t"):
183-
event = self._parse_indented_line(line)
184-
if event:
185-
events.append(event)
186-
return events
184+
return self._parse_indented_or_group_line(line, events)
187185

188186
# Rule start: "rule X:" or "localrule X:" - this ends error blocks
189187
if first_char == "r" and line.startswith("rule "):
@@ -250,6 +248,55 @@ def flush_pending_error(self) -> ParseEvent | None:
250248
"""
251249
return self.context.get_pending_error()
252250

251+
def _parse_indented_or_group_line(
252+
self, line: str, events: list[ParseEvent]
253+
) -> list[ParseEvent]:
254+
"""Parse indented lines: group-block elements or property lines.
255+
256+
In group/pipe job blocks, rule starts and timestamps are indented by
257+
4 spaces. Property lines are indented by 4 (normal) or 8 (group) spaces.
258+
259+
Args:
260+
line: Indented log line starting with space/tab.
261+
events: Mutable list to append events to.
262+
263+
Returns:
264+
The events list (same object passed in).
265+
"""
266+
stripped = line.lstrip()
267+
if not stripped:
268+
return events
269+
270+
first_stripped = stripped[0]
271+
272+
# Indented timestamp: " [Mon Jan 6 10:00:00 2026]"
273+
if first_stripped == "[":
274+
if match := TIMESTAMP_PATTERN.match(stripped):
275+
if pending := self.context.get_pending_error():
276+
events.append(pending)
277+
timestamp = _parse_timestamp(match.group(1))
278+
self.context.timestamp = timestamp
279+
events.append(ParseEvent(ParseEventType.TIMESTAMP, {"timestamp": timestamp}))
280+
return events
281+
282+
# Indented rule start: " rule X:" or " localrule X:"
283+
if (first_stripped == "r" and stripped.startswith("rule ")) or (
284+
first_stripped == "l" and stripped.startswith("localrule ")
285+
):
286+
if match := RULE_START_PATTERN.match(stripped):
287+
if pending := self.context.get_pending_error():
288+
events.append(pending)
289+
rule = match.group(1)
290+
self.context.reset_for_new_rule(rule)
291+
events.append(ParseEvent(ParseEventType.RULE_START, {"rule": rule}))
292+
return events
293+
294+
# Property lines (wildcards, threads, log, jobid)
295+
event = self._parse_indented_line(line)
296+
if event:
297+
events.append(event)
298+
return events
299+
253300
def _parse_indented_line(self, line: str) -> ParseEvent | None:
254301
"""Parse indented property lines (wildcards, threads, log, jobid).
255302

0 commit comments

Comments
 (0)