diff --git a/files/log8.jsonl b/files/log8.jsonl new file mode 100644 index 0000000..f873ecd --- /dev/null +++ b/files/log8.jsonl @@ -0,0 +1,12 @@ +{"timestamp": "2023-07-14 08:00:01", "level": "WARN", "message":"Connection lost due to timeout"} +{"timestamp": "2023-07-14 08:00:04", "level": "CRITICAL", "message": "Request processed unsuccessfully", "stacktrace": "Something went wrong\nTraceback (last line is latest):\n sample.py: line 32\n divide(100, 0)\n sample.py: line 8\n return a / b\nZeroDivisionError: division by zero"} +{"timestamp": "2023-07-14 08:00:06", "level": "INFO", "message": "User authentication failed"} +{"timestamp": "2023-07-14 08:00:08", "level": "DEBUG", "message": "Starting data synchronization"} +{"timestamp": "2023-07-14 08:00:11", "level": "INFO", "message": "Processing incoming request"} +{"timestamp": "2023-07-14 08:00:11", "level": "INFO", "message": "Processing incoming request (a little more...)"} +{"timestamp": "2023-07-14 08:00:14", "level": "DEBUG", "message": "Performing database backup"} +{"timestamp": "2023-07-14 08:00:16", "level": "WARN", "message": "Invalid input received: missing required field"} +{"timestamp": "2023-07-14 08:00:19", "level": "ERROR", "message": "Failed to connect to remote server"} +{"timestamp": "2023-07-14 08:00:22", "level": "INFO", "message": "Sending email notification"} +{"timestamp": "2023-07-14 08:00:25", "level": "WARN", "message": "Slow response time detected"} +{"timestamp": "2023-07-14 08:00:27", "level": "INFO", "message": "Data synchronization completed"} \ No newline at end of file diff --git a/files/log9.jsonl b/files/log9.jsonl new file mode 100644 index 0000000..0e06d81 --- /dev/null +++ b/files/log9.jsonl @@ -0,0 +1,12 @@ +{"level": "WARN", "LOG_TIME": "2023-07-14 08:00:01", "message":"Connection lost due to timeout"} +{"level": "CRITICAL", "LOG_TIME": "2023-07-14 08:00:04", "message": "Request processed unsuccessfully", "stacktrace": "Something went wrong\nTraceback (last line is latest):\n sample.py: line 32\n divide(100, 0)\n sample.py: line 8\n return a / b\nZeroDivisionError: division by zero"} +{"level": "INFO", "LOG_TIME": "2023-07-14 08:00:06", "message": "User authentication failed"} +{"level": "DEBUG", "LOG_TIME": "2023-07-14 08:00:08", "message": "Starting data synchronization"} +{"level": "INFO", "LOG_TIME": "2023-07-14 08:00:11", "message": "Processing incoming request"} +{"level": "INFO", "LOG_TIME": "2023-07-14 08:00:11", "message": "Processing incoming request (a little more...)"} +{"level": "DEBUG", "LOG_TIME": "2023-07-14 08:00:14", "message": "Performing database backup"} +{"level": "WARN", "LOG_TIME": "2023-07-14 08:00:16", "message": "Invalid input received: missing required field"} +{"level": "ERROR", "LOG_TIME": "2023-07-14 08:00:19", "message": "Failed to connect to remote server"} +{"level": "INFO", "LOG_TIME": "2023-07-14 08:00:22", "message": "Sending email notification"} +{"level": "WARN", "LOG_TIME": "2023-07-14 08:00:25", "message": "Slow response time detected"} +{"level": "INFO", "LOG_TIME": "2023-07-14 08:00:27", "message": "Data synchronization completed"} \ No newline at end of file diff --git a/logmerger/file_reading.py b/logmerger/file_reading.py index 6ad03fe..d021080 100644 --- a/logmerger/file_reading.py +++ b/logmerger/file_reading.py @@ -3,6 +3,9 @@ import abc import operator import types +from typing import Any + +from logmerger.timestamp_wrapper import TimestampedLineTransformer class FileReader(abc.ABC): @@ -253,3 +256,42 @@ def reader_guard(rdr): def _close_reader(self): self._close_obj.close() + + +class JsonLFileReader(FileReader): + @classmethod + def _can_read(cls, fname: str) -> bool: + return fname.endswith(".jsonl") + + def __init__(self, fname: str, encoding: str): + super().__init__(fname, encoding) + self._close_obj = open(fname, encoding=encoding, newline='') + + self._iter = self.iter_file() + + @staticmethod + def _find_dt_col(d: dict[str, Any], previous_key: str | None)-> tuple[str, Any]: + if previous_key is not None: + value = d.get(previous_key, "") + return previous_key, value + for key, val in d.items(): + try: + tt = TimestampedLineTransformer.make_transformer_from_sample_line(str(val) + " ") + except ValueError: + continue + return key, val + raise ValueError("Could not find timestamp in the line") + + + def iter_file(self): + import json + time_key = None + for row in self._close_obj: + d: dict = json.loads(row) + time_key, timestamp_entry = self._find_dt_col(d, time_key) + s = "\n".join([f"{key}: {value}" for key, value in d.items() if key != time_key]) + + yield f"{timestamp_entry} {s}" + + def _close_reader(self): + self._close_obj.close() \ No newline at end of file