From ee9b1fa4824942a81b3e00d2aeab7365354f88d4 Mon Sep 17 00:00:00 2001 From: David Date: Sun, 9 Feb 2025 00:26:47 +0100 Subject: [PATCH 1/4] Add file reader for JsonL-Format. --- files/log8.jsonl | 12 +++++++++++ logmerger/file_reading.py | 44 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 files/log8.jsonl diff --git a/files/log8.jsonl b/files/log8.jsonl new file mode 100644 index 0000000..f873ecd --- /dev/null +++ b/files/log8.jsonl @@ -0,0 +1,12 @@ +{"timestamp": "2023-07-14 08:00:01", "level": "WARN", "message":"Connection lost due to timeout"} +{"timestamp": "2023-07-14 08:00:04", "level": "CRITICAL", "message": "Request processed unsuccessfully", "stacktrace": "Something went wrong\nTraceback (last line is latest):\n sample.py: line 32\n divide(100, 0)\n sample.py: line 8\n return a / b\nZeroDivisionError: division by zero"} +{"timestamp": "2023-07-14 08:00:06", "level": "INFO", "message": "User authentication failed"} +{"timestamp": "2023-07-14 08:00:08", "level": "DEBUG", "message": "Starting data synchronization"} +{"timestamp": "2023-07-14 08:00:11", "level": "INFO", "message": "Processing incoming request"} +{"timestamp": "2023-07-14 08:00:11", "level": "INFO", "message": "Processing incoming request (a little more...)"} +{"timestamp": "2023-07-14 08:00:14", "level": "DEBUG", "message": "Performing database backup"} +{"timestamp": "2023-07-14 08:00:16", "level": "WARN", "message": "Invalid input received: missing required field"} +{"timestamp": "2023-07-14 08:00:19", "level": "ERROR", "message": "Failed to connect to remote server"} +{"timestamp": "2023-07-14 08:00:22", "level": "INFO", "message": "Sending email notification"} +{"timestamp": "2023-07-14 08:00:25", "level": "WARN", "message": "Slow response time detected"} +{"timestamp": "2023-07-14 08:00:27", "level": "INFO", "message": "Data synchronization completed"} \ No newline at end of file diff --git a/logmerger/file_reading.py b/logmerger/file_reading.py index 6ad03fe..8f77a8c 100644 --- a/logmerger/file_reading.py +++ b/logmerger/file_reading.py @@ -3,6 +3,9 @@ import abc import operator import types +from typing import Any + +from logmerger.timestamp_wrapper import TimestampedLineTransformer class FileReader(abc.ABC): @@ -253,3 +256,44 @@ def reader_guard(rdr): def _close_reader(self): self._close_obj.close() + + +class JsonLFileReader(FileReader): + @classmethod + def _can_read(cls, fname: str) -> bool: + return fname.endswith(".jsonl") + + def __init__(self, fname: str, encoding: str): + super().__init__(fname, encoding) + self._close_obj = open(fname, encoding=encoding, newline='') + + self._iter = self.iter_file() + + @staticmethod + def _find_dt_col(d: dict[str, Any], previous_key: str | None): + if previous_key is not None: + value = d.get(previous_key) + if value is not None: + return previous_key, value + for key, val in d.items(): + try: + tt = TimestampedLineTransformer.make_transformer_from_sample_line(val + " ") + print(tt) + except ValueError: + continue + return key, val + raise ValueError("Could not find timestamp in the line") + + + def iter_file(self): + import json + time_key = None + for row in self._close_obj: + d: dict = json.loads(row) + time_key, timestamp_entry = self._find_dt_col(d, time_key) + s = "\n".join([f"{key}: {value}" for key, value in d.items() if key != time_key]) + + yield f"{timestamp_entry} {s}" + + def _close_reader(self): + self._close_obj.close() \ No newline at end of file From 2316d74f2bfbe56b97310f6e15ae3b13e37514a0 Mon Sep 17 00:00:00 2001 From: David Date: Mon, 10 Feb 2025 20:02:45 +0100 Subject: [PATCH 2/4] Add new test file, add typing and remove print --- files/log9.jsonl | 12 ++++++++++++ logmerger/file_reading.py | 5 ++--- 2 files changed, 14 insertions(+), 3 deletions(-) create mode 100644 files/log9.jsonl diff --git a/files/log9.jsonl b/files/log9.jsonl new file mode 100644 index 0000000..0e06d81 --- /dev/null +++ b/files/log9.jsonl @@ -0,0 +1,12 @@ +{"level": "WARN", "LOG_TIME": "2023-07-14 08:00:01", "message":"Connection lost due to timeout"} +{"level": "CRITICAL", "LOG_TIME": "2023-07-14 08:00:04", "message": "Request processed unsuccessfully", "stacktrace": "Something went wrong\nTraceback (last line is latest):\n sample.py: line 32\n divide(100, 0)\n sample.py: line 8\n return a / b\nZeroDivisionError: division by zero"} +{"level": "INFO", "LOG_TIME": "2023-07-14 08:00:06", "message": "User authentication failed"} +{"level": "DEBUG", "LOG_TIME": "2023-07-14 08:00:08", "message": "Starting data synchronization"} +{"level": "INFO", "LOG_TIME": "2023-07-14 08:00:11", "message": "Processing incoming request"} +{"level": "INFO", "LOG_TIME": "2023-07-14 08:00:11", "message": "Processing incoming request (a little more...)"} +{"level": "DEBUG", "LOG_TIME": "2023-07-14 08:00:14", "message": "Performing database backup"} +{"level": "WARN", "LOG_TIME": "2023-07-14 08:00:16", "message": "Invalid input received: missing required field"} +{"level": "ERROR", "LOG_TIME": "2023-07-14 08:00:19", "message": "Failed to connect to remote server"} +{"level": "INFO", "LOG_TIME": "2023-07-14 08:00:22", "message": "Sending email notification"} +{"level": "WARN", "LOG_TIME": "2023-07-14 08:00:25", "message": "Slow response time detected"} +{"level": "INFO", "LOG_TIME": "2023-07-14 08:00:27", "message": "Data synchronization completed"} \ No newline at end of file diff --git a/logmerger/file_reading.py b/logmerger/file_reading.py index 8f77a8c..49e9e6d 100644 --- a/logmerger/file_reading.py +++ b/logmerger/file_reading.py @@ -270,15 +270,14 @@ def __init__(self, fname: str, encoding: str): self._iter = self.iter_file() @staticmethod - def _find_dt_col(d: dict[str, Any], previous_key: str | None): + def _find_dt_col(d: dict[str, Any], previous_key: str | None)-> tuple[str, Any]: if previous_key is not None: value = d.get(previous_key) if value is not None: return previous_key, value for key, val in d.items(): try: - tt = TimestampedLineTransformer.make_transformer_from_sample_line(val + " ") - print(tt) + tt = TimestampedLineTransformer.make_transformer_from_sample_line(str(val) + " ") except ValueError: continue return key, val From 50265b1395bcea3c217355ffa419cc214e5606a7 Mon Sep 17 00:00:00 2001 From: David Date: Tue, 18 Feb 2025 21:02:22 +0100 Subject: [PATCH 3/4] Remove dynamic search for timestamp col --- logmerger/file_reading.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/logmerger/file_reading.py b/logmerger/file_reading.py index 49e9e6d..56704c0 100644 --- a/logmerger/file_reading.py +++ b/logmerger/file_reading.py @@ -273,8 +273,7 @@ def __init__(self, fname: str, encoding: str): def _find_dt_col(d: dict[str, Any], previous_key: str | None)-> tuple[str, Any]: if previous_key is not None: value = d.get(previous_key) - if value is not None: - return previous_key, value + return previous_key, value for key, val in d.items(): try: tt = TimestampedLineTransformer.make_transformer_from_sample_line(str(val) + " ") From 05672e9d4490da9dbf323a966db6d161997bd405 Mon Sep 17 00:00:00 2001 From: David Date: Tue, 18 Feb 2025 21:24:09 +0100 Subject: [PATCH 4/4] Make an empty string the default return --- logmerger/file_reading.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logmerger/file_reading.py b/logmerger/file_reading.py index 56704c0..d021080 100644 --- a/logmerger/file_reading.py +++ b/logmerger/file_reading.py @@ -272,7 +272,7 @@ def __init__(self, fname: str, encoding: str): @staticmethod def _find_dt_col(d: dict[str, Any], previous_key: str | None)-> tuple[str, Any]: if previous_key is not None: - value = d.get(previous_key) + value = d.get(previous_key, "") return previous_key, value for key, val in d.items(): try: