Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions files/log8.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{"timestamp": "2023-07-14 08:00:01", "level": "WARN", "message":"Connection lost due to timeout"}
{"timestamp": "2023-07-14 08:00:04", "level": "CRITICAL", "message": "Request processed unsuccessfully", "stacktrace": "Something went wrong\nTraceback (last line is latest):\n sample.py: line 32\n divide(100, 0)\n sample.py: line 8\n return a / b\nZeroDivisionError: division by zero"}
{"timestamp": "2023-07-14 08:00:06", "level": "INFO", "message": "User authentication failed"}
{"timestamp": "2023-07-14 08:00:08", "level": "DEBUG", "message": "Starting data synchronization"}
{"timestamp": "2023-07-14 08:00:11", "level": "INFO", "message": "Processing incoming request"}
{"timestamp": "2023-07-14 08:00:11", "level": "INFO", "message": "Processing incoming request (a little more...)"}
{"timestamp": "2023-07-14 08:00:14", "level": "DEBUG", "message": "Performing database backup"}
{"timestamp": "2023-07-14 08:00:16", "level": "WARN", "message": "Invalid input received: missing required field"}
{"timestamp": "2023-07-14 08:00:19", "level": "ERROR", "message": "Failed to connect to remote server"}
{"timestamp": "2023-07-14 08:00:22", "level": "INFO", "message": "Sending email notification"}
{"timestamp": "2023-07-14 08:00:25", "level": "WARN", "message": "Slow response time detected"}
{"timestamp": "2023-07-14 08:00:27", "level": "INFO", "message": "Data synchronization completed"}
12 changes: 12 additions & 0 deletions files/log9.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{"level": "WARN", "LOG_TIME": "2023-07-14 08:00:01", "message":"Connection lost due to timeout"}
{"level": "CRITICAL", "LOG_TIME": "2023-07-14 08:00:04", "message": "Request processed unsuccessfully", "stacktrace": "Something went wrong\nTraceback (last line is latest):\n sample.py: line 32\n divide(100, 0)\n sample.py: line 8\n return a / b\nZeroDivisionError: division by zero"}
{"level": "INFO", "LOG_TIME": "2023-07-14 08:00:06", "message": "User authentication failed"}
{"level": "DEBUG", "LOG_TIME": "2023-07-14 08:00:08", "message": "Starting data synchronization"}
{"level": "INFO", "LOG_TIME": "2023-07-14 08:00:11", "message": "Processing incoming request"}
{"level": "INFO", "LOG_TIME": "2023-07-14 08:00:11", "message": "Processing incoming request (a little more...)"}
{"level": "DEBUG", "LOG_TIME": "2023-07-14 08:00:14", "message": "Performing database backup"}
{"level": "WARN", "LOG_TIME": "2023-07-14 08:00:16", "message": "Invalid input received: missing required field"}
{"level": "ERROR", "LOG_TIME": "2023-07-14 08:00:19", "message": "Failed to connect to remote server"}
{"level": "INFO", "LOG_TIME": "2023-07-14 08:00:22", "message": "Sending email notification"}
{"level": "WARN", "LOG_TIME": "2023-07-14 08:00:25", "message": "Slow response time detected"}
{"level": "INFO", "LOG_TIME": "2023-07-14 08:00:27", "message": "Data synchronization completed"}
42 changes: 42 additions & 0 deletions logmerger/file_reading.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
import abc
import operator
import types
from typing import Any

from logmerger.timestamp_wrapper import TimestampedLineTransformer


class FileReader(abc.ABC):
Expand Down Expand Up @@ -253,3 +256,42 @@ def reader_guard(rdr):

def _close_reader(self):
self._close_obj.close()


class JsonLFileReader(FileReader):
@classmethod
def _can_read(cls, fname: str) -> bool:
return fname.endswith(".jsonl")

def __init__(self, fname: str, encoding: str):
super().__init__(fname, encoding)
self._close_obj = open(fname, encoding=encoding, newline='')

self._iter = self.iter_file()

@staticmethod
def _find_dt_col(d: dict[str, Any], previous_key: str | None)-> tuple[str, Any]:
if previous_key is not None:
value = d.get(previous_key, "")
return previous_key, value
for key, val in d.items():
try:
tt = TimestampedLineTransformer.make_transformer_from_sample_line(str(val) + " ")
except ValueError:
continue
return key, val
raise ValueError("Could not find timestamp in the line")


def iter_file(self):
import json
time_key = None
for row in self._close_obj:
d: dict = json.loads(row)
time_key, timestamp_entry = self._find_dt_col(d, time_key)
s = "\n".join([f"{key}: {value}" for key, value in d.items() if key != time_key])

yield f"{timestamp_entry} {s}"

def _close_reader(self):
self._close_obj.close()