diff --git a/src/toolong/log_lines.py b/src/toolong/log_lines.py index 7354ddc..80d3060 100644 --- a/src/toolong/log_lines.py +++ b/src/toolong/log_lines.py @@ -369,8 +369,9 @@ def merge_log_files(self) -> None: break_position = 0 for line_no, break_position, timestamp in timestamps: - append_meta((timestamp, line_no, log_file)) - append(break_position) + if self.is_valid_timestamp(timestamp): + append_meta((timestamp, line_no, log_file)) + append(break_position) append(log_file.size) self.post_message( @@ -552,8 +553,9 @@ def get_timestamp(self, line_index: int) -> datetime | None: log_file, start, end = self.index_to_span(line_index) line = log_file.get_line(start, end) timestamp = log_file.timestamp_scanner.scan(line) - return timestamp - + if self.is_valid_timestamp(timestamp): + return timestamp + return None def on_unmount(self) -> None: self._line_reader.stop() self.log_file.close() diff --git a/src/toolong/timestamps.py b/src/toolong/timestamps.py index c1faff1..842a57f 100644 --- a/src/toolong/timestamps.py +++ b/src/toolong/timestamps.py @@ -21,77 +21,14 @@ def parse(timestamp: str) -> datetime | None: # Info taken from logmerger project https://github.com/ptmcg/logmerger/blob/main/logmerger/timestamp_wrapper.py +# Refined regular expressions to be more specific and less likely to match non-timestamp numbers TIMESTAMP_FORMATS = [ - TimestampFormat( - r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}\s?(?:Z|[+-]\d{4})", - datetime.fromisoformat, - ), - TimestampFormat( - r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}", - datetime.fromisoformat, - ), - TimestampFormat( - r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}\s?(?:Z|[+-]\d{4})", - datetime.fromisoformat, - ), - TimestampFormat( - r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}", - datetime.fromisoformat, - ), - TimestampFormat( - r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\s?(?:Z|[+-]\d{4})", - datetime.fromisoformat, - ), - TimestampFormat( - r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}", - datetime.fromisoformat, - ), - TimestampFormat( - r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2},\d{3}\s?(?:Z|[+-]\d{4})", - datetime.fromisoformat, - ), - TimestampFormat( - r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2},\d{3}", - datetime.fromisoformat, - ), - TimestampFormat( - r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}\s?(?:Z|[+-]\d{4}Z?)", - datetime.fromisoformat, - ), - TimestampFormat( - r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}", - datetime.fromisoformat, - ), - TimestampFormat( - r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\s?(?:Z|[+-]\d{4})", - datetime.fromisoformat, - ), - TimestampFormat( - r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}", - datetime.fromisoformat, - ), - TimestampFormat( - r"[JFMASOND][a-z]{2}\s(\s|\d)\d \d{2}:\d{2}:\d{2}", - parse_timestamp("%b %d %H:%M:%S"), - ), - TimestampFormat( - r"\d{2}\/\w+\/\d{4} \d{2}:\d{2}:\d{2}", - parse_timestamp( - "%d/%b/%Y %H:%M:%S", - ), - ), - TimestampFormat( - r"\d{2}\/\w+\/\d{4}:\d{2}:\d{2}:\d{2} [+-]\d{4}", - parse_timestamp("%d/%b/%Y:%H:%M:%S %z"), - ), - TimestampFormat( - r"\d{10}\.\d+", - lambda s: datetime.fromtimestamp(float(s)), - ), - TimestampFormat( - r"\d{13}", - lambda s: datetime.fromtimestamp(int(s)), - ), + (r'\b\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?Z\b', lambda x: datetime.fromisoformat(x.replace('Z', '+00:00'))), + (r'\b\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}\b', lambda x: datetime.strptime(x, '%Y/%m/%d %H:%M:%S')), + (r'\b\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}\b', lambda x: datetime.strptime(x, '%d-%m-%Y %H:%M:%S')), + (r'\b\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2}\b', lambda x: datetime.strptime(x, '%d/%m/%Y %H:%M:%S')), + (r'\b\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\b', lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S')), + (r'\b\d{2}:\d{2}:\d{2}\b', lambda x: datetime.strptime(x, '%H:%M:%S').time()), ] @@ -131,6 +68,16 @@ def scan(self, line: str) -> datetime | None: try: if (timestamp := parse_callable(match.group(0))) is None: continue + # Validate the timestamp to ensure it falls within a reasonable range + now = datetime.now() + if isinstance(timestamp, datetime): + if timestamp.year < 1970 or timestamp > now: + continue + elif isinstance(timestamp, time): + # If it's a time object, we can't validate the date part + pass + else: + continue except Exception: continue if index: