Skip to content

Commit

Permalink
Add alternate timestamp functionality (#170)
Browse files Browse the repository at this point in the history
* Add property to pull position_ms (presentation time) from input stream.

* Add use-pts option to directly use timestamps for event times.

The previous default behavior of calculating time from frame numbers should
be preserved.

The switch kind of smears alternate timekeeping through the scanner to fetch
position_ms from the video source and stuff it into MotionEvent so the rest
of the code should still behave the same.

Might need more testing; was unsure about some of the frame_skip bits.

Fixes #168

* Fix end-of-stream problem when using opencv timestamps.

position_ms seems to return 0 after reading past the end of a stream.

* Revert "Fix end-of-stream problem when using opencv timestamps."

This reverts commit 99cf723.

* Move (most) position_ms calls to _decode_thread.

* Cleanup code formatting and extraneous print

* Use exact PTS for events at end-of-file, comparable to frame_num behavior

* Add test for use_pts.

PTS uses a slightly different time than the frame_num which is
shifted by 1, causing a slight slide in the expected events.

---------

Co-authored-by: goatzilla <[email protected]>
  • Loading branch information
goatzillax and goatzillax authored Jul 22, 2024
1 parent 386208c commit 78a24ff
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 16 deletions.
7 changes: 7 additions & 0 deletions dvr_scan/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,13 @@ def get_cli_parser(user_config: ConfigRegistry):
(', '.join(CHOICE_MAP['verbosity']), user_config.get_help_string('verbosity'))),
)

parser.add_argument(
'--use-pts',
action='store_true',
default=False,
help=('Use OpenCV provided presentation timestamp instead of calculated version.'),
)

parser.add_argument(
'--debug',
action='store_true',
Expand Down
1 change: 1 addition & 0 deletions dvr_scan/cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ def from_config(config_value: str, default: 'RGBValue') -> 'RGBValue':
'min-event-length': TimecodeValue('0.1s'),
'time-before-event': TimecodeValue('1.5s'),
'time-post-event': TimecodeValue('2.0s'),
'use-pts': False,

# Detection Parameters
'bg-subtractor': 'MOG2',
Expand Down
1 change: 1 addition & 0 deletions dvr_scan/cli/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@ def run_dvr_scan(settings: ProgramSettings) -> ty.List[ty.Tuple[FrameTimecode, F
min_event_len=settings.get('min-event-length'),
time_pre_event=settings.get('time-before-event'),
time_post_event=settings.get('time-post-event'),
use_pts=settings.get('use-pts'),
)

scanner.set_thumbnail_params(thumbnails=settings.get('thumbnails'),)
Expand Down
73 changes: 57 additions & 16 deletions dvr_scan/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ def __init__(self,
self._min_event_len = None # -l/--min-event-length
self._pre_event_len = None # -tb/--time-before-event
self._post_event_len = None # -tp/--time-post-event
self._use_pts = None # --use_pts

# Region Parameters (set_region)
self._region_editor = False # -w/--region-window
Expand Down Expand Up @@ -432,12 +433,14 @@ def set_regions(self,
def set_event_params(self,
min_event_len: Union[int, float, str] = '0.1s',
time_pre_event: Union[int, float, str] = '1.5s',
time_post_event: Union[int, float, str] = '2s'):
time_post_event: Union[int, float, str] = '2s',
use_pts: bool = False):
"""Set motion event parameters."""
assert self._input.framerate is not None
self._min_event_len = FrameTimecode(min_event_len, self._input.framerate)
self._pre_event_len = FrameTimecode(time_pre_event, self._input.framerate)
self._post_event_len = FrameTimecode(time_post_event, self._input.framerate)
self._use_pts = use_pts

def set_thumbnail_params(self, thumbnails: str = None):
self._thumbnails = thumbnails
Expand Down Expand Up @@ -568,7 +571,11 @@ def scan(self) -> Optional[DetectionResult]:
# Seek to starting position if required.
if self._start_time is not None:
self._input.seek(self._start_time)
start_frame = self._input.position.frame_num

if not self._use_pts:
start_frame = self._input.position.frame_num
else:
start_frame_ms = self._input.position_ms

# Show ROI selection window if required.
if not self._handle_regions():
Expand Down Expand Up @@ -616,13 +623,20 @@ def scan(self) -> Optional[DetectionResult]:
# shifting the event start time. Instead of using `-l`/`--min-event-len` directly, we
# need to compensate for rounding errors when we corrected it for frame skip. This is
# important as this affects the number of frames we consider for the actual motion event.
start_event_shift: int = (
self._pre_event_len.frame_num + min_event_len * (self._frame_skip + 1))
if not self._use_pts:
start_event_shift: int = (
self._pre_event_len.frame_num + min_event_len * (self._frame_skip + 1))
else:
start_event_shift_ms: float = (
(self._pre_event_len.get_seconds() + self._min_event_len.get_seconds()) * 1000)

# Length of buffer we require in memory to keep track of all frames required for -l and -tb.
buff_len = pre_event_len + min_event_len
event_end = self._input.position
last_frame_above_threshold = 0
if not self._use_pts:
last_frame_above_threshold = 0
else:
last_frame_above_threshold_ms = 0

if self._bounding_box:
self._bounding_box.set_corrections(
Expand Down Expand Up @@ -660,6 +674,7 @@ def scan(self) -> Optional[DetectionResult]:
if frame is None:
break
assert frame.frame_bgr is not None
pts = frame.timecode.get_seconds() * 1000
frame_size = (frame.frame_bgr.shape[1], frame.frame_bgr.shape[0])
if frame_size != self._input.resolution:
time = frame.timecode
Expand Down Expand Up @@ -710,7 +725,10 @@ def scan(self) -> Optional[DetectionResult]:
# If this frame still has motion, reset the post-event window.
if above_threshold:
num_frames_post_event = 0
last_frame_above_threshold = frame.timecode.frame_num
if not self._use_pts:
last_frame_above_threshold = frame.timecode.frame_num
else:
last_frame_above_threshold_ms = pts
# Otherwise, we wait until the post-event window has passed before ending
# this motion event and start looking for a new one.
#
Expand Down Expand Up @@ -742,10 +760,16 @@ def scan(self) -> Optional[DetectionResult]:
# the post event length time. We also need to compensate for the number
# of frames that we skipped that could have had motion.
# We also add 1 to include the presentation duration of the last frame.
event_end = FrameTimecode(
1 + last_frame_above_threshold + self._post_event_len.frame_num +
self._frame_skip, self._input.framerate)
assert event_end.frame_num >= event_start.frame_num
if not self._use_pts:
event_end = FrameTimecode(
1 + last_frame_above_threshold + self._post_event_len.frame_num +
self._frame_skip, self._input.framerate)
assert event_end.frame_num >= event_start.frame_num
else:
event_end = FrameTimecode((last_frame_above_threshold_ms / 1000) +
self._post_event_len.get_seconds(),
self._input.framerate)
assert event_end.get_seconds() >= event_start.get_seconds()
event_list.append(MotionEvent(start=event_start, end=event_end))
if self._output_mode != OutputMode.SCAN_ONLY:
encode_queue.put(MotionEvent(start=event_start, end=event_end))
Expand Down Expand Up @@ -781,9 +805,19 @@ def scan(self) -> Optional[DetectionResult]:
num_frames_post_event = 0
frames_since_last_event = frame.timecode.frame_num - event_end.frame_num
last_frame_above_threshold = frame.timecode.frame_num
shift_amount = min(frames_since_last_event, start_event_shift)
shifted_start = max(start_frame, frame.timecode.frame_num + 1 - shift_amount)
event_start = FrameTimecode(shifted_start, self._input.framerate)

if not self._use_pts:
shift_amount = min(frames_since_last_event, start_event_shift)
shifted_start = max(start_frame,
frame.timecode.frame_num + 1 - shift_amount)
event_start = FrameTimecode(shifted_start, self._input.framerate)
else:
ms_since_last_event = pts - (event_end.get_seconds() * 1000)
last_frame_above_threshold_ms = pts
# TODO: not sure all of this is actually necessary?
shift_amount_ms = min(ms_since_last_event, start_event_shift_ms)
shifted_start_ms = max(start_frame_ms, pts - shift_amount_ms)
event_start = FrameTimecode(shifted_start_ms / 1000, self._input.framerate)
# Send buffered frames to encode thread.
for encode_frame in buffered_frames:
# We have to be careful here. Since we're putting multiple items
Expand Down Expand Up @@ -812,7 +846,10 @@ def scan(self) -> Optional[DetectionResult]:
# compute the duration and ending timecode and add it to the event list.
if in_motion_event and not self._stop.is_set():
# curr_pos already includes the presentation duration of the frame.
event_end = FrameTimecode(self._input.position.frame_num, self._input.framerate)
if not self._use_pts:
event_end = FrameTimecode(self._input.position.frame_num, self._input.framerate)
else:
event_end = FrameTimecode((pts / 1000), self._input.framerate)
event_list.append(MotionEvent(start=event_start, end=event_end))

logger.debug("event %d high score %f" % (1 + self._num_events, self._highscore))
Expand Down Expand Up @@ -868,8 +905,12 @@ def _decode_thread(self, decode_queue: queue.Queue):
# self._input.position points to the time at the end of the current frame (i.e. the
# first frame has a frame_num of 1), so we correct that for presentation time.
assert self._input.position.frame_num > 0
presentation_time = FrameTimecode(
timecode=self._input.position.frame_num - 1, fps=self._input.framerate)
if not self._use_pts:
presentation_time = FrameTimecode(
timecode=self._input.position.frame_num - 1, fps=self._input.framerate)
else:
presentation_time = FrameTimecode(self._input.position_ms / 1000,
self._input.framerate)
if not self._stop.is_set():
decode_queue.put(DecodeEvent(frame_bgr, presentation_time))

Expand Down
4 changes: 4 additions & 0 deletions dvr_scan/video_joiner.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ def position(self) -> FrameTimecode:
"""Current position of the video including presentation time of the current frame."""
return self._position + 1

@property
def position_ms(self) -> float:
return self._cap.position_ms

def read(self, decode: bool = True) -> Optional[numpy.ndarray]:
"""Read/decode the next frame."""
next = self._cap.read(decode=decode)
Expand Down
13 changes: 13 additions & 0 deletions tests/test_scan_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
# Similar to ARM, the CUDA version gives slightly different results.
CUDA_EVENT_TOLERANCE = 1

PTS_EVENT_TOLERANCE = 1

# ROI within the frame used for the test case (see traffic_camera.txt for details).
TRAFFIC_CAMERA_ROI = [
Point(631, 532),
Expand Down Expand Up @@ -102,6 +104,17 @@ def test_scan_context(traffic_camera_video):
compare_event_lists(event_list, TRAFFIC_CAMERA_EVENTS, EVENT_FRAME_TOLERANCE)


def test_scan_context_use_pts(traffic_camera_video):
"""Test scanner 'use_pts' option to change how timekeeping is done."""
scanner = MotionScanner([traffic_camera_video])
scanner.set_detection_params()
scanner.set_regions(regions=[TRAFFIC_CAMERA_ROI])
scanner.set_event_params(min_event_len=4, time_pre_event=0, use_pts=True)
event_list = scanner.scan().event_list
event_list = [(event.start.frame_num, event.end.frame_num) for event in event_list]
compare_event_lists(event_list, TRAFFIC_CAMERA_EVENTS, PTS_EVENT_TOLERANCE)


@pytest.mark.skipif(not SubtractorCudaMOG2.is_available(), reason="CUDA module not available.")
def test_scan_context_cuda(traffic_camera_video):
""" Test functionality of MotionScanner with the DetectorType.MOG2_CUDA. """
Expand Down

0 comments on commit 78a24ff

Please sign in to comment.