Performance optimisations in calculate_data and redact_data() (#361)

* Optimize redact_data() * feat: purging of redaction data after time * feat: Redaction improvements * Catch raw iBeacon uuid in advert hex strings * Ensure redaction list is refreshed before processing data (first_run on recursive func) * perf: add lru_cache to rssi cals and clean_charbuf * Log redact timing in dump_devices * Cleanups: BermudaDeviceScanner scanner * Remove redundant scanner data * Tidy check for scanner_sends_stamps * Removed doubled-up fields from merge conflict chaos. * linting --------- Co-authored-by: Ashley Gittins <[email protected]>
agittins · Dec 1, 2024 · f275baf · f275baf
1 parent d8d6272
commit f275baf
Show file tree

Hide file tree

Showing 9 changed files with 186 additions and 126 deletions.
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -29,8 +29,7 @@ jobs:
 
       - name: Install Python modules
         run: |
-          pip install --constraint=.github/workflows/constraints.txt pre-commit black
-          # flake8 isort
+          pip install --constraint=.github/workflows/constraints.txt pre-commit
 
       - name: "Install requirements"
         run: python3 -m pip install -r requirements.txt

diff --git a/custom_components/bermuda/__init__.py b/custom_components/bermuda/__init__.py
@@ -41,15 +41,23 @@ async def async_setup_entry(hass: HomeAssistant, entry: BermudaConfigEntry):
     coordinator = BermudaDataUpdateCoordinator(hass, entry)
     entry.runtime_data = BermudaData(coordinator)
 
-    await coordinator.async_refresh()
-
-    if not coordinator.last_update_success:
+    async def on_failure():
         _LOGGER.debug("Coordinator last update failed, rasing ConfigEntryNotReady")
+        await coordinator.stop_purging()
         raise ConfigEntryNotReady
 
+    try:
+        await coordinator.async_refresh()
+    except Exception as ex:  # noqa: BLE001
+        _LOGGER.exception(ex)
+        await on_failure()
+    if not coordinator.last_update_success:
+        await on_failure()
+
     await hass.config_entries.async_forward_entry_setups(entry, PLATFORMS)
 
     entry.async_on_unload(entry.add_update_listener(async_reload_entry))
+
     return True
 
 
@@ -87,6 +95,7 @@ async def async_unload_entry(hass: HomeAssistant, entry: BermudaConfigEntry) ->
     """Handle removal of an entry."""
     if unload_result := await hass.config_entries.async_unload_platforms(entry, PLATFORMS):
         _LOGGER.debug("Unloaded platforms.")
+    await entry.runtime_data.coordinator.stop_purging()
     return unload_result
 
 

diff --git a/custom_components/bermuda/bermuda_device.py b/custom_components/bermuda/bermuda_device.py
@@ -264,3 +264,7 @@ def to_dict(self):
                 val = scanout  # noqa
             out[var] = val
         return out
+
+    def __repr__(self) -> str:
+        """Help debug devices and figure out what device it is at a glance."""
+        return self.prefname
diff --git a/custom_components/bermuda/bermuda_device_scanner.py b/custom_components/bermuda/bermuda_device_scanner.py
@@ -11,7 +11,11 @@
 
 from typing import TYPE_CHECKING, cast
 
-from homeassistant.components.bluetooth import MONOTONIC_TIME, BluetoothScannerDevice
+from homeassistant.components.bluetooth import (
+    MONOTONIC_TIME,
+    BaseHaRemoteScanner,
+    BluetoothScannerDevice,
+)
 
 from .const import (
     _LOGGER,
@@ -59,19 +63,19 @@ def __init__(
     ) -> None:
         # I am declaring these just to control their order in the dump,
         # which is a bit silly, I suspect.
-        self.name: str = scandata.scanner.name
-        self.scanner_device_name = scanner_device.name
+        self.name: str = scanner_device.name or scandata.scanner.name
+        self.scanner_device = scanner_device  # links to the source device
         self.adapter: str = scandata.scanner.adapter
         self.address = scanner_device.address
         self.source: str = scandata.scanner.source
         self.area_id: str | None = scanner_device.area_id
         self.area_name: str | None = scanner_device.area_name
         self.parent_device = parent_device
         self.parent_device_address = parent_device.address
-        self.scanner_device = scanner_device  # links to the source device
         self.options = options
         self.stamp: float | None = 0
-        self.scanner_sends_stamps: bool = False
+        # Only remote scanners log timestamps, local usb adaptors do not.
+        self.scanner_sends_stamps = isinstance(scanner_device, BaseHaRemoteScanner)
         self.new_stamp: float | None = None  # Set when a new advert is loaded from update
         self.rssi: float | None = None
         self.tx_power: float | None = None
@@ -85,6 +89,11 @@ def __init__(
         self.hist_distance_by_interval = []  # updated per-interval
         self.hist_interval = []  # WARNING: This is actually "age of ad when we polled"
         self.hist_velocity = []  # Effective velocity versus previous stamped reading
+        self.conf_rssi_offset = self.options.get(CONF_RSSI_OFFSETS, {}).get(self.address, 0)
+        self.conf_ref_power = self.options.get(CONF_REF_POWER)
+        self.conf_attenuation = self.options.get(CONF_ATTENUATION)
+        self.conf_max_velocity = self.options.get(CONF_MAX_VELOCITY)
+        self.conf_smoothing_samples = self.options.get(CONF_SMOOTHING_SAMPLES)
         self.adverts: dict[str, list] = {
             "manufacturer_data": [],
             "service_data": [],
@@ -107,19 +116,18 @@ def update_advertisement(self, scandata: BluetoothScannerDevice):
         # In case the scanner has changed it's details since startup:
         # FIXME: This should probably be a separate function that the refresh_scanners
         # calls if necessary, rather than re-doing it every cycle.
-        self.name = scandata.scanner.name
+        scanner = scandata.scanner
+        self.name = scanner.name
         self.area_id = self.scanner_device.area_id
         self.area_name = self.scanner_device.area_name
-        new_stamp = None
+        new_stamp: float | None = None
 
-        # Only remote scanners log timestamps here (local usb adaptors do not),
-        if hasattr(scandata.scanner, "_discovered_device_timestamps"):
+        if self.scanner_sends_stamps:
             # Found a remote scanner which has timestamp history...
-            self.scanner_sends_stamps = True
             # There's no API for this, so we somewhat sneakily are accessing
             # what is intended to be a protected dict.
             # pylint: disable-next=protected-access
-            stamps = scandata.scanner._discovered_device_timestamps  # type: ignore #noqa
+            stamps = scanner._discovered_device_timestamps  # type: ignore #noqa
 
             # In this dict all MAC address keys are upper-cased
             uppermac = self.parent_device_address.upper()
@@ -135,7 +143,7 @@ def update_advertisement(self, scandata: BluetoothScannerDevice):
                 # of this scanner if it hadn't seen this device.
                 _LOGGER.error(
                     "Scanner %s has no stamp for %s - very odd.",
-                    scandata.scanner.source,
+                    scanner.source,
                     self.parent_device_address,
                 )
                 new_stamp = None
@@ -184,24 +192,19 @@ def update_advertisement(self, scandata: BluetoothScannerDevice):
             self.stamp = new_stamp
             self.hist_stamp.insert(0, self.stamp)
 
-        # Safe to update these values regardless of stamps...
-
-        self.adapter: str = scandata.scanner.adapter
-        self.source: str = scandata.scanner.source
-        if self.tx_power is not None and scandata.advertisement.tx_power != self.tx_power:
-            # Not really an erorr, we just don't account for this happening -
-            # I want to know if it does.
-            # AJG 2024-01-11: This does happen. Looks like maybe apple devices?
-            # Changing from warning to debug to quiet users' logs.
-            # Also happens with esphome set with long beacon interval tx, as it alternates
-            # between sending some generic advert and the iBeacon advert. ie, it's bogus for that
-            # case.
-            # _LOGGER.debug(
-            #     "Device changed TX-POWER! That was unexpected: %s %sdB",
-            #     self.parent_device_address,
-            #     scandata.advertisement.tx_power,
-            # )
-            pass
+        # if self.tx_power is not None and scandata.advertisement.tx_power != self.tx_power:
+        #     # Not really an erorr, we just don't account for this happening -
+        #     # I want to know if it does.
+        #     # AJG 2024-01-11: This does happen. Looks like maybe apple devices?
+        #     # Changing from warning to debug to quiet users' logs.
+        #     # Also happens with esphome set with long beacon interval tx, as it alternates
+        #     # between sending some generic advert and the iBeacon advert. ie, it's bogus for that
+        #     # case.
+        #     _LOGGER.debug(
+        #         "Device changed TX-POWER! That was unexpected: %s %sdB",
+        #         self.parent_device_address,
+        #         scandata.advertisement.tx_power,
+        #     )
         self.tx_power = scandata.advertisement.tx_power
 
         # Track each advertisement element as or if they change.
@@ -227,16 +230,13 @@ def _update_raw_distance(self, reading_is_new=True) -> float:
         setting change (such as altering a device's ref_power setting).
         """
         # Check if we should use a device-based ref_power
-        if self.ref_power == 0:
-            ref_power = self.options.get(CONF_REF_POWER)
+        if self.ref_power == 0:  # No user-supplied per-device value
+            # use global default
+            ref_power = self.conf_ref_power
         else:
             ref_power = self.ref_power
 
-        distance = rssi_to_metres(
-            self.rssi + self.options.get(CONF_RSSI_OFFSETS, {}).get(self.address, 0),
-            ref_power,
-            self.options.get(CONF_ATTENUATION),
-        )
+        distance = rssi_to_metres(self.rssi + self.conf_rssi_offset, ref_power, self.conf_attenuation)
         self.rssi_distance_raw = distance
         if reading_is_new:
             # Add a new historical reading
@@ -324,14 +324,14 @@ def calculate_data(self):
         new_stamp = self.new_stamp  # should have been set by update()
         self.new_stamp = None  # Clear so we know if an update is missed next cycle
 
-        if new_stamp is not None and self.rssi_distance is None:
+        if self.rssi_distance is None and new_stamp is not None:
             # DEVICE HAS ARRIVED!
             # We have just newly come into range (or we're starting up)
             # accept the new reading as-is.
             self.rssi_distance = self.rssi_distance_raw
             # And ensure the smoothing history gets a fresh start
-            self.hist_distance_by_interval.insert(0, self.rssi_distance_raw)
-            del self.hist_distance_by_interval[1:]
+
+            self.hist_distance_by_interval = [self.rssi_distance_raw]
 
         elif new_stamp is None and (self.stamp is None or self.stamp < MONOTONIC_TIME() - DISTANCE_TIMEOUT):
             # DEVICE IS AWAY!
@@ -356,34 +356,29 @@ def calculate_data(self):
                 peak_velocity = 0
                 # walk through the history of distances/stamps, and find
                 # the peak
-                for i, old_distance in enumerate(self.hist_distance):
-                    if i == 0:
-                        # (skip the first entry since it's what we're comparing with)
-                        continue
-
-                    if self.hist_stamp[i] is None:
-                        continue  # Skip this iteration if hist_stamp[i] is None
-
-                    delta_t = velo_newstamp - self.hist_stamp[i]
-                    delta_d = velo_newdistance - old_distance
-                    if delta_t <= 0:
-                        # Additionally, skip if delta_t is zero or negative
-                        # to avoid division by zero
-                        continue
-
-                    velocity = delta_d / delta_t
-
-                    # Approach velocities are only interesting vs the previous
-                    # reading, while retreats need to be sensible over time
-                    if i == 1:
-                        # on first round we want approach or retreat velocity
-                        peak_velocity = velocity
-                        if velocity < 0:
-                            # if our new reading is an approach, we are done here
-                            # (not so for == 0 since it might still be an invalid retreat)
-                            break
-
-                    peak_velocity = max(velocity, peak_velocity)
+                delta_t = velo_newstamp - self.hist_stamp[1]
+                delta_d = velo_newdistance - self.hist_distance[1]
+                if delta_t > 0:
+                    peak_velocity = delta_d / delta_t
+                # if our initial reading is an approach, we are done here
+                if peak_velocity >= 0:
+                    for old_distance, old_stamp in zip(self.hist_distance[2:], self.hist_stamp[2:], strict=False):
+                        if old_stamp is None:
+                            continue  # Skip this iteration if hist_stamp[i] is None
+
+                        delta_t = velo_newstamp - old_stamp
+                        if delta_t <= 0:
+                            # Additionally, skip if delta_t is zero or negative
+                            # to avoid division by zero
+                            continue
+                        delta_d = velo_newdistance - old_distance
+
+                        velocity = delta_d / delta_t
+
+                        # Don't use max() as it's slower.
+                        if velocity > peak_velocity:
+                            # but on subsequent comparisons we only care if they're faster retreats
+                            peak_velocity = velocity
                 # we've been through the history and have peak velo retreat, or the most recent
                 # approach velo.
                 velocity = peak_velocity
@@ -393,7 +388,7 @@ def calculate_data(self):
 
             self.hist_velocity.insert(0, velocity)
 
-            if velocity > self.options.get(CONF_MAX_VELOCITY):
+            if velocity > self.conf_max_velocity:
                 if self.parent_device_address.upper() in self.options.get(CONF_DEVICES, []):
                     _LOGGER.debug(
                         "This sparrow %s flies too fast (%2fm/s), ignoring",
@@ -406,14 +401,14 @@ def calculate_data(self):
                 else:
                     self.hist_distance_by_interval.insert(0, self.hist_distance_by_interval[0])
             else:
-                # Looks valid enough, add the current reading to the interval log
-                self.hist_distance_by_interval.insert(0, self.rssi_distance_raw)
+                self.hist_distance_by_interval.insert(0, self.hist_distance_by_interval[0])
 
             # trim the log to length
-            del self.hist_distance_by_interval[self.options.get(CONF_SMOOTHING_SAMPLES) :]
+            if len(self.hist_distance_by_interval) > self.conf_smoothing_samples:
+                del self.hist_distance_by_interval[self.conf_smoothing_samples :]
 
             # Calculate a moving-window average, that only includes
-            # historical values if their "closer" (ie more reliable).
+            # historical values if they're "closer" (ie more reliable).
             #
             # This might be improved by weighting the values by age, but
             # already does a fairly reasonable job of hugging the bottom
@@ -422,36 +417,28 @@ def calculate_data(self):
             # helpful, but probably dependent on use-case.
             #
             dist_total: float = 0
-            dist_count: int = 0
             local_min: float = self.rssi_distance_raw or DISTANCE_INFINITE
             for distance in self.hist_distance_by_interval:
-                if distance <= local_min:
-                    dist_total += distance
+                if distance is not None and distance <= local_min:
                     local_min = distance
-                else:
-                    dist_total += local_min
-                dist_count += 1
+                dist_total += local_min
 
-            if dist_count > 0:
-                movavg = dist_total / dist_count
+            if (_hist_dist_len := len(self.hist_distance_by_interval)) > 0:
+                movavg = dist_total / _hist_dist_len
             else:
                 movavg = local_min
-
             # The average is only helpful if it's lower than the actual reading.
             if self.rssi_distance_raw is None or movavg < self.rssi_distance_raw:
                 self.rssi_distance = movavg
             else:
                 self.rssi_distance = self.rssi_distance_raw
 
         # Trim our history lists
-        for histlist in (
-            self.hist_distance,
-            self.hist_interval,
-            self.hist_rssi,
-            self.hist_stamp,
-            self.hist_velocity,
-        ):
-            del histlist[HIST_KEEP_COUNT:]
+        del self.hist_distance[HIST_KEEP_COUNT:]
+        del self.hist_interval[HIST_KEEP_COUNT:]
+        del self.hist_rssi[HIST_KEEP_COUNT:]
+        del self.hist_stamp[HIST_KEEP_COUNT:]
+        del self.hist_velocity[HIST_KEEP_COUNT:]
 
     def to_dict(self):
         """Convert class to serialisable dict for dump_devices."""
@@ -475,3 +462,7 @@ def to_dict(self):
                 continue
             out[var] = val
         return out
+
+    def __repr__(self) -> str:
+        """Help debugging by giving it a clear name instead of empty dict."""
+        return self.address