From d08a81cf611e07fb0a630a5dc0c205a0d942066b Mon Sep 17 00:00:00 2001 From: Christian Date: Sun, 6 Oct 2024 21:05:08 +0200 Subject: [PATCH] #159 trim past events --- CHANGES.md | 187 ++++++++++-------- .../core/monitoring/event/EventManager.kt | 39 +++- 2 files changed, 142 insertions(+), 84 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index aeecd7ae..7685757c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,8 +1,11 @@ ### Unreleased +- Past events are now trimmed to max 100 and according to configured history retention time + ### 0.32.0 + - Distribution: Project now packaged as a .deb file for easier installation on Debian based systems - - .deb file depends on openjdk-21-jre-headless and daemon apt packages. Any java 21+ installation will do. + - .deb file depends on openjdk-21-jre-headless and daemon apt packages. Any java 21+ installation will do. - Distribution: Windows installer with bundled jre for easier installation on Windows - Systemd service definition is included in the .deb file - Run sys-API as a Windows service (using [winsw](https://github.com/winsw/winsw)) @@ -10,106 +13,128 @@ - Fix right-click "Run as Administrator" in Windows ### 0.31.3 + - Fix CPU load not updating properly - Fix native build ### 0.31.2 + - Properly opt-in to OSHIs load average handling on Windows - More robust handling of data directory and fix error in JarLocation ### 0.31.1 + - Fixes for running on windows - Fixed issue with serialization when using docker-java client ### 0.31.0 + - Support webserver checks - - Calls a webserver endpoint using GET and checks if response is 200 / OK - - Calculates uptime based on non-200 responses - - Added monitor type `WEBSERVER_UP` - - API: Check out the `WebserverCheck` types in **monitoring.graphqls** + - Calls a webserver endpoint using GET and checks if response is 200 / OK + - Calculates uptime based on non-200 responses + - Added monitor type `WEBSERVER_UP` + - API: Check out the `WebserverCheck` types in **monitoring.graphqls** ### 0.30.0 + - Migrated to [spring](https://spring.io) framework instead of Dropwizard. -- [Graal Native Image](https://www.graalvm.org/latest/reference-manual/native-image/) Docker image option. Significant reduction in RAM usage. -- Memory monitor now operates based on "used bytes goes above threshold" compared to the old "available bytes goes below threshold" as this is more intuitive. +- [Graal Native Image](https://www.graalvm.org/latest/reference-manual/native-image/) Docker image option. Significant + reduction in RAM usage. +- Memory monitor now operates based on "used bytes goes above threshold" compared to the old "available bytes goes below + threshold" as this is more intuitive. - Removed deprecated Disks (Drives still remain) - Fixed issue with container statistics history - Removed REST-API - This release require version 21 of Java #### Spring + - Introduces an additional config file: _application.properties_. - - Only required if you want to change ports. Sample config is available in /config in the repository. + - Only required if you want to change ports. Sample config is available in /config in the repository. - The user _configuration.yml_ from Dropwizard is still compatible. Look in /config for an up-to-date version. #### Graal Native Image + - RAM usage reduced to around **120-200 MB** compared to **600-800 MB** running the standard way - Native images are distributed under the _krillsson/sys-api:native_ tag on Docker Hub -- Consider this new variant experimental and sys-API may fail to start with obscure errors. If you encounter this, open an issue. -- No Raspberry PI support: only builds for amd64 can be provided at this time, as [GitHub does not support building for arm64 yet](https://github.com/actions/runner-images/issues/5631) +- Consider this new variant experimental and sys-API may fail to start with obscure errors. If you encounter this, open + an issue. +- No Raspberry PI support: only builds for amd64 can be provided at this time, + as [GitHub does not support building for arm64 yet](https://github.com/actions/runner-images/issues/5631) ### 0.20.0 + - added `deletePastEventsForMonitor`, `closeOngoingEventForMonitor` to the GraphQL-API - added `Monitor.maxValue` to the GraphQL-API. Useful when displaying monitored value in a graph. - added start value to past events - Container metrics support - - metricsForContainer(id) for near realtime metrics - - containerMetricsHistoryBetweenTimestamps(id, from, to) for history - - added monitor types for container cpu load and container memory usage + - metricsForContainer(id) for near realtime metrics + - containerMetricsHistoryBetweenTimestamps(id, from, to) for history + - added monitor types for container cpu load and container memory usage - Performance updates that should result in lower CPU usage - Tweaked JVM parameters for performance (update your docker-compose.yml) ### 0.19.3 + - fix NPE when querying ContainerNetworkSettings while using podman in rootless mode ### 0.19.2 + - Disabled admin interface in configuration.yml - Support specifying custom docker host (such as podman). See docker section in configuration.yml - Fixed: "System has not been booted with systemd as init system"... ### 0.19.1 + - Fixed: historyBetweenDates query throwing error - Fixed: db locking issue due to SQLite only allowing one simultaneous connection ### 0.19.0 #### Linux + - List and manage system daemon services (start, stop, reload etc.) - - From docker: requires new volume mounts. See docker-compose.yml - - From docker: only works on host systems with systemd + - From docker: requires new volume mounts. See docker-compose.yml + - From docker: only works on host systems with systemd - Read system daemon journal logs - - Same notices as above + - Same notices as above #### Windows + - List and manage services (start, stop, pause etc.) - - Not supported from within Docker + - Not supported from within Docker - Read event logs - - Same notice as above + - Same notice as above - Updates to OpenHardwareMonitor integration to fix CPU metrics #### Other features + - Read log files from a directory (see sample in configuration.yml) - Add one, five and fifteen LoadAverages to GraphQL-API. - Add monitors for load averages - Add support for automatic port forwarding using UPnP-IGD. - Generic events concept - - Update available on GitHub - - Monitored item disappeared + - Update available on GitHub + - Monitored item disappeared #### Under the hood + - Query networkInterface and fileSystem by ID -- Query container, system daemon service and windows service by name +- Query container, system daemon service and windows service by name - More fine-grained control over periodic tasks ### 0.18.3 + - Fixed: querying network interfaces on Windows takes too long - Fixed: Docker client timeout being unreasonably long (3m) ### 0.18.2 + - Fix id field being empty for some Filesystems - - Stability and uniqueness cannot be guaranteed. Duplicates will be discarded. + - Stability and uniqueness cannot be guaranteed. Duplicates will be discarded. ### 0.18.1 + - Fixed: CPU load and CPU core load freezing after a while for real this time. - Resolved issue where periodic tasks stopped executing - Provide ID's for FileSystems @@ -117,13 +142,13 @@ ### 0.18.0 -- History is now stored in a SQLite file. - - Enabling storage of significantly more history and circumventing storing it in memory +- History is now stored in a SQLite file. +- Enabling storage of significantly more history and circumventing storing it in memory - Docker image for arm64 architecture - Improved handling of build-date and version in the API - Add support for mDNS on local network. Making it easier for client discover the server. - Fixed: not all disks and filesystems show up. This deprecates Drives and introduces separate Disks and FileSystems. - - Changes to sample _docker-compose.yml_ on how to expose hdd's for monitoring + - Changes to sample _docker-compose.yml_ on how to expose hdd's for monitoring - Fixed: CPU load and CPU core load freezing after a while ### 0.17.2 @@ -135,9 +160,10 @@ ### 0.17.1 - - Fix issue with adding numerical monitors - - java.lang.ClassCastException: java.lang.Integer incompatible with java.lang.Long at com.krillsson.sysapi.graphql.scalars.LongCoercing.serialize - - Fix ongoing events not stopping properly +- Fix issue with adding numerical monitors + - java.lang.ClassCastException: java.lang.Integer incompatible with java.lang.Long at + com.krillsson.sysapi.graphql.scalars.LongCoercing.serialize +- Fix ongoing events not stopping properly ### 0.17.0 @@ -145,35 +171,38 @@ *The monitors API have been rebuilt for better type-safety.* - - Adds connectivity check and external IP functionality - - Several new monitors: - - Connectivity (opt-out in *configuration.yml*) - - Drive read/write rate - - Network upload/download rate - - External IP changed - - Process CPU usage - - Process memory usage - - Process died (pid disappeared) - - Monitors now have three subgroups - - Numerical: positive integer values such as Bytes, Temperature, etc - - Fractional: percentage values such as CPU utilization - - Conditional: either or values such as network up/down or connected/disconnected - - Monitors now have `currentValue` and `history` fields - - Read logs from a container - - To prepare for dockerization of sys-API: - - *configuration.yml* now lives in *config/* sub-directory - - json database files as well as keystore files in *data/* sub-directory - -*if you are migrating from v0.16.0 or earlier, simply move **history.json**, **monitors.json**, **events.json** and **keystorewww.jks** to data/ directory* - -*it is recommended to re-apply your configuration changes anew in the new **configuration.yml** rather than re-using your old one* +- Adds connectivity check and external IP functionality +- Several new monitors: + - Connectivity (opt-out in *configuration.yml*) + - Drive read/write rate + - Network upload/download rate + - External IP changed + - Process CPU usage + - Process memory usage + - Process died (pid disappeared) +- Monitors now have three subgroups + - Numerical: positive integer values such as Bytes, Temperature, etc + - Fractional: percentage values such as CPU utilization + - Conditional: either or values such as network up/down or connected/disconnected +- Monitors now have `currentValue` and `history` fields +- Read logs from a container +- To prepare for dockerization of sys-API: + - *configuration.yml* now lives in *config/* sub-directory + - json database files as well as keystore files in *data/* sub-directory + +*if you are migrating from v0.16.0 or earlier, simply move **history.json**, **monitors.json**, **events.json** +and **keystorewww.jks** to data/ directory* + +*it is recommended to re-apply your configuration changes anew in the new **configuration.yml** rather than re-using +your old one* ### 0.16.0 - Support for generating self-signed certificate for increased privacy - - Certificate names are pre-populated with external and internal IP's by default - - See `selfSignedCertificates` in `configuration.yml` - - Please note that this feature is not a substitution for properly signed certificates. It is only there to lower the barrier of entry to https. + - Certificate names are pre-populated with external and internal IP's by default + - See `selfSignedCertificates` in `configuration.yml` + - Please note that this feature is not a substitution for properly signed certificates. It is only there to lower + the barrier of entry to https. ### 0.15.2 @@ -193,7 +222,7 @@ _Sorry for the confusion_ - Persist history to save memory - History no longer include running processes (it was taking up too much space) - Added monitors for individual process memory and cpu loads -- Now shipped as a standalone runtime (embedded jre) +- Now shipped as a standalone runtime (embedded jre) - Update OSHI dependency - Fixed a few serialization errors in GraphQL layer @@ -201,49 +230,50 @@ _Sorry for the confusion_ **REST API Breaking changes** - - Optimize GraphQL layer - - Convert more classes to Kotlin - - Fix paths for OHMJNIWrapper - - More robust way of calculating processor utilization +- Optimize GraphQL layer +- Convert more classes to Kotlin +- Fix paths for OHMJNIWrapper +- More robust way of calculating processor utilization ### 0.14 **REST API Breaking changes** - - Migrated project to Gradle - - Add PhysicalMemory to MemoryInfo - - Remove as much nullability from GraphQL schema as possible - - Events persistence - - Update dependencies - +- Migrated project to Gradle +- Add PhysicalMemory to MemoryInfo +- Remove as much nullability from GraphQL schema as possible +- Events persistence +- Update dependencies + ### 0.13 - - - Migration to Kotlin - - GraphQL support - - Events persistence - + +- Migration to Kotlin +- GraphQL support +- Events persistence + ### 0.12 - - Dates are now serialized as: `2019-02-04T22:08:42.048+01:00` - - Latest dropwizard - - Added `GET /monitors/{id}/events` endpoint (get events for a monitor) - +- Dates are now serialized as: `2019-02-04T22:08:42.048+01:00` +- Latest dropwizard +- Added `GET /monitors/{id}/events` endpoint (get events for a monitor) ### 0.11 **REST API Breaking changes** -- OSX: Fixes related to drives migrated to APFS +- OSX: Fixes related to drives migrated to APFS - Network speed is now included in NetworkInterface object (/system/ & /nics/) - Drive object now has a sizeBytes property (/system/ & /drives/) - /system/load now includes top ten memory consuming processes by default. Configurable via query parameter. -- Every /history/ endpoint now has optional query parameters to limit the output. E.g: `v2/system/load/history?fromDate=2018-09-23T15:11:55.661&toDate=2018-09-23T15:21:25.659` +- Every /history/ endpoint now has optional query parameters to limit the output. + E.g: `v2/system/load/history?fromDate=2018-09-23T15:11:55.661&toDate=2018-09-23T15:21:25.659` ### 0.10 Lot's of new features! **And unfortunately an REST API breaking release.** -Changelog: +Changelog: + - Monitoring - Split static information from system load information - Load history @@ -256,7 +286,8 @@ Changelog: ### 0.9 - Network tx/rx and Disk r/w are now fetched from OpenHardwareMonitor on Windows -- Add support for hot reloading SSL certs (i.e Let's Encrypt) [Guide](https://github.com/Krillsson/sys-API/wiki/Let's-Encrypt) +- Add support for hot reloading SSL certs (i.e Let's + Encrypt) [Guide](https://github.com/Krillsson/sys-API/wiki/Let's-Encrypt) *Please keep in mind that the API is still in it's early stages and is subject to change* @@ -307,7 +338,7 @@ Changelog: *Please keep in mind that the API is still in it's early stages and is subject to change* -### 0.2 +### 0.2 - This is the first release of System Api. Keep in mind that this is a very early version. diff --git a/src/main/kotlin/com/krillsson/sysapi/core/monitoring/event/EventManager.kt b/src/main/kotlin/com/krillsson/sysapi/core/monitoring/event/EventManager.kt index 69c0b724..606975d5 100644 --- a/src/main/kotlin/com/krillsson/sysapi/core/monitoring/event/EventManager.kt +++ b/src/main/kotlin/com/krillsson/sysapi/core/monitoring/event/EventManager.kt @@ -1,27 +1,33 @@ package com.krillsson.sysapi.core.monitoring.event +import com.krillsson.sysapi.config.YAMLConfigFile import com.krillsson.sysapi.core.domain.event.Event import com.krillsson.sysapi.core.domain.event.OngoingEvent import com.krillsson.sysapi.core.domain.event.PastEvent +import com.krillsson.sysapi.util.logger import jakarta.annotation.PostConstruct import jakarta.annotation.PreDestroy -import org.slf4j.LoggerFactory import org.springframework.stereotype.Service import java.time.Clock +import java.time.temporal.ChronoUnit import java.util.* @Service -class EventManager(private val repository: EventRepository, private val clock: Clock) { +class EventManager( + private val repository: EventRepository, + private val clock: Clock, + private val config: YAMLConfigFile +) { private lateinit var events: MutableList - companion object { - private val LOGGER = LoggerFactory.getLogger(EventManager::class.java) - } + val logger by logger() + @PostConstruct fun start() { restore() + cleanupPastEvents() } @PreDestroy @@ -33,9 +39,15 @@ class EventManager(private val repository: EventRepository, private val clock: C fun add(event: Event) { removeOngoingEventForMonitor(event.monitorId) events.add(event) + cleanupPastEvents() persist() } + private fun cleanupPastEvents() { + purgePastEventsOlderThan(config.metricsConfig.history.purging.olderThan, config.metricsConfig.history.purging.unit) + trimPastEventsSize(100) + } + fun getAll(): List = events fun remove(id: UUID): Boolean = events @@ -90,11 +102,26 @@ class EventManager(private val repository: EventRepository, private val clock: C } } + private fun purgePastEventsOlderThan(olderThan: Long, unit: ChronoUnit) { + val maxAge = clock.instant().minus(olderThan, unit) + logger.info("Purging history older than {}", maxAge) + events.removeAll { event -> event is PastEvent && event.endTime.isBefore(maxAge) } + } + + private fun trimPastEventsSize(maxSize: Int) { + val pastEvents = events.filterIsInstance() + if (pastEvents.size > maxSize) { + logger.info("Trimming ${pastEvents.size - maxSize} past events") + val newPastEvents = pastEvents.takeLast(maxSize) + events.removeIf { it is PastEvent && !newPastEvents.contains(it) } + } + } + private fun endOngoingEvents() { val newEvents: MutableList = mutableListOf() events.forEach { event -> if (event is OngoingEvent) { - LOGGER.debug("Shutting down - Ending ongoing event ${event.monitorType} (${event.id})") + logger.info("Shutting down - Ending ongoing event ${event.monitorType} (${event.id})") newEvents += PastEvent( event.id, event.monitorId,