Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable stats v2 #1708

Merged
merged 5 commits into from
Jun 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion collector/lib/CollectorConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ void CollectorConfig::HandleSinspEnvVars() {
if ((envvar = std::getenv("ROX_COLLECTOR_SINSP_TOTAL_BUFFER_SIZE")) != NULL) {
try {
sinsp_total_buffer_size_ = std::stoll(envvar);
CLOG(INFO) << "Sinsp total buffer size: " << sinsp_buffer_size_;
CLOG(INFO) << "Sinsp total buffer size: " << sinsp_total_buffer_size_;
} catch (...) {
CLOG(ERROR) << "Invalid total buffer size value: '" << envvar << "'";
}
Expand Down
4 changes: 4 additions & 0 deletions collector/lib/CollectorStatsExporter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ void CollectorStatsExporter::run() {

auto& kernel = collectorEventCounters.Add({{"type", "kernel"}});
auto& drops = collectorEventCounters.Add({{"type", "drops"}});
auto& threadCacheDrops = collectorEventCounters.Add({{"type", "threadCacheDrops"}});
auto& ringbufferDrops = collectorEventCounters.Add({{"type", "ringbufferDrops"}});
auto& preemptions = collectorEventCounters.Add({{"type", "preemptions"}});
auto& grpcSendFailures = collectorEventCounters.Add({{"type", "grpcSendFailures"}});
auto& threadTableSize = collectorEventCounters.Add({{"type", "threadCacheSize"}});
Expand Down Expand Up @@ -223,6 +225,8 @@ void CollectorStatsExporter::run() {

kernel.Set(stats.nEvents);
drops.Set(stats.nDrops);
threadCacheDrops.Set(stats.nDropsThreadCache);
ringbufferDrops.Set(stats.nDropsBuffer);
preemptions.Set(stats.nPreemptions);
threadTableSize.Set(stats.nThreadCacheSize);

Expand Down
7 changes: 6 additions & 1 deletion collector/lib/GetStatus.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,16 @@ bool GetStatus::handleGet(CivetServer* server, struct mg_connection* conn) {
bool ready = system_inspector_->GetStats(&stats);

if (ready) {
Json::Value drops = Json::Value(Json::objectValue);
drops["total"] = Json::UInt64(stats.nDrops);
drops["ringbuffer"] = Json::UInt64(stats.nDropsBuffer);
drops["threadcache"] = Json::UInt64(stats.nDropsThreadCache);

status["status"] = "ok";
status["collector"] = Json::Value(Json::objectValue);
status["collector"]["node"] = node_name_;
status["collector"]["events"] = Json::UInt64(stats.nEvents);
status["collector"]["drops"] = Json::UInt64(stats.nDrops);
status["collector"]["drops"] = drops;
status["collector"]["preemptions"] = Json::UInt64(stats.nPreemptions);

mg_printf(conn, "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nConnection: close\r\n\r\n");
Expand Down
11 changes: 11 additions & 0 deletions collector/lib/system-inspector/Service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ bool Service::InitKernel(const CollectorConfig& config, const DriverCandidate& c
inspector_->set_auto_threads_purging_interval_s(60);
inspector_->m_thread_manager->set_max_thread_table_size(config.GetSinspThreadCacheSize());

// enable stats v2
inspector_->set_sinsp_stats_v2_enabled();

// Connection status tracking is used in NetworkSignalHandler,
// but only when trying to handle asynchronous connections
// as a special case.
Expand Down Expand Up @@ -355,13 +358,21 @@ bool Service::GetStats(system_inspector::Stats* stats) const {
if (!running_ || !inspector_) return false;

scap_stats kernel_stats;
std::shared_ptr<const sinsp_stats_v2> userspace_stats;

inspector_->get_capture_stats(&kernel_stats);
userspace_stats = inspector_->get_sinsp_stats_v2();

*stats = userspace_stats_;
stats->nEvents = kernel_stats.n_evts;
stats->nDrops = kernel_stats.n_drops;
stats->nDropsBuffer = kernel_stats.n_drops_buffer;
stats->nPreemptions = kernel_stats.n_preemptions;
stats->nThreadCacheSize = inspector_->m_thread_manager->get_thread_count();

if (userspace_stats != nullptr)
stats->nDropsThreadCache = userspace_stats->m_n_drops_full_threadtable;

return true;
}

Expand Down
2 changes: 2 additions & 0 deletions collector/lib/system-inspector/SystemInspector.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@ struct Stats {
// stats gathered in kernel space
volatile uint64_t nEvents = 0; // the number of kernel events
volatile uint64_t nDrops = 0; // the number of drops
volatile uint64_t nDropsBuffer = 0; // the number of drops due to full ringbuf
volatile uint64_t nPreemptions = 0; // the number of preemptions

// stats gathered in user space
volatile uint64_t nFilteredEvents[PPM_EVENT_MAX] = {0}; // events post filtering
volatile uint64_t nUserspaceEvents[PPM_EVENT_MAX] = {0}; // events processed by userspace
volatile uint64_t nGRPCSendFailures = 0; // number of signals that were not sent on GRPC
volatile uint64_t nThreadCacheSize = 0; // number of thread-info entries stored in the cache
volatile uint64_t nDropsThreadCache = 0; // the number of drops due to full thread cache

// process related metrics
volatile uint64_t nProcessSent = 0; // number of process signals sent
Expand Down
2 changes: 2 additions & 0 deletions docs/troubleshooting.md
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,8 @@ Units: occurence
|----------------------------------------|-----------------------------------------------------------------------------------------------------|
| kernel | number of received kernel events (by the probe) |
| drops | number of dropped kernel events |
| threadCacheDrops | number of dropped kernel events due to threadcache being full |
| ringbufferDrops | number of dropped kernel events due to ringbuffer being full |
| preemptions | Number of preemptions (?) |
| userspace[syscall] | Number of this kind of event |
| grpcSendFailures | (not used?) |
Expand Down
Loading