Skip to content

Commit

Permalink
StatsHouse metrics improvements (#911)
Browse files Browse the repository at this point in the history
- New host tag toggle function to dynamically enable host tag in all
 StatsHouse metrics in all workers and master processes from any worker
- New `kphp_by_host_*` metrics: time, cpu, heap, errors
- A lot of refactoring and small other improvements
  • Loading branch information
DrDet authored Oct 6, 2023
1 parent 9c53778 commit c8dc706
Show file tree
Hide file tree
Showing 15 changed files with 443 additions and 299 deletions.
11 changes: 1 addition & 10 deletions builtin-functions/_functions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ if (0) {
}

require_once __DIR__ . '/kphp_internal.txt';
require_once __DIR__ . '/kphp_toggles.txt';
require_once __DIR__ . '/kphp_tracing.txt';
require_once __DIR__ . '/uberh3.txt';
require_once __DIR__ . '/spl.txt';
Expand Down Expand Up @@ -1428,16 +1429,6 @@ function zstd_uncompress(string $data) ::: string | false;
function zstd_compress_dict(string $data, string $dict) ::: string | false;
function zstd_uncompress_dict(string $data, string $dict) ::: string | false;

function set_migration_php8_warning ($mask ::: int) ::: void;

function set_detect_incorrect_encoding_names_warning(bool $show) ::: void;

function set_json_log_on_timeout_mode(bool $enabled) ::: void;

function set_json_log_demangle_stacktrace(bool $enable) ::: void;

function set_use_updated_gmmktime(bool $enable) ::: void;

// re-initialize given ArrayIterator with another array;
// in KPHP it returns the same ArrayIterator that is ready to be used
// in PHP (via polyfills) it returns a newly allocated object
Expand Down
15 changes: 15 additions & 0 deletions builtin-functions/kphp_toggles.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?php

// Functions declared in this file are actually toggles to enable or disable some functionality in KPHP runtime.

function set_migration_php8_warning ($mask ::: int) ::: void;

function set_detect_incorrect_encoding_names_warning(bool $show) ::: void;

function set_json_log_on_timeout_mode(bool $enabled) ::: void;

function set_json_log_demangle_stacktrace(bool $enable) ::: void;

function set_use_updated_gmmktime(bool $enable) ::: void;

function kphp_turn_on_host_tag_in_inner_statshouse_metrics_toggle();
5 changes: 5 additions & 0 deletions runtime/interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "runtime/kphp_core.h"
#include "runtime/optional.h"
#include "server/php-query-data.h"
#include "server/statshouse/statshouse-manager.h"
#include "server/workers-control.h"

extern string_buffer *coub;//TODO static
Expand Down Expand Up @@ -238,6 +239,10 @@ inline void f$set_json_log_demangle_stacktrace(bool enable) {
is_demangled_stacktrace_logs_enabled = enable;
}

inline void f$kphp_turn_on_host_tag_in_inner_statshouse_metrics_toggle() {
StatsHouseManager::get().turn_on_host_tag_toggle();
}

int64_t f$numa_get_bound_node();

bool f$extension_loaded(const string &extension);
20 changes: 12 additions & 8 deletions server/php-engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include <cassert>
#include <cerrno>
#include <chrono>
#include <cstdlib>
#include <cstring>
#include <fstream>
Expand Down Expand Up @@ -61,7 +62,6 @@
#include "runtime/profiler.h"
#include "runtime/rpc.h"
#include "runtime/thread-pool.h"
#include "server/server-config.h"
#include "server/confdata-binlog-replay.h"
#include "server/database-drivers/adaptor.h"
#include "server/database-drivers/connector.h"
Expand Down Expand Up @@ -89,7 +89,7 @@
#include "server/server-stats.h"
#include "server/shared-data-worker-cache.h"
#include "server/signal-handlers.h"
#include "server/statshouse/statshouse-client.h"
#include "server/statshouse/statshouse-manager.h"
#include "server/workers-control.h"

using job_workers::JobWorkersContext;
Expand Down Expand Up @@ -1416,12 +1416,15 @@ static void sigusr1_handler(const int sig) {
pending_signals = pending_signals | (1ll << sig);
}

void cron() {
void worker_cron() {
if (master_flag == -1 && getppid() == 1) {
turn_sigterm_on();
}
vk::singleton<SharedDataWorkerCache>::get().on_worker_cron();
vk::singleton<ServerStats>::get().update_this_worker_stats();
auto virtual_memory_stat = get_self_mem_stats();
StatsHouseManager::get().add_worker_memory_stats(virtual_memory_stat);
StatsHouseManager::get().generic_cron();
}

void reopen_json_log() {
Expand All @@ -1445,7 +1448,7 @@ void generic_event_loop(WorkerType worker_type, bool init_and_listen_rpc_port) n
}

int http_port, http_sfd = -1;
int prev_time = 0;
double last_cron_time = 0;
double next_create_outbound = 0;

switch (worker_type) {
Expand Down Expand Up @@ -1568,9 +1571,9 @@ void generic_event_loop(WorkerType worker_type, bool init_and_listen_rpc_port) n
reopen_json_log();
}

if (now != prev_time) {
prev_time = now;
cron();
if (precise_now - last_cron_time >= 1.0) {
last_cron_time = precise_now;
worker_cron();
}

if (worker_type == WorkerType::general_worker) {
Expand Down Expand Up @@ -1664,6 +1667,7 @@ void init_all() {
}
log_server_warning(deprecation_warning);
}
StatsHouseManager::get().set_common_tags();

global_init_runtime_libs();
global_init_php_scripts();
Expand Down Expand Up @@ -2089,7 +2093,7 @@ int main_args_handler(int i, const char *long_option) {
host = "127.0.0.1";
}

StatsHouseClient::init(host, port);
StatsHouseManager::init(host, port);
return 0;
}
case 2027: {
Expand Down
15 changes: 7 additions & 8 deletions server/php-master.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
#include "server/server-stats.h"
#include "server/shared-data-worker-cache.h"
#include "server/shared-data.h"
#include "server/statshouse/statshouse-client.h"
#include "server/statshouse/statshouse-manager.h"
#include "server/workers-control.h"

#include "server/php-master-restart.h"
Expand Down Expand Up @@ -1394,18 +1394,17 @@ void check_and_instance_cache_try_swap_memory() {
}
}

static void cron() {
static void master_cron() {
if (!other->is_alive || in_old_master_on_restart()) {
// write stats at the beginning to avoid spikes in graphs
send_data_to_statsd_with_prefix(vk::singleton<ServerConfig>::get().get_statsd_prefix(), stats_tag_kphp_server);
if (StatsHouseClient::has()) {
const auto cpu_stats = server_stats.cpu[1].get_stat();
StatsHouseClient::get().send_common_master_stats(workers_stats, instance_cache_get_memory_stats(), cpu_stats.cpu_s_usage, cpu_stats.cpu_u_usage,
instance_cache_memory_swaps_ok, instance_cache_memory_swaps_fail);
}
const auto cpu_stats = server_stats.cpu[1].get_stat();
StatsHouseManager::get().add_common_master_stats(workers_stats, instance_cache_get_memory_stats(), cpu_stats.cpu_s_usage, cpu_stats.cpu_u_usage,
instance_cache_memory_swaps_ok, instance_cache_memory_swaps_fail);
}
create_all_outbound_connections();
vk::singleton<ServerStats>::get().aggregate_stats();
StatsHouseManager::get().generic_cron();

unsigned long long cpu_total = 0;
unsigned long long utime = 0;
Expand Down Expand Up @@ -1620,7 +1619,7 @@ WorkerType run_master() {
const auto new_tp = get_steady_tp_ms_now();
if (new_tp - prev_cron_start_tp >= 1s) {
prev_cron_start_tp = new_tp;
cron();
master_cron();
}
}
}
24 changes: 4 additions & 20 deletions server/server-stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

#include "server/json-logger.h"
#include "server/server-stats.h"
#include "server/statshouse/statshouse-client.h"
#include "server/statshouse/statshouse-manager.h"

namespace {

Expand Down Expand Up @@ -604,7 +604,6 @@ void ServerStats::after_fork(pid_t worker_pid, uint64_t active_connections, uint
gen_->seed(worker_pid);
shared_stats_->workers.reset_worker_stats(worker_pid, active_connections, max_connections, worker_process_id_);
last_update_aggr_stats = std::chrono::steady_clock::now();
last_update_statshouse = std::chrono::steady_clock::now();
}

void ServerStats::add_request_stats(double script_time_sec, double net_time_sec, int64_t script_queries, int64_t long_script_queries, int64_t memory_used,
Expand All @@ -617,30 +616,21 @@ void ServerStats::add_request_stats(double script_time_sec, double net_time_sec,
stats.add_request_stats(queries_stat, error, memory_used, real_memory_used, curl_total_allocated);
shared_stats_->workers.add_worker_stats(queries_stat, worker_process_id_);

using namespace statshouse;
if (StatsHouseClient::has()) {
StatsHouseClient::get().send_request_stats(worker_type_, script_time.count(), net_time.count(), memory_used, real_memory_used, script_queries,
long_script_queries);
}
StatsHouseManager::get().add_request_stats(script_time.count(), net_time.count(), error, memory_used, real_memory_used, script_queries, long_script_queries);
}

void ServerStats::add_job_stats(double job_wait_time_sec, int64_t request_memory_used, int64_t request_real_memory_used, int64_t response_memory_used,
int64_t response_real_memory_used) noexcept {
const auto job_wait_time = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::duration<double>(job_wait_time_sec));
shared_stats_->job_workers.add_job_stats(job_wait_time.count(), request_memory_used, request_real_memory_used, response_memory_used, response_real_memory_used);

if (StatsHouseClient::has()) {
StatsHouseClient::get().send_job_stats(job_wait_time.count(), request_memory_used, request_real_memory_used, response_memory_used,
response_real_memory_used);
}
StatsHouseManager::get().add_job_stats(job_wait_time.count(), request_memory_used, request_real_memory_used, response_memory_used, response_real_memory_used);
}

void ServerStats::add_job_common_memory_stats(int64_t common_request_memory_used, int64_t common_request_real_memory_used) noexcept {
shared_stats_->job_workers.add_job_common_memory_stats(common_request_memory_used, common_request_real_memory_used);

if (StatsHouseClient::has()) {
StatsHouseClient::get().send_job_common_memory_stats(common_request_memory_used, common_request_real_memory_used);
}
StatsHouseManager::get().add_job_common_memory_stats(common_request_memory_used, common_request_real_memory_used);
}

void ServerStats::update_this_worker_stats() noexcept {
Expand All @@ -649,12 +639,6 @@ void ServerStats::update_this_worker_stats() noexcept {
shared_stats_->workers.update_worker_stats(worker_process_id_);
last_update_aggr_stats = now_tp;
}

if (StatsHouseClient::has() && (now_tp - last_update_statshouse >= std::chrono::seconds{1})) {
auto virtual_memory_stat = get_self_mem_stats();
StatsHouseClient::get().send_worker_memory_stats(worker_type_, virtual_memory_stat);
last_update_statshouse = now_tp;
}
}

void ServerStats::update_active_connections(uint64_t active_connections, uint64_t max_connections) noexcept {
Expand Down
1 change: 0 additions & 1 deletion server/server-stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ class ServerStats : vk::not_copyable {
WorkerType worker_type_{WorkerType::general_worker};
uint16_t worker_process_id_{0};
std::chrono::steady_clock::time_point last_update_aggr_stats;
std::chrono::steady_clock::time_point last_update_statshouse;

std::mt19937 *gen_{nullptr};

Expand Down
8 changes: 6 additions & 2 deletions server/server.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ prepend(KPHP_SERVER_SOURCES ${BASE_DIR}/server/
slot-ids-factory.cpp
workers-control.cpp
shared-data-worker-cache.cpp
signal-handlers.cpp
statshouse/statshouse-client.cpp)
signal-handlers.cpp)

prepend(KPHP_JOB_WORKERS_SOURCES ${BASE_DIR}/server/job-workers/
job-stats.cpp
Expand All @@ -40,6 +39,10 @@ prepend(KPHP_JOB_WORKERS_SOURCES ${BASE_DIR}/server/job-workers/
pipe-io.cpp
shared-memory-manager.cpp)

prepend(KPHP_STATSHOUSE_SOURCES ${BASE_DIR}/server/statshouse/
statshouse-client.cpp
statshouse-manager.cpp)

prepend(KPHP_DATABASE_DRIVERS_SOURCES ${BASE_DIR}/server/database-drivers/
adaptor.cpp
connector.cpp)
Expand All @@ -65,6 +68,7 @@ endif()
set(KPHP_SERVER_ALL_SOURCES
${KPHP_SERVER_SOURCES}
${KPHP_JOB_WORKERS_SOURCES}
${KPHP_STATSHOUSE_SOURCES}
${KPHP_DATABASE_DRIVERS_SOURCES}
${KPHP_DATABASE_DRIVERS_MYSQL_SOURCES}
${KPHP_DATABASE_DRIVERS_PGSQL_SOURCES})
Expand Down
6 changes: 1 addition & 5 deletions server/shared-data-worker-cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,7 @@ void SharedDataWorkerCache::init_defaults() noexcept {
}

void SharedDataWorkerCache::on_worker_cron() noexcept {
const auto now = std::chrono::steady_clock::now();
if (now - last_update_ >= std::chrono::seconds{1}) {
cached_worker_stats = vk::singleton<SharedData>::get().load_worker_stats();
last_update_ = now;
}
cached_worker_stats = vk::singleton<SharedData>::get().load_worker_stats();
}

const WorkersStats &SharedDataWorkerCache::get_cached_worker_stats() const noexcept {
Expand Down
1 change: 0 additions & 1 deletion server/shared-data-worker-cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ class SharedDataWorkerCache : vk::not_copyable {
const WorkersStats &get_cached_worker_stats() const noexcept;

private:
std::chrono::steady_clock::time_point last_update_;
WorkersStats cached_worker_stats;

SharedDataWorkerCache() = default;
Expand Down
14 changes: 14 additions & 0 deletions server/shared-data.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#pragma once

#include <atomic>
#include <chrono>
#include <tuple>

#include "common/mixin/not_copyable.h"
Expand All @@ -28,9 +29,13 @@ struct WorkersStats {
class SharedData : vk::not_copyable {
struct Storage {
std::atomic<WorkersStats::PackerRepr> workers_stats;
std::atomic<uint64_t> start_use_host_in_statshouse_metrics_timestamp{0};
};

public:
using clock = std::chrono::steady_clock;
using time_point = clock::time_point;

void init();

void store_worker_stats(const WorkersStats &workers_stats) noexcept {
Expand All @@ -42,6 +47,15 @@ class SharedData : vk::not_copyable {
workers_stats.unpack(storage->workers_stats.load(std::memory_order_relaxed));
return workers_stats;
};

void store_start_use_host_in_statshouse_metrics_timestamp(const time_point &tp) noexcept {
storage->start_use_host_in_statshouse_metrics_timestamp.store(tp.time_since_epoch().count(), std::memory_order_release);
}

time_point load_start_use_host_in_statshouse_metrics_timestamp() noexcept {
auto t = storage->start_use_host_in_statshouse_metrics_timestamp.load(std::memory_order_acquire);
return time_point{clock::duration{t}};
}
private:
Storage *storage;

Expand Down
Loading

0 comments on commit c8dc706

Please sign in to comment.