Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add extra script time stats #891

Merged
merged 15 commits into from
Oct 16, 2023
13 changes: 13 additions & 0 deletions common/dl-utils-lite.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <string.h>
#include <sys/wait.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <time.h>
#include <unistd.h>
#include <dirent.h>
Expand Down Expand Up @@ -360,3 +361,15 @@ int get_cpu_total (unsigned long long *cpu_total) {
close (fd);
return 1;
}

process_rusage_t get_rusage_info() {
rusage usage;
memset(&usage, 0, sizeof(rusage));
if (getrusage(RUSAGE_SELF, &usage) != 0) {
fprintf(stderr, "error while colling getrusage %s\n", strerror(errno));
}
double user_time = usage.ru_utime.tv_sec + (usage.ru_utime.tv_usec / 1E6);
double system_time = usage.ru_stime.tv_sec + (usage.ru_stime.tv_usec / 1E6);

return {user_time, system_time, usage.ru_nvcsw, usage.ru_nivcsw};
}
9 changes: 9 additions & 0 deletions common/dl-utils-lite.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,17 @@ struct mem_info_t {
uint32_t rss_shmem;
};

struct process_rusage_t {
double user_time;
double system_time;
long voluntary_context_switches;
long involuntary_context_switches;
};

mem_info_t get_self_mem_stats();
int get_self_threads_count();
int get_pid_info (pid_t pid, pid_info_t *info);
unsigned long long get_pid_start_time (pid_t pid);
int get_cpu_total (unsigned long long *cpu_total);
process_rusage_t get_rusage_info();

9 changes: 4 additions & 5 deletions net/net-connections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,7 @@ int outbound_connections, active_outbound_connections, ready_outbound_connection
long long outbound_connections_created, inbound_connections_accepted;
int ready_targets;
int conn_generation;

static void(*on_active_special_connections_update_callback)() = []{};
static void(*on_active_special_connections_update_callback)(bool on_accept) = [](bool){};

const char *unix_socket_directory = "/var/run/engine";
SAVE_STRING_OPTION_PARSER(OPT_NETWORK, "unix-socket-directory", unix_socket_directory, "path to directory with UNIX sockets");
Expand All @@ -81,7 +80,7 @@ static void connections_constructor() {
bucket_salt = lrand48();
}

void set_on_active_special_connections_update_callback(void (*callback)()) noexcept {
void set_on_active_special_connections_update_callback(void (*callback)(bool)) noexcept {
assert(callback);
on_active_special_connections_update_callback = callback;
}
Expand Down Expand Up @@ -642,7 +641,7 @@ void close_special_connection(struct connection *c) {
tvkprintf(net_connections, 3, "close special conn %d\n", c->fd);
if (c->basic_type != ct_listen) {
--active_special_connections;
on_active_special_connections_update_callback();
on_active_special_connections_update_callback(false);
if (active_special_connections < max_special_connections && Connections[c->listening].basic_type == ct_listen &&
Connections[c->listening].generation == c->listening_generation) {
epoll_insert(c->listening, EVT_READ | EVT_LEVEL);
Expand Down Expand Up @@ -1170,7 +1169,7 @@ int accept_new_connections(struct connection *cc) {
max_special_connections);
}
++active_special_connections;
on_active_special_connections_update_callback();
on_active_special_connections_update_callback(true);
if (active_special_connections >= max_special_connections) {
return EVA_REMOVE;
}
Expand Down
2 changes: 1 addition & 1 deletion net/net-connections.h
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ extern int ready_targets;
extern long long total_failed_connections, total_connect_failures, unused_connections_closed;
extern const char *unix_socket_directory;

void set_on_active_special_connections_update_callback(void (*callback)()) noexcept;
void set_on_active_special_connections_update_callback(void (*callback)(bool on_accept)) noexcept;

int init_listening_connection_mode(int fd, conn_type_t *type, void *extra, int mode);

Expand Down
5 changes: 4 additions & 1 deletion server/php-engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2376,7 +2376,10 @@ int run_main(int argc, char **argv, php_mode mode) {
vk::singleton<SharedData>::get().init();

max_special_connections = 1;
set_on_active_special_connections_update_callback([] {
set_on_active_special_connections_update_callback([] (bool on_accept) {
if (on_accept) {
PhpScript::script_time_stats.http_conn_accept_time = get_utime_monotonic();
}
vk::singleton<ServerStats>::get().update_active_connections(active_special_connections, max_special_connections);
});
static_assert(offsetof(tcp_rpc_client_functions, rpc_ready) == offsetof(tcp_rpc_server_functions, rpc_ready), "");
Expand Down
26 changes: 23 additions & 3 deletions server/php-runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,11 +177,14 @@ void PhpScript::init(script_t *script, php_query_data *data_to_set) noexcept {

error_message = "??? error";

script_time_stats.script_start_time = get_utime_monotonic();
script_time = 0;
net_time = 0;
cur_timestamp = dl_time();
script_init_rusage = get_rusage_info();

queries_cnt = 0;
long_queries_cnt = 0;
cur_timestamp = dl_time();

query_stats_id++;
memset(&query_stats, 0, sizeof(query_stats));
Expand Down Expand Up @@ -307,8 +310,16 @@ void PhpScript::finish() noexcept {
const auto &script_mem_stats = dl::get_script_memory_stats();
state = run_state_t::uncleared;
update_net_time();
vk::singleton<ServerStats>::get().add_request_stats(script_time, net_time, queries_cnt, long_queries_cnt, script_mem_stats.max_memory_used,
script_mem_stats.max_real_memory_used, vk::singleton<CurlMemoryUsage>::get().total_allocated, error_type);
double script_init_time_sec = script_time_stats.script_start_time - script_time_stats.worker_init_time;
double connection_process_time_sec = 0;
if (process_type == ProcessType::http_worker) {
connection_process_time_sec = script_time_stats.worker_init_time - script_time_stats.http_conn_accept_time;
}
process_rusage_t script_rusage = get_script_rusage();

vk::singleton<ServerStats>::get().add_request_stats(script_time, net_time, script_init_time_sec, connection_process_time_sec,
queries_cnt, long_queries_cnt, script_mem_stats.max_memory_used,
script_mem_stats.max_real_memory_used, vk::singleton<CurlMemoryUsage>::get().total_allocated, script_rusage, error_type);
if (save_state == run_state_t::error) {
assert (error_message != nullptr);
kprintf("Critical error during script execution: %s\n", error_message);
Expand Down Expand Up @@ -476,6 +487,14 @@ double PhpScript::get_script_time() noexcept {
return script_time;
}

process_rusage_t PhpScript::get_script_rusage() noexcept {
process_rusage_t current_rusage = get_rusage_info();
return {current_rusage.user_time - script_init_rusage.user_time,
current_rusage.system_time - script_init_rusage.system_time,
current_rusage.voluntary_context_switches - script_init_rusage.voluntary_context_switches,
current_rusage.involuntary_context_switches - script_init_rusage.involuntary_context_switches};
}

int PhpScript::get_net_queries_count() const noexcept {
return queries_cnt;
}
Expand All @@ -485,6 +504,7 @@ ucontext_t_portable PhpScript::exit_context;
volatile bool PhpScript::in_script_context = false;
volatile bool PhpScript::time_limit_exceeded = false;
volatile bool PhpScript::memory_limit_exceeded = false;
PhpScript::script_time_stats_t PhpScript::script_time_stats;

static __inline__ void *get_sp() {
return __builtin_frame_address(0);
Expand Down
17 changes: 13 additions & 4 deletions server/php-runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

#pragma once

#include <csetjmp>

#include "common/dl-utils-lite.h"
#include "common/kprintf.h"
#include "common/mixin/not_copyable.h"
Expand Down Expand Up @@ -84,11 +82,19 @@ class PhpScriptStack : vk::not_copyable {
* It stores state of the script: current execution point, pointers to allocated script memory, stack for script context, etc.
*/
class PhpScript {
double cur_timestamp{0}, net_time{0}, script_time{0};
double cur_timestamp{0};
double net_time{0};
double script_time{0};
double last_net_time_delta{0};
int queries_cnt{0};
int long_queries_cnt{0};

struct script_time_stats_t {
double http_conn_accept_time{0};
double worker_init_time{0};
double script_start_time{0};
};

private:
int swapcontext_helper(ucontext_t_portable *oucp, const ucontext_t_portable *ucp);

Expand All @@ -101,6 +107,9 @@ class PhpScript {
volatile static bool time_limit_exceeded;
volatile static bool memory_limit_exceeded;

static script_time_stats_t script_time_stats;
process_rusage_t script_init_rusage;

run_state_t state{run_state_t::empty};
const char *error_message{nullptr};
script_error_t error_type{script_error_t::no_error};
Expand All @@ -111,7 +120,6 @@ class PhpScript {
PhpScriptStack script_stack;

ucontext_t_portable run_context{};
sigjmp_buf timeout_handler{};

script_t *run_main{nullptr};
php_query_data *data{nullptr};
Expand Down Expand Up @@ -150,6 +158,7 @@ class PhpScript {
void reset_script_timeout() noexcept;
double get_net_time() const noexcept;
double get_script_time() noexcept;
process_rusage_t get_script_rusage() noexcept;
int get_net_queries_count() const noexcept;
long long memory_get_total_usage() const noexcept;

Expand Down
1 change: 1 addition & 0 deletions server/php-worker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,7 @@ PhpWorker::PhpWorker(php_worker_mode_t mode_, connection *c, http_query_data *ht
, mode(mode_)
, req_id(req_id_)
{
PhpScript::script_time_stats.worker_init_time = init_time;
assert(c != nullptr);
if (conn->target) {
target_fd = static_cast<int>(conn->target - Targets);
Expand Down
62 changes: 57 additions & 5 deletions server/server-stats.cpp
DrDet marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include "server/json-logger.h"
#include "server/server-stats.h"
#include "server/php-worker.h"
#include "server/statshouse/statshouse-manager.h"

namespace {
Expand All @@ -38,6 +39,12 @@ struct ScriptSamples : WithStatType<uint64_t> {
working_time,
net_time,
script_time,
script_init_time,
http_connection_process_time,
user_time,
system_time,
voluntary_context_switches,
involuntary_context_switches,
types_count
};
};
Expand All @@ -49,6 +56,12 @@ struct QueriesStat : WithStatType<uint64_t> {
outgoing_long_queries,
script_time,
net_time,
script_init_time,
http_connection_process_time,
user_time,
system_time,
voluntary_context_switches,
involuntary_context_switches,
types_count
};
};
Expand Down Expand Up @@ -215,13 +228,22 @@ EnumTable<IdleStat> get_idle_stat() noexcept {
return result;
}

EnumTable<QueriesStat> make_queries_stat(uint64_t script_queries, uint64_t long_script_queries, uint64_t script_time_ns, uint64_t net_time_ns) noexcept {
EnumTable<QueriesStat> make_queries_stat(uint64_t script_queries, uint64_t long_script_queries,
uint64_t script_time_ns, uint64_t net_time_ns, uint64_t script_init_time_ns, uint64_t connection_process_time_ns,
uint64_t user_time, uint64_t system_time,
uint64_t voluntary_context_switches, uint64_t involuntary_context_switches) noexcept {
EnumTable<QueriesStat> result;
result[QueriesStat::Key::incoming_queries] = 1;
result[QueriesStat::Key::outgoing_queries] = script_queries;
result[QueriesStat::Key::outgoing_long_queries] = long_script_queries;
result[QueriesStat::Key::script_time] = script_time_ns;
result[QueriesStat::Key::net_time] = net_time_ns;
result[QueriesStat::Key::script_init_time] = script_init_time_ns;
result[QueriesStat::Key::http_connection_process_time] = connection_process_time_ns;
result[QueriesStat::Key::user_time] = user_time;
result[QueriesStat::Key::system_time] = system_time;
result[QueriesStat::Key::voluntary_context_switches] = voluntary_context_switches;
result[QueriesStat::Key::involuntary_context_switches] = involuntary_context_switches;
return result;
}

Expand Down Expand Up @@ -296,6 +318,12 @@ struct WorkerSharedStats : private vk::not_copyable {
sample[ScriptSamples::Key::working_time] = queries[QueriesStat::Key::net_time] + queries[QueriesStat::Key::script_time];
sample[ScriptSamples::Key::net_time] = queries[QueriesStat::Key::net_time];
sample[ScriptSamples::Key::script_time] = queries[QueriesStat::Key::script_time];
sample[ScriptSamples::Key::http_connection_process_time] = queries[QueriesStat::Key::http_connection_process_time];
sample[ScriptSamples::Key::script_init_time] = queries[QueriesStat::Key::script_init_time];
sample[ScriptSamples::Key::user_time] = queries[QueriesStat::Key::user_time];
sample[ScriptSamples::Key::system_time] = queries[QueriesStat::Key::system_time];
sample[ScriptSamples::Key::voluntary_context_switches] = queries[QueriesStat::Key::voluntary_context_switches];
sample[ScriptSamples::Key::involuntary_context_switches] = queries[QueriesStat::Key::involuntary_context_switches];
script_samples.add_sample(sample);
}

Expand Down Expand Up @@ -606,17 +634,29 @@ void ServerStats::after_fork(pid_t worker_pid, uint64_t active_connections, uint
last_update_aggr_stats = std::chrono::steady_clock::now();
}

void ServerStats::add_request_stats(double script_time_sec, double net_time_sec, int64_t script_queries, int64_t long_script_queries, int64_t memory_used,
int64_t real_memory_used, int64_t curl_total_allocated, script_error_t error) noexcept {
void ServerStats::add_request_stats(double script_time_sec, double net_time_sec, double script_init_time_sec, double connection_process_time_sec,
int64_t script_queries, int64_t long_script_queries, int64_t memory_used,
int64_t real_memory_used, int64_t curl_total_allocated, process_rusage_t script_rusage, script_error_t error) noexcept {
auto &stats = worker_type_ == WorkerType::job_worker ? shared_stats_->job_workers : shared_stats_->general_workers;
const auto script_time = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::duration<double>(script_time_sec));
const auto net_time = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::duration<double>(net_time_sec));
const auto queries_stat = make_queries_stat(script_queries, long_script_queries, script_time.count(), net_time.count());
const auto script_init_time = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::duration<double>(script_init_time_sec));
const auto http_connection_process_time = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::duration<double>(connection_process_time_sec));
const auto script_user_time = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::duration<double>(script_rusage.user_time));
const auto script_system_time = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::duration<double>(script_rusage.system_time));
const auto queries_stat = make_queries_stat(script_queries, long_script_queries, script_time.count(),
net_time.count(), script_init_time.count(), http_connection_process_time.count(),
script_user_time.count(), script_system_time.count(),
script_rusage.voluntary_context_switches, script_rusage.involuntary_context_switches);


stats.add_request_stats(queries_stat, error, memory_used, real_memory_used, curl_total_allocated);
shared_stats_->workers.add_worker_stats(queries_stat, worker_process_id_);

StatsHouseManager::get().add_request_stats(script_time.count(), net_time.count(), error, memory_used, real_memory_used, script_queries, long_script_queries);
StatsHouseManager::get().add_request_stats(script_time.count(), net_time.count(), error, memory_used, real_memory_used, script_queries, long_script_queries,
script_user_time.count(), script_system_time.count(),
script_init_time.count(), http_connection_process_time.count(),
script_rusage.voluntary_context_switches, script_rusage.involuntary_context_switches);
}

void ServerStats::add_job_stats(double job_wait_time_sec, int64_t request_memory_used, int64_t request_real_memory_used, int64_t response_memory_used,
Expand Down Expand Up @@ -726,14 +766,26 @@ void write_to(stats_t *stats, const char *prefix, const WorkerAggregatedStats &a

stats->add_gauge_stat(ns2double(shared.total_queries_stat[QueriesStat::Key::script_time]), prefix, ".requests.script_time.total");
stats->add_gauge_stat(ns2double(shared.total_queries_stat[QueriesStat::Key::net_time]), prefix, ".requests.net_time.total");
stats->add_gauge_stat(ns2double(shared.total_queries_stat[QueriesStat::Key::script_init_time]), prefix, ".requests.script_init_time.total");
stats->add_gauge_stat(ns2double(shared.total_queries_stat[QueriesStat::Key::http_connection_process_time]), prefix, ".requests.http_connection_process_time.total");
stats->add_gauge_stat(shared.total_queries_stat[QueriesStat::Key::incoming_queries], prefix, ".requests.total_incoming_queries");
stats->add_gauge_stat(shared.total_queries_stat[QueriesStat::Key::outgoing_queries], prefix, ".requests.total_outgoing_queries");
stats->add_gauge_stat(shared.total_queries_stat[QueriesStat::Key::outgoing_long_queries], prefix, ".requests.total_outgoing_long_queries");
stats->add_gauge_stat(ns2double(shared.total_queries_stat[QueriesStat::Key::user_time]), prefix, ".requests.script_user_time.total");
stats->add_gauge_stat(ns2double(shared.total_queries_stat[QueriesStat::Key::system_time]), prefix, ".requests.script_system_time.total");
stats->add_gauge_stat(shared.total_queries_stat[QueriesStat::Key::voluntary_context_switches], prefix, ".requests.script_voluntary_context_switches.total");
stats->add_gauge_stat(shared.total_queries_stat[QueriesStat::Key::involuntary_context_switches], prefix, ".requests.script_involuntary_context_switches.total");

write_to(stats, prefix, ".requests.outgoing_queries", agg.script_samples[ScriptSamples::Key::outgoing_queries]);
write_to(stats, prefix, ".requests.outgoing_long_queries", agg.script_samples[ScriptSamples::Key::outgoing_long_queries]);
write_to(stats, prefix, ".requests.script_time", agg.script_samples[ScriptSamples::Key::script_time], ns2double);
write_to(stats, prefix, ".requests.net_time", agg.script_samples[ScriptSamples::Key::net_time], ns2double);
write_to(stats, prefix, ".requests.script_init_time", agg.script_samples[ScriptSamples::Key::script_init_time], ns2double);
write_to(stats, prefix, ".requests.http_connection_process_time", agg.script_samples[ScriptSamples::Key::http_connection_process_time], ns2double);
write_to(stats, prefix, ".requests.script_user_time", agg.script_samples[ScriptSamples::Key::user_time], ns2double);
write_to(stats, prefix, ".requests.script_system_time", agg.script_samples[ScriptSamples::Key::system_time], ns2double);
write_to(stats, prefix, ".requests.script_voluntary_context_switches", agg.script_samples[ScriptSamples::Key::voluntary_context_switches]);
write_to(stats, prefix, ".requests.script_involuntary_context_switches", agg.script_samples[ScriptSamples::Key::involuntary_context_switches]);
write_to(stats, prefix, ".requests.working_time", agg.script_samples[ScriptSamples::Key::working_time], ns2double);
write_to(stats, prefix, ".memory.script_usage", agg.script_samples[ScriptSamples::Key::memory_used]);
write_to(stats, prefix, ".memory.script_real_usage", agg.script_samples[ScriptSamples::Key::real_memory_used]);
Expand Down
6 changes: 4 additions & 2 deletions server/server-stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@ class ServerStats : vk::not_copyable {
public:
void init() noexcept;

void add_request_stats(double script_time_sec, double net_time_sec, int64_t script_queries, int64_t long_script_queries, int64_t memory_used,
int64_t real_memory_used, int64_t curl_total_allocated, script_error_t error) noexcept;
void add_request_stats(double script_time_sec, double net_time_sec, double script_init_time_sec, double connection_process_time_sec,
int64_t script_queries, int64_t long_script_queries, int64_t memory_used,
int64_t real_memory_used, int64_t curl_total_allocated,
process_rusage_t script_rusage, script_error_t error) noexcept;
void add_job_stats(double job_wait_time_sec, int64_t request_memory_used, int64_t request_real_memory_used, int64_t response_memory_used,
int64_t response_real_memory_used) noexcept;
void add_job_common_memory_stats(int64_t common_request_memory_used, int64_t common_request_real_memory_used) noexcept;
Expand Down
Loading