Skip to content

Commit

Permalink
ccl/sqlproxyccl: add proxy.sql.routing_method_count metric
Browse files Browse the repository at this point in the history
Previously, it was not possible to determine the distribution of users
connecting to the proxy via SNI, database, or cluster option (via the options
parameter). This commit introduces a `proxy.sql.routing_method_count`
metric to record the number of occurrences of each proxy routing method. Only
successful parsing will be recorded; if a user provides invalid input, it will
not be tracked. This enhancement allows us to monitor and understand which
method was used by the proxy to retrieve the cluster identifier.

Epic: CC-6926

Release note: None
  • Loading branch information
jaylim-crl committed Sep 17, 2024
1 parent 0c2c3ce commit 56fc243
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 10 deletions.
1 change: 1 addition & 0 deletions pkg/ccl/sqlproxyccl/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ go_library(
"//pkg/util/httputil",
"//pkg/util/log",
"//pkg/util/metric",
"//pkg/util/metric/aggmetric",
"//pkg/util/netutil/addr",
"//pkg/util/randutil",
"//pkg/util/retry",
Expand Down
25 changes: 21 additions & 4 deletions pkg/ccl/sqlproxyccl/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ package sqlproxyccl
import (
"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/util/metric"
"github.com/cockroachdb/cockroach/pkg/util/metric/aggmetric"
)

// metrics contains pointers to the metrics for monitoring proxy operations.
Expand Down Expand Up @@ -45,6 +46,11 @@ type metrics struct {
QueryCancelSuccessful *metric.Counter

AccessControlFileErrorCount *metric.Gauge

RoutingMethodCount *aggmetric.AggCounter
SNIRoutingMethodCount *aggmetric.Counter
DatabaseRoutingMethodCount *aggmetric.Counter
ClusterOptionRoutingMethodCount *aggmetric.Counter
}

// MetricStruct implements the metrics.Struct interface.
Expand Down Expand Up @@ -212,18 +218,23 @@ var (
Measurement: "Query Cancel Requests",
Unit: metric.Unit_COUNT,
}
accessControlFileErrorCount = metric.Metadata{
metaAccessControlFileErrorCount = metric.Metadata{
Name: "proxy.access_control.errors",
Help: "Numbers of access control list files that are currently having errors",
Measurement: "Access Control File Errors",
Unit: metric.Unit_COUNT,
}
metaRoutingMethodCount = metric.Metadata{
Name: "proxy.sql.routing_method_count",
Help: "Number of occurrences of each proxy routing method",
Measurement: "Number of occurrences",
Unit: metric.Unit_COUNT,
}
)

// makeProxyMetrics instantiates the metrics holder for proxy monitoring.
func makeProxyMetrics() metrics {

return metrics{
m := &metrics{
BackendDisconnectCount: metric.NewCounter(metaBackendDisconnectCount),
IdleDisconnectCount: metric.NewCounter(metaIdleDisconnectCount),
BackendDownCount: metric.NewCounter(metaBackendDownCount),
Expand Down Expand Up @@ -273,8 +284,14 @@ func makeProxyMetrics() metrics {
QueryCancelForwarded: metric.NewCounter(metaQueryCancelForwarded),
QueryCancelSuccessful: metric.NewCounter(metaQueryCancelSuccessful),

AccessControlFileErrorCount: metric.NewGauge(accessControlFileErrorCount),
AccessControlFileErrorCount: metric.NewGauge(metaAccessControlFileErrorCount),

RoutingMethodCount: aggmetric.NewCounter(metaRoutingMethodCount, "method"),
}
m.SNIRoutingMethodCount = m.RoutingMethodCount.AddChild("sni")
m.DatabaseRoutingMethodCount = m.RoutingMethodCount.AddChild("database")
m.ClusterOptionRoutingMethodCount = m.RoutingMethodCount.AddChild("cluster_option")
return *m
}

// updateForError updates the metrics relevant for the type of the error
Expand Down
16 changes: 13 additions & 3 deletions pkg/ccl/sqlproxyccl/proxy_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ func (handler *proxyHandler) handle(

// NOTE: Errors returned from this function are user-facing errors so we
// should be careful with the details that we want to expose.
backendStartupMsg, clusterName, tenID, err := clusterNameAndTenantFromParams(ctx, fe)
backendStartupMsg, clusterName, tenID, err := clusterNameAndTenantFromParams(ctx, fe, handler.metrics)
if err != nil {
clientErr := withCode(err, codeParamsRoutingFailed)
log.Errorf(ctx, "unable to extract cluster name and tenant id: %s", err.Error())
Expand Down Expand Up @@ -715,7 +715,7 @@ func (handler *proxyHandler) setupIncomingCert(ctx context.Context) error {
// through its command-line options, i.e. "-c NAME=VALUE", "-cNAME=VALUE", and
// "--NAME=VALUE".
func clusterNameAndTenantFromParams(
ctx context.Context, fe *FrontendAdmitInfo,
ctx context.Context, fe *FrontendAdmitInfo, metrics *metrics,
) (*pgproto3.StartupMessage, string, roachpb.TenantID, error) {
clusterIdentifierDB, databaseName, err := parseDatabaseParam(fe.Msg.Parameters["database"])
if err != nil {
Expand Down Expand Up @@ -748,6 +748,7 @@ func clusterNameAndTenantFromParams(
// are blank. If SNI doesn't parse as a valid cluster id, then we assume
// that the end user didn't intend to pass cluster info through SNI and
// show missing cluster identifier instead of invalid cluster identifier.
metrics.SNIRoutingMethodCount.Inc(1)
return fe.Msg, clusterName, tenID, nil
}
}
Expand Down Expand Up @@ -790,7 +791,16 @@ func clusterNameAndTenantFromParams(
paramsOut[key] = value
}
}

// If both are provided, they must be the same, and we will track both.
if clusterIdentifierDB != "" {
metrics.DatabaseRoutingMethodCount.Inc(1)
}
// This metric is specific to the --cluster option. If we introduce a new
// --routing-id in the future, the existing metrics will still work, and
// we should introduce a new RoutingIDOptionReads metric.
if clusterIdentifierOpt != "" {
metrics.ClusterOptionRoutingMethodCount.Inc(1)
}
outMsg := &pgproto3.StartupMessage{
ProtocolVersion: fe.Msg.ProtocolVersion,
Parameters: paramsOut,
Expand Down
77 changes: 74 additions & 3 deletions pkg/ccl/sqlproxyccl/proxy_handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2517,15 +2517,17 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {

testCases := []struct {
name string
sniServerName string
params map[string]string
expectedClusterName string
expectedTenantID uint64
expectedParams map[string]string
expectedError string
expectedHint string
expectedMetrics func(t *testing.T, m *metrics)
}{
{
name: "empty params",
name: "empty params and server name",
params: map[string]string{},
expectedError: "missing cluster identifier",
expectedHint: clusterIdentifierHint,
Expand Down Expand Up @@ -2640,6 +2642,32 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
expectedError: "invalid cluster identifier 'happy-koala-0'",
expectedHint: "Tenant ID 0 is invalid.",
},
{
name: "invalid sni format",
sniServerName: "foo-bar-baz",
params: map[string]string{},
expectedError: "missing cluster identifier",
expectedHint: clusterIdentifierHint,
},
{
name: "invalid sni value",
sniServerName: "happy2koala-.abc.aws-ap-south-1.cockroachlabs.cloud",
params: map[string]string{},
expectedError: "missing cluster identifier",
expectedHint: clusterIdentifierHint,
},
{
name: "valid sni value",
sniServerName: "happy-seal-10.abc.gcp-us-central1.cockroachlabs.cloud",
params: map[string]string{},
expectedClusterName: "happy-seal",
expectedTenantID: 10,
expectedParams: map[string]string{},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.SNIRoutingMethodCount.Value())
},
},
{
name: "multiple similar cluster identifiers",
params: map[string]string{
Expand All @@ -2649,6 +2677,11 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
expectedClusterName: "happy-koala",
expectedTenantID: 7,
expectedParams: map[string]string{"database": "defaultdb"},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(2), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.DatabaseRoutingMethodCount.Value())
require.Equal(t, int64(1), m.ClusterOptionRoutingMethodCount.Value())
},
},
{
name: "cluster identifier in database param",
Expand All @@ -2659,6 +2692,10 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
expectedClusterName: strings.Repeat("a", 100),
expectedTenantID: 7,
expectedParams: map[string]string{"database": "defaultdb", "foo": "bar"},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.DatabaseRoutingMethodCount.Value())
},
},
{
name: "valid cluster identifier with invalid arrangements",
Expand All @@ -2672,6 +2709,10 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
"database": "defaultdb",
"options": "-c -c -c -c",
},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.ClusterOptionRoutingMethodCount.Value())
},
},
{
name: "short option: cluster identifier in options param",
Expand All @@ -2682,6 +2723,10 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
expectedClusterName: "happy-koala",
expectedTenantID: 7,
expectedParams: map[string]string{"database": "defaultdb"},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.ClusterOptionRoutingMethodCount.Value())
},
},
{
name: "short option with spaces: cluster identifier in options param",
Expand All @@ -2692,6 +2737,10 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
expectedClusterName: "happy-koala",
expectedTenantID: 7,
expectedParams: map[string]string{"database": "defaultdb"},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.ClusterOptionRoutingMethodCount.Value())
},
},
{
name: "long option: cluster identifier in options param",
Expand All @@ -2705,6 +2754,10 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
"database": "defaultdb",
"options": "--foo=test",
},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.ClusterOptionRoutingMethodCount.Value())
},
},
{
name: "long option: cluster identifier in options param with other options",
Expand All @@ -2718,26 +2771,35 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
"database": "defaultdb",
"options": "-csearch_path=public \t--foo=test",
},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.ClusterOptionRoutingMethodCount.Value())
},
},
{
name: "leading 0s are ok",
params: map[string]string{"database": "happy-koala-0-07.defaultdb"},
expectedClusterName: "happy-koala-0",
expectedTenantID: 7,
expectedParams: map[string]string{"database": "defaultdb"},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.DatabaseRoutingMethodCount.Value())
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
msg := &pgproto3.StartupMessage{Parameters: tc.params}
m := makeProxyMetrics()

originalParams := make(map[string]string)
for k, v := range msg.Parameters {
originalParams[k] = v
}

fe := &FrontendAdmitInfo{Msg: msg}
outMsg, clusterName, tenantID, err := clusterNameAndTenantFromParams(ctx, fe)
fe := &FrontendAdmitInfo{Msg: msg, SniServerName: tc.sniServerName}
outMsg, clusterName, tenantID, err := clusterNameAndTenantFromParams(ctx, fe, &m)
if tc.expectedError == "" {
require.NoErrorf(t, err, "failed test case\n%+v", tc)

Expand All @@ -2755,6 +2817,15 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {

// Check that the original parameters were not modified.
require.Equal(t, originalParams, msg.Parameters)

if tc.expectedMetrics != nil {
tc.expectedMetrics(t, &m)
} else {
require.Zero(t, m.RoutingMethodCount.Count())
require.Zero(t, m.SNIRoutingMethodCount.Value())
require.Zero(t, m.DatabaseRoutingMethodCount.Value())
require.Zero(t, m.ClusterOptionRoutingMethodCount.Value())
}
})
}
}
Expand Down

0 comments on commit 56fc243

Please sign in to comment.