Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
130892: ccl/sqlproxyccl: add proxy.sql.routing_method_count metric r=jaylim-crl a=jaylim-crl

Previously, it was not possible to determine the distribution of users
connecting to the proxy via SNI, database, or cluster option (via the options
parameter). This commit introduces a `proxy.sql.routing_method_count`
metric to record the number of occurrences of each proxy routing method. Only
successful parsing will be recorded; if a user provides invalid input, it will
not be tracked. This enhancement allows us to monitor and understand which
method was used by the proxy to retrieve the cluster identifier.

Epic: [CC-6926](https://cockroachlabs.atlassian.net/browse/CC-6926)

Release note: None

Co-authored-by: Jay <[email protected]>
  • Loading branch information
craig[bot] and jaylim-crl committed Sep 18, 2024
2 parents 9316bff + 56fc243 commit 197c6ee
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 10 deletions.
1 change: 1 addition & 0 deletions pkg/ccl/sqlproxyccl/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ go_library(
"//pkg/util/httputil",
"//pkg/util/log",
"//pkg/util/metric",
"//pkg/util/metric/aggmetric",
"//pkg/util/netutil/addr",
"//pkg/util/randutil",
"//pkg/util/retry",
Expand Down
25 changes: 21 additions & 4 deletions pkg/ccl/sqlproxyccl/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ package sqlproxyccl
import (
"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/util/metric"
"github.com/cockroachdb/cockroach/pkg/util/metric/aggmetric"
)

// metrics contains pointers to the metrics for monitoring proxy operations.
Expand Down Expand Up @@ -45,6 +46,11 @@ type metrics struct {
QueryCancelSuccessful *metric.Counter

AccessControlFileErrorCount *metric.Gauge

RoutingMethodCount *aggmetric.AggCounter
SNIRoutingMethodCount *aggmetric.Counter
DatabaseRoutingMethodCount *aggmetric.Counter
ClusterOptionRoutingMethodCount *aggmetric.Counter
}

// MetricStruct implements the metrics.Struct interface.
Expand Down Expand Up @@ -212,18 +218,23 @@ var (
Measurement: "Query Cancel Requests",
Unit: metric.Unit_COUNT,
}
accessControlFileErrorCount = metric.Metadata{
metaAccessControlFileErrorCount = metric.Metadata{
Name: "proxy.access_control.errors",
Help: "Numbers of access control list files that are currently having errors",
Measurement: "Access Control File Errors",
Unit: metric.Unit_COUNT,
}
metaRoutingMethodCount = metric.Metadata{
Name: "proxy.sql.routing_method_count",
Help: "Number of occurrences of each proxy routing method",
Measurement: "Number of occurrences",
Unit: metric.Unit_COUNT,
}
)

// makeProxyMetrics instantiates the metrics holder for proxy monitoring.
func makeProxyMetrics() metrics {

return metrics{
m := &metrics{
BackendDisconnectCount: metric.NewCounter(metaBackendDisconnectCount),
IdleDisconnectCount: metric.NewCounter(metaIdleDisconnectCount),
BackendDownCount: metric.NewCounter(metaBackendDownCount),
Expand Down Expand Up @@ -273,8 +284,14 @@ func makeProxyMetrics() metrics {
QueryCancelForwarded: metric.NewCounter(metaQueryCancelForwarded),
QueryCancelSuccessful: metric.NewCounter(metaQueryCancelSuccessful),

AccessControlFileErrorCount: metric.NewGauge(accessControlFileErrorCount),
AccessControlFileErrorCount: metric.NewGauge(metaAccessControlFileErrorCount),

RoutingMethodCount: aggmetric.NewCounter(metaRoutingMethodCount, "method"),
}
m.SNIRoutingMethodCount = m.RoutingMethodCount.AddChild("sni")
m.DatabaseRoutingMethodCount = m.RoutingMethodCount.AddChild("database")
m.ClusterOptionRoutingMethodCount = m.RoutingMethodCount.AddChild("cluster_option")
return *m
}

// updateForError updates the metrics relevant for the type of the error
Expand Down
16 changes: 13 additions & 3 deletions pkg/ccl/sqlproxyccl/proxy_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ func (handler *proxyHandler) handle(

// NOTE: Errors returned from this function are user-facing errors so we
// should be careful with the details that we want to expose.
backendStartupMsg, clusterName, tenID, err := clusterNameAndTenantFromParams(ctx, fe)
backendStartupMsg, clusterName, tenID, err := clusterNameAndTenantFromParams(ctx, fe, handler.metrics)
if err != nil {
clientErr := withCode(err, codeParamsRoutingFailed)
log.Errorf(ctx, "unable to extract cluster name and tenant id: %s", err.Error())
Expand Down Expand Up @@ -715,7 +715,7 @@ func (handler *proxyHandler) setupIncomingCert(ctx context.Context) error {
// through its command-line options, i.e. "-c NAME=VALUE", "-cNAME=VALUE", and
// "--NAME=VALUE".
func clusterNameAndTenantFromParams(
ctx context.Context, fe *FrontendAdmitInfo,
ctx context.Context, fe *FrontendAdmitInfo, metrics *metrics,
) (*pgproto3.StartupMessage, string, roachpb.TenantID, error) {
clusterIdentifierDB, databaseName, err := parseDatabaseParam(fe.Msg.Parameters["database"])
if err != nil {
Expand Down Expand Up @@ -748,6 +748,7 @@ func clusterNameAndTenantFromParams(
// are blank. If SNI doesn't parse as a valid cluster id, then we assume
// that the end user didn't intend to pass cluster info through SNI and
// show missing cluster identifier instead of invalid cluster identifier.
metrics.SNIRoutingMethodCount.Inc(1)
return fe.Msg, clusterName, tenID, nil
}
}
Expand Down Expand Up @@ -790,7 +791,16 @@ func clusterNameAndTenantFromParams(
paramsOut[key] = value
}
}

// If both are provided, they must be the same, and we will track both.
if clusterIdentifierDB != "" {
metrics.DatabaseRoutingMethodCount.Inc(1)
}
// This metric is specific to the --cluster option. If we introduce a new
// --routing-id in the future, the existing metrics will still work, and
// we should introduce a new RoutingIDOptionReads metric.
if clusterIdentifierOpt != "" {
metrics.ClusterOptionRoutingMethodCount.Inc(1)
}
outMsg := &pgproto3.StartupMessage{
ProtocolVersion: fe.Msg.ProtocolVersion,
Parameters: paramsOut,
Expand Down
77 changes: 74 additions & 3 deletions pkg/ccl/sqlproxyccl/proxy_handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2517,15 +2517,17 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {

testCases := []struct {
name string
sniServerName string
params map[string]string
expectedClusterName string
expectedTenantID uint64
expectedParams map[string]string
expectedError string
expectedHint string
expectedMetrics func(t *testing.T, m *metrics)
}{
{
name: "empty params",
name: "empty params and server name",
params: map[string]string{},
expectedError: "missing cluster identifier",
expectedHint: clusterIdentifierHint,
Expand Down Expand Up @@ -2640,6 +2642,32 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
expectedError: "invalid cluster identifier 'happy-koala-0'",
expectedHint: "Tenant ID 0 is invalid.",
},
{
name: "invalid sni format",
sniServerName: "foo-bar-baz",
params: map[string]string{},
expectedError: "missing cluster identifier",
expectedHint: clusterIdentifierHint,
},
{
name: "invalid sni value",
sniServerName: "happy2koala-.abc.aws-ap-south-1.cockroachlabs.cloud",
params: map[string]string{},
expectedError: "missing cluster identifier",
expectedHint: clusterIdentifierHint,
},
{
name: "valid sni value",
sniServerName: "happy-seal-10.abc.gcp-us-central1.cockroachlabs.cloud",
params: map[string]string{},
expectedClusterName: "happy-seal",
expectedTenantID: 10,
expectedParams: map[string]string{},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.SNIRoutingMethodCount.Value())
},
},
{
name: "multiple similar cluster identifiers",
params: map[string]string{
Expand All @@ -2649,6 +2677,11 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
expectedClusterName: "happy-koala",
expectedTenantID: 7,
expectedParams: map[string]string{"database": "defaultdb"},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(2), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.DatabaseRoutingMethodCount.Value())
require.Equal(t, int64(1), m.ClusterOptionRoutingMethodCount.Value())
},
},
{
name: "cluster identifier in database param",
Expand All @@ -2659,6 +2692,10 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
expectedClusterName: strings.Repeat("a", 100),
expectedTenantID: 7,
expectedParams: map[string]string{"database": "defaultdb", "foo": "bar"},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.DatabaseRoutingMethodCount.Value())
},
},
{
name: "valid cluster identifier with invalid arrangements",
Expand All @@ -2672,6 +2709,10 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
"database": "defaultdb",
"options": "-c -c -c -c",
},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.ClusterOptionRoutingMethodCount.Value())
},
},
{
name: "short option: cluster identifier in options param",
Expand All @@ -2682,6 +2723,10 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
expectedClusterName: "happy-koala",
expectedTenantID: 7,
expectedParams: map[string]string{"database": "defaultdb"},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.ClusterOptionRoutingMethodCount.Value())
},
},
{
name: "short option with spaces: cluster identifier in options param",
Expand All @@ -2692,6 +2737,10 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
expectedClusterName: "happy-koala",
expectedTenantID: 7,
expectedParams: map[string]string{"database": "defaultdb"},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.ClusterOptionRoutingMethodCount.Value())
},
},
{
name: "long option: cluster identifier in options param",
Expand All @@ -2705,6 +2754,10 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
"database": "defaultdb",
"options": "--foo=test",
},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.ClusterOptionRoutingMethodCount.Value())
},
},
{
name: "long option: cluster identifier in options param with other options",
Expand All @@ -2718,26 +2771,35 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
"database": "defaultdb",
"options": "-csearch_path=public \t--foo=test",
},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.ClusterOptionRoutingMethodCount.Value())
},
},
{
name: "leading 0s are ok",
params: map[string]string{"database": "happy-koala-0-07.defaultdb"},
expectedClusterName: "happy-koala-0",
expectedTenantID: 7,
expectedParams: map[string]string{"database": "defaultdb"},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.DatabaseRoutingMethodCount.Value())
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
msg := &pgproto3.StartupMessage{Parameters: tc.params}
m := makeProxyMetrics()

originalParams := make(map[string]string)
for k, v := range msg.Parameters {
originalParams[k] = v
}

fe := &FrontendAdmitInfo{Msg: msg}
outMsg, clusterName, tenantID, err := clusterNameAndTenantFromParams(ctx, fe)
fe := &FrontendAdmitInfo{Msg: msg, SniServerName: tc.sniServerName}
outMsg, clusterName, tenantID, err := clusterNameAndTenantFromParams(ctx, fe, &m)
if tc.expectedError == "" {
require.NoErrorf(t, err, "failed test case\n%+v", tc)

Expand All @@ -2755,6 +2817,15 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {

// Check that the original parameters were not modified.
require.Equal(t, originalParams, msg.Parameters)

if tc.expectedMetrics != nil {
tc.expectedMetrics(t, &m)
} else {
require.Zero(t, m.RoutingMethodCount.Count())
require.Zero(t, m.SNIRoutingMethodCount.Value())
require.Zero(t, m.DatabaseRoutingMethodCount.Value())
require.Zero(t, m.ClusterOptionRoutingMethodCount.Value())
}
})
}
}
Expand Down

0 comments on commit 197c6ee

Please sign in to comment.