diff --git a/.github/workflows/lint-go.yaml b/.github/workflows/lint-go.yaml index 408e01ae5..79d4d87f3 100644 --- a/.github/workflows/lint-go.yaml +++ b/.github/workflows/lint-go.yaml @@ -1,10 +1,8 @@ name: Lint on: - push: - branches: - - master - pull_request: + # Should only be used by other workflows + workflow_call: permissions: contents: read diff --git a/.github/workflows/on-master-commit.yaml b/.github/workflows/on-master-commit.yaml new file mode 100644 index 000000000..2770b6350 --- /dev/null +++ b/.github/workflows/on-master-commit.yaml @@ -0,0 +1,48 @@ +name: Master branch CI + +on: + push: + branches: + - 'master' + +jobs: + run-unit-tests: + name: Run Unit Tests + uses: ./.github/workflows/test.yaml + + lint: + name: Lint + uses: ./.github/workflows/lint-go.yaml + + generate-tags: + name: Generate Docker Tags + runs-on: ubuntu-latest + outputs: + tag_date: ${{ steps.tag_date.outputs.tag_date }} + short_sha: ${{ steps.short_sha.outputs.short_sha }} + steps: + - name: Generate Tag Date + id: tag_date + run: echo "tag_date=$(date +'%Y%m%d')" >> "$GITHUB_OUTPUT" + - name: Generate Short SHA + id: short_sha + run: echo "short_sha=$(echo $GITHUB_SHA | cut -c1-7)" >> "$GITHUB_OUTPUT" + + publish-docker-image: + name: Publish Docker Image + uses: ./.github/workflows/publish-docker-images.yaml + secrets: inherit + needs: + - run-unit-tests + - generate-tags + - lint + permissions: + contents: read + packages: write + with: + images: | + ghcr.io/${{ github.repository }} + # eg: master-20240321-7d8e9f2 + tags: | + type=raw,value=master-${{ needs.generate-tags.outputs.tag_date }}-${{ needs.generate-tags.outputs.short_sha }} + type=raw,value=master-latest diff --git a/.github/workflows/on-pre-release.yaml b/.github/workflows/on-pre-release.yaml new file mode 100644 index 000000000..94988ee03 --- /dev/null +++ b/.github/workflows/on-pre-release.yaml @@ -0,0 +1,43 @@ +name: Pre-Release CI + +on: + push: + tags: + - 'v*.*.*-rc.*' + +jobs: + + validate: + name: Validate Release + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Validate Tag + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # This step validates that the tag is a pre-release + run: | + prerelease=$(gh release view ${{ github.ref_name }} --json isPrerelease | jq -r '.isPrerelease') + if [ "$prerelease" != "true" ]; then + echo "Tag is not a pre-release" + exit 1 + fi + + publish-docker-image: + name: Publish Pre-Release Docker Image + uses: ./.github/workflows/publish-docker-images.yaml + secrets: inherit + needs: + - validate + permissions: + contents: read + packages: write + with: + environment: docker-publish + images: | + ${{ github.repository }} + ghcr.io/${{ github.repository }} + tags: | + type=raw,value=${{ github.ref_name }} diff --git a/.github/workflows/on-pull-request.yaml b/.github/workflows/on-pull-request.yaml new file mode 100644 index 000000000..143d08bdf --- /dev/null +++ b/.github/workflows/on-pull-request.yaml @@ -0,0 +1,29 @@ +name: Pull Request CI + +on: + pull_request: + branches: + - master + +jobs: + run-unit-tests: + name: Run Unit Tests + uses: ./.github/workflows/test.yaml + + lint: + name: Lint + uses: ./.github/workflows/lint-go.yaml + + # This doesn't publish the image, it just tests the publishing workflow (build the image / tags / labels) + test-docker-publish: + name: Test Docker Publish + uses: ./.github/workflows/publish-docker-images.yaml + secrets: inherit + permissions: + contents: read + packages: write + with: + images: | + ghcr.io/${{ github.repository }} + tags: | + type=raw,value=${{ github.ref }}-${{ github.sha }} diff --git a/.github/workflows/on-release.yaml b/.github/workflows/on-release.yaml new file mode 100644 index 000000000..2b4007dd4 --- /dev/null +++ b/.github/workflows/on-release.yaml @@ -0,0 +1,60 @@ +name: Release CI + +on: + push: + tags: + - 'v*.*.*' + - '!v*.*.*-rc*' + +jobs: + validate: + runs-on: ubuntu-latest + name: Validate Release + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Node + uses: actions/setup-node@v3 + with: + node-version: '20' + + - name: Validate Tag + run: | + node -e "if (!/^v\d+\.\d+\.\d+$/.test('${{ github.ref_name }}')) { console.error('Invalid version provided');process.exit(1);}" + + - name: Validate VERSION + run: | + if [ "$(cat cmd/thor/VERSION)" != "${{ github.ref_name }}" ]; then + echo "VERSION file does not match tag" + exit 1 + fi + + - name: Validate Release + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # This step validates that the tag is an official release + run: | + prerelease=$(gh release view ${{ github.ref_name }} --json isPrerelease | jq -r '.isPrerelease') + if [ "$prerelease" != "false" ]; then + echo "Tag is not an official release" + exit 1 + fi + + publish-docker-image: + name: Publish Docker Image + uses: ./.github/workflows/publish-docker-images.yaml + secrets: inherit + needs: + - validate + permissions: + contents: read + packages: write + with: + environment: docker-publish + images: | + ${{ github.repository }} + ghcr.io/${{ github.repository }} + tags: | + type=raw,value=${{ github.event.release.tag_name }} + type=raw,value=latest diff --git a/.github/workflows/publish-docker-images.yaml b/.github/workflows/publish-docker-images.yaml new file mode 100644 index 000000000..7362928dd --- /dev/null +++ b/.github/workflows/publish-docker-images.yaml @@ -0,0 +1,73 @@ +name: Publish Docker Image + +on: + workflow_call: + inputs: + environment: + type: string + required: false + description: 'The environment to publish the Docker image to.' + tags: + type: string + required: true + description: 'The tags to apply to the Docker image.' + images: + type: string + required: true + description: 'The images to publish' + workflow_dispatch: + +jobs: + build-and-push-image: + name: Build and Push Docker Image + runs-on: ubuntu-latest + + permissions: + contents: read + packages: write + + environment: ${{ inputs.environment }} + steps: + + - name: Checkout Repo + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + # Only log in to Docker Hub if the event is a release + if: ${{ inputs.environment == 'docker-publish' }} + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + # default to ghcr.io for workflow_dispatch + images: ${{ inputs.images || format('ghcr.io/{0}', github.repository) }} + # use the branch + sha if workflow_dispatch + tags: ${{ inputs.tags || format('type=raw,value={0}-{1}', github.ref_name, github.sha) }} + + - name: Push to Registry(s) + uses: docker/build-push-action@v5 + with: + context: . + platforms: linux/amd64,linux/arm64 + push: ${{ github.event_name != 'pull_request' }} + provenance: false + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/.github/workflows/release-docker.yaml b/.github/workflows/release-docker.yaml deleted file mode 100644 index 713e54e46..000000000 --- a/.github/workflows/release-docker.yaml +++ /dev/null @@ -1,57 +0,0 @@ -name: Publish docker image - -on: - push: - tags: - - 'v*' - -jobs: - push_to_registries: - runs-on: ubuntu-latest - permissions: - packages: write - contents: read - - environment: docker-publish - - steps: - - uses: actions/checkout@v3 - - # Add support for more platforms with QEMU (optional) - # https://github.com/docker/setup-qemu-action - - name: Set up QEMU - uses: docker/setup-qemu-action@v2 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - - name: Log in to Docker Hub - uses: docker/login-action@v2 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Log in to the Container registry - uses: docker/login-action@v2 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Extract metadata (tags, labels) for Docker - id: meta - uses: docker/metadata-action@v4 - with: - images: | - vechain/thor - ghcr.io/${{ github.repository }} - - - name: Build and push - uses: docker/build-push-action@v4 - with: - context: . - platforms: linux/amd64,linux/arm64 - push: true - provenance: false - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} diff --git a/.github/workflows/test-docker-build.yaml b/.github/workflows/test-docker-build.yaml deleted file mode 100644 index 01f925a50..000000000 --- a/.github/workflows/test-docker-build.yaml +++ /dev/null @@ -1,31 +0,0 @@ -name: Test Docker Buld - -on: - push: - branches: - - master - pull_request: - branches: - - master - -jobs: - docker_build: - runs-on: ubuntu-latest - name: Test Build - - steps: - - uses: actions/checkout@v3 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v2 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - - name: Build and push - uses: docker/build-push-action@v4 - with: - context: . - platforms: linux/amd64,linux/arm64 - push: false - provenance: false diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index ec9c475f6..495be30f0 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -1,14 +1,8 @@ name: Unit Tests -on: - push: - branches: - - 'master' - - 'master-*' - - pull_request: - branches: - - master +on: + # Should only be used by other workflows + workflow_call: jobs: unit_tests: @@ -18,9 +12,9 @@ jobs: os: [ubuntu-latest, macos-latest, windows-latest] include: - go-version: 1.19.x - os: ubuntu-latest + os: ubuntu-latest - go-version: 1.20.x - os: ubuntu-latest + os: ubuntu-latest runs-on: ${{ matrix.os }} steps: - name: Checkout code @@ -33,11 +27,11 @@ jobs: - name: Make all run: make all - + - name: Make Test id: unit-test run: make test - + - name: Post To Slack if: always() && github.ref == 'refs/heads/master' && (steps.unit-test.outcome == 'failure') uses: slackapi/slack-github-action@v1.24.0 @@ -52,7 +46,7 @@ jobs: env: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} test_coverage: - runs-on: ubuntu-latest + runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v3 diff --git a/api/utils/http.go b/api/utils/http.go index baa6f4679..c62b96afd 100644 --- a/api/utils/http.go +++ b/api/utils/http.go @@ -58,7 +58,9 @@ type HandlerFunc func(http.ResponseWriter, *http.Request) error func MetricsWrapHandlerFunc(pathPrefix, endpoint string, f HandlerFunc) http.HandlerFunc { fixedPath := strings.ReplaceAll(pathPrefix, "/", "_") // ensure no unexpected slashes httpReqCounter := telemetry.CounterVec(fixedPath+"_request_count", []string{"path", "code", "method"}) - httpReqDuration := telemetry.HistogramVecWithHTTPBuckets(fixedPath+"_duration_ms", []string{"path", "code", "method"}) + httpReqDuration := telemetry.HistogramVec( + fixedPath+"_duration_ms", []string{"path", "code", "method"}, telemetry.BucketHTTPReqs, + ) return func(w http.ResponseWriter, r *http.Request) { now := time.Now() diff --git a/bft/engine.go b/bft/engine.go index 54718f67e..ac8a1af44 100644 --- a/bft/engine.go +++ b/bft/engine.go @@ -8,7 +8,6 @@ import ( "sort" "sync/atomic" - lru "github.com/hashicorp/golang-lru" "github.com/pkg/errors" "github.com/vechain/thor/v2/block" "github.com/vechain/thor/v2/builtin" @@ -17,13 +16,20 @@ import ( "github.com/vechain/thor/v2/kv" "github.com/vechain/thor/v2/muxdb" "github.com/vechain/thor/v2/state" + "github.com/vechain/thor/v2/telemetry" "github.com/vechain/thor/v2/thor" + + lru "github.com/hashicorp/golang-lru" ) const dataStoreName = "bft.engine" var finalizedKey = []byte("finalized") +var ( + metricsBlocksCommitted = telemetry.LazyLoadCounterVec("block_bft_committed_count", []string{"status"}) +) + // BFTEngine tracks all votes of blocks, computes the finalized checkpoint. // Not thread-safe! type BFTEngine struct { @@ -127,6 +133,7 @@ func (engine *BFTEngine) CommitBlock(header *block.Header, isPacking bool) error return err } engine.finalized.Store(id) + metricsBlocksCommitted().AddWithLabel(1, map[string]string{"status": "finalized"}) } } @@ -142,6 +149,7 @@ func (engine *BFTEngine) CommitBlock(header *block.Header, isPacking bool) error return err } engine.casts.Mark(checkpoint, state.Quality) + metricsBlocksCommitted().AddWithLabel(1, map[string]string{"status": "proposed"}) } return nil diff --git a/cmd/thor/node/metrics.go b/cmd/thor/node/metrics.go new file mode 100644 index 000000000..cb9e60d93 --- /dev/null +++ b/cmd/thor/node/metrics.go @@ -0,0 +1,65 @@ +package node + +import ( + "time" + + "github.com/vechain/thor/v2/telemetry" +) + +var ( + metricBlockProposedCount = telemetry.LazyLoadCounterVec("block_proposed_count", []string{"status"}) + metricBlockProposedTxs = telemetry.LazyLoadCounterVec("block_proposed_tx_count", []string{"status"}) + metricBlockProposedDuration = telemetry.LazyLoadHistogramVec( + "block_proposed_duration_ms", []string{"status"}, telemetry.Bucket10s, + ) + + metricBlockReceivedCount = telemetry.LazyLoadCounterVec("block_received_count", []string{"status"}) + metricBlockReceivedProcessedTxs = telemetry.LazyLoadCounterVec("block_received_processed_tx_count", []string{"status"}) + metricBlockReceivedDuration = telemetry.LazyLoadHistogramVec( + "block_received_duration_ms", []string{"status"}, telemetry.Bucket10s, + ) + + metricChainForkCount = telemetry.LazyLoadCounter("chain_fork_count") + metricChainForkSize = telemetry.LazyLoadGauge("chain_fork_size") +) + +func evalBlockReceivedMetrics(f func() error) error { + startTime := time.Now() + + if err := f(); err != nil { + status := map[string]string{ + "status": "failed", + } + metricBlockReceivedCount().AddWithLabel(1, status) + metricBlockReceivedDuration().ObserveWithLabels(time.Since(startTime).Milliseconds(), status) + return err + } + + status := map[string]string{ + "status": "received", + } + metricBlockReceivedCount().AddWithLabel(1, status) + metricBlockReceivedDuration().ObserveWithLabels(time.Since(startTime).Milliseconds(), status) + return nil +} + +// evalBlockProposeMetrics captures block proposing metrics +func evalBlockProposeMetrics(f func() error) error { + startTime := time.Now() + + if err := f(); err != nil { + status := map[string]string{ + "status": "failed", + } + metricBlockProposedCount().AddWithLabel(1, status) + metricBlockProposedDuration().ObserveWithLabels(time.Since(startTime).Milliseconds(), status) + return err + } + + status := map[string]string{ + "status": "proposed", + } + metricBlockProposedCount().AddWithLabel(1, status) + metricBlockProposedDuration().ObserveWithLabels(time.Since(startTime).Milliseconds(), status) + return nil +} diff --git a/cmd/thor/node/node.go b/cmd/thor/node/node.go index 72f584d57..6b74a82d2 100644 --- a/cmd/thor/node/node.go +++ b/cmd/thor/node/node.go @@ -273,30 +273,32 @@ func (n *Node) txStashLoop(ctx context.Context) { } // guardBlockProcessing adds lock on block processing and maintains block conflicts. -func (n *Node) guardBlockProcessing(blockNum uint32, process func(conflicts uint32) error) error { - n.processLock.Lock() - defer n.processLock.Unlock() - - if blockNum > n.maxBlockNum { - if blockNum > n.maxBlockNum+1 { - // the block is surely unprocessable now - return errBlockTemporaryUnprocessable +func (n *Node) guardBlockProcessing(blockNum uint32, process func(conflicts uint32) error) func() error { + return func() error { + n.processLock.Lock() + defer n.processLock.Unlock() + + if blockNum > n.maxBlockNum { + if blockNum > n.maxBlockNum+1 { + // the block is surely unprocessable now + return errBlockTemporaryUnprocessable + } + n.maxBlockNum = blockNum + return process(0) } - n.maxBlockNum = blockNum - return process(0) - } - conflicts, err := n.repo.ScanConflicts(blockNum) - if err != nil { - return err + conflicts, err := n.repo.ScanConflicts(blockNum) + if err != nil { + return err + } + return process(conflicts) } - return process(conflicts) } func (n *Node) processBlock(newBlock *block.Block, stats *blockStats) (bool, error) { var isTrunk *bool - if err := n.guardBlockProcessing(newBlock.Header().Number(), func(conflicts uint32) error { + if err := evalBlockReceivedMetrics(n.guardBlockProcessing(newBlock.Header().Number(), func(conflicts uint32) error { // Check whether the block was already there. // It can be skipped if no conflicts. if conflicts > 0 { @@ -395,9 +397,10 @@ func (n *Node) processBlock(newBlock *block.Block, stats *blockStats) (bool, err log.Debug("bandwidth updated", "gps", v) } + metricBlockReceivedProcessedTxs().AddWithLabel(int64(len(receipts)), map[string]string{"status": "receivedBlock"}) stats.UpdateProcessed(1, len(receipts), execElapsed, commitElapsed, realElapsed, newBlock.Header().GasUsed()) return nil - }); err != nil { + })); err != nil { switch { case err == errKnownBlock || err == errBFTRejected: stats.UpdateIgnored(1) @@ -486,6 +489,8 @@ func (n *Node) processFork(newBlock *block.Block, oldBestBlockID thor.Bytes32) { } if n := len(sideIds); n >= 2 { + metricChainForkCount().Add(1) + metricChainForkSize().Gauge(int64(len(sideIds))) log.Warn(fmt.Sprintf( `⑂⑂⑂⑂⑂⑂⑂⑂ FORK HAPPENED ⑂⑂⑂⑂⑂⑂⑂⑂ side-chain: %v %v`, diff --git a/cmd/thor/node/packer_loop.go b/cmd/thor/node/packer_loop.go index 60730fe35..9ef8ab1ee 100644 --- a/cmd/thor/node/packer_loop.go +++ b/cmd/thor/node/packer_loop.go @@ -115,7 +115,7 @@ func (n *Node) pack(flow *packer.Flow) error { } }() - return n.guardBlockProcessing(flow.Number(), func(conflicts uint32) error { + return evalBlockProposeMetrics(n.guardBlockProcessing(flow.Number(), func(conflicts uint32) error { var ( startTime = mclock.Now() logEnabled = !n.skipLogs && !n.logDBFailed @@ -191,6 +191,7 @@ func (n *Node) pack(flow *packer.Flow) error { n.processFork(newBlock, oldBest.Header.ID()) commitElapsed := mclock.Now() - startTime - execElapsed + metricBlockProposedTxs().AddWithLabel(int64(len(receipts)), map[string]string{"status": "proposedBlock"}) n.comm.BroadcastBlock(newBlock) log.Info("📦 new block packed", "txs", len(receipts), @@ -203,5 +204,5 @@ func (n *Node) pack(flow *packer.Flow) error { log.Debug("bandwidth updated", "gps", v) } return nil - }) + })) } diff --git a/p2psrv/metrics.go b/p2psrv/metrics.go new file mode 100644 index 000000000..a989b4e12 --- /dev/null +++ b/p2psrv/metrics.go @@ -0,0 +1,9 @@ +package p2psrv + +import "github.com/vechain/thor/v2/telemetry" + +var ( + metricConnectedPeers = telemetry.LazyLoadGauge("p2p_connected_peers_count") + metricDiscoveredNodes = telemetry.LazyLoadGauge("p2p_discovered_node_count") + metricDialNewNode = telemetry.LazyLoadCounter("p2p_dial_new_node_count") +) diff --git a/p2psrv/nodes.go b/p2psrv/nodes.go index 234d30254..76921b286 100644 --- a/p2psrv/nodes.go +++ b/p2psrv/nodes.go @@ -51,6 +51,7 @@ func (nm *nodeMap) Add(node *discover.Node) { nm.lock.Lock() defer nm.lock.Unlock() nm.m[node.ID] = node + metricConnectedPeers().Gauge(1) } func (nm *nodeMap) Remove(id discover.NodeID) *discover.Node { @@ -58,6 +59,7 @@ func (nm *nodeMap) Remove(id discover.NodeID) *discover.Node { defer nm.lock.Unlock() if node, ok := nm.m[id]; ok { delete(nm.m, id) + metricConnectedPeers().Gauge(-1) return node } return nil diff --git a/p2psrv/server.go b/p2psrv/server.go index 609516b41..879c79982 100644 --- a/p2psrv/server.go +++ b/p2psrv/server.go @@ -247,6 +247,7 @@ func (s *Server) discoverLoop(topic discv5.Topic) { case v5node := <-discNodes: node := discover.NewNode(discover.NodeID(v5node.ID), v5node.IP, v5node.UDP, v5node.TCP) if _, found := s.discoveredNodes.Get(node.ID); !found { + metricDiscoveredNodes().Gauge(1) s.discoveredNodes.Set(node.ID, node) log.Debug("discovered node", "node", node) } @@ -300,6 +301,7 @@ func (s *Server) dialLoop() { s.dialingNodes.Add(node) // don't use goes.Go, since the dial process can't be interrupted go func() { + metricDialNewNode().Add(1) if err := s.tryDial(node); err != nil { s.dialingNodes.Remove(node.ID) log.Debug("failed to dial node", "err", err) diff --git a/telemetry/noop.go b/telemetry/noop.go index b8e056c9b..1852513d2 100644 --- a/telemetry/noop.go +++ b/telemetry/noop.go @@ -5,20 +5,21 @@ import "net/http" // noopTelemetry implements a no operations telemetry service type noopTelemetry struct{} -func (n *noopTelemetry) GetOrCreateHistogramVecMeter(string, []string, []int64) HistogramVecMeter { - return &noopTelemetry{} -} - func defaultNoopTelemetry() Telemetry { return &noopTelemetry{} } func (n *noopTelemetry) GetOrCreateHistogramMeter(string, []int64) HistogramMeter { return &noopMetric } - +func (n *noopTelemetry) GetOrCreateHistogramVecMeter(string, []string, []int64) HistogramVecMeter { + return &noopMetric +} func (n *noopTelemetry) GetOrCreateCountMeter(string) CountMeter { return &noopMetric } func (n *noopTelemetry) GetOrCreateCountVecMeter(string, []string) CountVecMeter { return &noopMetric } +func (n *noopTelemetry) GetOrCreateGaugeMeter(string) GaugeMeter { + return &noopMetric +} func (n *noopTelemetry) GetOrCreateGaugeVecMeter(string, []string) GaugeVecMeter { return &noopMetric } @@ -29,6 +30,10 @@ var noopMetric = noopMeters{} type noopMeters struct{} +func (n noopMeters) ObserveWithLabels(i int64, m map[string]string) {} + +func (n noopMeters) Gauge(int64) {} + func (n noopMeters) GaugeWithLabel(int64, map[string]string) {} func (n noopMeters) AddWithLabel(int64, map[string]string) {} diff --git a/telemetry/noop_test.go b/telemetry/noop_test.go index bc0a5b8fd..fed188355 100644 --- a/telemetry/noop_test.go +++ b/telemetry/noop_test.go @@ -26,8 +26,8 @@ func TestNoopTelemetry(t *testing.T) { Counter("count2").Add(1) } - hist := Histogram("hist1") - histVect := HistogramVec("hist2", []string{"zeroOrOne"}) + hist := Histogram("hist1", nil) + histVect := HistogramVec("hist2", []string{"zeroOrOne"}, nil) for i := 0; i < rand.Intn(100)+1; i++ { hist.Observe(int64(i)) histVect.ObserveWithLabels(int64(i), map[string]string{"thisIsNonsense": "butDoesntBreak"}) diff --git a/telemetry/prometheus.go b/telemetry/prometheus.go index aab4ab528..7b94751d0 100644 --- a/telemetry/prometheus.go +++ b/telemetry/prometheus.go @@ -5,9 +5,8 @@ import ( "sync" "github.com/ethereum/go-ethereum/log" - "github.com/prometheus/client_golang/prometheus/promhttp" - "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" ) const namespace = "node_telemetry" @@ -27,6 +26,7 @@ type prometheusTelemetry struct { histograms sync.Map histogramVecs sync.Map gaugeVecs sync.Map + gauges sync.Map } func newPrometheusTelemetry() Telemetry { @@ -36,6 +36,7 @@ func newPrometheusTelemetry() Telemetry { histograms: sync.Map{}, histogramVecs: sync.Map{}, gaugeVecs: sync.Map{}, + gauges: sync.Map{}, } } @@ -91,6 +92,18 @@ func (o *prometheusTelemetry) GetOrCreateHistogramVecMeter(name string, labels [ return meter } +func (o *prometheusTelemetry) GetOrCreateGaugeMeter(name string) GaugeMeter { + var meter GaugeMeter + mapItem, ok := o.gauges.Load(name) + if !ok { + meter = o.newGaugeMeter(name) + o.gauges.Store(name, meter) + } else { + meter = mapItem.(GaugeMeter) + } + return meter +} + func (o *prometheusTelemetry) GetOrCreateGaugeVecMeter(name string, labels []string) GaugeVecMeter { var meter GaugeVecMeter mapItem, ok := o.gaugeVecs.Load(name) @@ -203,6 +216,23 @@ func (o *prometheusTelemetry) newCountVecMeter(name string, labels []string) Cou } } +func (o *prometheusTelemetry) newGaugeMeter(name string) GaugeMeter { + meter := prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: namespace, + Name: name, + }, + ) + + err := prometheus.Register(meter) + if err != nil { + log.Warn("unable to register metric", "err", err) + } + return &promGaugeMeter{ + gauge: meter, + } +} + func (o *prometheusTelemetry) newGaugeVecMeter(name string, labels []string) GaugeVecMeter { meter := prometheus.NewGaugeVec( prometheus.GaugeOpts{ @@ -237,6 +267,14 @@ func (c *promCountVecMeter) AddWithLabel(i int64, labels map[string]string) { c.counter.With(labels).Add(float64(i)) } +type promGaugeMeter struct { + gauge prometheus.Gauge +} + +func (c *promGaugeMeter) Gauge(i int64) { + c.gauge.Add(float64(i)) +} + type promGaugeVecMeter struct { gauge *prometheus.GaugeVec } diff --git a/telemetry/prometheus_test.go b/telemetry/prometheus_test.go index aa1a5021c..b6125b7ae 100644 --- a/telemetry/prometheus_test.go +++ b/telemetry/prometheus_test.go @@ -12,6 +12,8 @@ import ( ) func TestOtelPromTelemetry(t *testing.T) { + noopGauge := Gauge("noopGauge") + lazyLoadGauge := LazyLoadGauge("lazyGauge") InitializePrometheusTelemetry() server := httptest.NewServer(Handler()) @@ -19,14 +21,24 @@ func TestOtelPromTelemetry(t *testing.T) { server.Close() }) + if _, ok := noopGauge.(*noopMeters); !ok { + t.Error("noopGauge is not nooptelemetry") + } + + if _, ok := lazyLoadGauge().(*promGaugeMeter); !ok { + t.Error("noopGauge is not promGaugeMeter") + } + // 2 ways of accessing it - useful to avoid lookups count1 := Counter("count1") Counter("count2") + countVect := CounterVec("countVec1", []string{"zeroOrOne"}) - hist := HistogramWithHTTPBuckets("hist1") - HistogramVec("hist2", []string{"zeroOrOne"}) + hist := Histogram("hist1", nil) + HistogramVec("hist2", []string{"zeroOrOne"}, nil) - countVect := CounterVec("countVec1", []string{"zeroOrOne"}) + gauge1 := Gauge("gauge1") + gaugeVec := GaugeVec("gaugeVec1", []string{"zeroOrOne"}) count1.Add(1) randCount2 := rand.Intn(100) + 1 @@ -38,7 +50,7 @@ func TestOtelPromTelemetry(t *testing.T) { for i := 0; i < rand.Intn(100)+1; i++ { zeroOrOne := i % 2 hist.Observe(int64(i)) - HistogramVec("hist2", []string{"zeroOrOne"}). + HistogramVec("hist2", []string{"zeroOrOne"}, nil). ObserveWithLabels(int64(i), map[string]string{"zeroOrOne": strconv.Itoa(zeroOrOne)}) histTotal += i } @@ -51,12 +63,12 @@ func TestOtelPromTelemetry(t *testing.T) { totalCountVec += i } - gaugeVec := GaugeVec("gaugeVec1", []string{"zeroOrOne"}) totalGaugeVec := 0 randGaugeVec := rand.Intn(100) + 1 for i := 0; i < randGaugeVec; i++ { zeroOrOne := i % 2 gaugeVec.GaugeWithLabel(int64(i), map[string]string{"zeroOrOne": strconv.Itoa(zeroOrOne)}) + gauge1.Gauge(int64(i)) totalGaugeVec += i } @@ -84,7 +96,33 @@ func TestOtelPromTelemetry(t *testing.T) { metrics["node_telemetry_countVec1"].GetMetric()[1].GetCounter().GetValue() require.Equal(t, sumCountVec, float64(totalCountVec)) + require.Equal(t, metrics["node_telemetry_gauge1"].GetMetric()[0].GetGauge().GetValue(), float64(totalGaugeVec)) sumGaugeVec := metrics["node_telemetry_gaugeVec1"].GetMetric()[0].GetGauge().GetValue() + metrics["node_telemetry_gaugeVec1"].GetMetric()[1].GetGauge().GetValue() require.Equal(t, sumGaugeVec, float64(totalGaugeVec)) } + +func TestLazyLoading(t *testing.T) { + telemetry = defaultNoopTelemetry() // make sure it starts in the default state + + for _, a := range []any{ + Gauge("noopGauge"), + GaugeVec("noopGauge", nil), + Counter("noopCounter"), + CounterVec("noopCounter", nil), + Histogram("noopHist", nil), + HistogramVec("noopHist", nil, nil), + } { + require.IsType(t, &noopMeters{}, a) + } + + // after initialization, newly created metrics become of the prometheus type + InitializePrometheusTelemetry() + + require.IsType(t, &promGaugeMeter{}, LazyLoadGauge("lazyGauge")()) + require.IsType(t, &promGaugeVecMeter{}, LazyLoadGaugeVec("lazyGaugeVec", nil)()) + require.IsType(t, &promCountMeter{}, LazyLoadCounter("lazyCounter")()) + require.IsType(t, &promCountVecMeter{}, LazyLoadCounterVec("lazyCounterVec", nil)()) + require.IsType(t, &promHistogramMeter{}, LazyLoadHistogram("lazyHistogram", nil)()) + require.IsType(t, &promHistogramVecMeter{}, LazyLoadHistogramVec("lazyHistogramVec", nil, nil)()) +} diff --git a/telemetry/telemetry.go b/telemetry/telemetry.go index c1e16c207..9525c9e7d 100644 --- a/telemetry/telemetry.go +++ b/telemetry/telemetry.go @@ -10,6 +10,7 @@ var telemetry = defaultNoopTelemetry() // defaults to a Noop implementation of t type Telemetry interface { GetOrCreateCountMeter(name string) CountMeter GetOrCreateCountVecMeter(name string, labels []string) CountVecMeter + GetOrCreateGaugeMeter(name string) GaugeMeter GetOrCreateGaugeVecMeter(name string, labels []string) GaugeVecMeter GetOrCreateHistogramMeter(name string, buckets []int64) HistogramMeter GetOrCreateHistogramVecMeter(name string, labels []string, buckets []int64) HistogramVecMeter @@ -21,33 +22,31 @@ func Handler() http.Handler { return telemetry.GetOrCreateHandler() } +// Define standard buckets for histograms +var ( + Bucket10s = []int64{0, 500, 1000, 2000, 3000, 4000, 5000, 7500, 10_000} + BucketHTTPReqs = []int64{0, 150, 300, 450, 600, 900, 1200, 1500, 3000} +) + // HistogramMeter represents the type of metric that is calculated by aggregating // as a Histogram of all reported measurements over a time interval. type HistogramMeter interface { Observe(int64) } -func Histogram(name string) HistogramMeter { - return telemetry.GetOrCreateHistogramMeter(name, nil) -} -func HistogramWithHTTPBuckets(name string) HistogramMeter { - return telemetry.GetOrCreateHistogramMeter(name, defaultHTTPBuckets) +func Histogram(name string, buckets []int64) HistogramMeter { + return telemetry.GetOrCreateHistogramMeter(name, buckets) } -// HistogramVecMeter //todo +// HistogramVecMeter same as the Histogram but with labels type HistogramVecMeter interface { ObserveWithLabels(int64, map[string]string) } -func HistogramVec(name string, labels []string) HistogramVecMeter { - return telemetry.GetOrCreateHistogramVecMeter(name, labels, nil) -} -func HistogramVecWithHTTPBuckets(name string, labels []string) HistogramVecMeter { - return telemetry.GetOrCreateHistogramVecMeter(name, labels, defaultHTTPBuckets) +func HistogramVec(name string, labels []string, buckets []int64) HistogramVecMeter { + return telemetry.GetOrCreateHistogramVecMeter(name, labels, buckets) } -var defaultHTTPBuckets = []int64{0, 150, 300, 450, 600, 900, 1200, 1500, 3000} - // CountMeter is a cumulative metric that represents a single monotonically increasing counter // whose value can only increase or be reset to zero on restart. type CountMeter interface { @@ -66,6 +65,15 @@ func CounterVec(name string, labels []string) CountVecMeter { return telemetry.GetOrCreateCountVecMeter(name, labels) } +// GaugeMeter ... +type GaugeMeter interface { + Gauge(int64) +} + +func Gauge(name string) GaugeMeter { + return telemetry.GetOrCreateGaugeMeter(name) +} + // GaugeVecMeter ... type GaugeVecMeter interface { GaugeWithLabel(int64, map[string]string) @@ -74,3 +82,53 @@ type GaugeVecMeter interface { func GaugeVec(name string, labels []string) GaugeVecMeter { return telemetry.GetOrCreateGaugeVecMeter(name, labels) } + +// LazyLoad allows to defer the instantiation of the metric while allowing its definition. More clearly: +// - it allow metrics to be defined and used package wide (using var) +// - it avoid metrics definition to determine the singleton to use (noop vs prometheus) +func LazyLoad[T any](f func() T) func() T { + var result T + var loaded bool + return func() T { + if !loaded { + result = f() + loaded = true + } + return result + } +} + +func LazyLoadHistogram(name string, buckets []int64) func() HistogramMeter { + return LazyLoad(func() HistogramMeter { + return Histogram(name, buckets) + }) +} +func LazyLoadHistogramVec(name string, labels []string, buckets []int64) func() HistogramVecMeter { + return LazyLoad(func() HistogramVecMeter { + return HistogramVec(name, labels, buckets) + }) +} + +func LazyLoadCounter(name string) func() CountMeter { + return LazyLoad(func() CountMeter { + return Counter(name) + }) +} + +func LazyLoadCounterVec(name string, labels []string) func() CountVecMeter { + return LazyLoad(func() CountVecMeter { + return CounterVec(name, labels) + }) +} + +func LazyLoadGaugeVec(name string, labels []string) func() GaugeVecMeter { + return LazyLoad(func() GaugeVecMeter { + return GaugeVec(name, labels) + }) +} + +func LazyLoadGauge(name string) func() GaugeMeter { + return LazyLoad(func() GaugeMeter { + return Gauge(name) + }) +} diff --git a/txpool/metrics.go b/txpool/metrics.go new file mode 100644 index 000000000..61fe739f1 --- /dev/null +++ b/txpool/metrics.go @@ -0,0 +1,7 @@ +package txpool + +import ( + "github.com/vechain/thor/v2/telemetry" +) + +var metricTxPoolGauge = telemetry.LazyLoadGaugeVec("txpool_current_tx_count", []string{"source", "total"}) diff --git a/txpool/tx_pool.go b/txpool/tx_pool.go index e4f8ce5c7..542c4717a 100644 --- a/txpool/tx_pool.go +++ b/txpool/tx_pool.go @@ -280,14 +280,16 @@ func (p *TxPool) add(newTx *tx.Transaction, rejectNonExecutable bool, localSubmi return nil } -// Add add new tx into pool. +// Add adds a new tx into pool. // It's not assumed as an error if the tx to be added is already in the pool, func (p *TxPool) Add(newTx *tx.Transaction) error { + metricTxPoolGauge().GaugeWithLabel(1, map[string]string{"source": "remote", "total": "true"}) return p.add(newTx, false, false) } // AddLocal adds new locally submitted tx into pool. func (p *TxPool) AddLocal(newTx *tx.Transaction) error { + metricTxPoolGauge().GaugeWithLabel(1, map[string]string{"source": "local", "total": "true"}) return p.add(newTx, false, true) } @@ -307,6 +309,7 @@ func (p *TxPool) StrictlyAdd(newTx *tx.Transaction) error { // Remove removes tx from pool by its Hash. func (p *TxPool) Remove(txHash thor.Bytes32, txID thor.Bytes32) bool { if p.all.RemoveByHash(txHash) { + metricTxPoolGauge().GaugeWithLabel(-1, map[string]string{"source": "n/a", "total": "true"}) log.Debug("tx removed", "id", txID) return true }