Skip to content

Commit

Permalink
feat: naive fixes and optimzations for CreatedTimestamp function (p…
Browse files Browse the repository at this point in the history
…rometheus#14965)

* enhance: wip ct parse optimizations

Signed-off-by: Manik Rana <[email protected]>

* feat: further work on optimization

Signed-off-by: Manik Rana <[email protected]>

* feat: further improvements and remove unused code

Signed-off-by: Manik Rana <[email protected]>

* feat: improve optimizations and fix some CT parse errors

Signed-off-by: Manik Rana <[email protected]>

* fix: check for LsetHash along with name

Signed-off-by: Manik Rana <[email protected]>

* chore: cleanup and documentation

Signed-off-by: Manik Rana <[email protected]>

* enhance: improve comments and add cleaner functions

Signed-off-by: Manik Rana <[email protected]>

* feat: improve comments and add cleaner functions

Signed-off-by: Manik Rana <[email protected]>

* chore: rename to resetCTParseValues

Signed-off-by: Manik Rana <[email protected]>

* fix: post-merge fixes

Signed-off-by: Manik Rana <[email protected]>

* fix: add all possible reserved suffixes

Signed-off-by: Manik Rana <[email protected]>

* test: separate CT values for each metric

Signed-off-by: Manik Rana <[email protected]>

---------

Signed-off-by: Manik Rana <[email protected]>
Signed-off-by: Manik Rana <[email protected]>
  • Loading branch information
Maniktherana authored Oct 4, 2024
1 parent 023146e commit 47aeca9
Show file tree
Hide file tree
Showing 2 changed files with 184 additions and 181 deletions.
106 changes: 72 additions & 34 deletions model/textparse/openmetricsparse.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,12 @@ type OpenMetricsParser struct {
exemplarTs int64
hasExemplarTs bool

// Created timestamp parsing state.
ct int64
ctHashSet uint64
// visitedName is the metric name of the last visited metric when peeking ahead
// for _created series during the execution of the CreatedTimestamp method.
visitedName string
skipCTSeries bool
}

Expand Down Expand Up @@ -254,6 +260,9 @@ func (p *OpenMetricsParser) Exemplar(e *exemplar.Exemplar) bool {
func (p *OpenMetricsParser) CreatedTimestamp() *int64 {
if !typeRequiresCT(p.mtype) {
// Not a CT supported metric type, fast path.
p.ct = 0
p.visitedName = ""
p.ctHashSet = 0
return nil
}

Expand All @@ -264,27 +273,44 @@ func (p *OpenMetricsParser) CreatedTimestamp() *int64 {
)
p.Metric(&currLset)
currFamilyLsetHash, buf := currLset.HashWithoutLabels(buf, labels.MetricName, "le", "quantile")
// Search for the _created line for the currFamilyLsetHash using ephemeral parser until
// we see EOF or new metric family. We have to do it as we don't know where (and if)
// that CT line is.
// TODO(bwplotka): Make sure OM 1.1/2.0 pass CT via metadata or exemplar-like to avoid this.
peek := deepCopy(p)
currName := currLset.Get(model.MetricNameLabel)
currName = findBaseMetricName(currName)

// make sure we're on a new metric before returning
if currName == p.visitedName && currFamilyLsetHash == p.ctHashSet && p.visitedName != "" && p.ctHashSet > 0 && p.ct > 0 {
// CT is already known, fast path.
return &p.ct
}

// Create a new lexer to reset the parser once this function is done executing.
resetLexer := &openMetricsLexer{
b: p.l.b,
i: p.l.i,
start: p.l.start,
err: p.l.err,
state: p.l.state,
}

p.skipCTSeries = false

for {
eType, err := peek.Next()
eType, err := p.Next()
if err != nil {
// This means peek will give error too later on, so def no CT line found.
// This means p.Next() will give error too later on, so def no CT line found.
// This might result in partial scrape with wrong/missing CT, but only
// spec improvement would help.
// TODO(bwplotka): Make sure OM 1.1/2.0 pass CT via metadata or exemplar-like to avoid this.
// TODO: Make sure OM 1.1/2.0 pass CT via metadata or exemplar-like to avoid this.
p.resetCTParseValues(resetLexer)
return nil
}
if eType != EntrySeries {
// Assume we hit different family, no CT line found.
p.resetCTParseValues(resetLexer)
return nil
}

var peekedLset labels.Labels
peek.Metric(&peekedLset)
p.Metric(&peekedLset)
peekedName := peekedLset.Get(model.MetricNameLabel)
if !strings.HasSuffix(peekedName, "_created") {
// Not a CT line, search more.
Expand All @@ -294,17 +320,52 @@ func (p *OpenMetricsParser) CreatedTimestamp() *int64 {
// We got a CT line here, but let's search if CT line is actually for our series, edge case.
peekWithoutNameLsetHash, _ = peekedLset.HashWithoutLabels(buf, labels.MetricName, "le", "quantile")
if peekWithoutNameLsetHash != currFamilyLsetHash {
// CT line for a different series, for our series no CT.
// Found CT line for a different series, for our series no CT.
p.resetCTParseValues(resetLexer)
return nil
}

// All timestamps in OpenMetrics are Unix Epoch in seconds. Convert to milliseconds.
// https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#timestamps
ct := int64(peek.val * 1000.0)
ct := int64(p.val * 1000.0)
p.setCTParseValues(ct, currFamilyLsetHash, currName, true, resetLexer)
return &ct
}
}

// setCTParseValues sets the parser to the state after CreatedTimestamp method was called and CT was found.
// This is useful to prevent re-parsing the same series again and early return the CT value.
func (p *OpenMetricsParser) setCTParseValues(ct int64, ctHashSet uint64, visitedName string, skipCTSeries bool, resetLexer *openMetricsLexer) {
p.ct = ct
p.l = resetLexer
p.ctHashSet = ctHashSet
p.visitedName = visitedName
p.skipCTSeries = skipCTSeries
}

// resetCtParseValues resets the parser to the state before CreatedTimestamp method was called.
func (p *OpenMetricsParser) resetCTParseValues(resetLexer *openMetricsLexer) {
p.l = resetLexer
p.ct = 0
p.ctHashSet = 0
p.visitedName = ""
p.skipCTSeries = true
}

// findBaseMetricName returns the metric name without reserved suffixes such as "_created",
// "_sum", etc. based on the OpenMetrics specification found at
// https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md.
// If no suffix is found, the original name is returned.
func findBaseMetricName(name string) string {
suffixes := []string{"_created", "_count", "_sum", "_bucket", "_total", "_gcount", "_gsum", "_info"}
for _, suffix := range suffixes {
if strings.HasSuffix(name, suffix) {
return strings.TrimSuffix(name, suffix)
}
}
return name
}

// typeRequiresCT returns true if the metric type requires a _created timestamp.
func typeRequiresCT(t model.MetricType) bool {
switch t {
Expand All @@ -315,29 +376,6 @@ func typeRequiresCT(t model.MetricType) bool {
}
}

// deepCopy creates a copy of a parser without re-using the slices' original memory addresses.
func deepCopy(p *OpenMetricsParser) OpenMetricsParser {
newB := make([]byte, len(p.l.b))
copy(newB, p.l.b)

newLexer := &openMetricsLexer{
b: newB,
i: p.l.i,
start: p.l.start,
err: p.l.err,
state: p.l.state,
}

newParser := OpenMetricsParser{
l: newLexer,
builder: p.builder,
mtype: p.mtype,
val: p.val,
skipCTSeries: false,
}
return newParser
}

// nextToken returns the next token from the openMetricsLexer.
func (p *OpenMetricsParser) nextToken() token {
tok := p.l.Lex()
Expand Down
Loading

0 comments on commit 47aeca9

Please sign in to comment.