diff --git a/docs/blocks-storage/compactor.md b/docs/blocks-storage/compactor.md index 030035f2ac..f5539511ca 100644 --- a/docs/blocks-storage/compactor.md +++ b/docs/blocks-storage/compactor.md @@ -286,18 +286,18 @@ compactor: [wait_active_instance_timeout: | default = 10m] # The compaction strategy to use. Supported values are: default, partitioning. - # CLI flag: -compactor.compaction-mode - [compaction_mode: | default = "default"] + # CLI flag: -compactor.compaction-strategy + [compaction_strategy: | default = "default"] - # How long block visit marker file should be considered as expired and able to - # be picked up by compactor again. - # CLI flag: -compactor.block-visit-marker-timeout - [block_visit_marker_timeout: | default = 5m] + # How long compaction visit marker file should be considered as expired and + # able to be picked up by compactor again. + # CLI flag: -compactor.compaction-visit-marker-timeout + [compaction_visit_marker_timeout: | default = 10m] - # How frequently block visit marker file should be updated duration + # How frequently compaction visit marker file should be updated duration # compaction. - # CLI flag: -compactor.block-visit-marker-file-update-interval - [block_visit_marker_file_update_interval: | default = 1m] + # CLI flag: -compactor.compaction-visit-marker-file-update-interval + [compaction_visit_marker_file_update_interval: | default = 1m] # How long cleaner visit marker file should be considered as expired and able # to be picked up by cleaner again. The value should be smaller than diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index 5479bb4d00..03d0bcf594 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -2331,17 +2331,18 @@ sharding_ring: [wait_active_instance_timeout: | default = 10m] # The compaction strategy to use. Supported values are: default, partitioning. -# CLI flag: -compactor.compaction-mode -[compaction_mode: | default = "default"] +# CLI flag: -compactor.compaction-strategy +[compaction_strategy: | default = "default"] -# How long block visit marker file should be considered as expired and able to -# be picked up by compactor again. -# CLI flag: -compactor.block-visit-marker-timeout -[block_visit_marker_timeout: | default = 5m] +# How long compaction visit marker file should be considered as expired and able +# to be picked up by compactor again. +# CLI flag: -compactor.compaction-visit-marker-timeout +[compaction_visit_marker_timeout: | default = 10m] -# How frequently block visit marker file should be updated duration compaction. -# CLI flag: -compactor.block-visit-marker-file-update-interval -[block_visit_marker_file_update_interval: | default = 1m] +# How frequently compaction visit marker file should be updated duration +# compaction. +# CLI flag: -compactor.compaction-visit-marker-file-update-interval +[compaction_visit_marker_file_update_interval: | default = 1m] # How long cleaner visit marker file should be considered as expired and able to # be picked up by cleaner again. The value should be smaller than @@ -3592,6 +3593,14 @@ query_rejection: # CLI flag: -compactor.tenant-shard-size [compactor_tenant_shard_size: | default = 0] +# Index size limit in bytes for each compaction partition. 0 means no limit +# CLI flag: -compactor.partition-index-size-bytes +[compactor_partition_index_size_bytes: | default = 68719476736] + +# Time series count limit for each compaction partition. 0 means no limit +# CLI flag: -compactor.partition-series-count +[compactor_partition_series_count: | default = 0] + # S3 server-side encryption type. Required to enable server-side encryption # overrides for a specific tenant. If not set, the default S3 client settings # are used. diff --git a/pkg/compactor/compactor.go b/pkg/compactor/compactor.go index a0a7e65e68..c50a98ca48 100644 --- a/pkg/compactor/compactor.go +++ b/pkg/compactor/compactor.go @@ -60,7 +60,7 @@ var ( errInvalidCompactionStrategy = errors.New("invalid compaction strategy") errInvalidCompactionStrategyPartitioning = errors.New("compaction strategy partitioning can only be enabled when shuffle sharding is enabled") - DefaultBlocksGrouperFactory = func(ctx context.Context, cfg Config, bkt objstore.InstrumentedBucket, logger log.Logger, blocksMarkedForNoCompaction prometheus.Counter, _ prometheus.Counter, _ prometheus.Counter, syncerMetrics *compact.SyncerMetrics, compactorMetrics *compactorMetrics, _ *ring.Ring, _ *ring.Lifecycler, _ Limits, _ string, _ *compact.GatherNoCompactionMarkFilter) compact.Grouper { + DefaultBlocksGrouperFactory = func(ctx context.Context, cfg Config, bkt objstore.InstrumentedBucket, logger log.Logger, blocksMarkedForNoCompaction prometheus.Counter, _ prometheus.Counter, _ prometheus.Counter, syncerMetrics *compact.SyncerMetrics, compactorMetrics *compactorMetrics, _ *ring.Ring, _ *ring.Lifecycler, _ Limits, _ string, _ *compact.GatherNoCompactionMarkFilter, _ int) compact.Grouper { return compact.NewDefaultGrouperWithMetrics( logger, bkt, @@ -79,9 +79,31 @@ var ( cfg.BlocksFetchConcurrency) } - ShuffleShardingGrouperFactory = func(ctx context.Context, cfg Config, bkt objstore.InstrumentedBucket, logger log.Logger, blocksMarkedForNoCompaction prometheus.Counter, blockVisitMarkerReadFailed prometheus.Counter, blockVisitMarkerWriteFailed prometheus.Counter, syncerMetrics *compact.SyncerMetrics, compactorMetrics *compactorMetrics, ring *ring.Ring, ringLifecycle *ring.Lifecycler, limits Limits, userID string, noCompactionMarkFilter *compact.GatherNoCompactionMarkFilter) compact.Grouper { + ShuffleShardingGrouperFactory = func(ctx context.Context, cfg Config, bkt objstore.InstrumentedBucket, logger log.Logger, blocksMarkedForNoCompaction prometheus.Counter, blockVisitMarkerReadFailed prometheus.Counter, blockVisitMarkerWriteFailed prometheus.Counter, syncerMetrics *compact.SyncerMetrics, compactorMetrics *compactorMetrics, ring *ring.Ring, ringLifecycle *ring.Lifecycler, limits Limits, userID string, noCompactionMarkFilter *compact.GatherNoCompactionMarkFilter, ingestionReplicationFactor int) compact.Grouper { if cfg.CompactionStrategy == util.CompactionStrategyPartitioning { - return NewPartitionCompactionGrouper(ctx, logger, bkt) + return NewPartitionCompactionGrouper( + ctx, + logger, + bkt, + cfg.AcceptMalformedIndex, + true, // Enable vertical compaction + blocksMarkedForNoCompaction, + syncerMetrics, + compactorMetrics, + metadata.NoneFunc, + cfg, + ring, + ringLifecycle.Addr, + ringLifecycle.ID, + limits, + userID, + cfg.BlockFilesConcurrency, + cfg.BlocksFetchConcurrency, + cfg.CompactionConcurrency, + true, + cfg.CompactionVisitMarkerTimeout, + noCompactionMarkFilter.NoCompactMarkedBlocks, + ingestionReplicationFactor) } else { return NewShuffleShardingGrouper( ctx, @@ -102,7 +124,7 @@ var ( cfg.BlockFilesConcurrency, cfg.BlocksFetchConcurrency, cfg.CompactionConcurrency, - cfg.BlockVisitMarkerTimeout, + cfg.CompactionVisitMarkerTimeout, blockVisitMarkerReadFailed, blockVisitMarkerWriteFailed, noCompactionMarkFilter.NoCompactMarkedBlocks) @@ -133,7 +155,7 @@ var ( if cfg.CompactionStrategy == util.CompactionStrategyPartitioning { return NewPartitionCompactionPlanner(ctx, bkt, logger) } else { - return NewShuffleShardingPlanner(ctx, bkt, logger, cfg.BlockRanges.ToMilliseconds(), noCompactionMarkFilter.NoCompactMarkedBlocks, ringLifecycle.ID, cfg.BlockVisitMarkerTimeout, cfg.BlockVisitMarkerFileUpdateInterval, blockVisitMarkerReadFailed, blockVisitMarkerWriteFailed) + return NewShuffleShardingPlanner(ctx, bkt, logger, cfg.BlockRanges.ToMilliseconds(), noCompactionMarkFilter.NoCompactMarkedBlocks, ringLifecycle.ID, cfg.CompactionVisitMarkerTimeout, cfg.CompactionVisitMarkerFileUpdateInterval, blockVisitMarkerReadFailed, blockVisitMarkerWriteFailed) } } return compactor, plannerFactory, nil @@ -156,6 +178,7 @@ type BlocksGrouperFactory func( limit Limits, userID string, noCompactionMarkFilter *compact.GatherNoCompactionMarkFilter, + ingestionReplicationFactor int, ) compact.Grouper // BlocksCompactorFactory builds and returns the compactor and planner to use to compact a tenant's blocks. @@ -182,6 +205,8 @@ type PlannerFactory func( // Limits defines limits used by the Compactor. type Limits interface { CompactorTenantShardSize(userID string) int + CompactorPartitionIndexSizeBytes(userID string) int64 + CompactorPartitionSeriesCount(userID string) int64 } // Config holds the Compactor config. @@ -213,8 +238,8 @@ type Config struct { ShardingStrategy string `yaml:"sharding_strategy"` ShardingRing RingConfig `yaml:"sharding_ring"` - // Compaction mode. - CompactionStrategy string `yaml:"compaction_mode"` + // Compaction strategy. + CompactionStrategy string `yaml:"compaction_strategy"` // No need to add options to customize the retry backoff, // given the defaults should be fine, but allow to override @@ -226,9 +251,9 @@ type Config struct { BlocksGrouperFactory BlocksGrouperFactory `yaml:"-"` BlocksCompactorFactory BlocksCompactorFactory `yaml:"-"` - // Block visit marker file config - BlockVisitMarkerTimeout time.Duration `yaml:"block_visit_marker_timeout"` - BlockVisitMarkerFileUpdateInterval time.Duration `yaml:"block_visit_marker_file_update_interval"` + // Compaction visit marker file config + CompactionVisitMarkerTimeout time.Duration `yaml:"compaction_visit_marker_timeout"` + CompactionVisitMarkerFileUpdateInterval time.Duration `yaml:"compaction_visit_marker_file_update_interval"` // Cleaner visit marker file config CleanerVisitMarkerTimeout time.Duration `yaml:"cleaner_visit_marker_timeout"` @@ -258,7 +283,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.IntVar(&cfg.CleanupConcurrency, "compactor.cleanup-concurrency", 20, "Max number of tenants for which blocks cleanup and maintenance should run concurrently.") f.BoolVar(&cfg.ShardingEnabled, "compactor.sharding-enabled", false, "Shard tenants across multiple compactor instances. Sharding is required if you run multiple compactor instances, in order to coordinate compactions and avoid race conditions leading to the same tenant blocks simultaneously compacted by different instances.") f.StringVar(&cfg.ShardingStrategy, "compactor.sharding-strategy", util.ShardingStrategyDefault, fmt.Sprintf("The sharding strategy to use. Supported values are: %s.", strings.Join(supportedShardingStrategies, ", "))) - f.StringVar(&cfg.CompactionStrategy, "compactor.compaction-mode", util.CompactionStrategyDefault, fmt.Sprintf("The compaction strategy to use. Supported values are: %s.", strings.Join(supportedCompactionStrategies, ", "))) + f.StringVar(&cfg.CompactionStrategy, "compactor.compaction-strategy", util.CompactionStrategyDefault, fmt.Sprintf("The compaction strategy to use. Supported values are: %s.", strings.Join(supportedCompactionStrategies, ", "))) f.DurationVar(&cfg.DeletionDelay, "compactor.deletion-delay", 12*time.Hour, "Time before a block marked for deletion is deleted from bucket. "+ "If not 0, blocks will be marked for deletion and compactor component will permanently delete blocks marked for deletion from the bucket. "+ "If 0, blocks will be deleted straight away. Note that deleting blocks immediately can cause query failures.") @@ -271,8 +296,8 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.Var(&cfg.EnabledTenants, "compactor.enabled-tenants", "Comma separated list of tenants that can be compacted. If specified, only these tenants will be compacted by compactor, otherwise all tenants can be compacted. Subject to sharding.") f.Var(&cfg.DisabledTenants, "compactor.disabled-tenants", "Comma separated list of tenants that cannot be compacted by this compactor. If specified, and compactor would normally pick given tenant for compaction (via -compactor.enabled-tenants or sharding), it will be ignored instead.") - f.DurationVar(&cfg.BlockVisitMarkerTimeout, "compactor.block-visit-marker-timeout", 5*time.Minute, "How long block visit marker file should be considered as expired and able to be picked up by compactor again.") - f.DurationVar(&cfg.BlockVisitMarkerFileUpdateInterval, "compactor.block-visit-marker-file-update-interval", 1*time.Minute, "How frequently block visit marker file should be updated duration compaction.") + f.DurationVar(&cfg.CompactionVisitMarkerTimeout, "compactor.compaction-visit-marker-timeout", 10*time.Minute, "How long compaction visit marker file should be considered as expired and able to be picked up by compactor again.") + f.DurationVar(&cfg.CompactionVisitMarkerFileUpdateInterval, "compactor.compaction-visit-marker-file-update-interval", 1*time.Minute, "How frequently compaction visit marker file should be updated duration compaction.") f.DurationVar(&cfg.CleanerVisitMarkerTimeout, "compactor.cleaner-visit-marker-timeout", 10*time.Minute, "How long cleaner visit marker file should be considered as expired and able to be picked up by cleaner again. The value should be smaller than -compactor.cleanup-interval") f.DurationVar(&cfg.CleanerVisitMarkerFileUpdateInterval, "compactor.cleaner-visit-marker-file-update-interval", 5*time.Minute, "How frequently cleaner visit marker file should be updated when cleaning user.") @@ -305,7 +330,7 @@ func (cfg *Config) Validate(limits validation.Limits) error { } } - // Make sure a valid compaction mode is being used + // Make sure a valid compaction strategy is being used if !util.StringsContain(supportedCompactionStrategies, cfg.CompactionStrategy) { return errInvalidCompactionStrategy } @@ -379,10 +404,13 @@ type Compactor struct { // Thanos compactor metrics per user compactorMetrics *compactorMetrics + + // Replication factor of ingester ring + ingestionReplicationFactor int } // NewCompactor makes a new Compactor. -func NewCompactor(compactorCfg Config, storageCfg cortex_tsdb.BlocksStorageConfig, logger log.Logger, registerer prometheus.Registerer, limits *validation.Overrides) (*Compactor, error) { +func NewCompactor(compactorCfg Config, storageCfg cortex_tsdb.BlocksStorageConfig, logger log.Logger, registerer prometheus.Registerer, limits *validation.Overrides, ingestionReplicationFactor int) (*Compactor, error) { bucketClientFactory := func(ctx context.Context) (objstore.InstrumentedBucket, error) { return bucket.NewClient(ctx, storageCfg.Bucket, nil, "compactor", logger, registerer) } @@ -405,7 +433,11 @@ func NewCompactor(compactorCfg Config, storageCfg cortex_tsdb.BlocksStorageConfi } } - cortexCompactor, err := newCompactor(compactorCfg, storageCfg, logger, registerer, bucketClientFactory, blocksGrouperFactory, blocksCompactorFactory, limits) + if ingestionReplicationFactor <= 0 { + ingestionReplicationFactor = 1 + } + + cortexCompactor, err := newCompactor(compactorCfg, storageCfg, logger, registerer, bucketClientFactory, blocksGrouperFactory, blocksCompactorFactory, limits, ingestionReplicationFactor) if err != nil { return nil, errors.Wrap(err, "failed to create Cortex blocks compactor") } @@ -422,6 +454,7 @@ func newCompactor( blocksGrouperFactory BlocksGrouperFactory, blocksCompactorFactory BlocksCompactorFactory, limits *validation.Overrides, + ingestionReplicationFactor int, ) (*Compactor, error) { var compactorMetrics *compactorMetrics if compactorCfg.ShardingStrategy == util.ShardingStrategyShuffle { @@ -496,8 +529,9 @@ func newCompactor( Name: "cortex_compactor_block_visit_marker_write_failed", Help: "Number of block visit marker file failed to be written.", }), - limits: limits, - compactorMetrics: compactorMetrics, + limits: limits, + compactorMetrics: compactorMetrics, + ingestionReplicationFactor: ingestionReplicationFactor, } if len(compactorCfg.EnabledTenants) > 0 { @@ -761,6 +795,7 @@ func (c *Compactor) compactUsers(ctx context.Context) { continue } else if markedForDeletion { c.CompactionRunSkippedTenants.Inc() + c.compactorMetrics.deleteMetricsForDeletedTenant(userID) level.Debug(c.logger).Log("msg", "skipping user because it is marked for deletion", "user", userID) continue } @@ -929,7 +964,7 @@ func (c *Compactor) compactUser(ctx context.Context, userID string) error { compactor, err := compact.NewBucketCompactor( ulogger, syncer, - c.blocksGrouperFactory(currentCtx, c.compactorCfg, bucket, ulogger, c.BlocksMarkedForNoCompaction, c.blockVisitMarkerReadFailed, c.blockVisitMarkerWriteFailed, syncerMetrics, c.compactorMetrics, c.ring, c.ringLifecycler, c.limits, userID, noCompactMarkerFilter), + c.blocksGrouperFactory(currentCtx, c.compactorCfg, bucket, ulogger, c.BlocksMarkedForNoCompaction, c.blockVisitMarkerReadFailed, c.blockVisitMarkerWriteFailed, syncerMetrics, c.compactorMetrics, c.ring, c.ringLifecycler, c.limits, userID, noCompactMarkerFilter, c.ingestionReplicationFactor), c.blocksPlannerFactory(currentCtx, bucket, ulogger, c.compactorCfg, noCompactMarkerFilter, c.ringLifecycler, userID, c.blockVisitMarkerReadFailed, c.blockVisitMarkerWriteFailed, c.compactorMetrics), c.blocksCompactor, c.compactDirForUser(userID), diff --git a/pkg/compactor/compactor_metrics.go b/pkg/compactor/compactor_metrics.go index bdd3fefef0..e14fb9a0dc 100644 --- a/pkg/compactor/compactor_metrics.go +++ b/pkg/compactor/compactor_metrics.go @@ -38,6 +38,7 @@ type compactorMetrics struct { verticalCompactions *prometheus.CounterVec remainingPlannedCompactions *prometheus.GaugeVec compactionErrorsCount *prometheus.CounterVec + partitionCount *prometheus.GaugeVec } const ( @@ -169,6 +170,10 @@ func newCompactorMetricsWithLabels(reg prometheus.Registerer, commonLabels []str Name: "cortex_compactor_compaction_error_total", Help: "Total number of errors from compactions.", }, append(commonLabels, compactionErrorTypesLabelName)) + m.partitionCount = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ + Name: "cortex_compactor_group_partition_count", + Help: "Number of partitions for each compaction group.", + }, compactionLabels) return &m } @@ -207,3 +212,28 @@ func (m *compactorMetrics) getCommonLabelValues(userID string) []string { } return labelValues } + +func (m *compactorMetrics) initMetricWithCompactionLabelValues(labelValue ...string) { + if len(m.compactionLabels) != len(commonLabels)+len(compactionLabels) { + return + } + + m.compactions.WithLabelValues(labelValue...) + m.compactionPlanned.WithLabelValues(labelValue...) + m.compactionRunsStarted.WithLabelValues(labelValue...) + m.compactionRunsCompleted.WithLabelValues(labelValue...) + m.compactionFailures.WithLabelValues(labelValue...) + m.verticalCompactions.WithLabelValues(labelValue...) + m.partitionCount.WithLabelValues(labelValue...) +} + +func (m *compactorMetrics) deleteMetricsForDeletedTenant(userID string) { + m.syncerBlocksMarkedForDeletion.DeleteLabelValues(userID) + m.compactions.DeleteLabelValues(userID) + m.compactionPlanned.DeleteLabelValues(userID) + m.compactionRunsStarted.DeleteLabelValues(userID) + m.compactionRunsCompleted.DeleteLabelValues(userID) + m.compactionFailures.DeleteLabelValues(userID) + m.verticalCompactions.DeleteLabelValues(userID) + m.partitionCount.DeleteLabelValues(userID) +} diff --git a/pkg/compactor/compactor_metrics_test.go b/pkg/compactor/compactor_metrics_test.go index da4bb82025..f2a13276cd 100644 --- a/pkg/compactor/compactor_metrics_test.go +++ b/pkg/compactor/compactor_metrics_test.go @@ -130,6 +130,11 @@ func TestSyncerMetrics(t *testing.T) { cortex_compactor_compaction_error_total{type="unauthorized",user="aaa"} 477730 cortex_compactor_compaction_error_total{type="unauthorized",user="bbb"} 488840 cortex_compactor_compaction_error_total{type="unauthorized",user="ccc"} 499950 + # HELP cortex_compactor_group_partition_count Number of partitions for each compaction group. + # TYPE cortex_compactor_group_partition_count gauge + cortex_compactor_group_partition_count{user="aaa"} 511060 + cortex_compactor_group_partition_count{user="bbb"} 522170 + cortex_compactor_group_partition_count{user="ccc"} 533280 `)) require.NoError(t, err) @@ -183,4 +188,7 @@ func generateTestData(cm *compactorMetrics, base float64) { cm.compactionErrorsCount.WithLabelValues("aaa", unauthorizedError).Add(43 * base) cm.compactionErrorsCount.WithLabelValues("bbb", unauthorizedError).Add(44 * base) cm.compactionErrorsCount.WithLabelValues("ccc", unauthorizedError).Add(45 * base) + cm.partitionCount.WithLabelValues("aaa").Add(46 * base) + cm.partitionCount.WithLabelValues("bbb").Add(47 * base) + cm.partitionCount.WithLabelValues("ccc").Add(48 * base) } diff --git a/pkg/compactor/compactor_test.go b/pkg/compactor/compactor_test.go index 09a3929d64..fd67e6b650 100644 --- a/pkg/compactor/compactor_test.go +++ b/pkg/compactor/compactor_test.go @@ -1578,7 +1578,7 @@ func prepare(t *testing.T, compactorCfg Config, bucketClient objstore.Instrument blocksGrouperFactory = DefaultBlocksGrouperFactory } - c, err := newCompactor(compactorCfg, storageCfg, logger, registry, bucketClientFactory, blocksGrouperFactory, blocksCompactorFactory, overrides) + c, err := newCompactor(compactorCfg, storageCfg, logger, registry, bucketClientFactory, blocksGrouperFactory, blocksCompactorFactory, overrides, 1) require.NoError(t, err) return c, tsdbCompactor, tsdbPlanner, logs, registry diff --git a/pkg/compactor/partition_compaction_grouper.go b/pkg/compactor/partition_compaction_grouper.go index c3687f7e6a..1340093ab2 100644 --- a/pkg/compactor/partition_compaction_grouper.go +++ b/pkg/compactor/partition_compaction_grouper.go @@ -2,37 +2,922 @@ package compactor import ( "context" + "fmt" + "math" + "math/rand" + "sort" + "strings" + "time" "github.com/go-kit/log" + "github.com/go-kit/log/level" "github.com/oklog/ulid" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/prometheus/model/labels" "github.com/thanos-io/objstore" + thanosblock "github.com/thanos-io/thanos/pkg/block" "github.com/thanos-io/thanos/pkg/block/metadata" "github.com/thanos-io/thanos/pkg/compact" + + "github.com/cortexproject/cortex/pkg/ring" + "github.com/cortexproject/cortex/pkg/storage/tsdb" +) + +var ( + DUMMY_BLOCK_ID = ulid.ULID{} ) type PartitionCompactionGrouper struct { - ctx context.Context - logger log.Logger - bkt objstore.InstrumentedBucket + ctx context.Context + logger log.Logger + bkt objstore.InstrumentedBucket + acceptMalformedIndex bool + enableVerticalCompaction bool + blocksMarkedForNoCompact prometheus.Counter + hashFunc metadata.HashFunc + syncerMetrics *compact.SyncerMetrics + compactorMetrics *compactorMetrics + compactorCfg Config + limits Limits + userID string + blockFilesConcurrency int + blocksFetchConcurrency int + compactionConcurrency int + + doRandomPick bool + + ring ring.ReadRing + ringLifecyclerAddr string + ringLifecyclerID string + + noCompBlocksFunc func() map[ulid.ULID]*metadata.NoCompactMark + partitionVisitMarkerTimeout time.Duration + + ingestionReplicationFactor int } func NewPartitionCompactionGrouper( ctx context.Context, logger log.Logger, bkt objstore.InstrumentedBucket, + acceptMalformedIndex bool, + enableVerticalCompaction bool, + blocksMarkedForNoCompact prometheus.Counter, + syncerMetrics *compact.SyncerMetrics, + compactorMetrics *compactorMetrics, + hashFunc metadata.HashFunc, + compactorCfg Config, + ring ring.ReadRing, + ringLifecyclerAddr string, + ringLifecyclerID string, + limits Limits, + userID string, + blockFilesConcurrency int, + blocksFetchConcurrency int, + compactionConcurrency int, + doRandomPick bool, + partitionVisitMarkerTimeout time.Duration, + noCompBlocksFunc func() map[ulid.ULID]*metadata.NoCompactMark, + ingestionReplicationFactor int, ) *PartitionCompactionGrouper { if logger == nil { logger = log.NewNopLogger() } return &PartitionCompactionGrouper{ - ctx: ctx, - logger: logger, - bkt: bkt, + ctx: ctx, + logger: logger, + bkt: bkt, + acceptMalformedIndex: acceptMalformedIndex, + enableVerticalCompaction: enableVerticalCompaction, + blocksMarkedForNoCompact: blocksMarkedForNoCompact, + hashFunc: hashFunc, + syncerMetrics: syncerMetrics, + compactorMetrics: compactorMetrics, + compactorCfg: compactorCfg, + ring: ring, + ringLifecyclerAddr: ringLifecyclerAddr, + ringLifecyclerID: ringLifecyclerID, + limits: limits, + userID: userID, + blockFilesConcurrency: blockFilesConcurrency, + blocksFetchConcurrency: blocksFetchConcurrency, + compactionConcurrency: compactionConcurrency, + doRandomPick: doRandomPick, + partitionVisitMarkerTimeout: partitionVisitMarkerTimeout, + noCompBlocksFunc: noCompBlocksFunc, + ingestionReplicationFactor: ingestionReplicationFactor, } } // Groups function modified from https://github.com/cortexproject/cortex/pull/2616 func (g *PartitionCompactionGrouper) Groups(blocks map[ulid.ULID]*metadata.Meta) (res []*compact.Group, err error) { - panic("PartitionCompactionGrouper not implemented") + // Check if this compactor is on the subring. + // If the compactor is not on the subring when using the userID as a identifier + // no plans generated below will be owned by the compactor so we can just return an empty array + // as there will be no planned groups + onSubring, err := g.checkSubringForCompactor() + if err != nil { + return nil, errors.Wrap(err, "unable to check sub-ring for compactor ownership") + } + if !onSubring { + level.Debug(g.logger).Log("msg", "compactor is not on the current sub-ring skipping user", "user", g.userID) + return nil, nil + } + + // Filter out no compact blocks + noCompactMarked := g.noCompBlocksFunc() + for id, b := range blocks { + if _, excluded := noCompactMarked[b.ULID]; excluded { + delete(blocks, id) + } + } + + partitionCompactionJobs, err := g.generateCompactionJobs(blocks) + if err != nil { + return nil, errors.Wrap(err, "unable to generate compaction jobs") + } + + pickedPartitionCompactionJobs := g.pickPartitionCompactionJob(partitionCompactionJobs) + + return pickedPartitionCompactionJobs, nil +} + +// Check whether this compactor exists on the subring based on user ID +func (g *PartitionCompactionGrouper) checkSubringForCompactor() (bool, error) { + subRing := g.ring.ShuffleShard(g.userID, g.limits.CompactorTenantShardSize(g.userID)) + + rs, err := subRing.GetAllHealthy(RingOp) + if err != nil { + return false, err + } + + return rs.Includes(g.ringLifecyclerAddr), nil +} + +func (g *PartitionCompactionGrouper) generateCompactionJobs(blocks map[ulid.ULID]*metadata.Meta) ([]*blocksGroupWithPartition, error) { + timeRanges := g.compactorCfg.BlockRanges.ToMilliseconds() + + groups := g.groupBlocks(blocks, timeRanges) + + existingPartitionedGroups, err := g.loadExistingPartitionedGroups() + if err != nil { + return nil, err + } + + var blockIDs []string + for _, p := range existingPartitionedGroups { + blockIDs = p.getAllBlockIDs() + level.Info(g.logger).Log("msg", "existing partitioned group", "partitioned_group_id", p.PartitionedGroupID, "partition_count", p.PartitionCount, "rangeStart", p.rangeStartTime().String(), "rangeEnd", p.rangeEndTime().String(), "blocks", strings.Join(blockIDs, ",")) + } + + allPartitionedGroup, err := g.generatePartitionedGroups(blocks, groups, existingPartitionedGroups, timeRanges) + if err != nil { + return nil, err + } + g.sortPartitionedGroups(allPartitionedGroup) + for _, p := range allPartitionedGroup { + blockIDs = p.getAllBlockIDs() + level.Info(g.logger).Log("msg", "partitioned group ready for compaction", "partitioned_group_id", p.PartitionedGroupID, "partition_count", p.PartitionCount, "rangeStart", p.rangeStartTime().String(), "rangeEnd", p.rangeEndTime().String(), "blocks", strings.Join(blockIDs, ",")) + } + + partitionCompactionJobs := g.generatePartitionCompactionJobs(blocks, allPartitionedGroup, g.doRandomPick) + for _, p := range partitionCompactionJobs { + blockIDs = p.getBlockIDs() + level.Info(g.logger).Log("msg", "partitioned compaction job", "partitioned_group_id", p.partitionedGroupInfo.PartitionedGroupID, "partition_id", p.partition.PartitionID, "partition_count", p.partitionedGroupInfo.PartitionCount, "rangeStart", p.rangeStartTime().String(), "rangeEnd", p.rangeEndTime().String(), "blocks", strings.Join(blockIDs, ",")) + } + return partitionCompactionJobs, nil +} + +func (g *PartitionCompactionGrouper) loadExistingPartitionedGroups() (map[uint32]*PartitionedGroupInfo, error) { + partitionedGroups := make(map[uint32]*PartitionedGroupInfo) + err := g.bkt.Iter(g.ctx, PartitionedGroupDirectory, func(file string) error { + if !strings.Contains(file, PartitionVisitMarkerDirectory) { + partitionedGroup, err := ReadPartitionedGroupInfoFile(g.ctx, g.bkt, g.logger, file) + if err != nil { + return err + } + partitionedGroups[partitionedGroup.PartitionedGroupID] = partitionedGroup + } + return nil + }) + if err != nil { + return nil, errors.Wrap(err, "unable to load existing partitioned groups") + } + return partitionedGroups, nil +} + +func (g *PartitionCompactionGrouper) groupBlocks(blocks map[ulid.ULID]*metadata.Meta, timeRanges []int64) []blocksGroupWithPartition { + // First of all we have to group blocks using the Thanos default + // grouping (based on downsample resolution + external labels). + mainGroups := map[string][]*metadata.Meta{} + for _, b := range blocks { + key := b.Thanos.GroupKey() + mainGroups[key] = append(mainGroups[key], b) + } + + var groups []blocksGroupWithPartition + for _, mainBlocks := range mainGroups { + groups = append(groups, g.groupBlocksByCompactableRanges(mainBlocks, timeRanges)...) + } + + g.sortBlockGroups(groups) + + return groups +} + +func (g *PartitionCompactionGrouper) groupBlocksByCompactableRanges(blocks []*metadata.Meta, timeRanges []int64) []blocksGroupWithPartition { + if len(blocks) == 0 { + return nil + } + + // Sort blocks by min time. + sortMetasByMinTime(blocks) + + var groups []blocksGroupWithPartition + + for _, tr := range timeRanges { + groups = append(groups, g.groupBlocksByRange(blocks, tr)...) + } + + return groups +} + +func (g *PartitionCompactionGrouper) groupBlocksByRange(blocks []*metadata.Meta, tr int64) []blocksGroupWithPartition { + var ret []blocksGroupWithPartition + + for i := 0; i < len(blocks); { + var ( + group blocksGroupWithPartition + m = blocks[i] + ) + + group.rangeStart = getRangeStart(m, tr) + group.rangeEnd = group.rangeStart + tr + + // Skip blocks that don't fall into the range. This can happen via mis-alignment or + // by being the multiple of the intended range. + if m.MaxTime > group.rangeEnd { + i++ + continue + } + + // Add all blocks to the current group that are within [t0, t0+tr]. + for ; i < len(blocks); i++ { + // If the block does not start within this group, then we should break the iteration + // and move it to the next group. + if blocks[i].MinTime >= group.rangeEnd { + break + } + + // If the block doesn't fall into this group, but it started within this group then it + // means it spans across multiple ranges and we should skip it. + if blocks[i].MaxTime > group.rangeEnd { + continue + } + + group.blocks = append(group.blocks, blocks[i]) + } + + if len(group.blocks) > 1 { + ret = append(ret, group) + } + } + + return ret +} + +func (g *PartitionCompactionGrouper) sortBlockGroups(groups []blocksGroupWithPartition) { + // Ensure groups are sorted by smallest range, oldest min time first. The rationale + // is that we wanna favor smaller ranges first (ie. to deduplicate samples sooner + // than later) and older ones are more likely to be "complete" (no missing block still + // to be uploaded). + sort.SliceStable(groups, func(i, j int) bool { + iGroup := groups[i] + jGroup := groups[j] + iRangeStart := iGroup.rangeStart + iRangeEnd := iGroup.rangeEnd + jRangeStart := jGroup.rangeStart + jRangeEnd := jGroup.rangeEnd + iLength := iRangeEnd - iRangeStart + jLength := jRangeEnd - jRangeStart + + if iLength != jLength { + return iLength < jLength + } + if iRangeStart != jRangeStart { + return iRangeStart < jRangeStart + } + + iGroupHash := hashGroup(g.userID, iRangeStart, iRangeEnd) + iGroupKey := createGroupKeyWithPartition(iGroupHash, iGroup) + jGroupHash := hashGroup(g.userID, jRangeStart, jRangeEnd) + jGroupKey := createGroupKeyWithPartition(jGroupHash, jGroup) + // Guarantee stable sort for tests. + return iGroupKey < jGroupKey + }) +} + +func (g *PartitionCompactionGrouper) generatePartitionedGroups(blocks map[ulid.ULID]*metadata.Meta, groups []blocksGroupWithPartition, existingPartitionedGroups map[uint32]*PartitionedGroupInfo, timeRanges []int64) ([]*PartitionedGroupInfo, error) { + var allPartitionedGroup []*PartitionedGroupInfo + for _, partitionedGroup := range existingPartitionedGroups { + status := partitionedGroup.getPartitionedGroupStatus(g.ctx, g.bkt, g.partitionVisitMarkerTimeout, g.logger) + if !status.IsCompleted { + allPartitionedGroup = append(allPartitionedGroup, partitionedGroup) + } + } + + timeRangeChecker := NewCompletenessChecker(blocks, groups, timeRanges) + for _, startTimeMap := range timeRangeChecker.TimeRangesStatus { + for _, status := range startTimeMap { + if !status.canTakeCompaction { + level.Info(g.logger).Log("msg", "incomplete time range", "rangeStart", status.rangeStartTime().String(), "rangeEnd", status.rangeEndTime().String(), + "timeRange", status.timeRangeDuration().String(), "previousTimeRange", status.previousTimeRangeDuration().String()) + } + } + } + + var blockIDs []string + for _, group := range groups { + groupHash := hashGroup(g.userID, group.rangeStart, group.rangeEnd) + logger := log.With(g.logger, "partitioned_group_id", groupHash, "rangeStart", group.rangeStartTime().String(), "rangeEnd", group.rangeEndTime().String()) + + blockIDs = group.getBlockIDs() + level.Info(logger).Log("msg", "block group", "blocks", strings.Join(blockIDs, ",")) + + level.Info(logger).Log("msg", "start generating partitioned group") + if g.shouldSkipGroup(logger, group, groupHash, existingPartitionedGroups, timeRangeChecker) { + level.Info(logger).Log("msg", "skip generating partitioned group") + continue + } + partitionedGroup, err := g.generatePartitionBlockGroup(group, groupHash) + if err != nil { + return nil, errors.Wrapf(err, "unable to generate partitioned group: %d", groupHash) + } + level.Info(logger).Log("msg", "generated partitioned group") + allPartitionedGroup = append(allPartitionedGroup, partitionedGroup) + } + return allPartitionedGroup, nil +} + +func (g *PartitionCompactionGrouper) shouldSkipGroup(logger log.Logger, group blocksGroupWithPartition, partitionedGroupID uint32, existingPartitionedGroups map[uint32]*PartitionedGroupInfo, timeRangeChecker TimeRangeChecker) bool { + if _, ok := existingPartitionedGroups[partitionedGroupID]; ok { + level.Info(logger).Log("msg", "skip group", "reason", "partitioned group already exists") + return true + } + tr := group.rangeEnd - group.rangeStart + if status, ok := timeRangeChecker.TimeRangesStatus[tr][group.rangeStart]; !ok { + level.Info(logger).Log("msg", "skip group", "reason", "unable to get time range status") + return true + } else if !status.canTakeCompaction { + level.Info(logger).Log("msg", "skip group", "reason", "time range cannot take compaction job") + return true + } + + // Check if all blocks in group having same partitioned group id as destination partitionedGroupID + for _, b := range group.blocks { + partitionInfo, err := tsdb.GetPartitionInfo(*b) + if err != nil || partitionInfo == nil || partitionInfo.PartitionedGroupID != partitionedGroupID { + return false + } + } + level.Info(logger).Log("msg", "skip group", "reason", "all blocks in the group have partitioned group id equals to new group partitioned_group_id") + return true +} + +func (g *PartitionCompactionGrouper) generatePartitionBlockGroup(group blocksGroupWithPartition, groupHash uint32) (*PartitionedGroupInfo, error) { + partitionedGroupInfo, err := g.partitionBlockGroup(group, groupHash) + if err != nil { + return nil, err + } + updatedPartitionedGroupInfo, err := UpdatePartitionedGroupInfo(g.ctx, g.bkt, g.logger, *partitionedGroupInfo) + if err != nil { + return nil, err + } + return updatedPartitionedGroupInfo, nil +} + +func (g *PartitionCompactionGrouper) partitionBlockGroup(group blocksGroupWithPartition, groupHash uint32) (*PartitionedGroupInfo, error) { + partitionCount := g.calculatePartitionCount(group, groupHash) + blocksByMinTime := g.groupBlocksByMinTime(group) + partitionedGroups, err := g.partitionBlocksGroup(partitionCount, blocksByMinTime, group.rangeStart, group.rangeEnd) + if err != nil { + return nil, err + } + + partitions := make([]Partition, partitionCount) + for partitionID := 0; partitionID < partitionCount; partitionID++ { + partitionedGroup := partitionedGroups[partitionID] + blockIDs := make([]ulid.ULID, len(partitionedGroup.blocks)) + for i, m := range partitionedGroup.blocks { + blockIDs[i] = m.ULID + } + partitions[partitionID] = Partition{ + PartitionID: partitionID, + Blocks: blockIDs, + } + } + partitionedGroupInfo := PartitionedGroupInfo{ + PartitionedGroupID: groupHash, + PartitionCount: partitionCount, + Partitions: partitions, + RangeStart: group.rangeStart, + RangeEnd: group.rangeEnd, + Version: PartitionedGroupInfoVersion1, + } + return &partitionedGroupInfo, nil +} + +func (g *PartitionCompactionGrouper) calculatePartitionCount(group blocksGroupWithPartition, groupHash uint32) int { + indexSizeLimit := g.limits.CompactorPartitionIndexSizeBytes(g.userID) + seriesCountLimit := g.limits.CompactorPartitionSeriesCount(g.userID) + smallestRange := g.compactorCfg.BlockRanges.ToMilliseconds()[0] + groupRange := group.rangeLength() + if smallestRange >= groupRange { + level.Info(g.logger).Log("msg", "calculate level 1 block limits", "partitioned_group_id", groupHash, "smallest_range", smallestRange, "group_range", groupRange, "ingestion_replication_factor", g.ingestionReplicationFactor) + indexSizeLimit = indexSizeLimit * int64(g.ingestionReplicationFactor) + seriesCountLimit = seriesCountLimit * int64(g.ingestionReplicationFactor) + } + + totalIndexSizeInBytes := int64(0) + totalSeriesCount := int64(0) + for _, block := range group.blocks { + blockFiles := block.Thanos.Files + totalSeriesCount += int64(block.Stats.NumSeries) + var indexFile *metadata.File + for _, file := range blockFiles { + if file.RelPath == thanosblock.IndexFilename { + indexFile = &file + } + } + if indexFile == nil { + level.Debug(g.logger).Log("msg", "unable to find index file in metadata", "block", block.ULID) + break + } + indexSize := indexFile.SizeBytes + totalIndexSizeInBytes += indexSize + } + partitionNumberBasedOnIndex := 1 + if indexSizeLimit > 0 && totalIndexSizeInBytes > indexSizeLimit { + partitionNumberBasedOnIndex = g.findNearestPartitionNumber(float64(totalIndexSizeInBytes), float64(indexSizeLimit)) + } + partitionNumberBasedOnSeries := 1 + if seriesCountLimit > 0 && totalSeriesCount > seriesCountLimit { + partitionNumberBasedOnSeries = g.findNearestPartitionNumber(float64(totalSeriesCount), float64(seriesCountLimit)) + } + partitionNumber := partitionNumberBasedOnIndex + if partitionNumberBasedOnSeries > partitionNumberBasedOnIndex { + partitionNumber = partitionNumberBasedOnSeries + } + level.Info(g.logger).Log("msg", "calculated partition number for group", "partitioned_group_id", groupHash, "partition_number", partitionNumber, "total_index_size", totalIndexSizeInBytes, "index_size_limit", indexSizeLimit, "total_series_count", totalSeriesCount, "series_count_limit", seriesCountLimit, "group", group.String()) + return partitionNumber +} + +func (g *PartitionCompactionGrouper) findNearestPartitionNumber(size float64, limit float64) int { + return int(math.Pow(2, math.Ceil(math.Log2(size/limit)))) +} + +func (g *PartitionCompactionGrouper) groupBlocksByMinTime(group blocksGroupWithPartition) map[int64][]*metadata.Meta { + blocksByMinTime := make(map[int64][]*metadata.Meta) + for _, block := range group.blocks { + blockRange := block.MaxTime - block.MinTime + minTime := block.MinTime + for _, tr := range g.compactorCfg.BlockRanges.ToMilliseconds() { + if blockRange <= tr { + minTime = tr * (block.MinTime / tr) + break + } + } + blocksByMinTime[minTime] = append(blocksByMinTime[minTime], block) + } + return blocksByMinTime +} + +func (g *PartitionCompactionGrouper) partitionBlocksGroup(partitionCount int, blocksByMinTime map[int64][]*metadata.Meta, rangeStart int64, rangeEnd int64) (map[int]blocksGroupWithPartition, error) { + partitionedGroups := make(map[int]blocksGroupWithPartition) + addToPartitionedGroups := func(blocks []*metadata.Meta, partitionID int) { + if _, ok := partitionedGroups[partitionID]; !ok { + partitionedGroups[partitionID] = blocksGroupWithPartition{ + rangeStart: rangeStart, + rangeEnd: rangeEnd, + blocks: []*metadata.Meta{}, + } + } + partitionedGroup := partitionedGroups[partitionID] + partitionedGroup.blocks = append(partitionedGroup.blocks, blocks...) + partitionedGroups[partitionID] = partitionedGroup + } + + for _, blocksInSameTimeInterval := range blocksByMinTime { + for _, block := range blocksInSameTimeInterval { + partitionInfo, err := tsdb.GetPartitionInfo(*block) + if err != nil { + return nil, err + } + if partitionInfo == nil || partitionInfo.PartitionCount < 1 { + // For legacy blocks with level > 1, treat PartitionID is always 0. + // So it can be included in every partition. + defaultPartitionInfo := tsdb.DefaultPartitionInfo + partitionInfo = &defaultPartitionInfo + } + if partitionInfo.PartitionCount < partitionCount { + for partitionID := partitionInfo.PartitionID; partitionID < partitionCount; partitionID += partitionInfo.PartitionCount { + addToPartitionedGroups([]*metadata.Meta{block}, partitionID) + } + } else if partitionInfo.PartitionCount == partitionCount { + addToPartitionedGroups([]*metadata.Meta{block}, partitionInfo.PartitionID) + } else { + addToPartitionedGroups([]*metadata.Meta{block}, partitionInfo.PartitionID%partitionCount) + } + } + } + return partitionedGroups, nil +} + +func (g *PartitionCompactionGrouper) sortPartitionedGroups(partitionedGroups []*PartitionedGroupInfo) { + // Ensure groups are sorted by smallest range, oldest min time first. The rationale + // is that we wanna favor smaller ranges first (ie. to deduplicate samples sooner + // than later) and older ones are more likely to be "complete" (no missing block still + // to be uploaded). + sort.SliceStable(partitionedGroups, func(i, j int) bool { + iGroup := partitionedGroups[i] + jGroup := partitionedGroups[j] + iRangeStart := iGroup.RangeStart + iRangeEnd := iGroup.RangeEnd + jRangeStart := jGroup.RangeStart + jRangeEnd := jGroup.RangeEnd + iLength := iRangeEnd - iRangeStart + jLength := jRangeEnd - jRangeStart + + if iLength != jLength { + return iLength < jLength + } + if iRangeStart != jRangeStart { + return iRangeStart < jRangeStart + } + // Guarantee stable sort for tests. + return iGroup.PartitionedGroupID < jGroup.PartitionedGroupID + }) +} + +func (g *PartitionCompactionGrouper) generatePartitionCompactionJobs(blocks map[ulid.ULID]*metadata.Meta, partitionedGroups []*PartitionedGroupInfo, doRandomPick bool) []*blocksGroupWithPartition { + var partitionedBlockGroups []*blocksGroupWithPartition + for _, partitionedGroupInfo := range partitionedGroups { + partitionedGroupID := partitionedGroupInfo.PartitionedGroupID + partitionAdded := 0 + var partitionIDs []int + if doRandomPick { + // Randomly pick partitions from partitioned group to avoid all compactors + // trying to get same partition at same time. + r := rand.New(rand.NewSource(time.Now().UnixMicro() + int64(hashString(g.ringLifecyclerID)))) + partitionIDs = r.Perm(len(partitionedGroupInfo.Partitions)) + } else { + for i := 0; i < partitionedGroupInfo.PartitionCount; i++ { + partitionIDs = append(partitionIDs, i) + } + } + for _, i := range partitionIDs { + partition := partitionedGroupInfo.Partitions[i] + if len(partition.Blocks) == 1 { + partition.Blocks = append(partition.Blocks, DUMMY_BLOCK_ID) + level.Info(g.logger).Log("msg", "handled single block in partition", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID, "partition_count", partitionedGroupInfo.PartitionCount, "partition_id", partition.PartitionID) + } else if len(partition.Blocks) < 1 { + if err := g.handleEmptyPartition(partitionedGroupInfo, partition); err != nil { + level.Warn(g.logger).Log("msg", "failed to handle empty partition", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID, "partition_count", partitionedGroupInfo.PartitionCount, "partition_id", partition.PartitionID, "err", err) + } + continue + } + partitionedGroup, err := createBlocksGroup(blocks, partition.Blocks, partitionedGroupInfo.RangeStart, partitionedGroupInfo.RangeEnd) + if err != nil { + continue + } + partitionedGroup.groupHash = partitionedGroupID + partitionedGroup.partitionedGroupInfo = partitionedGroupInfo + partitionedGroup.partition = partition + partitionedBlockGroups = append(partitionedBlockGroups, partitionedGroup) + partitionAdded++ + } + } + return partitionedBlockGroups +} + +// handleEmptyPartition uploads a completed partition visit marker for any partition that does have any blocks assigned +func (g *PartitionCompactionGrouper) handleEmptyPartition(partitionedGroupInfo *PartitionedGroupInfo, partition Partition) error { + if len(partition.Blocks) > 0 { + return nil + } + + level.Info(g.logger).Log("msg", "handling empty block partition", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID, "partition_count", partitionedGroupInfo.PartitionCount, "partition_id", partition.PartitionID) + visitMarker := &partitionVisitMarker{ + PartitionedGroupID: partitionedGroupInfo.PartitionedGroupID, + PartitionID: partition.PartitionID, + Version: PartitionVisitMarkerVersion1, + } + visitMarkerManager := NewVisitMarkerManager(g.bkt, g.logger, g.ringLifecyclerID, visitMarker) + visitMarkerManager.MarkWithStatus(g.ctx, Completed) + + level.Info(g.logger).Log("msg", "handled empty block in partition", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID, "partition_count", partitionedGroupInfo.PartitionCount, "partition_id", partition.PartitionID) + return nil +} + +func (g *PartitionCompactionGrouper) pickPartitionCompactionJob(partitionCompactionJobs []*blocksGroupWithPartition) []*compact.Group { + var outGroups []*compact.Group + for _, partitionedGroup := range partitionCompactionJobs { + groupHash := partitionedGroup.groupHash + partitionedGroupID := partitionedGroup.partitionedGroupInfo.PartitionedGroupID + partitionCount := partitionedGroup.partitionedGroupInfo.PartitionCount + partitionID := partitionedGroup.partition.PartitionID + partitionedGroupLogger := log.With(g.logger, "rangeStart", partitionedGroup.rangeStartTime().String(), "rangeEnd", partitionedGroup.rangeEndTime().String(), "rangeDuration", partitionedGroup.rangeDuration().String(), "partitioned_group_id", partitionedGroupID, "partition_id", partitionID, "partition_count", partitionCount, "group_hash", groupHash) + visitMarker := newPartitionVisitMarker(g.ringLifecyclerID, partitionedGroupID, partitionID) + visitMarkerManager := NewVisitMarkerManager(g.bkt, g.logger, g.ringLifecyclerID, visitMarker) + if isVisited, err := g.isGroupVisited(partitionID, visitMarkerManager); err != nil { + level.Warn(partitionedGroupLogger).Log("msg", "unable to check if partition is visited", "err", err, "group", partitionedGroup.String()) + continue + } else if isVisited { + level.Info(partitionedGroupLogger).Log("msg", "skipping group because partition is visited") + continue + } + partitionedGroupKey := createGroupKeyWithPartitionID(groupHash, partitionID, *partitionedGroup) + + level.Info(partitionedGroupLogger).Log("msg", "found compactable group for user", "group", partitionedGroup.String()) + begin := time.Now() + + visitMarkerManager.MarkWithStatus(g.ctx, Pending) + level.Info(partitionedGroupLogger).Log("msg", "marked partition visited in group", "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds(), "group", partitionedGroup.String()) + + resolution := partitionedGroup.blocks[0].Thanos.Downsample.Resolution + externalLabels := labels.FromMap(partitionedGroup.blocks[0].Thanos.Labels) + timeRange := partitionedGroup.rangeEnd - partitionedGroup.rangeStart + metricLabelValues := []string{ + g.userID, + fmt.Sprintf("%d", timeRange), + } + g.compactorMetrics.initMetricWithCompactionLabelValues(metricLabelValues...) + g.compactorMetrics.partitionCount.WithLabelValues(metricLabelValues...).Set(float64(partitionCount)) + thanosGroup, err := compact.NewGroup( + log.With(partitionedGroupLogger, "groupKey", partitionedGroupKey, "externalLabels", externalLabels, "downsampleResolution", resolution), + g.bkt, + partitionedGroupKey, + externalLabels, + resolution, + g.acceptMalformedIndex, + true, // Enable vertical compaction. + g.compactorMetrics.compactions.WithLabelValues(metricLabelValues...), + g.compactorMetrics.compactionRunsStarted.WithLabelValues(metricLabelValues...), + g.compactorMetrics.compactionRunsCompleted.WithLabelValues(metricLabelValues...), + g.compactorMetrics.compactionFailures.WithLabelValues(metricLabelValues...), + g.compactorMetrics.verticalCompactions.WithLabelValues(metricLabelValues...), + g.syncerMetrics.GarbageCollectedBlocks, + g.syncerMetrics.BlocksMarkedForDeletion, + g.blocksMarkedForNoCompact, + g.hashFunc, + g.blockFilesConcurrency, + g.blocksFetchConcurrency, + ) + if err != nil { + level.Error(partitionedGroupLogger).Log("msg", "failed to create partitioned group", "blocks", partitionedGroup.partition.Blocks) + } + + for _, m := range partitionedGroup.blocks { + if err := thanosGroup.AppendMeta(m); err != nil { + level.Error(partitionedGroupLogger).Log("msg", "failed to add block to partitioned group", "block", m.ULID, "err", err) + } + } + thanosGroup.SetExtensions(&tsdb.CortexMetaExtensions{ + PartitionInfo: &tsdb.PartitionInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: partitionCount, + PartitionID: partitionID, + PartitionedGroupCreationTime: partitionedGroup.partitionedGroupInfo.CreationTime, + }, + TimeRange: timeRange, + }) + + outGroups = append(outGroups, thanosGroup) + level.Debug(partitionedGroupLogger).Log("msg", "added partition to compaction groups") + if len(outGroups) >= g.compactionConcurrency { + break + } + } + + level.Info(g.logger).Log("msg", fmt.Sprintf("total groups for compaction: %d", len(outGroups))) + + for _, p := range outGroups { + partitionInfo, err := tsdb.ConvertToPartitionInfo(p.Extensions()) + if err == nil && partitionInfo != nil { + level.Info(g.logger).Log("msg", "picked compaction job", "partitioned_group_id", partitionInfo.PartitionedGroupID, "partition_count", partitionInfo.PartitionCount) + } + } + return outGroups +} + +func (g *PartitionCompactionGrouper) isGroupVisited(partitionID int, visitMarkerManager *VisitMarkerManager) (bool, error) { + visitMarker := &partitionVisitMarker{} + err := visitMarkerManager.ReadVisitMarker(g.ctx, visitMarker) + if err != nil { + if errors.Is(err, errorVisitMarkerNotFound) { + level.Warn(g.logger).Log("msg", "no visit marker file for partition", "partition_visit_marker_file", visitMarkerManager.visitMarker.GetVisitMarkerFilePath()) + return false, nil + } + level.Error(g.logger).Log("msg", "unable to read partition visit marker file", "partition_visit_marker_file", visitMarkerManager.visitMarker.GetVisitMarkerFilePath(), "err", err) + return true, err + } + if visitMarker.GetStatus() == Completed { + level.Info(g.logger).Log("msg", "partition visit marker with partition ID is completed", "partition_visit_marker", visitMarker.String()) + return true, nil + } + if visitMarker.IsVisited(g.partitionVisitMarkerTimeout, partitionID) { + level.Info(g.logger).Log("msg", "visited partition with partition ID", "partition_visit_marker", visitMarker.String()) + return true, nil + } + return false, nil +} + +type TimeRangeChecker struct { + // This is a map of timeRange to a map of rangeStart to timeRangeStatus + TimeRangesStatus map[int64]map[int64]*timeRangeStatus +} + +func NewCompletenessChecker(blocks map[ulid.ULID]*metadata.Meta, groups []blocksGroupWithPartition, timeRanges []int64) TimeRangeChecker { + timeRangeToBlockMap := make(map[int64][]*metadata.Meta) + for _, b := range blocks { + timeRange := int64(0) + if b.Compaction.Level > 1 { + ext, err := tsdb.GetCortexMetaExtensionsFromMeta(*b) + if err == nil && ext != nil && ext.TimeRange > 0 { + timeRange = ext.TimeRange + } else { + // fallback logic to guess block time range based + // on MaxTime and MinTime + blockRange := b.MaxTime - b.MinTime + for _, tr := range timeRanges { + rangeStart := getRangeStart(b, tr) + rangeEnd := rangeStart + tr + if tr >= blockRange && rangeEnd >= b.MaxTime { + timeRange = tr + break + } + } + } + } + timeRangeToBlockMap[timeRange] = append(timeRangeToBlockMap[timeRange], b) + } + timeRangesStatus := make(map[int64]map[int64]*timeRangeStatus) + for _, g := range groups { + tr := g.rangeEnd - g.rangeStart + if _, ok := timeRangesStatus[tr]; !ok { + timeRangesStatus[tr] = make(map[int64]*timeRangeStatus) + } + timeRangesStatus[tr][g.rangeStart] = &timeRangeStatus{ + timeRange: tr, + rangeStart: g.rangeStart, + rangeEnd: g.rangeEnd, + numActiveBlocks: 0, + canTakeCompaction: false, + } + } + for tr, blks := range timeRangeToBlockMap { + if _, ok := timeRangesStatus[tr]; !ok { + timeRangesStatus[tr] = make(map[int64]*timeRangeStatus) + } + for _, b := range blks { + actualTr := tr + if tr == 0 { + actualTr = timeRanges[0] + } + rangeStart := getRangeStart(b, actualTr) + if _, ok := timeRangesStatus[tr][rangeStart]; !ok { + timeRangesStatus[tr][rangeStart] = &timeRangeStatus{ + timeRange: tr, + rangeStart: rangeStart, + rangeEnd: rangeStart + actualTr, + numActiveBlocks: 0, + canTakeCompaction: false, + } + } + timeRangesStatus[tr][rangeStart].addBlock(1) + } + } + previousTimeRanges := []int64{0} + for _, tr := range timeRanges { + timeRangeLoop: + for rangeStart, status := range timeRangesStatus[tr] { + previousTrBlocks := 0 + for _, previousTr := range previousTimeRanges { + allPreviousTimeRanges := getAllPreviousTimeRanges(tr, rangeStart, previousTr, timeRanges[0]) + for _, previousRangeStart := range allPreviousTimeRanges { + if previousTrStatus, ok := timeRangesStatus[previousTr][previousRangeStart]; ok { + if previousTrStatus.canTakeCompaction { + status.canTakeCompaction = false + continue timeRangeLoop + } + previousTrBlocks += previousTrStatus.numActiveBlocks + } + } + } + status.canTakeCompaction = !(previousTrBlocks == 0 || (previousTrBlocks == 1 && status.numActiveBlocks == 0)) + } + previousTimeRanges = append(previousTimeRanges, tr) + } + return TimeRangeChecker{TimeRangesStatus: timeRangesStatus} +} + +// getAllPreviousTimeRanges returns a list of rangeStart time for previous time range that +// falls within current time range and start time +func getAllPreviousTimeRanges(currentTr int64, rangeStart int64, previousTr int64, smallestTr int64) []int64 { + var result []int64 + if previousTr == 0 { + previousTr = smallestTr + } + previousRangeStart := rangeStart + for ; previousRangeStart+previousTr <= rangeStart+currentTr; previousRangeStart += previousTr { + result = append(result, previousRangeStart) + } + return result +} + +type timeRangeStatus struct { + timeRange int64 + rangeStart int64 + rangeEnd int64 + numActiveBlocks int + canTakeCompaction bool + previousTimeRange int64 +} + +func (t *timeRangeStatus) addBlock(num int) { + t.numActiveBlocks += num +} + +func (t *timeRangeStatus) rangeStartTime() time.Time { + return time.Unix(0, t.rangeStart*int64(time.Millisecond)).UTC() +} + +func (t *timeRangeStatus) rangeEndTime() time.Time { + return time.Unix(0, t.rangeEnd*int64(time.Millisecond)).UTC() +} + +func (t *timeRangeStatus) timeRangeDuration() time.Duration { + return time.Duration(t.timeRange) * time.Millisecond +} + +func (t *timeRangeStatus) previousTimeRangeDuration() time.Duration { + return time.Duration(t.previousTimeRange) * time.Millisecond +} + +type blocksGroupWithPartition struct { + blocksGroup + rangeStart int64 // Included. + rangeEnd int64 // Excluded. + blocks []*metadata.Meta + groupHash uint32 + partitionedGroupInfo *PartitionedGroupInfo + partition Partition +} + +func (g blocksGroupWithPartition) rangeDuration() time.Duration { + return g.rangeEndTime().Sub(g.rangeStartTime()) +} + +func (g blocksGroupWithPartition) getBlockIDs() []string { + blockIDs := make([]string, len(g.blocks)) + for i, block := range g.blocks { + blockIDs[i] = block.ULID.String() + } + return blockIDs +} + +func createGroupKeyWithPartition(groupHash uint32, group blocksGroupWithPartition) string { + return fmt.Sprintf("%v%s", groupHash, group.blocks[0].Thanos.GroupKey()) +} + +func createGroupKeyWithPartitionID(groupHash uint32, partitionID int, group blocksGroupWithPartition) string { + return fmt.Sprintf("%v%d%s", groupHash, partitionID, group.blocks[0].Thanos.GroupKey()) +} + +func createBlocksGroup(blocks map[ulid.ULID]*metadata.Meta, blockIDs []ulid.ULID, rangeStart int64, rangeEnd int64) (*blocksGroupWithPartition, error) { + var group blocksGroupWithPartition + group.rangeStart = rangeStart + group.rangeEnd = rangeEnd + var nonDummyBlock *metadata.Meta + for _, blockID := range blockIDs { + if blockID == DUMMY_BLOCK_ID { + continue + } + m, ok := blocks[blockID] + if !ok { + return nil, fmt.Errorf("block not found: %s", blockID) + } + nonDummyBlock = m + group.blocks = append(group.blocks, m) + } + for _, blockID := range blockIDs { + if blockID == DUMMY_BLOCK_ID { + dummyMeta := *nonDummyBlock + dummyMeta.ULID = DUMMY_BLOCK_ID + group.blocks = append(group.blocks, &dummyMeta) + } + } + return &group, nil } diff --git a/pkg/compactor/partition_compaction_grouper_test.go b/pkg/compactor/partition_compaction_grouper_test.go new file mode 100644 index 0000000000..2167a219ae --- /dev/null +++ b/pkg/compactor/partition_compaction_grouper_test.go @@ -0,0 +1,2139 @@ +package compactor + +import ( + "context" + "encoding/json" + "fmt" + "path" + "testing" + "time" + + "github.com/oklog/ulid" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/prometheus/tsdb" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "github.com/thanos-io/objstore" + thanosblock "github.com/thanos-io/thanos/pkg/block" + "github.com/thanos-io/thanos/pkg/block/metadata" + + "github.com/cortexproject/cortex/pkg/ring" + "github.com/cortexproject/cortex/pkg/storage/bucket" + cortextsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" + "github.com/cortexproject/cortex/pkg/util/validation" +) + +var ( + M = time.Minute.Milliseconds() + H = time.Hour.Milliseconds() +) + +func TestPartitionCompactionGrouper_GenerateCompactionJobs(t *testing.T) { + block1 := ulid.MustNew(1, nil) + block2 := ulid.MustNew(2, nil) + block3 := ulid.MustNew(3, nil) + block4 := ulid.MustNew(4, nil) + block5 := ulid.MustNew(5, nil) + block6 := ulid.MustNew(6, nil) + block7 := ulid.MustNew(7, nil) + + testCompactorID := "test-compactor" + //otherCompactorID := "other-compactor" + + userID := "test-user" + partitionedGroupID_0_2 := hashGroup(userID, 0*H, 2*H) + partitionedGroupID_0_12 := hashGroup(userID, 0*H, 12*H) + partitionedGroupID_0_24 := hashGroup(userID, 0*H, 24*H) + + tests := map[string]generateCompactionJobsTestCase{ + "only level 1 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + {blocks: []ulid.ULID{block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 2 * H, rangeEnd: 4 * H}, + }, + }, + "only level 1 blocks, there is existing partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 2 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + {blocks: []ulid.ULID{block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 2 * H, rangeEnd: 4 * H}, + }, + }, + "only level 1 blocks, there are existing partitioned group files for all blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 2 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2}}, + }}, + {rangeStart: 2 * H, rangeEnd: 4 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block3, block4}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + {blocks: []ulid.ULID{block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 2 * H, rangeEnd: 4 * H}, + }, + }, + "only level 2 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "only level 2 blocks, there is existing partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 12 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2, block3, block4}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "only level 2 blocks from same time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "mix level 1 and level 2 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 10 * H, rangeEnd: 12 * H}, + }, + }, + "mix level 1 and level 2 blocks, there is partitioned group file for level 1 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 10 * H, rangeEnd: 12 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block4, block5}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 10 * H, rangeEnd: 12 * H}, + }, + }, + "mix level 1 and level 2 blocks in different time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 12 * H, rangeEnd: 14 * H}, + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "mix level 1 and level 2 blocks in different time range, there are partitioned group files for all groups": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 12 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2, block3}}, + }}, + {rangeStart: 12 * H, rangeEnd: 14 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block4, block5}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 12 * H, rangeEnd: 14 * H}, + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "level 2 blocks along with level 3 blocks from some of partitions, level 1 blocks in different time range, there are partitioned group files for all groups": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 2}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 22 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 22 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 12 * H, partitionCount: 4, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block3}}, + {PartitionID: 1, Blocks: []ulid.ULID{block2, block3}}, + {PartitionID: 2, Blocks: []ulid.ULID{block1, block3}}, + {PartitionID: 3, Blocks: []ulid.ULID{block2, block3}}, + }, partitionVisitMarkers: map[int]mockPartitionVisitMarker{ + 0: {partitionID: 0, compactorID: testCompactorID, isExpired: true, status: Completed}, + 2: {partitionID: 2, compactorID: testCompactorID, isExpired: false, status: Completed}, + }}, + {rangeStart: 22 * H, rangeEnd: 24 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block6, block7}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block6, block7}, partitionCount: 1, partitionID: 0, rangeStart: 22 * H, rangeEnd: 24 * H}, + {blocks: []ulid.ULID{block1, block3}, partitionCount: 4, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block2, block3}, partitionCount: 4, partitionID: 1, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block1, block3}, partitionCount: 4, partitionID: 2, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block2, block3}, partitionCount: 4, partitionID: 3, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "level 2 blocks in first 12h are all complete, level 2 blocks in second 12h have not started compaction, there is no partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 6 * H, MaxTime: 8 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block6, block7}, partitionCount: 1, partitionID: 0, rangeStart: 12 * H, rangeEnd: 14 * H}, + {blocks: []ulid.ULID{block1, block2, block3, block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "level 2 blocks are all complete, there is no partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 14 * H, MaxTime: 16 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block6, block7}, partitionCount: 1, partitionID: 0, rangeStart: 12 * H, rangeEnd: 24 * H}, + }, + }, + "level 2 blocks are complete only in second half of 12h, there is existing partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 12 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 12 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 12 * H, partitionCount: 2, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2, block3, block4}}, + {PartitionID: 1, Blocks: []ulid.ULID{block1, block2, block3, block4}}, + }, partitionVisitMarkers: map[int]mockPartitionVisitMarker{ + 1: {partitionID: 1, compactorID: testCompactorID, isExpired: true, status: Completed}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 2, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 2, partitionID: 1, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "level 3 blocks are complete, there are some level 2 blocks not deleted, there is existing partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 12 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 12 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 12 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2, block3, block4}}, + }, partitionVisitMarkers: map[int]mockPartitionVisitMarker{ + 0: {partitionID: 0, compactorID: testCompactorID, isExpired: true, status: Completed}, + }}, + }, + expected: []expectedCompactionJob{ + // nothing should be grouped. cleaner should mark all level 2 blocks for deletion + // and delete partitioned group file since level 2 to level 3 compaction is complete + }, + }, + "recompact one level 1 block with level 2 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "recompact one level 1 block with level 2 blocks in same and different time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "recompact one level 1 block with level 2 blocks all in different time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 1 block with level 2 blocks and level 3 block": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "recompact two level 1 block with level 2 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "recompact one level 1 block with one level 3 block": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 1 block with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact two level 1 block in same time range with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "recompact two level 1 block in different time range with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 1 block with one level 4 block": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 22 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact one level 1 block with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact two level 1 blocks in different time range with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 22 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact one level 2 block with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact two level 2 blocks from different time range with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact two level 2 blocks from same time range with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 2 block with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact two level 2 blocks from different time range with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact two level 2 blocks from same time range with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 3 block with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact two level 3 blocks from different time range with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 12 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact two level 3 blocks from same time range with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "blocks with partition info should be assigned to correct partition": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 0}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 1}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 2}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 3}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block3, block5, block7}, partitionCount: 2, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block2, block4, block6, block7}, partitionCount: 2, partitionID: 1, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "one of the partitions got only one block": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}, Stats: tsdb.BlockStats{NumSeries: 2}}, + Thanos: metadata.Thanos{Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 2, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + {blocks: []ulid.ULID{block3, DUMMY_BLOCK_ID}, partitionCount: 2, partitionID: 1, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "not all level 2 blocks are in bucket index": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 2 * H, partitionCount: 2, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{ulid.MustNew(99, nil), ulid.MustNew(98, nil)}}, + {PartitionID: 1, Blocks: []ulid.ULID{ulid.MustNew(99, nil), ulid.MustNew(98, nil)}}, + }, partitionVisitMarkers: map[int]mockPartitionVisitMarker{ + 0: {partitionID: 0, compactorID: testCompactorID, isExpired: true, status: Completed}, + 1: {partitionID: 1, compactorID: testCompactorID, isExpired: true, status: Completed}, + }}, + }, + expected: []expectedCompactionJob{}, + }, + "not all level 2 blocks are in bucket index and there are late level 1 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 2 * H, partitionCount: 2, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{ulid.MustNew(99, nil), ulid.MustNew(98, nil)}}, + {PartitionID: 1, Blocks: []ulid.ULID{ulid.MustNew(99, nil), ulid.MustNew(98, nil)}}, + }, partitionVisitMarkers: map[int]mockPartitionVisitMarker{ + 0: {partitionID: 0, compactorID: testCompactorID, isExpired: true, status: Completed}, + 1: {partitionID: 1, compactorID: testCompactorID, isExpired: true, status: Completed}, + }}, + }, + expected: []expectedCompactionJob{}, + }, + "level 2 blocks all have same partitioned group id as destination group": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_12, PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_12, PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{}, + }, + "level 3 blocks all have same partitioned group id as destination group": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_24, PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_24, PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{}, + }, + "level 2 blocks not all have same partitioned group id as destination group": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_12, PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_12, PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_2, PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 1 block with level 4 blocks with data only in part of time range across smaller time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10*H + 49*M, MaxTime: 11*H + 47*M, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 10*H + 49*M, MaxTime: 16 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact one level 1 block with level 4 blocks with time range in meta and data only in part of time range in same smaller time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10*H + 49*M, MaxTime: 11*H + 47*M, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 1 * H, MaxTime: 10 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{TimeRange: 24 * H, PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact one level 1 block with level 4 blocks with no time range in meta and data only in part of time range in same smaller time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10*H + 49*M, MaxTime: 11*H + 47*M, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 1 * H, MaxTime: 10 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + } + + for testName, testCase := range tests { + t.Run(testName, func(t *testing.T) { + compactorCfg := &Config{ + BlockRanges: testCase.ranges, + } + + limits := &validation.Limits{ + CompactorPartitionSeriesCount: 4, + } + overrides, err := validation.NewOverrides(*limits, nil) + require.NoError(t, err) + + // Setup mocking of the ring so that the grouper will own all the shards + rs := ring.ReplicationSet{ + Instances: []ring.InstanceDesc{ + {Addr: "test-addr"}, + }, + } + subring := &RingMock{} + subring.On("GetAllHealthy", mock.Anything).Return(rs, nil) + subring.On("Get", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(rs, nil) + + ring := &RingMock{} + ring.On("ShuffleShard", mock.Anything, mock.Anything).Return(subring, nil) + + registerer := prometheus.NewPedanticRegistry() + + metrics := newCompactorMetrics(registerer) + + noCompactFilter := testCase.getNoCompactFilter() + + bkt := &bucket.ClientMock{} + visitMarkerTimeout := 5 * time.Minute + testCase.setupBucketStore(t, bkt, userID, visitMarkerTimeout) + bkt.MockUpload(mock.Anything, nil) + bkt.MockGet(mock.Anything, "", nil) + bkt.MockIter(mock.Anything, nil, nil) + + for _, b := range testCase.blocks { + b.fixPartitionInfo(t, userID) + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + g := NewPartitionCompactionGrouper( + ctx, + nil, + objstore.WithNoopInstr(bkt), + false, // Do not accept malformed indexes + true, // Enable vertical compaction + nil, + metrics.getSyncerMetrics(userID), + metrics, + metadata.NoneFunc, + *compactorCfg, + ring, + "test-addr", + testCompactorID, + overrides, + userID, + 10, + 3, + 1, + false, + visitMarkerTimeout, + noCompactFilter, + 1, + ) + actual, err := g.generateCompactionJobs(testCase.getBlocks()) + require.NoError(t, err) + require.Len(t, actual, len(testCase.expected)) + + for idx, expectedGroup := range testCase.expected { + actualGroup := actual[idx] + actualBlocks := actualGroup.blocks + require.Equal(t, expectedGroup.rangeStart, actualGroup.partitionedGroupInfo.RangeStart) + require.Equal(t, expectedGroup.rangeEnd, actualGroup.partitionedGroupInfo.RangeEnd) + require.Equal(t, expectedGroup.partitionCount, actualGroup.partitionedGroupInfo.PartitionCount) + require.Equal(t, expectedGroup.partitionID, actualGroup.partition.PartitionID) + require.Len(t, actualBlocks, len(expectedGroup.blocks)) + for _, b := range actualBlocks { + require.Contains(t, expectedGroup.blocks, b.ULID) + } + } + }) + } +} + +type generateCompactionJobsTestCase struct { + ranges []time.Duration + blocks map[ulid.ULID]mockBlock + existingPartitionedGroups []mockExistingPartitionedGroup + expected []expectedCompactionJob +} + +func (g *generateCompactionJobsTestCase) setupBucketStore(t *testing.T, bkt *bucket.ClientMock, userID string, visitMarkerTimeout time.Duration) { + var existingPartitionedGroupFiles []string + for _, existingPartitionedGroup := range g.existingPartitionedGroups { + partitionedGroupFilePath := existingPartitionedGroup.setupBucketStore(t, bkt, userID, visitMarkerTimeout) + existingPartitionedGroupFiles = append(existingPartitionedGroupFiles, partitionedGroupFilePath) + } + bkt.MockIter(PartitionedGroupDirectory, existingPartitionedGroupFiles, nil) +} + +func (g *generateCompactionJobsTestCase) getNoCompactFilter() func() map[ulid.ULID]*metadata.NoCompactMark { + noCompactBlocks := make(map[ulid.ULID]*metadata.NoCompactMark) + for id, b := range g.blocks { + if b.hasNoCompactMark { + noCompactBlocks[id] = &metadata.NoCompactMark{ + ID: id, + NoCompactTime: time.Now().Add(-1 * time.Hour).Unix(), + } + } + } + return func() map[ulid.ULID]*metadata.NoCompactMark { + return noCompactBlocks + } +} + +func (g *generateCompactionJobsTestCase) getBlocks() map[ulid.ULID]*metadata.Meta { + blocks := make(map[ulid.ULID]*metadata.Meta) + for id, b := range g.blocks { + blocks[id] = b.meta + } + return blocks +} + +type mockExistingPartitionedGroup struct { + partitionedGroupID uint32 + rangeStart int64 + rangeEnd int64 + partitionCount int + partitions []Partition + partitionVisitMarkers map[int]mockPartitionVisitMarker +} + +func (p *mockExistingPartitionedGroup) updatePartitionedGroupID(userID string) { + p.partitionedGroupID = hashGroup(userID, p.rangeStart, p.rangeEnd) +} + +func (p *mockExistingPartitionedGroup) setupBucketStore(t *testing.T, bkt *bucket.ClientMock, userID string, visitMarkerTimeout time.Duration) string { + p.updatePartitionedGroupID(userID) + partitionedGroupFilePath := path.Join(PartitionedGroupDirectory, fmt.Sprintf("%d.json", p.partitionedGroupID)) + for _, partition := range p.partitions { + partitionID := partition.PartitionID + if _, ok := p.partitionVisitMarkers[partitionID]; !ok { + continue + } + visitMarker := p.partitionVisitMarkers[partitionID] + partitionVisitMarkerFilePath := path.Join(PartitionedGroupDirectory, PartitionVisitMarkerDirectory, + fmt.Sprintf("%d/%s%d-%s", p.partitionedGroupID, PartitionVisitMarkerFilePrefix, partitionID, PartitionVisitMarkerFileSuffix)) + visitTime := time.Now() + if visitMarker.isExpired { + visitTime = time.Now().Add(-2 * visitMarkerTimeout) + } + actualVisitMarker := partitionVisitMarker{ + CompactorID: visitMarker.compactorID, + Status: visitMarker.status, + PartitionedGroupID: p.partitionedGroupID, + PartitionID: partitionID, + VisitTime: visitTime.UnixMilli(), + Version: PartitionVisitMarkerVersion1, + } + partitionVisitMarkerContent, err := json.Marshal(actualVisitMarker) + require.NoError(t, err) + bkt.MockGet(partitionVisitMarkerFilePath, string(partitionVisitMarkerContent), nil) + } + partitionedGroup := PartitionedGroupInfo{ + PartitionedGroupID: p.partitionedGroupID, + PartitionCount: p.partitionCount, + Partitions: p.partitions, + RangeStart: p.rangeStart, + RangeEnd: p.rangeEnd, + CreationTime: time.Now().Add(-1 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + } + partitionedGroupContent, err := json.Marshal(partitionedGroup) + require.NoError(t, err) + bkt.MockGet(partitionedGroupFilePath, string(partitionedGroupContent), nil) + return partitionedGroupFilePath +} + +type mockBlock struct { + meta *metadata.Meta + timeRange time.Duration + hasNoCompactMark bool +} + +func (b *mockBlock) fixPartitionInfo(t *testing.T, userID string) { + extensions, err := cortextsdb.GetCortexMetaExtensionsFromMeta(*b.meta) + require.NoError(t, err) + if extensions != nil { + rangeStart := getRangeStart(b.meta, b.timeRange.Milliseconds()) + rangeEnd := rangeStart + b.timeRange.Milliseconds() + if extensions.PartitionInfo.PartitionedGroupID == 0 { + extensions.PartitionInfo.PartitionedGroupID = hashGroup(userID, rangeStart, rangeEnd) + } + b.meta.Thanos.Extensions = extensions + } +} + +type mockPartitionVisitMarker struct { + partitionID int + compactorID string + isExpired bool + status VisitStatus +} + +type expectedCompactionJob struct { + blocks []ulid.ULID + partitionCount int + partitionID int + rangeStart int64 + rangeEnd int64 +} diff --git a/pkg/compactor/partition_visit_marker.go b/pkg/compactor/partition_visit_marker.go new file mode 100644 index 0000000000..4a5d8fdc4a --- /dev/null +++ b/pkg/compactor/partition_visit_marker.go @@ -0,0 +1,96 @@ +package compactor + +import ( + "fmt" + "path" + "strings" + "time" + + "github.com/pkg/errors" +) + +const ( + // PartitionVisitMarkerDirectory is the name of directory where all visit markers are saved. + PartitionVisitMarkerDirectory = "visit-marks" + // PartitionVisitMarkerFileSuffix is the known suffix of json filename for representing the most recent compactor visit. + PartitionVisitMarkerFileSuffix = "visit-mark.json" + // PartitionVisitMarkerFilePrefix is the known prefix of json filename for representing the most recent compactor visit. + PartitionVisitMarkerFilePrefix = "partition-" + // PartitionVisitMarkerVersion1 is the current supported version of visit-mark file. + PartitionVisitMarkerVersion1 = 1 +) + +var ( + errorNotPartitionVisitMarker = errors.New("file is not partition visit marker") +) + +type partitionVisitMarker struct { + CompactorID string `json:"compactorID"` + Status VisitStatus `json:"status"` + PartitionedGroupID uint32 `json:"partitionedGroupID"` + PartitionID int `json:"partitionID"` + // VisitTime is a unix timestamp of when the partition was visited (mark updated). + VisitTime int64 `json:"visitTime"` + // Version of the file. + Version int `json:"version"` +} + +func newPartitionVisitMarker(compactorID string, partitionedGroupID uint32, partitionID int) *partitionVisitMarker { + return &partitionVisitMarker{ + CompactorID: compactorID, + PartitionedGroupID: partitionedGroupID, + PartitionID: partitionID, + } +} + +func (b *partitionVisitMarker) IsExpired(partitionVisitMarkerTimeout time.Duration) bool { + return !time.Now().Before(time.Unix(b.VisitTime, 0).Add(partitionVisitMarkerTimeout)) +} + +func (b *partitionVisitMarker) IsVisited(partitionVisitMarkerTimeout time.Duration, partitionID int) bool { + return b.GetStatus() == Completed || (partitionID == b.PartitionID && !b.IsExpired(partitionVisitMarkerTimeout)) +} + +func (b *partitionVisitMarker) IsPendingByCompactor(partitionVisitMarkerTimeout time.Duration, partitionID int, compactorID string) bool { + return b.CompactorID == compactorID && partitionID == b.PartitionID && b.GetStatus() == Pending && !b.IsExpired(partitionVisitMarkerTimeout) +} + +func (b *partitionVisitMarker) GetStatus() VisitStatus { + return b.Status +} + +func (b *partitionVisitMarker) GetVisitMarkerFilePath() string { + return GetPartitionVisitMarkerFilePath(b.PartitionedGroupID, b.PartitionID) +} + +func (b *partitionVisitMarker) UpdateStatus(ownerIdentifier string, status VisitStatus) { + b.CompactorID = ownerIdentifier + b.Status = status + b.VisitTime = time.Now().Unix() +} + +func (b *partitionVisitMarker) String() string { + return fmt.Sprintf("visit_marker_partitioned_group_id=%d visit_marker_partition_id=%d visit_marker_compactor_id=%s visit_marker_status=%s visit_marker_visit_time=%s", + b.PartitionedGroupID, + b.PartitionID, + b.CompactorID, + b.Status, + time.Unix(b.VisitTime, 0).String(), + ) +} + +func GetPartitionVisitMarkerFilePath(partitionedGroupID uint32, partitionID int) string { + return path.Join(GetPartitionVisitMarkerDirectoryPath(partitionedGroupID), fmt.Sprintf("%s%d-%s", PartitionVisitMarkerFilePrefix, partitionID, PartitionVisitMarkerFileSuffix)) +} + +func GetPartitionVisitMarkerDirectoryPath(partitionedGroupID uint32) string { + return path.Join(PartitionedGroupDirectory, PartitionVisitMarkerDirectory, fmt.Sprintf("%d", partitionedGroupID)) +} + +func IsPartitionVisitMarker(path string) bool { + return strings.HasSuffix(path, PartitionVisitMarkerFileSuffix) +} + +func IsNotPartitionVisitMarkerError(err error) bool { + return errors.Is(err, errorNotPartitionVisitMarker) +} diff --git a/pkg/compactor/partitioned_group_info.go b/pkg/compactor/partitioned_group_info.go new file mode 100644 index 0000000000..71d4c61639 --- /dev/null +++ b/pkg/compactor/partitioned_group_info.go @@ -0,0 +1,303 @@ +package compactor + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "path" + "strings" + "time" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/oklog/ulid" + "github.com/pkg/errors" + "github.com/thanos-io/objstore" + "github.com/thanos-io/thanos/pkg/block/metadata" + + "github.com/cortexproject/cortex/pkg/util/runutil" +) + +const ( + PartitionedGroupDirectory = "partitioned-groups" + PartitionedGroupInfoVersion1 = 1 +) + +var ( + ErrorPartitionedGroupInfoNotFound = errors.New("partitioned group info not found") + ErrorUnmarshalPartitionedGroupInfo = errors.New("unmarshal partitioned group info JSON") +) + +type Partition struct { + PartitionID int `json:"partitionID"` + Blocks []ulid.ULID `json:"blocks"` +} + +type PartitionedGroupStatus struct { + PartitionedGroupID uint32 + CanDelete bool + IsCompleted bool + DeleteVisitMarker bool + PendingPartitions int + InProgressPartitions int + PendingOrFailedPartitions []Partition +} + +func (s PartitionedGroupStatus) String() string { + var partitions []string + for _, p := range s.PendingOrFailedPartitions { + partitions = append(partitions, fmt.Sprintf("%d", p.PartitionID)) + } + return fmt.Sprintf(`{"partitioned_group_id": %d, "can_delete": %t, "is_complete": %t, "delete_visit_marker": %t, "pending_partitions": %d, "in_progress_partitions": %d, "pending_or_failed_partitions": [%s]}`, + s.PartitionedGroupID, s.CanDelete, s.IsCompleted, s.DeleteVisitMarker, s.PendingPartitions, s.InProgressPartitions, strings.Join(partitions, ",")) +} + +type PartitionedGroupInfo struct { + PartitionedGroupID uint32 `json:"partitionedGroupID"` + PartitionCount int `json:"partitionCount"` + Partitions []Partition `json:"partitions"` + RangeStart int64 `json:"rangeStart"` + RangeEnd int64 `json:"rangeEnd"` + CreationTime int64 `json:"creationTime"` + // Version of the file. + Version int `json:"version"` +} + +func (p *PartitionedGroupInfo) rangeStartTime() time.Time { + return time.Unix(0, p.RangeStart*int64(time.Millisecond)).UTC() +} + +func (p *PartitionedGroupInfo) rangeEndTime() time.Time { + return time.Unix(0, p.RangeEnd*int64(time.Millisecond)).UTC() +} + +func (p *PartitionedGroupInfo) getPartitionIDsByBlock(blockID ulid.ULID) []int { + var partitionIDs []int +partitionLoop: + for _, partition := range p.Partitions { + for _, block := range partition.Blocks { + if block == blockID { + partitionIDs = append(partitionIDs, partition.PartitionID) + continue partitionLoop + } + } + } + return partitionIDs +} + +func (p *PartitionedGroupInfo) getAllBlocks() []ulid.ULID { + uniqueBlocks := make(map[ulid.ULID]struct{}) + for _, partition := range p.Partitions { + for _, block := range partition.Blocks { + uniqueBlocks[block] = struct{}{} + } + } + blocks := make([]ulid.ULID, len(uniqueBlocks)) + i := 0 + for block := range uniqueBlocks { + blocks[i] = block + i++ + } + return blocks +} + +func (p *PartitionedGroupInfo) getAllBlockIDs() []string { + blocks := p.getAllBlocks() + blockIDs := make([]string, len(blocks)) + for i, block := range blocks { + blockIDs[i] = block.String() + } + return blockIDs +} + +func (p *PartitionedGroupInfo) getPartitionedGroupStatus( + ctx context.Context, + userBucket objstore.InstrumentedBucket, + partitionVisitMarkerTimeout time.Duration, + userLogger log.Logger, +) PartitionedGroupStatus { + status := PartitionedGroupStatus{ + PartitionedGroupID: p.PartitionedGroupID, + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingPartitions: 0, + InProgressPartitions: 0, + PendingOrFailedPartitions: []Partition{}, + } + allPartitionCompleted := true + hasInProgressPartitions := false + for _, partition := range p.Partitions { + visitMarker := &partitionVisitMarker{ + PartitionedGroupID: p.PartitionedGroupID, + PartitionID: partition.PartitionID, + } + visitMarkerManager := NewVisitMarkerManager(userBucket, userLogger, "PartitionedGroupInfo.getPartitionedGroupStatus", visitMarker) + partitionVisitMarkerExists := true + if err := visitMarkerManager.ReadVisitMarker(ctx, visitMarker); err != nil { + if errors.Is(err, errorVisitMarkerNotFound) { + partitionVisitMarkerExists = false + } else { + level.Warn(userLogger).Log("msg", "unable to read partition visit marker", "path", visitMarker.GetVisitMarkerFilePath(), "err", err) + return status + } + } + + if !partitionVisitMarkerExists { + status.PendingPartitions++ + allPartitionCompleted = false + status.PendingOrFailedPartitions = append(status.PendingOrFailedPartitions, partition) + } else if visitMarker.VisitTime < p.CreationTime { + status.DeleteVisitMarker = true + allPartitionCompleted = false + } else if (visitMarker.GetStatus() == Pending || visitMarker.GetStatus() == InProgress) && !visitMarker.IsExpired(partitionVisitMarkerTimeout) { + status.InProgressPartitions++ + hasInProgressPartitions = true + allPartitionCompleted = false + } else if visitMarker.GetStatus() != Completed { + status.PendingPartitions++ + allPartitionCompleted = false + status.PendingOrFailedPartitions = append(status.PendingOrFailedPartitions, partition) + } + } + + if hasInProgressPartitions { + return status + } + + status.IsCompleted = allPartitionCompleted + + if allPartitionCompleted { + status.CanDelete = true + status.DeleteVisitMarker = true + return status + } + + checkedBlocks := make(map[ulid.ULID]struct{}) + for _, partition := range status.PendingOrFailedPartitions { + for _, blockID := range partition.Blocks { + if _, ok := checkedBlocks[blockID]; ok { + continue + } + if !p.doesBlockExist(ctx, userBucket, userLogger, blockID) { + level.Info(userLogger).Log("msg", "delete partitioned group", "reason", "block is physically deleted", "block", blockID) + status.CanDelete = true + status.DeleteVisitMarker = true + return status + } + if p.isBlockDeleted(ctx, userBucket, userLogger, blockID) { + level.Info(userLogger).Log("msg", "delete partitioned group", "reason", "block is marked for deletion", "block", blockID) + status.CanDelete = true + status.DeleteVisitMarker = true + return status + } + if p.isBlockNoCompact(ctx, userBucket, userLogger, blockID) { + level.Info(userLogger).Log("msg", "delete partitioned group", "reason", "block is marked for no compact", "block", blockID) + status.CanDelete = true + status.DeleteVisitMarker = true + return status + } + checkedBlocks[blockID] = struct{}{} + } + } + return status +} + +func (p *PartitionedGroupInfo) doesBlockExist(ctx context.Context, userBucket objstore.InstrumentedBucket, userLogger log.Logger, blockID ulid.ULID) bool { + metaExists, err := userBucket.Exists(ctx, path.Join(blockID.String(), metadata.MetaFilename)) + if err != nil { + level.Warn(userLogger).Log("msg", "unable to get stats of meta.json for block", "partitioned_group_id", p.PartitionedGroupID, "block", blockID.String()) + return true + } + return metaExists +} + +func (p *PartitionedGroupInfo) isBlockDeleted(ctx context.Context, userBucket objstore.InstrumentedBucket, userLogger log.Logger, blockID ulid.ULID) bool { + deletionMarkerExists, err := userBucket.Exists(ctx, path.Join(blockID.String(), metadata.DeletionMarkFilename)) + if err != nil { + level.Warn(userLogger).Log("msg", "unable to get stats of deletion-mark.json for block", "partitioned_group_id", p.PartitionedGroupID, "block", blockID.String()) + return false + } + return deletionMarkerExists +} + +func (p *PartitionedGroupInfo) isBlockNoCompact(ctx context.Context, userBucket objstore.InstrumentedBucket, userLogger log.Logger, blockID ulid.ULID) bool { + noCompactMarkerExists, err := userBucket.Exists(ctx, path.Join(blockID.String(), metadata.NoCompactMarkFilename)) + if err != nil { + level.Warn(userLogger).Log("msg", "unable to get stats of no-compact-mark.json for block", "partitioned_group_id", p.PartitionedGroupID, "block", blockID.String()) + return false + } + return noCompactMarkerExists +} + +func (p *PartitionedGroupInfo) String() string { + var partitions []string + for _, partition := range p.Partitions { + partitions = append(partitions, fmt.Sprintf("(PartitionID: %d, Blocks: %s)", partition.PartitionID, partition.Blocks)) + } + return fmt.Sprintf("{PartitionedGroupID: %d, PartitionCount: %d, Partitions: %s}", p.PartitionedGroupID, p.PartitionCount, strings.Join(partitions, ", ")) +} + +func GetPartitionedGroupFile(partitionedGroupID uint32) string { + return path.Join(PartitionedGroupDirectory, fmt.Sprintf("%d.json", partitionedGroupID)) +} + +func ReadPartitionedGroupInfo(ctx context.Context, bkt objstore.InstrumentedBucketReader, logger log.Logger, partitionedGroupID uint32) (*PartitionedGroupInfo, error) { + return ReadPartitionedGroupInfoFile(ctx, bkt, logger, GetPartitionedGroupFile(partitionedGroupID)) +} + +func ReadPartitionedGroupInfoFile(ctx context.Context, bkt objstore.InstrumentedBucketReader, logger log.Logger, partitionedGroupFile string) (*PartitionedGroupInfo, error) { + partitionedGroupReader, err := bkt.ReaderWithExpectedErrs(bkt.IsObjNotFoundErr).Get(ctx, partitionedGroupFile) + if err != nil { + if bkt.IsObjNotFoundErr(err) { + return nil, errors.Wrapf(ErrorPartitionedGroupInfoNotFound, "partitioned group file: %s", partitionedGroupReader) + } + return nil, errors.Wrapf(err, "get partitioned group file: %s", partitionedGroupReader) + } + defer runutil.CloseWithLogOnErr(logger, partitionedGroupReader, "close partitioned group reader") + p, err := io.ReadAll(partitionedGroupReader) + if err != nil { + return nil, errors.Wrapf(err, "read partitioned group file: %s", partitionedGroupFile) + } + partitionedGroupInfo := PartitionedGroupInfo{} + if err = json.Unmarshal(p, &partitionedGroupInfo); err != nil { + return nil, errors.Wrapf(ErrorUnmarshalPartitionedGroupInfo, "partitioned group file: %s, error: %v", partitionedGroupFile, err.Error()) + } + if partitionedGroupInfo.Version != VisitMarkerVersion1 { + return nil, errors.Errorf("unexpected partitioned group file version %d, expected %d", partitionedGroupInfo.Version, VisitMarkerVersion1) + } + if partitionedGroupInfo.CreationTime <= 0 { + objAttr, err := bkt.Attributes(ctx, partitionedGroupFile) + if err != nil { + return nil, errors.Errorf("unable to get partitioned group file attributes: %s, error: %v", partitionedGroupFile, err.Error()) + } + partitionedGroupInfo.CreationTime = objAttr.LastModified.Unix() + } + return &partitionedGroupInfo, nil +} + +func UpdatePartitionedGroupInfo(ctx context.Context, bkt objstore.InstrumentedBucket, logger log.Logger, partitionedGroupInfo PartitionedGroupInfo) (*PartitionedGroupInfo, error) { + // Ignore error in order to always update partitioned group info. There is no harm to put latest version of + // partitioned group info which is supposed to be the correct grouping based on latest bucket store. + existingPartitionedGroup, _ := ReadPartitionedGroupInfo(ctx, bkt, logger, partitionedGroupInfo.PartitionedGroupID) + if existingPartitionedGroup != nil { + level.Warn(logger).Log("msg", "partitioned group info already exists", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID) + return existingPartitionedGroup, nil + } + if partitionedGroupInfo.CreationTime <= 0 { + partitionedGroupInfo.CreationTime = time.Now().Unix() + } + partitionedGroupFile := GetPartitionedGroupFile(partitionedGroupInfo.PartitionedGroupID) + partitionedGroupInfoContent, err := json.Marshal(partitionedGroupInfo) + if err != nil { + return nil, err + } + reader := bytes.NewReader(partitionedGroupInfoContent) + if err := bkt.Upload(ctx, partitionedGroupFile, reader); err != nil { + return nil, err + } + level.Info(logger).Log("msg", "created new partitioned group info", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID) + return &partitionedGroupInfo, nil +} diff --git a/pkg/compactor/partitioned_group_info_test.go b/pkg/compactor/partitioned_group_info_test.go new file mode 100644 index 0000000000..aa4f27253b --- /dev/null +++ b/pkg/compactor/partitioned_group_info_test.go @@ -0,0 +1,882 @@ +package compactor + +import ( + "context" + "encoding/json" + "path" + "testing" + "time" + + "github.com/go-kit/log" + "github.com/oklog/ulid" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "github.com/thanos-io/objstore" + "github.com/thanos-io/thanos/pkg/block/metadata" + + "github.com/cortexproject/cortex/pkg/storage/bucket" + cortex_testutil "github.com/cortexproject/cortex/pkg/storage/tsdb/testutil" +) + +func TestPartitionedGroupInfo(t *testing.T) { + ulid0 := ulid.MustNew(0, nil) + ulid1 := ulid.MustNew(1, nil) + ulid2 := ulid.MustNew(2, nil) + rangeStart := (1 * time.Hour).Milliseconds() + rangeEnd := (2 * time.Hour).Milliseconds() + partitionedGroupID := uint32(12345) + for _, tcase := range []struct { + name string + partitionedGroupInfo PartitionedGroupInfo + }{ + { + name: "write partitioned group info 1", + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: rangeStart, + RangeEnd: rangeEnd, + Version: PartitionedGroupInfoVersion1, + }, + }, + { + name: "write partitioned group info 2", + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 3, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid1, + }, + }, + { + PartitionID: 2, + Blocks: []ulid.ULID{ + ulid2, + }, + }, + }, + RangeStart: rangeStart, + RangeEnd: rangeEnd, + Version: PartitionedGroupInfoVersion1, + }, + }, + } { + t.Run(tcase.name, func(t *testing.T) { + ctx := context.Background() + testBkt, _ := cortex_testutil.PrepareFilesystemBucket(t) + bkt := objstore.WithNoopInstr(testBkt) + logger := log.NewNopLogger() + writeRes, err := UpdatePartitionedGroupInfo(ctx, bkt, logger, tcase.partitionedGroupInfo) + tcase.partitionedGroupInfo.CreationTime = writeRes.CreationTime + require.NoError(t, err) + require.Equal(t, tcase.partitionedGroupInfo, *writeRes) + readRes, err := ReadPartitionedGroupInfo(ctx, bkt, logger, tcase.partitionedGroupInfo.PartitionedGroupID) + require.NoError(t, err) + require.Equal(t, tcase.partitionedGroupInfo, *readRes) + }) + } +} + +func TestGetPartitionIDsByBlock(t *testing.T) { + ulid0 := ulid.MustNew(0, nil) + ulid1 := ulid.MustNew(1, nil) + ulid2 := ulid.MustNew(2, nil) + ulid3 := ulid.MustNew(3, nil) + partitionedGroupInfo := PartitionedGroupInfo{ + PartitionedGroupID: uint32(12345), + PartitionCount: 3, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + { + PartitionID: 2, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + ulid2, + ulid3, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + Version: PartitionedGroupInfoVersion1, + } + + res0 := partitionedGroupInfo.getPartitionIDsByBlock(ulid0) + require.Equal(t, 3, len(res0)) + require.Contains(t, res0, 0) + require.Contains(t, res0, 1) + require.Contains(t, res0, 2) + + res1 := partitionedGroupInfo.getPartitionIDsByBlock(ulid1) + require.Equal(t, 2, len(res1)) + require.Contains(t, res1, 0) + require.Contains(t, res1, 2) + + res2 := partitionedGroupInfo.getPartitionIDsByBlock(ulid2) + require.Equal(t, 2, len(res2)) + require.Contains(t, res2, 1) + require.Contains(t, res2, 2) + + res3 := partitionedGroupInfo.getPartitionIDsByBlock(ulid3) + require.Equal(t, 1, len(res3)) + require.Contains(t, res3, 2) +} + +func TestGetPartitionedGroupStatus(t *testing.T) { + ulid0 := ulid.MustNew(0, nil) + ulid1 := ulid.MustNew(1, nil) + ulid2 := ulid.MustNew(2, nil) + partitionedGroupID := uint32(1234) + for _, tcase := range []struct { + name string + expectedResult PartitionedGroupStatus + partitionedGroupInfo PartitionedGroupInfo + partitionVisitMarkers []partitionVisitMarker + deletedBlock map[ulid.ULID]bool + noCompactBlock map[ulid.ULID]struct{} + }{ + { + name: "test one partition is not visited and contains block marked for deletion", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid0: true, + }, + }, + { + name: "test one partition is pending and contains block marked for deletion", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Pending, + VisitTime: time.Now().Add(-5 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid0: true, + }, + }, + { + name: "test one partition is completed and one partition is under visiting", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingOrFailedPartitions: []Partition{}, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Pending, + VisitTime: time.Now().Add(time.Second).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid0: false, + }, + }, + { + name: "test one partition is pending expired", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Pending, + VisitTime: time.Now().Add(-5 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{}, + }, + { + name: "test one partition is complete with one block deleted and one partition is not visited with no blocks deleted", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid1: true, + }, + }, + { + name: "test one partition is complete and one partition is failed with no blocks deleted", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Failed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{}, + }, + { + name: "test one partition is complete and one partition is failed one block deleted", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Failed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid2: true, + }, + }, + { + name: "test all partitions are complete", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: true, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{}, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid2: true, + }, + }, + { + name: "test partitioned group created after visit marker", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{}, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(1 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{}, + }, + { + name: "test one partition is in progress not expired and contains block marked for deletion", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingOrFailedPartitions: []Partition{}, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: InProgress, + VisitTime: time.Now().Add(time.Second).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid0: true, + }, + }, + { + name: "test one partition is not visited and contains block with no compact mark", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + noCompactBlock: map[ulid.ULID]struct{}{ + ulid0: {}, + }, + }, + { + name: "test one partition is expired and contains block with no compact mark", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: InProgress, + VisitTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + noCompactBlock: map[ulid.ULID]struct{}{ + ulid0: {}, + }, + }, + } { + t.Run(tcase.name, func(t *testing.T) { + bucketClient := &bucket.ClientMock{} + for _, partitionVisitMarker := range tcase.partitionVisitMarkers { + content, _ := json.Marshal(partitionVisitMarker) + bucketClient.MockGet(partitionVisitMarker.GetVisitMarkerFilePath(), string(content), nil) + } + + for _, partition := range tcase.partitionedGroupInfo.Partitions { + for _, blockID := range partition.Blocks { + metaPath := path.Join(blockID.String(), metadata.MetaFilename) + noCompactPath := path.Join(blockID.String(), metadata.NoCompactMarkFilename) + deletionMarkerPath := path.Join(blockID.String(), metadata.DeletionMarkFilename) + if hasDeletionMarker, ok := tcase.deletedBlock[blockID]; ok { + if hasDeletionMarker { + bucketClient.MockExists(metaPath, true, nil) + bucketClient.MockExists(deletionMarkerPath, true, nil) + } else { + bucketClient.MockExists(metaPath, false, nil) + } + } else { + bucketClient.MockExists(metaPath, true, nil) + bucketClient.MockExists(deletionMarkerPath, false, nil) + } + if _, ok := tcase.noCompactBlock[blockID]; ok { + bucketClient.MockExists(noCompactPath, true, nil) + } else { + bucketClient.MockExists(noCompactPath, false, nil) + } + } + } + bucketClient.MockGet(mock.Anything, "", nil) + + ctx := context.Background() + logger := log.NewNopLogger() + result := tcase.partitionedGroupInfo.getPartitionedGroupStatus(ctx, bucketClient, 60*time.Second, logger) + require.Equal(t, tcase.expectedResult.CanDelete, result.CanDelete) + require.Equal(t, tcase.expectedResult.IsCompleted, result.IsCompleted) + require.Equal(t, len(tcase.expectedResult.PendingOrFailedPartitions), len(result.PendingOrFailedPartitions)) + for _, partition := range result.PendingOrFailedPartitions { + require.Contains(t, tcase.expectedResult.PendingOrFailedPartitions, partition) + } + }) + } +} diff --git a/pkg/compactor/shuffle_sharding_grouper.go b/pkg/compactor/shuffle_sharding_grouper.go index a041f55b6b..f6328b8fb5 100644 --- a/pkg/compactor/shuffle_sharding_grouper.go +++ b/pkg/compactor/shuffle_sharding_grouper.go @@ -289,15 +289,20 @@ func (g *ShuffleShardingGrouper) checkSubringForCompactor() (bool, error) { return rs.Includes(g.ringLifecyclerAddr), nil } -// Get the hash of a group based on the UserID, and the starting and ending time of the group's range. +// hashGroup Get the hash of a group based on the UserID, and the starting and ending time of the group's range. func hashGroup(userID string, rangeStart int64, rangeEnd int64) uint32 { groupString := fmt.Sprintf("%v%v%v", userID, rangeStart, rangeEnd) - groupHasher := fnv.New32a() + + return hashString(groupString) +} + +func hashString(s string) uint32 { + hasher := fnv.New32a() // Hasher never returns err. - _, _ = groupHasher.Write([]byte(groupString)) - groupHash := groupHasher.Sum32() + _, _ = hasher.Write([]byte(s)) + result := hasher.Sum32() - return groupHash + return result } func createGroupKey(groupHash uint32, group blocksGroup) string { diff --git a/pkg/cortex/modules.go b/pkg/cortex/modules.go index a771c22116..5b8a3640b0 100644 --- a/pkg/cortex/modules.go +++ b/pkg/cortex/modules.go @@ -684,8 +684,9 @@ func (t *Cortex) initAlertManager() (serv services.Service, err error) { func (t *Cortex) initCompactor() (serv services.Service, err error) { t.Cfg.Compactor.ShardingRing.ListenPort = t.Cfg.Server.GRPCListenPort + ingestionReplicationFactor := t.Cfg.Ingester.LifecyclerConfig.RingConfig.ReplicationFactor - t.Compactor, err = compactor.NewCompactor(t.Cfg.Compactor, t.Cfg.BlocksStorage, util_log.Logger, prometheus.DefaultRegisterer, t.Overrides) + t.Compactor, err = compactor.NewCompactor(t.Cfg.Compactor, t.Cfg.BlocksStorage, util_log.Logger, prometheus.DefaultRegisterer, t.Overrides, ingestionReplicationFactor) if err != nil { return } diff --git a/pkg/storage/tsdb/meta_extensions.go b/pkg/storage/tsdb/meta_extensions.go new file mode 100644 index 0000000000..b6b8a7acf0 --- /dev/null +++ b/pkg/storage/tsdb/meta_extensions.go @@ -0,0 +1,71 @@ +package tsdb + +import ( + "fmt" + "strconv" + + "github.com/thanos-io/thanos/pkg/block/metadata" +) + +type CortexMetaExtensions struct { + PartitionInfo *PartitionInfo `json:"partition_info,omitempty"` + TimeRange int64 `json:"time_range,omitempty"` +} + +type PartitionInfo struct { + PartitionedGroupID uint32 `json:"partitioned_group_id"` + PartitionCount int `json:"partition_count"` + PartitionID int `json:"partition_id"` + PartitionedGroupCreationTime int64 `json:"partitioned_group_creation_time"` +} + +var ( + DefaultPartitionInfo = PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 1, + PartitionedGroupCreationTime: 0, + } +) + +func (c *CortexMetaExtensions) TimeRangeStr() string { + return strconv.FormatInt(c.TimeRange, 10) +} + +func ConvertToCortexMetaExtensions(extensions any) (*CortexMetaExtensions, error) { + defaultPartitionInfo := DefaultPartitionInfo + cortexExtensions, err := metadata.ConvertExtensions(extensions, &CortexMetaExtensions{ + PartitionInfo: &defaultPartitionInfo, + }) + + if err != nil { + return nil, err + } + if cortexExtensions == nil { + return nil, nil + } + converted, ok := cortexExtensions.(*CortexMetaExtensions) + if !ok { + return nil, fmt.Errorf("unable to convert extensions to CortexMetaExtensions") + } + return converted, nil +} + +func ConvertToPartitionInfo(extensions any) (*PartitionInfo, error) { + cortexExtensions, err := ConvertToCortexMetaExtensions(extensions) + if err != nil { + return nil, err + } + if cortexExtensions == nil { + return nil, nil + } + return cortexExtensions.PartitionInfo, nil +} + +func GetCortexMetaExtensionsFromMeta(meta metadata.Meta) (*CortexMetaExtensions, error) { + return ConvertToCortexMetaExtensions(meta.Thanos.Extensions) +} + +func GetPartitionInfo(meta metadata.Meta) (*PartitionInfo, error) { + return ConvertToPartitionInfo(meta.Thanos.Extensions) +} diff --git a/pkg/storage/tsdb/meta_extensions_test.go b/pkg/storage/tsdb/meta_extensions_test.go new file mode 100644 index 0000000000..6f296eb461 --- /dev/null +++ b/pkg/storage/tsdb/meta_extensions_test.go @@ -0,0 +1,182 @@ +package tsdb + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/thanos-io/thanos/pkg/block/metadata" +) + +func TestGetPartitionedInfo(t *testing.T) { + for _, tcase := range []struct { + name string + meta metadata.Meta + expected *PartitionInfo + }{ + { + name: "partition info with all information provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: &PartitionInfo{ + PartitionedGroupID: 123, + PartitionID: 8, + PartitionCount: 32, + }, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 123, + PartitionID: 8, + PartitionCount: 32, + }, + }, + { + name: "partition info with only PartitionedGroupID provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: &PartitionInfo{ + PartitionedGroupID: 123, + }, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 123, + PartitionID: 0, + PartitionCount: 0, + }, + }, + { + name: "partition info with only PartitionID provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: &PartitionInfo{ + PartitionID: 5, + }, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 5, + PartitionCount: 0, + }, + }, + { + name: "partition info with only PartitionCount provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: &PartitionInfo{ + PartitionCount: 4, + }, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 4, + }, + }, + { + name: "meta with empty partition info provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: &PartitionInfo{}, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 0, + }, + }, + { + name: "meta with nil partition info provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: nil, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 1, + }, + }, + { + name: "meta with non CortexMetaExtensions provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: struct { + dummy string + }{ + dummy: "test_dummy", + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 1, + }, + }, + { + name: "meta with invalid CortexMetaExtensions provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: struct { + PartitionInfo struct { + PartitionedGroupID uint32 `json:"partitionedGroupId"` + PartitionCount int `json:"partitionCount"` + PartitionID int `json:"partitionId"` + } `json:"partition_info,omitempty"` + }{ + PartitionInfo: struct { + PartitionedGroupID uint32 `json:"partitionedGroupId"` + PartitionCount int `json:"partitionCount"` + PartitionID int `json:"partitionId"` + }{ + PartitionedGroupID: 123, + PartitionID: 8, + PartitionCount: 32, + }, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 1, + }, + }, + { + name: "meta does not have any extensions", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: nil, + }, + }, + expected: nil, + }, + } { + t.Run(tcase.name, func(t *testing.T) { + result, err := GetPartitionInfo(tcase.meta) + assert.NoError(t, err) + if tcase.expected == nil { + assert.Nil(t, result) + } else { + assert.Equal(t, *tcase.expected, *result) + } + }) + } +} diff --git a/pkg/util/shard.go b/pkg/util/shard.go index 5d3de01cc4..364f39656f 100644 --- a/pkg/util/shard.go +++ b/pkg/util/shard.go @@ -11,7 +11,7 @@ const ( ShardingStrategyDefault = "default" ShardingStrategyShuffle = "shuffle-sharding" - // Compaction mode + // Compaction strategies CompactionStrategyDefault = "default" CompactionStrategyPartitioning = "partitioning" ) diff --git a/pkg/util/validation/limits.go b/pkg/util/validation/limits.go index 38f31e8da0..7d2ab8518d 100644 --- a/pkg/util/validation/limits.go +++ b/pkg/util/validation/limits.go @@ -192,8 +192,10 @@ type Limits struct { MaxDownloadedBytesPerRequest int `yaml:"max_downloaded_bytes_per_request" json:"max_downloaded_bytes_per_request"` // Compactor. - CompactorBlocksRetentionPeriod model.Duration `yaml:"compactor_blocks_retention_period" json:"compactor_blocks_retention_period"` - CompactorTenantShardSize int `yaml:"compactor_tenant_shard_size" json:"compactor_tenant_shard_size"` + CompactorBlocksRetentionPeriod model.Duration `yaml:"compactor_blocks_retention_period" json:"compactor_blocks_retention_period"` + CompactorTenantShardSize int `yaml:"compactor_tenant_shard_size" json:"compactor_tenant_shard_size"` + CompactorPartitionIndexSizeBytes int64 `yaml:"compactor_partition_index_size_bytes" json:"compactor_partition_index_size_bytes"` + CompactorPartitionSeriesCount int64 `yaml:"compactor_partition_series_count" json:"compactor_partition_series_count"` // This config doesn't have a CLI flag registered here because they're registered in // their own original config struct. @@ -282,6 +284,9 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) { f.Var(&l.CompactorBlocksRetentionPeriod, "compactor.blocks-retention-period", "Delete blocks containing samples older than the specified retention period. 0 to disable.") f.IntVar(&l.CompactorTenantShardSize, "compactor.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used by the compactor. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.") + // Default to 64GB because this is the hard limit of index size in Cortex + f.Int64Var(&l.CompactorPartitionIndexSizeBytes, "compactor.partition-index-size-bytes", 68719476736, "Index size limit in bytes for each compaction partition. 0 means no limit") + f.Int64Var(&l.CompactorPartitionSeriesCount, "compactor.partition-series-count", 0, "Time series count limit for each compaction partition. 0 means no limit") // Store-gateway. f.Float64Var(&l.StoreGatewayTenantShardSize, "store-gateway.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used. Must be set when the store-gateway sharding is enabled with the shuffle-sharding strategy. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant. If the value is < 1 the shard size will be a percentage of the total store-gateways.") @@ -799,6 +804,16 @@ func (o *Overrides) CompactorTenantShardSize(userID string) int { return o.GetOverridesForUser(userID).CompactorTenantShardSize } +// CompactorPartitionIndexSizeBytes returns shard size (number of rulers) used by this tenant when using shuffle-sharding strategy. +func (o *Overrides) CompactorPartitionIndexSizeBytes(userID string) int64 { + return o.GetOverridesForUser(userID).CompactorPartitionIndexSizeBytes +} + +// CompactorPartitionSeriesCount returns shard size (number of rulers) used by this tenant when using shuffle-sharding strategy. +func (o *Overrides) CompactorPartitionSeriesCount(userID string) int64 { + return o.GetOverridesForUser(userID).CompactorPartitionSeriesCount +} + // MetricRelabelConfigs returns the metric relabel configs for a given user. func (o *Overrides) MetricRelabelConfigs(userID string) []*relabel.Config { return o.GetOverridesForUser(userID).MetricRelabelConfigs