Skip to content

Optimize cleaner run time #6815

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 16 additions & 6 deletions pkg/compactor/blocks_cleaner.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,13 +243,23 @@ func (c *BlocksCleaner) loop(ctx context.Context) error {
continue
}
cleanJobTimestamp := time.Now().Unix()
usersChan <- &cleanerJob{

select {
case usersChan <- &cleanerJob{
users: activeUsers,
timestamp: cleanJobTimestamp,
}:
default:
level.Warn(c.logger).Log("msg", "unable to push cleaning job to usersChan")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if it is worth adding a counter metric of unable to push active user clean job to queue. Ideally it should work next retry but if current job is taking too long it is possible to drop those jobs for a long time.

The symptom of this would be bucket index update delay so it might not be hard to figure out the root cause from that without the metric. The metric is still nice to have though. I think we have similar metric in Query Frontend..

}
deleteChan <- &cleanerJob{

select {
case deleteChan <- &cleanerJob{
users: deletedUsers,
timestamp: cleanJobTimestamp,
}:
default:
level.Warn(c.logger).Log("msg", "unable to push deletion job to deleteChan")
}

case <-ctx.Done():
Expand Down Expand Up @@ -491,7 +501,7 @@ func (c *BlocksCleaner) deleteUserMarkedForDeletion(ctx context.Context, userLog
return err
}

if deleted, err := bucket.DeletePrefix(ctx, userBucket, bucketindex.MarkersPathname, userLogger); err != nil {
if deleted, err := bucket.DeletePrefix(ctx, userBucket, bucketindex.MarkersPathname, userLogger, defaultDeleteBlocksConcurrency); err != nil {
return errors.Wrap(err, "failed to delete marker files")
} else if deleted > 0 {
level.Info(userLogger).Log("msg", "deleted marker files for tenant marked for deletion", "count", deleted)
Expand All @@ -503,15 +513,15 @@ func (c *BlocksCleaner) deleteUserMarkedForDeletion(ctx context.Context, userLog
}

func (c *BlocksCleaner) deleteNonDataFiles(ctx context.Context, userLogger log.Logger, userBucket objstore.InstrumentedBucket) error {
if deleted, err := bucket.DeletePrefix(ctx, userBucket, block.DebugMetas, userLogger); err != nil {
if deleted, err := bucket.DeletePrefix(ctx, userBucket, block.DebugMetas, userLogger, defaultDeleteBlocksConcurrency); err != nil {
return errors.Wrap(err, "failed to delete "+block.DebugMetas)
} else if deleted > 0 {
level.Info(userLogger).Log("msg", "deleted files under "+block.DebugMetas+" for tenant marked for deletion", "count", deleted)
}

if c.cfg.CompactionStrategy == util.CompactionStrategyPartitioning {
// Clean up partitioned group info files
if deleted, err := bucket.DeletePrefix(ctx, userBucket, PartitionedGroupDirectory, userLogger); err != nil {
if deleted, err := bucket.DeletePrefix(ctx, userBucket, PartitionedGroupDirectory, userLogger, defaultDeleteBlocksConcurrency); err != nil {
return errors.Wrap(err, "failed to delete "+PartitionedGroupDirectory)
} else if deleted > 0 {
level.Info(userLogger).Log("msg", "deleted files under "+PartitionedGroupDirectory+" for tenant marked for deletion", "count", deleted)
Expand Down Expand Up @@ -771,7 +781,7 @@ func (c *BlocksCleaner) cleanPartitionedGroupInfo(ctx context.Context, userBucke

if extraInfo.status.CanDelete || extraInfo.status.DeleteVisitMarker {
// Remove partition visit markers
if _, err := bucket.DeletePrefix(ctx, userBucket, GetPartitionVisitMarkerDirectoryPath(partitionedGroupInfo.PartitionedGroupID), userLogger); err != nil {
if _, err := bucket.DeletePrefix(ctx, userBucket, GetPartitionVisitMarkerDirectoryPath(partitionedGroupInfo.PartitionedGroupID), userLogger, defaultDeleteBlocksConcurrency); err != nil {
level.Warn(userLogger).Log("msg", "failed to delete partition visit markers for partitioned group", "partitioned_group_info", partitionedGroupInfoFile, "err", err)
} else {
level.Info(userLogger).Log("msg", "deleted partition visit markers for partitioned group", "partitioned_group_info", partitionedGroupInfoFile)
Expand Down
38 changes: 28 additions & 10 deletions pkg/storage/bucket/bucket_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,45 @@ import (
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/thanos-io/objstore"
"go.uber.org/atomic"

"github.com/cortexproject/cortex/pkg/util/concurrency"
)

// DeletePrefix removes all objects with given prefix, recursively.
// It returns number of deleted objects.
// If deletion of any object fails, it returns error and stops.
func DeletePrefix(ctx context.Context, bkt objstore.Bucket, prefix string, logger log.Logger) (int, error) {
result := 0
err := bkt.Iter(ctx, prefix, func(name string) error {
if strings.HasSuffix(name, objstore.DirDelim) {
deleted, err := DeletePrefix(ctx, bkt, name, logger)
result += deleted
return err
}
func DeletePrefix(ctx context.Context, bkt objstore.Bucket, prefix string, logger log.Logger, maxConcurrency int) (int, error) {
keys, err := ListPrefixes(ctx, bkt, prefix, logger)
if err != nil {
return 0, err
}

result := atomic.NewInt32(0)
err = concurrency.ForEach(ctx, concurrency.CreateJobsFromStrings(keys), maxConcurrency, func(ctx context.Context, key interface{}) error {
name := key.(string)
if err := bkt.Delete(ctx, name); err != nil {
return err
}
result++
result.Inc()
level.Debug(logger).Log("msg", "deleted file", "file", name)
return nil
})

return result, err
return int(result.Load()), err
}

func ListPrefixes(ctx context.Context, bkt objstore.Bucket, prefix string, logger log.Logger) ([]string, error) {
var keys []string
err := bkt.Iter(ctx, prefix, func(name string) error {
if strings.HasSuffix(name, objstore.DirDelim) {
moreKeys, err := ListPrefixes(ctx, bkt, name, logger)
keys = append(keys, moreKeys...)
return err
}

keys = append(keys, name)
return nil
})
return keys, err
}
23 changes: 22 additions & 1 deletion pkg/storage/bucket/bucket_util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package bucket

import (
"context"
"fmt"
"strings"
"testing"

Expand All @@ -21,8 +22,28 @@ func TestDeletePrefix(t *testing.T) {
require.NoError(t, mem.Upload(context.Background(), "prefix/sub2/4", strings.NewReader("hello")))
require.NoError(t, mem.Upload(context.Background(), "outside/obj", strings.NewReader("hello")))

del, err := DeletePrefix(context.Background(), mem, "prefix", log.NewNopLogger())
del, err := DeletePrefix(context.Background(), mem, "prefix", log.NewNopLogger(), 1)
require.NoError(t, err)
assert.Equal(t, 4, del)
assert.Equal(t, 2, len(mem.Objects()))
}

func TestDeletePrefixConcurrent(t *testing.T) {
mem := objstore.NewInMemBucket()

require.NoError(t, mem.Upload(context.Background(), "obj", strings.NewReader("hello")))
require.NoError(t, mem.Upload(context.Background(), "prefix/1", strings.NewReader("hello")))
require.NoError(t, mem.Upload(context.Background(), "prefix/2", strings.NewReader("hello")))
require.NoError(t, mem.Upload(context.Background(), "prefix/sub1/3", strings.NewReader("hello")))
require.NoError(t, mem.Upload(context.Background(), "prefix/sub2/4", strings.NewReader("hello")))
require.NoError(t, mem.Upload(context.Background(), "outside/obj", strings.NewReader("hello")))
n := 10000
for i := 0; i < n; i++ {
require.NoError(t, mem.Upload(context.Background(), fmt.Sprintf("prefix/sub/%d", i), strings.NewReader(fmt.Sprintf("hello%d", i))))
}

del, err := DeletePrefix(context.Background(), mem, "prefix", log.NewNopLogger(), 100)
require.NoError(t, err)
assert.Equal(t, 4+n, del)
assert.Equal(t, 2, len(mem.Objects()))
}
Loading