-
Notifications
You must be signed in to change notification settings - Fork 229
Batch DescribeLogGroups calls #1717
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
fd94cc0
d3b0fe1
a8020a8
4ea1bb6
ebcb7bb
166c9d1
0792a3d
dfe6c5c
2be444c
6fc438c
c3019aa
be352fc
a9c8794
6806569
4a2885c
39199ac
c665bce
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -16,8 +16,9 @@ import ( | |||
) | ||||
|
||||
const ( | ||||
cacheTTL = 5 * time.Second | ||||
retentionChannelSize = 100 | ||||
retentionChannelSize = 100 | ||||
cacheTTL = 5 * time.Second | ||||
logGroupIdentifierLimit = 50 | ||||
// max wait time with backoff and jittering: | ||||
// 0 + 2.4 + 4.8 + 9.6 + 10 ~= 26.8 sec | ||||
baseRetryDelay = 1 * time.Second | ||||
|
@@ -174,44 +175,56 @@ func (m *targetManager) createLogStream(t Target) error { | |||
} | ||||
|
||||
func (m *targetManager) processDescribeLogGroup() { | ||||
for target := range m.dlg { | ||||
for attempt := 0; attempt < numBackoffRetries; attempt++ { | ||||
currentRetention, err := m.getRetention(target) | ||||
if err != nil { | ||||
m.logger.Errorf("failed to describe log group retention for target %v: %v", target, err) | ||||
time.Sleep(m.calculateBackoff(attempt)) | ||||
continue | ||||
t := time.NewTicker(5 * time.Second) | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm sure it's fine, but what's the reasoning behind the ticker vs timer. Are we anticipating There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is possible, but maybe this covers the scenario I'm thinking of: amazon-cloudwatch-agent/logs/logs.go Line 174 in 47683ec
Another scenario - my thinking is that it's not too safe to assume that it will only need to be called once with the timer. The system could be slow to initialize the targets so having this on a timer could potentially miss those log groups |
||||
defer t.Stop() | ||||
|
||||
batch := make(map[string]Target, logGroupIdentifierLimit) | ||||
|
||||
for { | ||||
select { | ||||
case target := <-m.dlg: | ||||
batch[target.Group] = target | ||||
if len(batch) == logGroupIdentifierLimit { | ||||
m.updateTargetBatch(batch) | ||||
// Reset batch | ||||
batch = make(map[string]Target, logGroupIdentifierLimit) | ||||
} | ||||
|
||||
if currentRetention != target.Retention && target.Retention > 0 { | ||||
m.logger.Debugf("queueing log group %v to update retention policy", target.Group) | ||||
m.prp <- target | ||||
case <-t.C: | ||||
if len(batch) > 0 { | ||||
m.updateTargetBatch(batch) | ||||
// Reset batch | ||||
batch = make(map[string]Target, logGroupIdentifierLimit) | ||||
} | ||||
break // no change in retention | ||||
} | ||||
} | ||||
} | ||||
|
||||
func (m *targetManager) getRetention(target Target) (int, error) { | ||||
input := &cloudwatchlogs.DescribeLogGroupsInput{ | ||||
LogGroupNamePrefix: aws.String(target.Group), | ||||
func (m *targetManager) updateTargetBatch(targets map[string]Target) { | ||||
identifiers := make([]*string, 0, len(targets)) | ||||
for logGroup := range targets { | ||||
identifiers = append(identifiers, aws.String(logGroup)) | ||||
} | ||||
|
||||
output, err := m.service.DescribeLogGroups(input) | ||||
if err != nil { | ||||
return 0, fmt.Errorf("describe log groups failed: %w", err) | ||||
describeLogGroupsInput := &cloudwatchlogs.DescribeLogGroupsInput{ | ||||
LogGroupIdentifiers: identifiers, | ||||
Limit: aws.Int64(50), | ||||
} | ||||
for attempt := 0; attempt < numBackoffRetries; attempt++ { | ||||
output, err := m.service.DescribeLogGroups(describeLogGroupsInput) | ||||
duhminick marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
if err != nil { | ||||
m.logger.Errorf("failed to describe log group retention for targets %v: %v", targets, err) | ||||
time.Sleep(m.calculateBackoff(attempt)) | ||||
continue | ||||
} | ||||
|
||||
for _, group := range output.LogGroups { | ||||
if *group.LogGroupName == target.Group { | ||||
if group.RetentionInDays == nil { | ||||
return 0, nil | ||||
for _, logGroups := range output.LogGroups { | ||||
target := targets[*logGroups.LogGroupName] | ||||
if (logGroups.RetentionInDays == nil || target.Retention != int(*logGroups.RetentionInDays)) && target.Retention > 0 { | ||||
duhminick marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
m.logger.Debugf("queueing log group %v to update retention policy", target.Group) | ||||
m.prp <- target | ||||
} | ||||
return int(*group.RetentionInDays), nil | ||||
} | ||||
break | ||||
} | ||||
|
||||
return 0, fmt.Errorf("log group %v not found", target.Group) | ||||
} | ||||
|
||||
func (m *targetManager) processPutRetentionPolicy() { | ||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why 50? I'm guessing this is the api limi?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, it's the API limit :/