Skip to content

Do not resync blocks in running store gateways during rollout deployment and container restart #5363

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
* [BUGFIX] Storage: Bucket index updater should ignore meta not found for partial blocks. #5343
* [BUGFIX] Ring: Add JOINING state to read operation. #5346
* [BUGFIX] Compactor: Partial block with only visit marker should be deleted even there is no deletion marker. #5342
* [ENHANCEMENT] Do not resync blocks in running store gateways during rollout deployment and container restart. #5363

## 1.15.1 2023-04-26

Expand Down
20 changes: 16 additions & 4 deletions pkg/ring/replication_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,24 +127,36 @@ func (r ReplicationSet) GetAddressesWithout(exclude string) []string {
return addrs
}

// HasReplicationSetChanged returns true if two replications sets are the same (with possibly different timestamps),
// false if they differ in any way (number of instances, instance states, tokens, zones, ...).
// HasReplicationSetChanged returns false if two replications sets are the same (with possibly different timestamps),
// true if they differ in any way (number of instances, instance states, tokens, zones, ...).
func HasReplicationSetChanged(before, after ReplicationSet) bool {
return hasReplicationSetChangedExcluding(before, after, func(i *InstanceDesc) {
i.Timestamp = 0
})
}

// HasReplicationSetChangedWithoutState returns true if two replications sets
// HasReplicationSetChangedWithoutState returns false if two replications sets
// are the same (with possibly different timestamps and instance states),
// false if they differ in any other way (number of instances, tokens, zones, ...).
// true if they differ in any other way (number of instances, tokens, zones, ...).
func HasReplicationSetChangedWithoutState(before, after ReplicationSet) bool {
return hasReplicationSetChangedExcluding(before, after, func(i *InstanceDesc) {
i.Timestamp = 0
i.State = PENDING
})
}

// HasReplicationSetChangedWithoutStateAndAddress returns false if two replications sets
// are the same (with possibly different timestamps, instance states and address),
// true if they differ in any other way (number of instances, tokens, or zones).
func HasReplicationSetChangedWithoutStateAndAddress(before, after ReplicationSet) bool {
return hasReplicationSetChangedExcluding(before, after, func(i *InstanceDesc) {
i.Timestamp = 0
i.State = PENDING
i.Addr = ""
i.RegisteredTimestamp = 0
})
}

// Do comparison of replicasets, but apply a function first
// to be able to exclude (reset) some values
func hasReplicationSetChangedExcluding(before, after ReplicationSet, exclude func(*InstanceDesc)) bool {
Expand Down
42 changes: 39 additions & 3 deletions pkg/ring/replication_set_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,9 +251,10 @@ var (
},
}
replicationSetChangesTestCases = map[string]struct {
nextState ReplicationSet
expectHasReplicationSetChanged bool
expectHasReplicationSetChangedWithoutState bool
nextState ReplicationSet
expectHasReplicationSetChanged bool
expectHasReplicationSetChangedWithoutState bool
expectHasReplicationSetChangedWithoutStateAndAddress bool
}{
"timestamp changed": {
ReplicationSet{
Expand All @@ -265,6 +266,7 @@ var (
},
false,
false,
false,
},
"state changed": {
ReplicationSet{
Expand All @@ -276,6 +278,7 @@ var (
},
true,
false,
false,
},
"more instances": {
ReplicationSet{
Expand All @@ -288,6 +291,30 @@ var (
},
true,
true,
true,
},
"less instances": {
ReplicationSet{
Instances: []InstanceDesc{
{Addr: "127.0.0.1"},
{Addr: "127.0.0.2"},
},
},
true,
true,
true,
},
"replaced instance": {
ReplicationSet{
Instances: []InstanceDesc{
{Addr: "127.0.0.1"},
{Addr: "127.0.0.2"},
{Addr: "127.0.0.5"},
},
},
true,
true,
false,
},
}
)
Expand All @@ -309,3 +336,12 @@ func TestHasReplicationSetChangedWithoutState_IgnoresTimeStampAndState(t *testin
})
}
}

func TestHasReplicationSetChangedWithoutStateAndAddress_IgnoresTimeStampAndStateAndAddress(t *testing.T) {
// Only testing difference to underlying Equal function
for testName, testData := range replicationSetChangesTestCases {
t.Run(testName, func(t *testing.T) {
assert.Equal(t, testData.expectHasReplicationSetChangedWithoutStateAndAddress, HasReplicationSetChangedWithoutStateAndAddress(replicationSetChangesInitialState, testData.nextState), "HasReplicationSetChangedWithoutStateAndAddress wrong result")
})
}
}
3 changes: 2 additions & 1 deletion pkg/storegateway/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,8 @@ func (g *StoreGateway) running(ctx context.Context) error {
// replication set which we use to compare with the previous state.
currRingState, _ := g.ring.GetAllHealthy(BlocksOwnerSync) // nolint:errcheck

if ring.HasReplicationSetChanged(ringLastState, currRingState) {
// Ignore address when comparing to avoid block re-sync if tokens are persisted with tokens_file_path
if ring.HasReplicationSetChangedWithoutStateAndAddress(ringLastState, currRingState) {
ringLastState = currRingState
g.syncStores(ctx, syncReasonRingChange)
}
Expand Down
18 changes: 15 additions & 3 deletions pkg/storegateway/gateway_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ func TestStoreGateway_BlocksSyncWithDefaultSharding_RingTopologyChangedAfterScal
shardingStrategy = util.ShardingStrategyDefault
replicationFactor = 3
numInitialGateways = 4
numScaleUpGateways = 6
numScaleUpGateways = 2
expectedBlocksLoaded = 3 * numBlocks // blocks are replicated 3 times
)

Expand Down Expand Up @@ -615,7 +615,7 @@ func TestStoreGateway_SyncOnRingTopologyChanged(t *testing.T) {
},
expectedSync: true,
},
"should sync when an instance changes state": {
"should NOT sync when an instance changes state": {
setupRing: func(desc *ring.Desc) {
desc.AddIngester("instance-1", "127.0.0.1", "", ring.Tokens{1, 2, 3}, ring.ACTIVE, registeredAt)
desc.AddIngester("instance-2", "127.0.0.2", "", ring.Tokens{4, 5, 6}, ring.JOINING, registeredAt)
Expand All @@ -625,7 +625,19 @@ func TestStoreGateway_SyncOnRingTopologyChanged(t *testing.T) {
instance.State = ring.ACTIVE
desc.Ingesters["instance-2"] = instance
},
expectedSync: true,
expectedSync: false,
},
"should NOT sync when an instance address is replaced": {
setupRing: func(desc *ring.Desc) {
desc.AddIngester("instance-1", "127.0.0.1", "", ring.Tokens{1, 2, 3}, ring.ACTIVE, registeredAt)
desc.AddIngester("instance-2", "127.0.0.2", "", ring.Tokens{4, 5, 6}, ring.JOINING, registeredAt)
},
updateRing: func(desc *ring.Desc) {
instance := desc.Ingesters["instance-2"]
instance.Addr = "127.0.0.3"
desc.Ingesters["instance-2"] = instance
},
expectedSync: false,
},
"should sync when an healthy instance becomes unhealthy": {
setupRing: func(desc *ring.Desc) {
Expand Down