Skip to content

Commit 5268d53

Browse files
shiv0408dk2k
authored andcommitted
[Remote State] Upload incremental cluster state on master re-election (opensearch-project#15145)
* Upload incremental cluster state on master re-election Signed-off-by: Shivansh Arora <[email protected]>
1 parent 56f0a32 commit 5268d53

File tree

12 files changed

+732
-126
lines changed

12 files changed

+732
-126
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
5757
- Relax the join validation for Remote State publication ([#15471](https://github.com/opensearch-project/OpenSearch/pull/15471))
5858
- Static RemotePublication setting added, removed experimental feature flag ([#15478](https://github.com/opensearch-project/OpenSearch/pull/15478))
5959
- MultiTermQueries in keyword fields now default to `indexed` approach and gated behind cluster setting ([#15637](https://github.com/opensearch-project/OpenSearch/pull/15637))
60+
- [Remote Publication] Upload incremental cluster state on master re-election ([#15145](https://github.com/opensearch-project/OpenSearch/pull/15145))
6061
- Making _cat/allocation API use indexLevelStats ([#15292](https://github.com/opensearch-project/OpenSearch/pull/15292))
6162

6263
### Dependencies

server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.opensearch.common.blobstore.BlobPath;
2121
import org.opensearch.common.settings.Settings;
2222
import org.opensearch.discovery.DiscoveryStats;
23+
import org.opensearch.gateway.GatewayMetaState;
2324
import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata;
2425
import org.opensearch.gateway.remote.model.RemoteClusterMetadataManifest;
2526
import org.opensearch.gateway.remote.model.RemoteRoutingTableBlobStore;
@@ -43,6 +44,7 @@
4344
import java.util.Map;
4445
import java.util.Objects;
4546
import java.util.Set;
47+
import java.util.concurrent.ExecutionException;
4648
import java.util.function.Function;
4749
import java.util.stream.Collectors;
4850

@@ -74,15 +76,15 @@ public class RemoteStatePublicationIT extends RemoteStoreBaseIntegTestCase {
7476
private static final String REMOTE_STATE_PREFIX = "!";
7577
private static final String REMOTE_ROUTING_PREFIX = "_";
7678
private boolean isRemoteStateEnabled = true;
77-
private String isRemotePublicationEnabled = "true";
79+
private boolean isRemotePublicationEnabled = true;
7880
private boolean hasRemoteStateCharPrefix;
7981
private boolean hasRemoteRoutingCharPrefix;
8082

8183
@Before
8284
public void setup() {
8385
asyncUploadMockFsRepo = false;
8486
isRemoteStateEnabled = true;
85-
isRemotePublicationEnabled = "true";
87+
isRemotePublicationEnabled = true;
8688
hasRemoteStateCharPrefix = randomBoolean();
8789
hasRemoteRoutingCharPrefix = randomBoolean();
8890
}
@@ -112,6 +114,7 @@ protected Settings nodeSettings(int nodeOrdinal) {
112114
RemoteClusterStateService.REMOTE_CLUSTER_STATE_CHECKSUM_VALIDATION_MODE_SETTING.getKey(),
113115
RemoteClusterStateService.RemoteClusterStateValidationMode.FAILURE
114116
)
117+
.put(REMOTE_PUBLICATION_SETTING_KEY, isRemotePublicationEnabled)
115118
.put(
116119
RemoteClusterStateService.CLUSTER_REMOTE_STORE_STATE_PATH_PREFIX.getKey(),
117120
hasRemoteStateCharPrefix ? REMOTE_STATE_PREFIX : ""
@@ -341,6 +344,59 @@ public void doAfterNodes(int n, Client client) {
341344
});
342345
}
343346

347+
public void testMasterReElectionUsesIncrementalUpload() throws IOException {
348+
prepareCluster(3, 2, INDEX_NAME, 1, 1);
349+
PersistedStateRegistry persistedStateRegistry = internalCluster().getClusterManagerNodeInstance(PersistedStateRegistry.class);
350+
GatewayMetaState.RemotePersistedState remotePersistedState = (GatewayMetaState.RemotePersistedState) persistedStateRegistry
351+
.getPersistedState(PersistedStateRegistry.PersistedStateType.REMOTE);
352+
ClusterMetadataManifest manifest = remotePersistedState.getLastAcceptedManifest();
353+
// force elected master to step down
354+
internalCluster().stopCurrentClusterManagerNode();
355+
ensureStableCluster(4);
356+
357+
persistedStateRegistry = internalCluster().getClusterManagerNodeInstance(PersistedStateRegistry.class);
358+
CoordinationState.PersistedState persistedStateAfterElection = persistedStateRegistry.getPersistedState(
359+
PersistedStateRegistry.PersistedStateType.REMOTE
360+
);
361+
ClusterMetadataManifest manifestAfterElection = persistedStateAfterElection.getLastAcceptedManifest();
362+
363+
// coordination metadata is updated, it will be unequal
364+
assertNotEquals(manifest.getCoordinationMetadata(), manifestAfterElection.getCoordinationMetadata());
365+
// all other attributes are not uploaded again and will be pointing to same files in manifest after new master is elected
366+
assertEquals(manifest.getClusterUUID(), manifestAfterElection.getClusterUUID());
367+
assertEquals(manifest.getIndices(), manifestAfterElection.getIndices());
368+
assertEquals(manifest.getSettingsMetadata(), manifestAfterElection.getSettingsMetadata());
369+
assertEquals(manifest.getTemplatesMetadata(), manifestAfterElection.getTemplatesMetadata());
370+
assertEquals(manifest.getCustomMetadataMap(), manifestAfterElection.getCustomMetadataMap());
371+
assertEquals(manifest.getRoutingTableVersion(), manifest.getRoutingTableVersion());
372+
assertEquals(manifest.getIndicesRouting(), manifestAfterElection.getIndicesRouting());
373+
}
374+
375+
public void testVotingConfigAreCommitted() throws ExecutionException, InterruptedException {
376+
prepareCluster(3, 2, INDEX_NAME, 1, 2);
377+
ensureStableCluster(5);
378+
ensureGreen(INDEX_NAME);
379+
// add two new nodes to the cluster, to update the voting config
380+
internalCluster().startClusterManagerOnlyNodes(2, Settings.EMPTY);
381+
ensureStableCluster(7);
382+
383+
internalCluster().getInstances(PersistedStateRegistry.class).forEach(persistedStateRegistry -> {
384+
CoordinationState.PersistedState localState = persistedStateRegistry.getPersistedState(
385+
PersistedStateRegistry.PersistedStateType.LOCAL
386+
);
387+
CoordinationState.PersistedState remoteState = persistedStateRegistry.getPersistedState(
388+
PersistedStateRegistry.PersistedStateType.REMOTE
389+
);
390+
if (remoteState != null) {
391+
assertEquals(
392+
localState.getLastAcceptedState().getLastCommittedConfiguration(),
393+
remoteState.getLastAcceptedState().getLastCommittedConfiguration()
394+
);
395+
assertEquals(5, remoteState.getLastAcceptedState().getLastCommittedConfiguration().getNodeIds().size());
396+
}
397+
});
398+
}
399+
344400
private void assertDataNodeDownloadStats(NodesStatsResponse nodesStatsResponse) {
345401
// assert cluster state stats for data node
346402
DiscoveryStats dataNodeDiscoveryStats = nodesStatsResponse.getNodes().get(0).getDiscoveryStats();

server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -76,29 +76,4 @@ protected void verifyRestoredData(Map<String, Long> indexStats, String indexName
7676
protected void verifyRestoredData(Map<String, Long> indexStats, String indexName) throws Exception {
7777
verifyRestoredData(indexStats, indexName, true);
7878
}
79-
80-
public void prepareCluster(int numClusterManagerNodes, int numDataOnlyNodes, String indices, int replicaCount, int shardCount) {
81-
prepareCluster(numClusterManagerNodes, numDataOnlyNodes, indices, replicaCount, shardCount, Settings.EMPTY);
82-
}
83-
84-
public void prepareCluster(
85-
int numClusterManagerNodes,
86-
int numDataOnlyNodes,
87-
String indices,
88-
int replicaCount,
89-
int shardCount,
90-
Settings settings
91-
) {
92-
prepareCluster(numClusterManagerNodes, numDataOnlyNodes, settings);
93-
for (String index : indices.split(",")) {
94-
createIndex(index, remoteStoreIndexSettings(replicaCount, shardCount));
95-
ensureYellowAndNoInitializingShards(index);
96-
ensureGreen(index);
97-
}
98-
}
99-
100-
public void prepareCluster(int numClusterManagerNodes, int numDataOnlyNodes, Settings settings) {
101-
internalCluster().startClusterManagerOnlyNodes(numClusterManagerNodes, settings);
102-
internalCluster().startDataOnlyNodes(numDataOnlyNodes, settings);
103-
}
10479
}

server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -351,13 +351,7 @@ protected void restore(boolean restoreAllShards, String... indices) {
351351
}
352352

353353
protected void prepareCluster(int numClusterManagerNodes, int numDataOnlyNodes, String indices, int replicaCount, int shardCount) {
354-
internalCluster().startClusterManagerOnlyNodes(numClusterManagerNodes);
355-
internalCluster().startDataOnlyNodes(numDataOnlyNodes);
356-
for (String index : indices.split(",")) {
357-
createIndex(index, remoteStoreIndexSettings(replicaCount, shardCount));
358-
ensureYellowAndNoInitializingShards(index);
359-
ensureGreen(index);
360-
}
354+
prepareCluster(numClusterManagerNodes, numDataOnlyNodes, indices, replicaCount, shardCount, Settings.EMPTY);
361355
}
362356

363357
protected void prepareCluster(
@@ -368,11 +362,16 @@ protected void prepareCluster(
368362
int shardCount,
369363
Settings settings
370364
) {
371-
internalCluster().startClusterManagerOnlyNodes(numClusterManagerNodes, settings);
372-
internalCluster().startDataOnlyNodes(numDataOnlyNodes, settings);
365+
prepareCluster(numClusterManagerNodes, numDataOnlyNodes, settings);
373366
for (String index : indices.split(",")) {
374367
createIndex(index, remoteStoreIndexSettings(replicaCount, shardCount));
368+
ensureYellowAndNoInitializingShards(index);
375369
ensureGreen(index);
376370
}
377371
}
372+
373+
protected void prepareCluster(int numClusterManagerNodes, int numDataOnlyNodes, Settings settings) {
374+
internalCluster().startClusterManagerOnlyNodes(numClusterManagerNodes, settings);
375+
internalCluster().startDataOnlyNodes(numDataOnlyNodes, settings);
376+
}
378377
}

server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java

Lines changed: 64 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import org.opensearch.cluster.node.DiscoveryNode;
4141
import org.opensearch.common.settings.Settings;
4242
import org.opensearch.common.util.io.IOUtils;
43+
import org.opensearch.gateway.remote.ClusterMetadataManifest;
4344

4445
import java.io.Closeable;
4546
import java.io.IOException;
@@ -104,6 +105,7 @@ public CoordinationState(
104105
.getLastAcceptedConfiguration();
105106
this.publishVotes = new VoteCollection();
106107
this.isRemoteStateEnabled = isRemoteStoreClusterStateEnabled(settings);
108+
// ToDo: revisit this check while making the setting dynamic
107109
this.isRemotePublicationEnabled = isRemoteStateEnabled
108110
&& REMOTE_PUBLICATION_SETTING.get(settings)
109111
&& localNode.isRemoteStatePublicationEnabled();
@@ -459,6 +461,9 @@ public PublishResponse handlePublishRequest(PublishRequest publishRequest) {
459461
clusterState.term()
460462
);
461463
persistedStateRegistry.getPersistedState(PersistedStateType.LOCAL).setLastAcceptedState(clusterState);
464+
if (shouldUpdateRemotePersistedState(publishRequest)) {
465+
updateRemotePersistedStateOnPublishRequest(publishRequest);
466+
}
462467
assert getLastAcceptedState() == clusterState;
463468

464469
return new PublishResponse(clusterState.term(), clusterState.version());
@@ -571,6 +576,9 @@ public void handleCommit(ApplyCommitRequest applyCommit) {
571576
);
572577

573578
persistedStateRegistry.getPersistedState(PersistedStateType.LOCAL).markLastAcceptedStateAsCommitted();
579+
if (shouldCommitRemotePersistedState()) {
580+
persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).markLastAcceptedStateAsCommitted();
581+
}
574582
assert getLastCommittedConfiguration().equals(getLastAcceptedConfiguration());
575583
}
576584

@@ -616,6 +624,33 @@ public void close() throws IOException {
616624
IOUtils.close(persistedStateRegistry);
617625
}
618626

627+
private boolean shouldUpdateRemotePersistedState(PublishRequest publishRequest) {
628+
return persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE) != null
629+
&& publishRequest.getAcceptedState().getNodes().isLocalNodeElectedClusterManager() == false;
630+
}
631+
632+
private void updateRemotePersistedStateOnPublishRequest(PublishRequest publishRequest) {
633+
if (publishRequest instanceof RemoteStatePublishRequest) {
634+
persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).setLastAcceptedState(publishRequest.getAcceptedState());
635+
persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE)
636+
.setLastAcceptedManifest(((RemoteStatePublishRequest) publishRequest).getAcceptedManifest());
637+
} else {
638+
// We will end up here if PublishRequest was sent not using Remote Store even with remote persisted state on this node
639+
persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).setLastAcceptedState(null);
640+
persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).setLastAcceptedManifest(null);
641+
}
642+
}
643+
644+
private boolean shouldCommitRemotePersistedState() {
645+
return persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE) != null
646+
&& persistedStateRegistry.getPersistedState(PersistedStateType.LOCAL)
647+
.getLastAcceptedState()
648+
.getNodes()
649+
.isLocalNodeElectedClusterManager() == false
650+
&& persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedState() != null
651+
&& persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedManifest() != null;
652+
}
653+
619654
/**
620655
* Pluggable persistence layer for {@link CoordinationState}.
621656
*
@@ -653,6 +688,22 @@ public interface PersistedState extends Closeable {
653688
*/
654689
PersistedStateStats getStats();
655690

691+
/**
692+
* Returns the last accepted {@link ClusterMetadataManifest}.
693+
*
694+
* @return The last accepted {@link ClusterMetadataManifest}, or null if no manifest
695+
* has been accepted yet.
696+
*/
697+
default ClusterMetadataManifest getLastAcceptedManifest() {
698+
// return null by default, this method needs to be overridden wherever required
699+
return null;
700+
}
701+
702+
/**
703+
* Sets the last accepted {@link ClusterMetadataManifest}.
704+
*/
705+
default void setLastAcceptedManifest(ClusterMetadataManifest manifest) {}
706+
656707
/**
657708
* Marks the last accepted cluster state as committed.
658709
* After a successful call to this method, {@link #getLastAcceptedState()} should return the last cluster state that was set,
@@ -661,14 +712,7 @@ public interface PersistedState extends Closeable {
661712
*/
662713
default void markLastAcceptedStateAsCommitted() {
663714
final ClusterState lastAcceptedState = getLastAcceptedState();
664-
Metadata.Builder metadataBuilder = null;
665-
if (lastAcceptedState.getLastAcceptedConfiguration().equals(lastAcceptedState.getLastCommittedConfiguration()) == false) {
666-
final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder(lastAcceptedState.coordinationMetadata())
667-
.lastCommittedConfiguration(lastAcceptedState.getLastAcceptedConfiguration())
668-
.build();
669-
metadataBuilder = Metadata.builder(lastAcceptedState.metadata());
670-
metadataBuilder.coordinationMetadata(coordinationMetadata);
671-
}
715+
Metadata.Builder metadataBuilder = commitVotingConfiguration(lastAcceptedState);
672716
// if we receive a commit from a Zen1 cluster-manager that has not recovered its state yet,
673717
// the cluster uuid might not been known yet.
674718
assert lastAcceptedState.metadata().clusterUUID().equals(Metadata.UNKNOWN_CLUSTER_UUID) == false
@@ -693,6 +737,18 @@ default void markLastAcceptedStateAsCommitted() {
693737
}
694738
}
695739

740+
default Metadata.Builder commitVotingConfiguration(ClusterState lastAcceptedState) {
741+
Metadata.Builder metadataBuilder = null;
742+
if (lastAcceptedState.getLastAcceptedConfiguration().equals(lastAcceptedState.getLastCommittedConfiguration()) == false) {
743+
final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder(lastAcceptedState.coordinationMetadata())
744+
.lastCommittedConfiguration(lastAcceptedState.getLastAcceptedConfiguration())
745+
.build();
746+
metadataBuilder = Metadata.builder(lastAcceptedState.metadata());
747+
metadataBuilder.coordinationMetadata(coordinationMetadata);
748+
}
749+
return metadataBuilder;
750+
}
751+
696752
default void close() throws IOException {}
697753
}
698754

0 commit comments

Comments
 (0)