Skip to content

Commit a8afdfa

Browse files
linuxpishiv0408
authored andcommitted
revive remote cluster state auto restore integ tests (opensearch-project#10503)
* revive remote cluster state auto restore integ tests Signed-off-by: bansvaru <[email protected]> Signed-off-by: Shivansh Arora <[email protected]>
1 parent d7a8384 commit a8afdfa

File tree

3 files changed

+82
-167
lines changed

3 files changed

+82
-167
lines changed

server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,13 @@ public void assertRemoteStoreRepositoryOnAllNodes(String repositoryName) {
353353
// Validated that all the restricted settings are entact on all the nodes.
354354
repository.getRestrictedSystemRepositorySettings()
355355
.stream()
356-
.forEach(setting -> assertEquals(setting.get(actualRepository.settings()), setting.get(expectedRepository.settings())));
356+
.forEach(
357+
setting -> assertEquals(
358+
String.format(Locale.ROOT, "Restricted Settings mismatch [%s]", setting.getKey()),
359+
setting.get(actualRepository.settings()),
360+
setting.get(expectedRepository.settings())
361+
)
362+
);
357363
}
358364
}
359365

server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java

Lines changed: 69 additions & 162 deletions
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,22 @@
88

99
package org.opensearch.remotestore;
1010

11-
import org.opensearch.action.admin.cluster.remotestore.restore.RestoreRemoteStoreResponse;
12-
import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest;
13-
import org.opensearch.action.support.PlainActionFuture;
11+
import org.opensearch.cluster.ClusterState;
12+
import org.opensearch.cluster.metadata.IndexMetadata;
13+
import org.opensearch.cluster.metadata.Metadata;
1414
import org.opensearch.common.settings.Settings;
15+
import org.opensearch.gateway.remote.ClusterMetadataManifest;
16+
import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata;
1517
import org.opensearch.gateway.remote.RemoteClusterStateService;
1618
import org.opensearch.test.OpenSearchIntegTestCase;
1719

1820
import java.io.IOException;
1921
import java.nio.file.Files;
20-
import java.util.Locale;
22+
import java.util.List;
2123
import java.util.Map;
2224
import java.util.Objects;
23-
import java.util.concurrent.ExecutionException;
2425

2526
import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING;
26-
import static org.opensearch.indices.ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE;
27-
import static org.opensearch.indices.ShardLimitValidator.SETTING_MAX_SHARDS_PER_CLUSTER_KEY;
2827

2928
@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0)
3029
public class RemoteStoreClusterStateRestoreIT extends BaseRemoteStoreRestoreIT {
@@ -48,47 +47,10 @@ private Map<String, Long> initialTestSetup(int shardCount, int replicaCount, int
4847

4948
private void resetCluster(int dataNodeCount, int clusterManagerNodeCount) {
5049
internalCluster().stopAllNodes();
51-
addNewNodes(dataNodeCount, clusterManagerNodeCount);
50+
internalCluster().startClusterManagerOnlyNodes(clusterManagerNodeCount);
51+
internalCluster().startDataOnlyNodes(dataNodeCount);
5252
}
5353

54-
private void restoreAndValidate(String clusterUUID, Map<String, Long> indexStats) throws Exception {
55-
restoreAndValidate(clusterUUID, indexStats, true);
56-
}
57-
58-
private void restoreAndValidate(String clusterUUID, Map<String, Long> indexStats, boolean validate) throws Exception {
59-
// TODO once auto restore is merged, the remote cluster state will be restored
60-
61-
if (validate) {
62-
// Step - 4 validation restore is successful.
63-
ensureGreen(INDEX_NAME);
64-
verifyRestoredData(indexStats, INDEX_NAME);
65-
}
66-
}
67-
68-
private void restoreAndValidateFails(
69-
String clusterUUID,
70-
PlainActionFuture<RestoreRemoteStoreResponse> actionListener,
71-
Class<? extends Throwable> clazz,
72-
String errorSubString
73-
) {
74-
75-
try {
76-
restoreAndValidate(clusterUUID, null, false);
77-
} catch (Exception e) {
78-
assertTrue(
79-
String.format(Locale.ROOT, "%s %s", clazz, e),
80-
clazz.isAssignableFrom(e.getClass())
81-
|| clazz.isAssignableFrom(e.getCause().getClass())
82-
|| (e.getCause().getCause() != null && clazz.isAssignableFrom(e.getCause().getCause().getClass()))
83-
);
84-
assertTrue(
85-
String.format(Locale.ROOT, "Error message mismatch. Expected: [%s]. Actual: [%s]", errorSubString, e.getMessage()),
86-
e.getMessage().contains(errorSubString)
87-
);
88-
}
89-
}
90-
91-
@AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834")
9254
public void testFullClusterRestore() throws Exception {
9355
int shardCount = randomIntBetween(1, 2);
9456
int replicaCount = 1;
@@ -106,10 +68,10 @@ public void testFullClusterRestore() throws Exception {
10668
assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same";
10769

10870
// Step - 3 Trigger full cluster restore and validate
109-
restoreAndValidate(prevClusterUUID, indexStats);
71+
validateMetadata(List.of(INDEX_NAME));
72+
verifyRestoredData(indexStats, INDEX_NAME);
11073
}
11174

112-
@AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834")
11375
public void testFullClusterRestoreMultipleIndices() throws Exception {
11476
int shardCount = randomIntBetween(1, 2);
11577
int replicaCount = 1;
@@ -134,155 +96,100 @@ public void testFullClusterRestoreMultipleIndices() throws Exception {
13496
assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same";
13597

13698
// Step - 3 Trigger full cluster restore
137-
restoreAndValidate(prevClusterUUID, indexStats);
138-
ensureGreen(secondIndexName);
139-
verifyRestoredData(indexStats2, secondIndexName);
99+
validateMetadata(List.of(INDEX_NAME, secondIndexName));
100+
verifyRestoredData(indexStats, INDEX_NAME);
140101
}
141102

142-
@AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834")
143-
public void testFullClusterRestoreFailureValidationFailures() throws Exception {
103+
public void testFullClusterRestoreManifestFilePointsToInvalidIndexMetadataPathThrowsException() throws Exception {
144104
int shardCount = randomIntBetween(1, 2);
145105
int replicaCount = 1;
146106
int dataNodeCount = shardCount * (replicaCount + 1);
147107
int clusterManagerNodeCount = 1;
148108

149-
// index some data to generate files in remote directory
150-
Map<String, Long> indexStats = initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount);
151-
String prevClusterUUID = clusterService().state().metadata().clusterUUID();
152-
153-
// Start of Test - 1
154-
// Test - 1 Trigger full cluster restore and validate it fails due to incorrect cluster UUID
155-
PlainActionFuture<RestoreRemoteStoreResponse> future = PlainActionFuture.newFuture();
156-
restoreAndValidateFails("randomUUID", future, IllegalStateException.class, "Remote Cluster State not found - randomUUID");
157-
// End of Test - 1
109+
// Step - 1 index some data to generate files in remote directory
110+
initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount);
158111

159-
// Start of Test - 3
160-
// Test - 2 Trigger full cluster restore and validate it fails due to cluster UUID same as current cluster UUID
161-
future = PlainActionFuture.newFuture();
162-
restoreAndValidateFails(
163-
clusterService().state().metadata().clusterUUID(),
164-
future,
165-
IllegalArgumentException.class,
166-
"clusterUUID to restore from should be different from current cluster UUID"
167-
);
168-
// End of Test - 2
112+
String prevClusterUUID = clusterService().state().metadata().clusterUUID();
113+
String clusterName = clusterService().state().getClusterName().value();
169114

170-
// Start of Test - 3
171115
// Step - 2 Replace all nodes in the cluster with new nodes. This ensures new cluster state doesn't have previous index metadata
172-
// Restarting cluster with just 1 data node helps with applying cluster settings
173-
resetCluster(1, clusterManagerNodeCount);
174-
String newClusterUUID = clusterService().state().metadata().clusterUUID();
175-
assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same";
176-
177-
reduceShardLimits(1, 1);
178-
179-
// Step - 4 Trigger full cluster restore and validate it fails
180-
future = PlainActionFuture.newFuture();
181-
restoreAndValidateFails(
182-
prevClusterUUID,
183-
future,
184-
IllegalArgumentException.class,
185-
"this action would add [2] total shards, but this cluster currently has [0]/[1] maximum shards open"
186-
);
187-
resetShardLimits();
188-
// End of Test - 3
189-
190-
// Start of Test - 4
191-
// Test -4 Reset cluster and trigger full restore with same name index in the cluster
192-
// Test -4 Add required nodes for this test after last reset.
193-
addNewNodes(dataNodeCount - 1, 0);
194-
195-
newClusterUUID = clusterService().state().metadata().clusterUUID();
196-
assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same";
197-
198-
// Test -4 Step - 2 Create a new index with same name
199-
createIndex(INDEX_NAME, remoteStoreIndexSettings(0, 1));
200-
ensureYellowAndNoInitializingShards(INDEX_NAME);
201-
ensureGreen(INDEX_NAME);
202-
203-
future = PlainActionFuture.newFuture();
204-
205-
// Test -4 Step - 3 Trigger full cluster restore and validate fails
206-
restoreAndValidateFails(
207-
prevClusterUUID,
208-
future,
209-
IllegalStateException.class,
210-
"cannot restore index [remote-store-test-idx-1] because an open index with same name/uuid already exists in the cluster"
211-
);
116+
internalCluster().stopAllNodes();
117+
// Step - 3 Delete index metadata file in remote
118+
try {
119+
Files.move(
120+
segmentRepoPath.resolve(
121+
RemoteClusterStateService.encodeString(clusterName) + "/cluster-state/" + prevClusterUUID + "/index"
122+
),
123+
segmentRepoPath.resolve("cluster-state/")
124+
);
125+
} catch (IOException e) {
126+
throw new RuntimeException(e);
127+
}
128+
assertThrows(IllegalStateException.class, () -> addNewNodes(dataNodeCount, clusterManagerNodeCount));
129+
// Test is complete
212130

213-
// Test -4 Step - 4 validation restore is successful.
214-
ensureGreen(INDEX_NAME);
215-
// End of Test - 4
131+
// Starting a node without remote state to ensure test cleanup
132+
internalCluster().startNode(Settings.builder().put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), false).build());
216133
}
217134

218-
@AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834")
219-
public void testFullClusterRestoreManifestFilePointsToInvalidIndexMetadataPathThrowsException() throws Exception {
135+
public void testRemoteStateFullRestart() throws Exception {
220136
int shardCount = randomIntBetween(1, 2);
221137
int replicaCount = 1;
222138
int dataNodeCount = shardCount * (replicaCount + 1);
223-
int clusterManagerNodeCount = 1;
224-
225-
// Step - 1 index some data to generate files in remote directory
226-
initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount);
139+
int clusterManagerNodeCount = 3;
227140

141+
Map<String, Long> indexStats = initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount);
228142
String prevClusterUUID = clusterService().state().metadata().clusterUUID();
229-
230-
// Step - 2 Replace all nodes in the cluster with new nodes. This ensures new cluster state doesn't have previous index metadata
231-
resetCluster(dataNodeCount, clusterManagerNodeCount);
232-
233-
String newClusterUUID = clusterService().state().metadata().clusterUUID();
234-
assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same";
235-
236-
// Step - 4 Delete index metadata file in remote
143+
// Delete index metadata file in remote
237144
try {
238145
Files.move(
239146
segmentRepoPath.resolve(
240147
RemoteClusterStateService.encodeString(clusterService().state().getClusterName().value())
241148
+ "/cluster-state/"
242149
+ prevClusterUUID
243-
+ "/index"
150+
+ "/manifest"
244151
),
245152
segmentRepoPath.resolve("cluster-state/")
246153
);
247154
} catch (IOException e) {
248155
throw new RuntimeException(e);
249156
}
250-
251-
// Step - 5 Trigger full cluster restore and validate fails
252-
PlainActionFuture<RestoreRemoteStoreResponse> future = PlainActionFuture.newFuture();
253-
restoreAndValidateFails(prevClusterUUID, future, IllegalStateException.class, "asdsa");
157+
internalCluster().fullRestart();
158+
ensureGreen(INDEX_NAME);
159+
String newClusterUUID = clusterService().state().metadata().clusterUUID();
160+
assert Objects.equals(newClusterUUID, prevClusterUUID) : "Full restart not successful. cluster uuid has changed";
161+
validateCurrentMetadata();
162+
verifyRestoredData(indexStats, INDEX_NAME);
254163
}
255164

256-
private void reduceShardLimits(int maxShardsPerNode, int maxShardsPerCluster) {
257-
// Step 3 - Reduce shard limits to hit shard limit with less no of shards
258-
try {
259-
client().admin()
260-
.cluster()
261-
.updateSettings(
262-
new ClusterUpdateSettingsRequest().transientSettings(
263-
Settings.builder()
264-
.put(SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), maxShardsPerNode)
265-
.put(SETTING_MAX_SHARDS_PER_CLUSTER_KEY, maxShardsPerCluster)
266-
)
267-
)
268-
.get();
269-
} catch (InterruptedException | ExecutionException e) {
270-
throw new RuntimeException(e);
165+
private void validateMetadata(List<String> indexNames) {
166+
assertEquals(clusterService().state().metadata().indices().size(), indexNames.size());
167+
for (String indexName : indexNames) {
168+
assertTrue(clusterService().state().metadata().hasIndex(indexName));
271169
}
272170
}
273171

274-
private void resetShardLimits() {
275-
// Step - 5 Reset the cluster settings
276-
ClusterUpdateSettingsRequest resetRequest = new ClusterUpdateSettingsRequest();
277-
resetRequest.transientSettings(
278-
Settings.builder().putNull(SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey()).putNull(SETTING_MAX_SHARDS_PER_CLUSTER_KEY)
172+
private void validateCurrentMetadata() throws Exception {
173+
RemoteClusterStateService remoteClusterStateService = internalCluster().getInstance(
174+
RemoteClusterStateService.class,
175+
internalCluster().getClusterManagerName()
279176
);
280-
281-
try {
282-
client().admin().cluster().updateSettings(resetRequest).get();
283-
} catch (InterruptedException | ExecutionException e) {
284-
throw new RuntimeException(e);
285-
}
177+
assertBusy(() -> {
178+
ClusterMetadataManifest manifest = remoteClusterStateService.getLatestClusterMetadataManifest(
179+
getClusterState().getClusterName().value(),
180+
getClusterState().metadata().clusterUUID()
181+
).get();
182+
ClusterState clusterState = getClusterState();
183+
Metadata currentMetadata = clusterState.metadata();
184+
assertEquals(currentMetadata.indices().size(), manifest.getIndices().size());
185+
assertEquals(currentMetadata.coordinationMetadata().term(), manifest.getClusterTerm());
186+
assertEquals(clusterState.version(), manifest.getStateVersion());
187+
assertEquals(clusterState.stateUUID(), manifest.getStateUUID());
188+
assertEquals(currentMetadata.clusterUUIDCommitted(), manifest.isClusterUUIDCommitted());
189+
for (UploadedIndexMetadata uploadedIndexMetadata : manifest.getIndices()) {
190+
IndexMetadata currentIndexMetadata = currentMetadata.index(uploadedIndexMetadata.getIndexName());
191+
assertEquals(currentIndexMetadata.getIndex().getUUID(), uploadedIndexMetadata.getIndexUUID());
192+
}
193+
});
286194
}
287-
288195
}

test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1853,10 +1853,12 @@ public synchronized void stopRandomNodeNotCurrentMaster() throws IOException {
18531853
*/
18541854
public void stopAllNodes() {
18551855
try {
1856-
int totalDataNodes = numDataNodes();
1857-
while (totalDataNodes > 0) {
1858-
stopRandomDataNode();
1859-
totalDataNodes -= 1;
1856+
if (numDataAndClusterManagerNodes() != numClusterManagerNodes()) {
1857+
int totalDataNodes = numDataNodes();
1858+
while (totalDataNodes > 0) {
1859+
stopRandomDataNode();
1860+
totalDataNodes -= 1;
1861+
}
18601862
}
18611863
int totalClusterManagerNodes = numClusterManagerNodes();
18621864
while (totalClusterManagerNodes > 1) {

0 commit comments

Comments
 (0)