8
8
9
9
package org .opensearch .remotestore ;
10
10
11
- import org .opensearch .action . admin . cluster .remotestore . restore . RestoreRemoteStoreResponse ;
12
- import org .opensearch .action . admin . cluster .settings . ClusterUpdateSettingsRequest ;
13
- import org .opensearch .action . support . PlainActionFuture ;
11
+ import org .opensearch .cluster .ClusterState ;
12
+ import org .opensearch .cluster .metadata . IndexMetadata ;
13
+ import org .opensearch .cluster . metadata . Metadata ;
14
14
import org .opensearch .common .settings .Settings ;
15
+ import org .opensearch .gateway .remote .ClusterMetadataManifest ;
16
+ import org .opensearch .gateway .remote .ClusterMetadataManifest .UploadedIndexMetadata ;
15
17
import org .opensearch .gateway .remote .RemoteClusterStateService ;
16
18
import org .opensearch .test .OpenSearchIntegTestCase ;
17
19
18
20
import java .io .IOException ;
19
21
import java .nio .file .Files ;
20
- import java .util .Locale ;
22
+ import java .util .List ;
21
23
import java .util .Map ;
22
24
import java .util .Objects ;
23
- import java .util .concurrent .ExecutionException ;
24
25
25
26
import static org .opensearch .gateway .remote .RemoteClusterStateService .REMOTE_CLUSTER_STATE_ENABLED_SETTING ;
26
- import static org .opensearch .indices .ShardLimitValidator .SETTING_CLUSTER_MAX_SHARDS_PER_NODE ;
27
- import static org .opensearch .indices .ShardLimitValidator .SETTING_MAX_SHARDS_PER_CLUSTER_KEY ;
28
27
29
28
@ OpenSearchIntegTestCase .ClusterScope (scope = OpenSearchIntegTestCase .Scope .TEST , numDataNodes = 0 )
30
29
public class RemoteStoreClusterStateRestoreIT extends BaseRemoteStoreRestoreIT {
@@ -48,47 +47,10 @@ private Map<String, Long> initialTestSetup(int shardCount, int replicaCount, int
48
47
49
48
private void resetCluster (int dataNodeCount , int clusterManagerNodeCount ) {
50
49
internalCluster ().stopAllNodes ();
51
- addNewNodes (dataNodeCount , clusterManagerNodeCount );
50
+ internalCluster ().startClusterManagerOnlyNodes (clusterManagerNodeCount );
51
+ internalCluster ().startDataOnlyNodes (dataNodeCount );
52
52
}
53
53
54
- private void restoreAndValidate (String clusterUUID , Map <String , Long > indexStats ) throws Exception {
55
- restoreAndValidate (clusterUUID , indexStats , true );
56
- }
57
-
58
- private void restoreAndValidate (String clusterUUID , Map <String , Long > indexStats , boolean validate ) throws Exception {
59
- // TODO once auto restore is merged, the remote cluster state will be restored
60
-
61
- if (validate ) {
62
- // Step - 4 validation restore is successful.
63
- ensureGreen (INDEX_NAME );
64
- verifyRestoredData (indexStats , INDEX_NAME );
65
- }
66
- }
67
-
68
- private void restoreAndValidateFails (
69
- String clusterUUID ,
70
- PlainActionFuture <RestoreRemoteStoreResponse > actionListener ,
71
- Class <? extends Throwable > clazz ,
72
- String errorSubString
73
- ) {
74
-
75
- try {
76
- restoreAndValidate (clusterUUID , null , false );
77
- } catch (Exception e ) {
78
- assertTrue (
79
- String .format (Locale .ROOT , "%s %s" , clazz , e ),
80
- clazz .isAssignableFrom (e .getClass ())
81
- || clazz .isAssignableFrom (e .getCause ().getClass ())
82
- || (e .getCause ().getCause () != null && clazz .isAssignableFrom (e .getCause ().getCause ().getClass ()))
83
- );
84
- assertTrue (
85
- String .format (Locale .ROOT , "Error message mismatch. Expected: [%s]. Actual: [%s]" , errorSubString , e .getMessage ()),
86
- e .getMessage ().contains (errorSubString )
87
- );
88
- }
89
- }
90
-
91
- @ AwaitsFix (bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834" )
92
54
public void testFullClusterRestore () throws Exception {
93
55
int shardCount = randomIntBetween (1 , 2 );
94
56
int replicaCount = 1 ;
@@ -106,10 +68,10 @@ public void testFullClusterRestore() throws Exception {
106
68
assert !Objects .equals (newClusterUUID , prevClusterUUID ) : "cluster restart not successful. cluster uuid is same" ;
107
69
108
70
// Step - 3 Trigger full cluster restore and validate
109
- restoreAndValidate (prevClusterUUID , indexStats );
71
+ validateMetadata (List .of (INDEX_NAME ));
72
+ verifyRestoredData (indexStats , INDEX_NAME );
110
73
}
111
74
112
- @ AwaitsFix (bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834" )
113
75
public void testFullClusterRestoreMultipleIndices () throws Exception {
114
76
int shardCount = randomIntBetween (1 , 2 );
115
77
int replicaCount = 1 ;
@@ -134,155 +96,100 @@ public void testFullClusterRestoreMultipleIndices() throws Exception {
134
96
assert !Objects .equals (newClusterUUID , prevClusterUUID ) : "cluster restart not successful. cluster uuid is same" ;
135
97
136
98
// Step - 3 Trigger full cluster restore
137
- restoreAndValidate (prevClusterUUID , indexStats );
138
- ensureGreen (secondIndexName );
139
- verifyRestoredData (indexStats2 , secondIndexName );
99
+ validateMetadata (List .of (INDEX_NAME , secondIndexName ));
100
+ verifyRestoredData (indexStats , INDEX_NAME );
140
101
}
141
102
142
- @ AwaitsFix (bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834" )
143
- public void testFullClusterRestoreFailureValidationFailures () throws Exception {
103
+ public void testFullClusterRestoreManifestFilePointsToInvalidIndexMetadataPathThrowsException () throws Exception {
144
104
int shardCount = randomIntBetween (1 , 2 );
145
105
int replicaCount = 1 ;
146
106
int dataNodeCount = shardCount * (replicaCount + 1 );
147
107
int clusterManagerNodeCount = 1 ;
148
108
149
- // index some data to generate files in remote directory
150
- Map <String , Long > indexStats = initialTestSetup (shardCount , replicaCount , dataNodeCount , clusterManagerNodeCount );
151
- String prevClusterUUID = clusterService ().state ().metadata ().clusterUUID ();
152
-
153
- // Start of Test - 1
154
- // Test - 1 Trigger full cluster restore and validate it fails due to incorrect cluster UUID
155
- PlainActionFuture <RestoreRemoteStoreResponse > future = PlainActionFuture .newFuture ();
156
- restoreAndValidateFails ("randomUUID" , future , IllegalStateException .class , "Remote Cluster State not found - randomUUID" );
157
- // End of Test - 1
109
+ // Step - 1 index some data to generate files in remote directory
110
+ initialTestSetup (shardCount , replicaCount , dataNodeCount , clusterManagerNodeCount );
158
111
159
- // Start of Test - 3
160
- // Test - 2 Trigger full cluster restore and validate it fails due to cluster UUID same as current cluster UUID
161
- future = PlainActionFuture .newFuture ();
162
- restoreAndValidateFails (
163
- clusterService ().state ().metadata ().clusterUUID (),
164
- future ,
165
- IllegalArgumentException .class ,
166
- "clusterUUID to restore from should be different from current cluster UUID"
167
- );
168
- // End of Test - 2
112
+ String prevClusterUUID = clusterService ().state ().metadata ().clusterUUID ();
113
+ String clusterName = clusterService ().state ().getClusterName ().value ();
169
114
170
- // Start of Test - 3
171
115
// Step - 2 Replace all nodes in the cluster with new nodes. This ensures new cluster state doesn't have previous index metadata
172
- // Restarting cluster with just 1 data node helps with applying cluster settings
173
- resetCluster (1 , clusterManagerNodeCount );
174
- String newClusterUUID = clusterService ().state ().metadata ().clusterUUID ();
175
- assert !Objects .equals (newClusterUUID , prevClusterUUID ) : "cluster restart not successful. cluster uuid is same" ;
176
-
177
- reduceShardLimits (1 , 1 );
178
-
179
- // Step - 4 Trigger full cluster restore and validate it fails
180
- future = PlainActionFuture .newFuture ();
181
- restoreAndValidateFails (
182
- prevClusterUUID ,
183
- future ,
184
- IllegalArgumentException .class ,
185
- "this action would add [2] total shards, but this cluster currently has [0]/[1] maximum shards open"
186
- );
187
- resetShardLimits ();
188
- // End of Test - 3
189
-
190
- // Start of Test - 4
191
- // Test -4 Reset cluster and trigger full restore with same name index in the cluster
192
- // Test -4 Add required nodes for this test after last reset.
193
- addNewNodes (dataNodeCount - 1 , 0 );
194
-
195
- newClusterUUID = clusterService ().state ().metadata ().clusterUUID ();
196
- assert !Objects .equals (newClusterUUID , prevClusterUUID ) : "cluster restart not successful. cluster uuid is same" ;
197
-
198
- // Test -4 Step - 2 Create a new index with same name
199
- createIndex (INDEX_NAME , remoteStoreIndexSettings (0 , 1 ));
200
- ensureYellowAndNoInitializingShards (INDEX_NAME );
201
- ensureGreen (INDEX_NAME );
202
-
203
- future = PlainActionFuture .newFuture ();
204
-
205
- // Test -4 Step - 3 Trigger full cluster restore and validate fails
206
- restoreAndValidateFails (
207
- prevClusterUUID ,
208
- future ,
209
- IllegalStateException .class ,
210
- "cannot restore index [remote-store-test-idx-1] because an open index with same name/uuid already exists in the cluster"
211
- );
116
+ internalCluster ().stopAllNodes ();
117
+ // Step - 3 Delete index metadata file in remote
118
+ try {
119
+ Files .move (
120
+ segmentRepoPath .resolve (
121
+ RemoteClusterStateService .encodeString (clusterName ) + "/cluster-state/" + prevClusterUUID + "/index"
122
+ ),
123
+ segmentRepoPath .resolve ("cluster-state/" )
124
+ );
125
+ } catch (IOException e ) {
126
+ throw new RuntimeException (e );
127
+ }
128
+ assertThrows (IllegalStateException .class , () -> addNewNodes (dataNodeCount , clusterManagerNodeCount ));
129
+ // Test is complete
212
130
213
- // Test -4 Step - 4 validation restore is successful.
214
- ensureGreen (INDEX_NAME );
215
- // End of Test - 4
131
+ // Starting a node without remote state to ensure test cleanup
132
+ internalCluster ().startNode (Settings .builder ().put (REMOTE_CLUSTER_STATE_ENABLED_SETTING .getKey (), false ).build ());
216
133
}
217
134
218
- @ AwaitsFix (bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834" )
219
- public void testFullClusterRestoreManifestFilePointsToInvalidIndexMetadataPathThrowsException () throws Exception {
135
+ public void testRemoteStateFullRestart () throws Exception {
220
136
int shardCount = randomIntBetween (1 , 2 );
221
137
int replicaCount = 1 ;
222
138
int dataNodeCount = shardCount * (replicaCount + 1 );
223
- int clusterManagerNodeCount = 1 ;
224
-
225
- // Step - 1 index some data to generate files in remote directory
226
- initialTestSetup (shardCount , replicaCount , dataNodeCount , clusterManagerNodeCount );
139
+ int clusterManagerNodeCount = 3 ;
227
140
141
+ Map <String , Long > indexStats = initialTestSetup (shardCount , replicaCount , dataNodeCount , clusterManagerNodeCount );
228
142
String prevClusterUUID = clusterService ().state ().metadata ().clusterUUID ();
229
-
230
- // Step - 2 Replace all nodes in the cluster with new nodes. This ensures new cluster state doesn't have previous index metadata
231
- resetCluster (dataNodeCount , clusterManagerNodeCount );
232
-
233
- String newClusterUUID = clusterService ().state ().metadata ().clusterUUID ();
234
- assert !Objects .equals (newClusterUUID , prevClusterUUID ) : "cluster restart not successful. cluster uuid is same" ;
235
-
236
- // Step - 4 Delete index metadata file in remote
143
+ // Delete index metadata file in remote
237
144
try {
238
145
Files .move (
239
146
segmentRepoPath .resolve (
240
147
RemoteClusterStateService .encodeString (clusterService ().state ().getClusterName ().value ())
241
148
+ "/cluster-state/"
242
149
+ prevClusterUUID
243
- + "/index "
150
+ + "/manifest "
244
151
),
245
152
segmentRepoPath .resolve ("cluster-state/" )
246
153
);
247
154
} catch (IOException e ) {
248
155
throw new RuntimeException (e );
249
156
}
250
-
251
- // Step - 5 Trigger full cluster restore and validate fails
252
- PlainActionFuture <RestoreRemoteStoreResponse > future = PlainActionFuture .newFuture ();
253
- restoreAndValidateFails (prevClusterUUID , future , IllegalStateException .class , "asdsa" );
157
+ internalCluster ().fullRestart ();
158
+ ensureGreen (INDEX_NAME );
159
+ String newClusterUUID = clusterService ().state ().metadata ().clusterUUID ();
160
+ assert Objects .equals (newClusterUUID , prevClusterUUID ) : "Full restart not successful. cluster uuid has changed" ;
161
+ validateCurrentMetadata ();
162
+ verifyRestoredData (indexStats , INDEX_NAME );
254
163
}
255
164
256
- private void reduceShardLimits (int maxShardsPerNode , int maxShardsPerCluster ) {
257
- // Step 3 - Reduce shard limits to hit shard limit with less no of shards
258
- try {
259
- client ().admin ()
260
- .cluster ()
261
- .updateSettings (
262
- new ClusterUpdateSettingsRequest ().transientSettings (
263
- Settings .builder ()
264
- .put (SETTING_CLUSTER_MAX_SHARDS_PER_NODE .getKey (), maxShardsPerNode )
265
- .put (SETTING_MAX_SHARDS_PER_CLUSTER_KEY , maxShardsPerCluster )
266
- )
267
- )
268
- .get ();
269
- } catch (InterruptedException | ExecutionException e ) {
270
- throw new RuntimeException (e );
165
+ private void validateMetadata (List <String > indexNames ) {
166
+ assertEquals (clusterService ().state ().metadata ().indices ().size (), indexNames .size ());
167
+ for (String indexName : indexNames ) {
168
+ assertTrue (clusterService ().state ().metadata ().hasIndex (indexName ));
271
169
}
272
170
}
273
171
274
- private void resetShardLimits () {
275
- // Step - 5 Reset the cluster settings
276
- ClusterUpdateSettingsRequest resetRequest = new ClusterUpdateSettingsRequest ();
277
- resetRequest .transientSettings (
278
- Settings .builder ().putNull (SETTING_CLUSTER_MAX_SHARDS_PER_NODE .getKey ()).putNull (SETTING_MAX_SHARDS_PER_CLUSTER_KEY )
172
+ private void validateCurrentMetadata () throws Exception {
173
+ RemoteClusterStateService remoteClusterStateService = internalCluster ().getInstance (
174
+ RemoteClusterStateService .class ,
175
+ internalCluster ().getClusterManagerName ()
279
176
);
280
-
281
- try {
282
- client ().admin ().cluster ().updateSettings (resetRequest ).get ();
283
- } catch (InterruptedException | ExecutionException e ) {
284
- throw new RuntimeException (e );
285
- }
177
+ assertBusy (() -> {
178
+ ClusterMetadataManifest manifest = remoteClusterStateService .getLatestClusterMetadataManifest (
179
+ getClusterState ().getClusterName ().value (),
180
+ getClusterState ().metadata ().clusterUUID ()
181
+ ).get ();
182
+ ClusterState clusterState = getClusterState ();
183
+ Metadata currentMetadata = clusterState .metadata ();
184
+ assertEquals (currentMetadata .indices ().size (), manifest .getIndices ().size ());
185
+ assertEquals (currentMetadata .coordinationMetadata ().term (), manifest .getClusterTerm ());
186
+ assertEquals (clusterState .version (), manifest .getStateVersion ());
187
+ assertEquals (clusterState .stateUUID (), manifest .getStateUUID ());
188
+ assertEquals (currentMetadata .clusterUUIDCommitted (), manifest .isClusterUUIDCommitted ());
189
+ for (UploadedIndexMetadata uploadedIndexMetadata : manifest .getIndices ()) {
190
+ IndexMetadata currentIndexMetadata = currentMetadata .index (uploadedIndexMetadata .getIndexName ());
191
+ assertEquals (currentIndexMetadata .getIndex ().getUUID (), uploadedIndexMetadata .getIndexUUID ());
192
+ }
193
+ });
286
194
}
287
-
288
195
}
0 commit comments