|
24 | 24 | import org.apache.lucene.util.BytesRef;
|
25 | 25 | import org.opensearch.action.admin.indices.alias.Alias;
|
26 | 26 | import org.opensearch.action.admin.indices.flush.FlushRequest;
|
| 27 | +import org.opensearch.action.admin.indices.recovery.RecoveryResponse; |
27 | 28 | import org.opensearch.action.admin.indices.stats.IndicesStatsRequest;
|
28 | 29 | import org.opensearch.action.admin.indices.stats.IndicesStatsResponse;
|
29 | 30 | import org.opensearch.action.get.GetResponse;
|
|
58 | 59 | import org.opensearch.common.lucene.index.OpenSearchDirectoryReader;
|
59 | 60 | import org.opensearch.common.settings.Settings;
|
60 | 61 | import org.opensearch.common.unit.TimeValue;
|
| 62 | +import org.opensearch.core.common.bytes.BytesArray; |
61 | 63 | import org.opensearch.core.common.io.stream.NamedWriteableRegistry;
|
62 | 64 | import org.opensearch.core.index.shard.ShardId;
|
63 | 65 | import org.opensearch.core.xcontent.XContentBuilder;
|
|
71 | 73 | import org.opensearch.index.engine.NRTReplicationReaderManager;
|
72 | 74 | import org.opensearch.index.shard.IndexShard;
|
73 | 75 | import org.opensearch.indices.recovery.FileChunkRequest;
|
| 76 | +import org.opensearch.indices.recovery.RecoveryState; |
74 | 77 | import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint;
|
75 | 78 | import org.opensearch.indices.replication.common.ReplicationType;
|
76 | 79 | import org.opensearch.node.NodeClosedException;
|
|
82 | 85 | import org.opensearch.test.InternalTestCluster;
|
83 | 86 | import org.opensearch.test.OpenSearchIntegTestCase;
|
84 | 87 | import org.opensearch.test.transport.MockTransportService;
|
| 88 | +import org.opensearch.transport.TransportRequest; |
85 | 89 | import org.opensearch.transport.TransportService;
|
86 | 90 | import org.junit.Before;
|
87 | 91 |
|
|
94 | 98 | import java.util.Set;
|
95 | 99 | import java.util.concurrent.CountDownLatch;
|
96 | 100 | import java.util.concurrent.TimeUnit;
|
| 101 | +import java.util.concurrent.atomic.AtomicBoolean; |
97 | 102 | import java.util.stream.Collectors;
|
98 | 103 |
|
99 | 104 | import static java.util.Arrays.asList;
|
@@ -1777,4 +1782,134 @@ public void testRealtimeTermVectorRequestsUnSuccessful() throws IOException {
|
1777 | 1782 |
|
1778 | 1783 | }
|
1779 | 1784 |
|
| 1785 | + public void testSendCorruptBytesToReplica() throws Exception { |
| 1786 | + // this test stubs transport calls specific to node-node replication. |
| 1787 | + assumeFalse( |
| 1788 | + "Skipping the test as its not compatible with segment replication with remote store.", |
| 1789 | + segmentReplicationWithRemoteEnabled() |
| 1790 | + ); |
| 1791 | + final String primaryNode = internalCluster().startDataOnlyNode(); |
| 1792 | + createIndex( |
| 1793 | + INDEX_NAME, |
| 1794 | + Settings.builder() |
| 1795 | + .put(indexSettings()) |
| 1796 | + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) |
| 1797 | + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) |
| 1798 | + .put("index.refresh_interval", -1) |
| 1799 | + .build() |
| 1800 | + ); |
| 1801 | + ensureYellow(INDEX_NAME); |
| 1802 | + final String replicaNode = internalCluster().startDataOnlyNode(); |
| 1803 | + ensureGreen(INDEX_NAME); |
| 1804 | + |
| 1805 | + MockTransportService primaryTransportService = ((MockTransportService) internalCluster().getInstance( |
| 1806 | + TransportService.class, |
| 1807 | + primaryNode |
| 1808 | + )); |
| 1809 | + CountDownLatch latch = new CountDownLatch(1); |
| 1810 | + AtomicBoolean failed = new AtomicBoolean(false); |
| 1811 | + primaryTransportService.addSendBehavior( |
| 1812 | + internalCluster().getInstance(TransportService.class, replicaNode), |
| 1813 | + (connection, requestId, action, request, options) -> { |
| 1814 | + if (action.equals(SegmentReplicationTargetService.Actions.FILE_CHUNK) && failed.getAndSet(true) == false) { |
| 1815 | + FileChunkRequest req = (FileChunkRequest) request; |
| 1816 | + logger.info("SENDING CORRUPT file chunk [{}] lastChunk: {}", req, req.lastChunk()); |
| 1817 | + TransportRequest corrupt = new FileChunkRequest( |
| 1818 | + req.recoveryId(), |
| 1819 | + ((FileChunkRequest) request).requestSeqNo(), |
| 1820 | + ((FileChunkRequest) request).shardId(), |
| 1821 | + ((FileChunkRequest) request).metadata(), |
| 1822 | + ((FileChunkRequest) request).position(), |
| 1823 | + new BytesArray("test"), |
| 1824 | + false, |
| 1825 | + 0, |
| 1826 | + 0L |
| 1827 | + ); |
| 1828 | + connection.sendRequest(requestId, action, corrupt, options); |
| 1829 | + latch.countDown(); |
| 1830 | + } else { |
| 1831 | + connection.sendRequest(requestId, action, request, options); |
| 1832 | + } |
| 1833 | + } |
| 1834 | + ); |
| 1835 | + for (int i = 0; i < 100; i++) { |
| 1836 | + client().prepareIndex(INDEX_NAME) |
| 1837 | + .setId(String.valueOf(i)) |
| 1838 | + .setSource(jsonBuilder().startObject().field("field", i).endObject()) |
| 1839 | + .get(); |
| 1840 | + } |
| 1841 | + final long originalRecoveryTime = getRecoveryStopTime(replicaNode); |
| 1842 | + assertNotEquals(originalRecoveryTime, 0); |
| 1843 | + refresh(INDEX_NAME); |
| 1844 | + latch.await(); |
| 1845 | + assertTrue(failed.get()); |
| 1846 | + waitForNewPeerRecovery(replicaNode, originalRecoveryTime); |
| 1847 | + // reset checkIndex to ensure our original shard doesn't throw |
| 1848 | + resetCheckIndexStatus(); |
| 1849 | + waitForSearchableDocs(100, primaryNode, replicaNode); |
| 1850 | + } |
| 1851 | + |
| 1852 | + public void testWipeSegmentBetweenSyncs() throws Exception { |
| 1853 | + internalCluster().startClusterManagerOnlyNode(); |
| 1854 | + final String primaryNode = internalCluster().startDataOnlyNode(); |
| 1855 | + createIndex( |
| 1856 | + INDEX_NAME, |
| 1857 | + Settings.builder() |
| 1858 | + .put(indexSettings()) |
| 1859 | + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) |
| 1860 | + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) |
| 1861 | + .put("index.refresh_interval", -1) |
| 1862 | + .build() |
| 1863 | + ); |
| 1864 | + ensureYellow(INDEX_NAME); |
| 1865 | + final String replicaNode = internalCluster().startDataOnlyNode(); |
| 1866 | + ensureGreen(INDEX_NAME); |
| 1867 | + |
| 1868 | + for (int i = 0; i < 10; i++) { |
| 1869 | + client().prepareIndex(INDEX_NAME) |
| 1870 | + .setId(String.valueOf(i)) |
| 1871 | + .setSource(jsonBuilder().startObject().field("field", i).endObject()) |
| 1872 | + .get(); |
| 1873 | + } |
| 1874 | + refresh(INDEX_NAME); |
| 1875 | + ensureGreen(INDEX_NAME); |
| 1876 | + final long originalRecoveryTime = getRecoveryStopTime(replicaNode); |
| 1877 | + |
| 1878 | + final IndexShard indexShard = getIndexShard(replicaNode, INDEX_NAME); |
| 1879 | + waitForSearchableDocs(INDEX_NAME, 10, List.of(replicaNode)); |
| 1880 | + indexShard.store().directory().deleteFile("_0.si"); |
| 1881 | + |
| 1882 | + for (int i = 11; i < 21; i++) { |
| 1883 | + client().prepareIndex(INDEX_NAME) |
| 1884 | + .setId(String.valueOf(i)) |
| 1885 | + .setSource(jsonBuilder().startObject().field("field", i).endObject()) |
| 1886 | + .get(); |
| 1887 | + } |
| 1888 | + refresh(INDEX_NAME); |
| 1889 | + waitForNewPeerRecovery(replicaNode, originalRecoveryTime); |
| 1890 | + resetCheckIndexStatus(); |
| 1891 | + waitForSearchableDocs(20, primaryNode, replicaNode); |
| 1892 | + } |
| 1893 | + |
| 1894 | + private void waitForNewPeerRecovery(String replicaNode, long originalRecoveryTime) throws Exception { |
| 1895 | + assertBusy(() -> { |
| 1896 | + // assert we have a peer recovery after the original |
| 1897 | + final long time = getRecoveryStopTime(replicaNode); |
| 1898 | + assertNotEquals(time, 0); |
| 1899 | + assertNotEquals(originalRecoveryTime, time); |
| 1900 | + |
| 1901 | + }, 1, TimeUnit.MINUTES); |
| 1902 | + } |
| 1903 | + |
| 1904 | + private long getRecoveryStopTime(String nodeName) { |
| 1905 | + final RecoveryResponse recoveryResponse = client().admin().indices().prepareRecoveries(INDEX_NAME).get(); |
| 1906 | + final List<RecoveryState> recoveryStates = recoveryResponse.shardRecoveryStates().get(INDEX_NAME); |
| 1907 | + logger.info("Recovery states {}", recoveryResponse); |
| 1908 | + for (RecoveryState recoveryState : recoveryStates) { |
| 1909 | + if (recoveryState.getTargetNode().getName().equals(nodeName)) { |
| 1910 | + return recoveryState.getTimer().stopTime(); |
| 1911 | + } |
| 1912 | + } |
| 1913 | + return 0L; |
| 1914 | + } |
1780 | 1915 | }
|
0 commit comments