1
1
#include " compressed.h"
2
2
#include " wire_format.h"
3
+ #include " output.h"
3
4
4
5
#include < cityhash/city.h>
5
6
#include < lz4/lz4.h>
6
7
#include < stdexcept>
7
8
#include < system_error>
8
9
9
- #define DBMS_MAX_COMPRESSED_SIZE 0x40000000ULL // 1GB
10
+ namespace {
11
+ constexpr size_t HEADER_SIZE = 9 ;
12
+ // see DB::CompressionMethodByte::LZ4 from src/Compression/CompressionInfo.h of ClickHouse project
13
+ constexpr uint8_t COMPRESSION_METHOD = 0x82 ;
14
+ // Documentation says that compression is faster when output buffer is larger than LZ4_compressBound estimation.
15
+ constexpr size_t EXTRA_COMPRESS_BUFFER_SIZE = 4096 ;
16
+ constexpr size_t DBMS_MAX_COMPRESSED_SIZE = 0x40000000ULL ; // 1GB
17
+ }
10
18
11
19
namespace clickhouse {
12
20
13
- CompressedInput::CompressedInput (CodedInputStream * input)
21
+ CompressedInput::CompressedInput (InputStream * input)
14
22
: input_(input)
15
23
{
16
24
}
@@ -22,7 +30,7 @@ CompressedInput::~CompressedInput() {
22
30
#else
23
31
if (!std::uncaught_exceptions ()) {
24
32
#endif
25
- throw std::runtime_error (" some data was not readed " );
33
+ throw std::runtime_error (" some data was not read " );
26
34
}
27
35
}
28
36
}
@@ -50,9 +58,8 @@ bool CompressedInput::Decompress() {
50
58
return false ;
51
59
}
52
60
53
- if (method != 0x82 ) {
54
- throw std::runtime_error (" unsupported compression method " +
55
- std::to_string (int (method)));
61
+ if (method != COMPRESSION_METHOD) {
62
+ throw std::runtime_error (" unsupported compression method " + std::to_string (int (method)));
56
63
} else {
57
64
if (!WireFormat::ReadFixed (input_, &compressed)) {
58
65
return false ;
@@ -75,7 +82,7 @@ bool CompressedInput::Decompress() {
75
82
out.Write (&original, sizeof (original));
76
83
}
77
84
78
- if (!WireFormat::ReadBytes (input_, tmp.data () + 9 , compressed - 9 )) {
85
+ if (!WireFormat::ReadBytes (input_, tmp.data () + HEADER_SIZE , compressed - HEADER_SIZE )) {
79
86
return false ;
80
87
} else {
81
88
if (hash != CityHash128 ((const char *)tmp.data (), compressed)) {
@@ -85,7 +92,7 @@ bool CompressedInput::Decompress() {
85
92
86
93
data_ = Buffer (original);
87
94
88
- if (LZ4_decompress_safe ((const char *)tmp.data () + 9 , (char *)data_.data (), compressed - 9 , original) < 0 ) {
95
+ if (LZ4_decompress_safe ((const char *)tmp.data () + HEADER_SIZE , (char *)data_.data (), compressed - HEADER_SIZE , original) < 0 ) {
89
96
throw std::runtime_error (" can't decompress data" );
90
97
} else {
91
98
mem_.Reset (data_.data (), original);
@@ -95,4 +102,73 @@ bool CompressedInput::Decompress() {
95
102
return true ;
96
103
}
97
104
105
+
106
+ CompressedOutput::CompressedOutput (OutputStream * destination, size_t max_compressed_chunk_size)
107
+ : destination_ (destination)
108
+ , max_compressed_chunk_size_ (max_compressed_chunk_size)
109
+ {
110
+ PreallocateCompressBuffer (max_compressed_chunk_size);
111
+ }
112
+
113
+ CompressedOutput::~CompressedOutput () {
114
+ Flush ();
115
+ }
116
+
117
+ size_t CompressedOutput::DoWrite (const void * data, size_t len) {
118
+ const size_t original_len = len;
119
+ // what if len > max_compressed_chunk_size_ ?
120
+ const size_t max_chunk_size = max_compressed_chunk_size_ > 0 ? max_compressed_chunk_size_ : len;
121
+ if (max_chunk_size > max_compressed_chunk_size_) {
122
+ PreallocateCompressBuffer (len);
123
+ }
124
+
125
+ while (len > 0 ) {
126
+ auto to_compress = std::min (len, max_chunk_size);
127
+ Compress (data, to_compress);
128
+
129
+ len -= to_compress;
130
+ data = reinterpret_cast <const char *>(data) + to_compress;
131
+ }
132
+
133
+ return original_len - len;
134
+ }
135
+
136
+ void CompressedOutput::DoFlush () {
137
+ destination_->Flush ();
138
+ }
139
+
140
+ void CompressedOutput::Compress (const void * data, size_t len) {
141
+ const auto compressed_size = LZ4_compress_default (
142
+ (const char *)data,
143
+ (char *)compressed_buffer_.data () + HEADER_SIZE,
144
+ len,
145
+ compressed_buffer_.size () - HEADER_SIZE);
146
+ if (compressed_size <= 0 )
147
+ throw std::runtime_error (" Failed to compress chunk of " + std::to_string (len) + " bytes, "
148
+ " LZ4 error: " + std::to_string (compressed_size));
149
+
150
+ {
151
+ auto header = compressed_buffer_.data ();
152
+ WriteUnaligned (header, COMPRESSION_METHOD);
153
+ // Compressed data size with header
154
+ WriteUnaligned (header + 1 , static_cast <uint32_t >(compressed_size + HEADER_SIZE));
155
+ // Original data size
156
+ WriteUnaligned (header + 5 , static_cast <uint32_t >(len));
157
+ }
158
+
159
+ WireFormat::WriteFixed (destination_, CityHash128 (
160
+ (const char *)compressed_buffer_.data (), compressed_size + HEADER_SIZE));
161
+ WireFormat::WriteBytes (destination_, compressed_buffer_.data (), compressed_size + HEADER_SIZE);
162
+
163
+ destination_->Flush ();
164
+ }
165
+
166
+ void CompressedOutput::PreallocateCompressBuffer (size_t input_size) {
167
+ const auto estimated_compressed_buffer_size = LZ4_compressBound (input_size);
168
+ if (estimated_compressed_buffer_size <= 0 )
169
+ throw std::runtime_error (" Failed to estimate compressed buffer size, LZ4 error: " + std::to_string (estimated_compressed_buffer_size));
170
+
171
+ compressed_buffer_.resize (estimated_compressed_buffer_size + HEADER_SIZE + EXTRA_COMPRESS_BUFFER_SIZE);
172
+ }
173
+
98
174
}
0 commit comments