Skip to content

Commit f90cc39

Browse files
authored
Merge pull request #757 from kiwix/gzip_compression
2 parents dc42f83 + fba0f09 commit f90cc39

File tree

5 files changed

+69
-43
lines changed

5 files changed

+69
-43
lines changed

src/server/internalServer.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ extern "C" {
7878
#include "response.h"
7979

8080
#define MAX_SEARCH_LEN 140
81-
#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100
8281
#define DEFAULT_CACHE_SIZE 2
8382

8483
namespace kiwix {

src/server/request_context.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -75,15 +75,15 @@ RequestContext::RequestContext(struct MHD_Connection* connection,
7575
method(str2RequestMethod(_method)),
7676
version(version),
7777
requestIndex(s_requestIndex++),
78-
acceptEncodingDeflate(false),
78+
acceptEncodingGzip(false),
7979
byteRange_()
8080
{
8181
MHD_get_connection_values(connection, MHD_HEADER_KIND, &RequestContext::fill_header, this);
8282
MHD_get_connection_values(connection, MHD_GET_ARGUMENT_KIND, &RequestContext::fill_argument, this);
8383

8484
try {
85-
acceptEncodingDeflate =
86-
(get_header(MHD_HTTP_HEADER_ACCEPT_ENCODING).find("deflate") != std::string::npos);
85+
acceptEncodingGzip =
86+
(get_header(MHD_HTTP_HEADER_ACCEPT_ENCODING).find("gzip") != std::string::npos);
8787
} catch (const std::out_of_range&) {}
8888

8989
try {
@@ -127,7 +127,7 @@ void RequestContext::print_debug_info() const {
127127
printf("Parsed : \n");
128128
printf("full_url: %s\n", full_url.c_str());
129129
printf("url : %s\n", url.c_str());
130-
printf("acceptEncodingDeflate : %d\n", acceptEncodingDeflate);
130+
printf("acceptEncodingGzip : %d\n", acceptEncodingGzip);
131131
printf("has_range : %d\n", byteRange_.kind() != ByteRange::NONE);
132132
printf("is_valid_url : %d\n", is_valid_url());
133133
printf(".............\n");

src/server/request_context.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ class RequestContext {
9292

9393
ByteRange get_range() const;
9494

95-
bool can_compress() const { return acceptEncodingDeflate; }
95+
bool can_compress() const { return acceptEncodingGzip; }
9696

9797
std::string get_user_language() const;
9898

@@ -103,7 +103,7 @@ class RequestContext {
103103
std::string version;
104104
unsigned long long requestIndex;
105105

106-
bool acceptEncodingDeflate;
106+
bool acceptEncodingGzip;
107107

108108
ByteRange byteRange_;
109109
std::map<std::string, std::string> headers;

src/server/response.cpp

+51-25
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,17 @@
3131
#include <mustache.hpp>
3232
#include <zlib.h>
3333

34-
35-
#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100
34+
#include <array>
35+
36+
// This is somehow a magic value.
37+
// If this value is too small, we will compress (and lost cpu time) too much
38+
// content.
39+
// If this value is too big, we will not compress enough content and send too
40+
// much data.
41+
// If we assume that MTU is 1500 Bytes it is useless to compress
42+
// content smaller as the content will be sent in one packet anyway.
43+
// 1400 Bytes seems to be a common accepted limit.
44+
#define KIWIX_MIN_CONTENT_SIZE_TO_COMPRESS 1400
3645

3746
namespace kiwix {
3847

@@ -58,6 +67,41 @@ bool is_compressible_mime_type(const std::string& mimeType)
5867
|| mimeType.find("application/json") != string::npos;
5968
}
6069

70+
bool compress(std::string &content) {
71+
z_stream strm;
72+
strm.zalloc = Z_NULL;
73+
strm.zfree = Z_NULL;
74+
strm.opaque = Z_NULL;
75+
76+
auto ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 31, 8,
77+
Z_DEFAULT_STRATEGY);
78+
if (ret != Z_OK) { return false; }
79+
80+
strm.avail_in = static_cast<decltype(strm.avail_in)>(content.size());
81+
strm.next_in =
82+
const_cast<Bytef *>(reinterpret_cast<const Bytef *>(content.data()));
83+
84+
std::string compressed;
85+
86+
std::array<char, 16384> buff{};
87+
do {
88+
strm.avail_out = buff.size();
89+
strm.next_out = reinterpret_cast<Bytef *>(buff.data());
90+
ret = deflate(&strm, Z_FINISH);
91+
assert(ret != Z_STREAM_ERROR);
92+
compressed.append(buff.data(), buff.size() - strm.avail_out);
93+
} while (strm.avail_out == 0);
94+
95+
assert(ret == Z_STREAM_END);
96+
assert(strm.avail_in == 0);
97+
98+
content.swap(compressed);
99+
100+
deflateEnd(&strm);
101+
return true;
102+
}
103+
104+
61105

62106
} // unnamed namespace
63107

@@ -331,7 +375,7 @@ ContentResponse::can_compress(const RequestContext& request) const
331375
{
332376
return request.can_compress()
333377
&& is_compressible_mime_type(m_mimeType)
334-
&& (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE);
378+
&& (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_COMPRESS);
335379
}
336380

337381
bool
@@ -365,35 +409,17 @@ ContentResponse::create_mhd_response(const RequestContext& request)
365409
}
366410
}
367411

368-
bool shouldCompress = can_compress(request);
369-
if (shouldCompress) {
370-
std::vector<Bytef> compr_buffer(compressBound(m_content.size()));
371-
uLongf comprLen = compr_buffer.capacity();
372-
int err = compress(&compr_buffer[0],
373-
&comprLen,
374-
(const Bytef*)(m_content.data()),
375-
m_content.size());
376-
if (err == Z_OK && comprLen > 2 && comprLen < (m_content.size() + 2)) {
377-
/* /!\ Internet Explorer has a bug with deflate compression.
378-
It can not handle the first two bytes (compression headers)
379-
We need to chunk them off (move the content 2bytes)
380-
It has no incidence on other browsers
381-
See http://www.subbu.org/blog/2008/03/ie7-deflate-or-not and comments */
382-
m_content = string((char*)&compr_buffer[2], comprLen - 2);
383-
m_etag.set_option(ETag::COMPRESSED_CONTENT);
384-
} else {
385-
shouldCompress = false;
386-
}
387-
}
412+
const bool isCompressed = can_compress(request) && compress(m_content);
388413

389414
MHD_Response* response = MHD_create_response_from_buffer(
390415
m_content.size(), const_cast<char*>(m_content.data()), MHD_RESPMEM_MUST_COPY);
391416

392-
if (shouldCompress) {
417+
if (isCompressed) {
418+
m_etag.set_option(ETag::COMPRESSED_CONTENT);
393419
MHD_add_response_header(
394420
response, MHD_HTTP_HEADER_VARY, "Accept-Encoding");
395421
MHD_add_response_header(
396-
response, MHD_HTTP_HEADER_CONTENT_ENCODING, "deflate");
422+
response, MHD_HTTP_HEADER_CONTENT_ENCODING, "gzip");
397423
}
398424
return response;
399425
}

test/server.cpp

+12-11
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11

2+
#define CPPHTTPLIB_ZLIB_SUPPORT 1
23
#include "./httplib.h"
34
#include "gtest/gtest.h"
45

@@ -184,13 +185,10 @@ const ResourceCollection resources200Compressible{
184185
{ WITH_ETAG, "/ROOT/skin/taskbar.css" },
185186
{ WITH_ETAG, "/ROOT/skin/block_external.js" },
186187

187-
{ NO_ETAG, "/ROOT/catalog/root.xml" },
188-
{ NO_ETAG, "/ROOT/catalog/searchdescription.xml" },
189188
{ NO_ETAG, "/ROOT/catalog/search" },
190189

191190
{ NO_ETAG, "/ROOT/search?content=zimfile&pattern=a" },
192191

193-
{ NO_ETAG, "/ROOT/suggest?content=zimfile" },
194192
{ NO_ETAG, "/ROOT/suggest?content=zimfile&term=ray" },
195193

196194
{ NO_ETAG, "/ROOT/catch/external?source=www.example.com" },
@@ -223,7 +221,10 @@ const ResourceCollection resources200Uncompressible{
223221
{ WITH_ETAG, "/ROOT/corner_cases/-/empty.css" },
224222
{ WITH_ETAG, "/ROOT/corner_cases/-/empty.js" },
225223

226-
// The title and creator are too small to be compressed
224+
// The following url's responses are too small to be compressed
225+
{ NO_ETAG, "/ROOT/catalog/root.xml" },
226+
{ NO_ETAG, "/ROOT/catalog/searchdescription.xml" },
227+
{ NO_ETAG, "/ROOT/suggest?content=zimfile" },
227228
{ WITH_ETAG, "/ROOT/raw/zimfile/meta/Creator" },
228229
{ WITH_ETAG, "/ROOT/raw/zimfile/meta/Title" },
229230
};
@@ -271,17 +272,17 @@ TEST_F(ServerTest, 200)
271272
TEST_F(ServerTest, CompressibleContentIsCompressedIfAcceptable)
272273
{
273274
for ( const Resource& res : resources200Compressible ) {
274-
const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} });
275+
const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} });
275276
EXPECT_EQ(200, x->status) << res;
276-
EXPECT_EQ("deflate", x->get_header_value("Content-Encoding")) << res;
277+
EXPECT_EQ("gzip", x->get_header_value("Content-Encoding")) << res;
277278
EXPECT_EQ("Accept-Encoding", x->get_header_value("Vary")) << res;
278279
}
279280
}
280281

281282
TEST_F(ServerTest, UncompressibleContentIsNotCompressed)
282283
{
283284
for ( const Resource& res : resources200Uncompressible ) {
284-
const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} });
285+
const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} });
285286
EXPECT_EQ(200, x->status) << res;
286287
EXPECT_EQ("", x->get_header_value("Content-Encoding")) << res;
287288
}
@@ -1062,7 +1063,7 @@ TEST_F(ServerTest, CompressionInfluencesETag)
10621063
if ( ! res.etag_expected ) continue;
10631064
const auto g1 = zfs1_->GET(res.url);
10641065
const auto g2 = zfs1_->GET(res.url, { {"Accept-Encoding", ""} } );
1065-
const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} } );
1066+
const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} } );
10661067
const auto etag = g1->get_header_value("ETag");
10671068
EXPECT_EQ(etag, g2->get_header_value("ETag"));
10681069
EXPECT_NE(etag, g3->get_header_value("ETag"));
@@ -1075,7 +1076,7 @@ TEST_F(ServerTest, ETagOfUncompressibleContentIsNotAffectedByAcceptEncoding)
10751076
if ( ! res.etag_expected ) continue;
10761077
const auto g1 = zfs1_->GET(res.url);
10771078
const auto g2 = zfs1_->GET(res.url, { {"Accept-Encoding", ""} } );
1078-
const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} } );
1079+
const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} } );
10791080
const auto etag = g1->get_header_value("ETag");
10801081
EXPECT_EQ(etag, g2->get_header_value("ETag")) << res;
10811082
EXPECT_EQ(etag, g3->get_header_value("ETag")) << res;
@@ -1114,7 +1115,7 @@ std::string make_etag_list(const std::string& etag)
11141115

11151116
TEST_F(ServerTest, IfNoneMatchRequestsWithMatchingETagResultIn304Responses)
11161117
{
1117-
const char* const encodings[] = { "", "deflate" };
1118+
const char* const encodings[] = { "", "gzip" };
11181119
for ( const Resource& res : all200Resources() ) {
11191120
for ( const char* enc: encodings ) {
11201121
if ( ! res.etag_expected ) continue;
@@ -1245,7 +1246,7 @@ TEST_F(ServerTest, RangeHasPrecedenceOverCompression)
12451246

12461247
const Headers onlyRange{ {"Range", "bytes=123-456"} };
12471248
Headers rangeAndCompression(onlyRange);
1248-
rangeAndCompression.insert({"Accept-Encoding", "deflate"});
1249+
rangeAndCompression.insert({"Accept-Encoding", "gzip"});
12491250

12501251
const auto p1 = zfs1_->GET(url, onlyRange);
12511252
const auto p2 = zfs1_->GET(url, rangeAndCompression);

0 commit comments

Comments
 (0)