Skip to content

[SERVER] Support gzip encoding instead of deflate. #757

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/server/internalServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ extern "C" {
#include "response.h"

#define MAX_SEARCH_LEN 140
#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100
#define DEFAULT_CACHE_SIZE 2

namespace kiwix {
Expand Down
8 changes: 4 additions & 4 deletions src/server/request_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,15 @@ RequestContext::RequestContext(struct MHD_Connection* connection,
method(str2RequestMethod(_method)),
version(version),
requestIndex(s_requestIndex++),
acceptEncodingDeflate(false),
acceptEncodingGzip(false),
byteRange_()
{
MHD_get_connection_values(connection, MHD_HEADER_KIND, &RequestContext::fill_header, this);
MHD_get_connection_values(connection, MHD_GET_ARGUMENT_KIND, &RequestContext::fill_argument, this);

try {
acceptEncodingDeflate =
(get_header(MHD_HTTP_HEADER_ACCEPT_ENCODING).find("deflate") != std::string::npos);
acceptEncodingGzip =
(get_header(MHD_HTTP_HEADER_ACCEPT_ENCODING).find("gzip") != std::string::npos);
} catch (const std::out_of_range&) {}

try {
Expand Down Expand Up @@ -127,7 +127,7 @@ void RequestContext::print_debug_info() const {
printf("Parsed : \n");
printf("full_url: %s\n", full_url.c_str());
printf("url : %s\n", url.c_str());
printf("acceptEncodingDeflate : %d\n", acceptEncodingDeflate);
printf("acceptEncodingGzip : %d\n", acceptEncodingGzip);
printf("has_range : %d\n", byteRange_.kind() != ByteRange::NONE);
printf("is_valid_url : %d\n", is_valid_url());
printf(".............\n");
Expand Down
4 changes: 2 additions & 2 deletions src/server/request_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ class RequestContext {

ByteRange get_range() const;

bool can_compress() const { return acceptEncodingDeflate; }
bool can_compress() const { return acceptEncodingGzip; }

std::string get_user_language() const;

Expand All @@ -103,7 +103,7 @@ class RequestContext {
std::string version;
unsigned long long requestIndex;

bool acceptEncodingDeflate;
bool acceptEncodingGzip;

ByteRange byteRange_;
std::map<std::string, std::string> headers;
Expand Down
76 changes: 51 additions & 25 deletions src/server/response.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,17 @@
#include <mustache.hpp>
#include <zlib.h>


#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100
#include <array>

// This is somehow a magic value.
// If this value is too small, we will compress (and lost cpu time) too much
// content.
// If this value is too big, we will not compress enough content and send too
// much data.
// If we assume that MTU is 1500 Bytes it is useless to compress
// content smaller as the content will be sent in one packet anyway.
// 1400 Bytes seems to be a common accepted limit.
#define KIWIX_MIN_CONTENT_SIZE_TO_COMPRESS 1400

namespace kiwix {

Expand All @@ -58,6 +67,41 @@ bool is_compressible_mime_type(const std::string& mimeType)
|| mimeType.find("application/json") != string::npos;
}

bool compress(std::string &content) {
z_stream strm;
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;

auto ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 31, 8,
Z_DEFAULT_STRATEGY);
if (ret != Z_OK) { return false; }

strm.avail_in = static_cast<decltype(strm.avail_in)>(content.size());
strm.next_in =
const_cast<Bytef *>(reinterpret_cast<const Bytef *>(content.data()));

std::string compressed;

std::array<char, 16384> buff{};
do {
strm.avail_out = buff.size();
strm.next_out = reinterpret_cast<Bytef *>(buff.data());
ret = deflate(&strm, Z_FINISH);
assert(ret != Z_STREAM_ERROR);
compressed.append(buff.data(), buff.size() - strm.avail_out);
} while (strm.avail_out == 0);

assert(ret == Z_STREAM_END);
assert(strm.avail_in == 0);

content.swap(compressed);

deflateEnd(&strm);
return true;
}



} // unnamed namespace

Expand Down Expand Up @@ -331,7 +375,7 @@ ContentResponse::can_compress(const RequestContext& request) const
{
return request.can_compress()
&& is_compressible_mime_type(m_mimeType)
&& (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE);
&& (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_COMPRESS);
}

bool
Expand Down Expand Up @@ -365,35 +409,17 @@ ContentResponse::create_mhd_response(const RequestContext& request)
}
}

bool shouldCompress = can_compress(request);
if (shouldCompress) {
std::vector<Bytef> compr_buffer(compressBound(m_content.size()));
uLongf comprLen = compr_buffer.capacity();
int err = compress(&compr_buffer[0],
&comprLen,
(const Bytef*)(m_content.data()),
m_content.size());
if (err == Z_OK && comprLen > 2 && comprLen < (m_content.size() + 2)) {
/* /!\ Internet Explorer has a bug with deflate compression.
It can not handle the first two bytes (compression headers)
We need to chunk them off (move the content 2bytes)
It has no incidence on other browsers
See http://www.subbu.org/blog/2008/03/ie7-deflate-or-not and comments */
m_content = string((char*)&compr_buffer[2], comprLen - 2);
m_etag.set_option(ETag::COMPRESSED_CONTENT);
} else {
shouldCompress = false;
}
}
const bool isCompressed = can_compress(request) && compress(m_content);

MHD_Response* response = MHD_create_response_from_buffer(
m_content.size(), const_cast<char*>(m_content.data()), MHD_RESPMEM_MUST_COPY);

if (shouldCompress) {
if (isCompressed) {
m_etag.set_option(ETag::COMPRESSED_CONTENT);
MHD_add_response_header(
response, MHD_HTTP_HEADER_VARY, "Accept-Encoding");
MHD_add_response_header(
response, MHD_HTTP_HEADER_CONTENT_ENCODING, "deflate");
response, MHD_HTTP_HEADER_CONTENT_ENCODING, "gzip");
}
return response;
}
Expand Down
23 changes: 12 additions & 11 deletions test/server.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@

#define CPPHTTPLIB_ZLIB_SUPPORT 1
#include "./httplib.h"
#include "gtest/gtest.h"

Expand Down Expand Up @@ -184,13 +185,10 @@ const ResourceCollection resources200Compressible{
{ WITH_ETAG, "/ROOT/skin/taskbar.css" },
{ WITH_ETAG, "/ROOT/skin/block_external.js" },

{ NO_ETAG, "/ROOT/catalog/root.xml" },
{ NO_ETAG, "/ROOT/catalog/searchdescription.xml" },
{ NO_ETAG, "/ROOT/catalog/search" },

{ NO_ETAG, "/ROOT/search?content=zimfile&pattern=a" },

{ NO_ETAG, "/ROOT/suggest?content=zimfile" },
{ NO_ETAG, "/ROOT/suggest?content=zimfile&term=ray" },

{ NO_ETAG, "/ROOT/catch/external?source=www.example.com" },
Expand Down Expand Up @@ -223,7 +221,10 @@ const ResourceCollection resources200Uncompressible{
{ WITH_ETAG, "/ROOT/corner_cases/-/empty.css" },
{ WITH_ETAG, "/ROOT/corner_cases/-/empty.js" },

// The title and creator are too small to be compressed
// The following url's responses are too small to be compressed
{ NO_ETAG, "/ROOT/catalog/root.xml" },
{ NO_ETAG, "/ROOT/catalog/searchdescription.xml" },
{ NO_ETAG, "/ROOT/suggest?content=zimfile" },
{ WITH_ETAG, "/ROOT/raw/zimfile/meta/Creator" },
{ WITH_ETAG, "/ROOT/raw/zimfile/meta/Title" },
};
Expand Down Expand Up @@ -271,17 +272,17 @@ TEST_F(ServerTest, 200)
TEST_F(ServerTest, CompressibleContentIsCompressedIfAcceptable)
{
for ( const Resource& res : resources200Compressible ) {
const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} });
const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} });
EXPECT_EQ(200, x->status) << res;
EXPECT_EQ("deflate", x->get_header_value("Content-Encoding")) << res;
EXPECT_EQ("gzip", x->get_header_value("Content-Encoding")) << res;
EXPECT_EQ("Accept-Encoding", x->get_header_value("Vary")) << res;
}
}

TEST_F(ServerTest, UncompressibleContentIsNotCompressed)
{
for ( const Resource& res : resources200Uncompressible ) {
const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} });
const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} });
EXPECT_EQ(200, x->status) << res;
EXPECT_EQ("", x->get_header_value("Content-Encoding")) << res;
}
Expand Down Expand Up @@ -1062,7 +1063,7 @@ TEST_F(ServerTest, CompressionInfluencesETag)
if ( ! res.etag_expected ) continue;
const auto g1 = zfs1_->GET(res.url);
const auto g2 = zfs1_->GET(res.url, { {"Accept-Encoding", ""} } );
const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} } );
const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} } );
const auto etag = g1->get_header_value("ETag");
EXPECT_EQ(etag, g2->get_header_value("ETag"));
EXPECT_NE(etag, g3->get_header_value("ETag"));
Expand All @@ -1075,7 +1076,7 @@ TEST_F(ServerTest, ETagOfUncompressibleContentIsNotAffectedByAcceptEncoding)
if ( ! res.etag_expected ) continue;
const auto g1 = zfs1_->GET(res.url);
const auto g2 = zfs1_->GET(res.url, { {"Accept-Encoding", ""} } );
const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} } );
const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} } );
const auto etag = g1->get_header_value("ETag");
EXPECT_EQ(etag, g2->get_header_value("ETag")) << res;
EXPECT_EQ(etag, g3->get_header_value("ETag")) << res;
Expand Down Expand Up @@ -1114,7 +1115,7 @@ std::string make_etag_list(const std::string& etag)

TEST_F(ServerTest, IfNoneMatchRequestsWithMatchingETagResultIn304Responses)
{
const char* const encodings[] = { "", "deflate" };
const char* const encodings[] = { "", "gzip" };
for ( const Resource& res : all200Resources() ) {
for ( const char* enc: encodings ) {
if ( ! res.etag_expected ) continue;
Expand Down Expand Up @@ -1245,7 +1246,7 @@ TEST_F(ServerTest, RangeHasPrecedenceOverCompression)

const Headers onlyRange{ {"Range", "bytes=123-456"} };
Headers rangeAndCompression(onlyRange);
rangeAndCompression.insert({"Accept-Encoding", "deflate"});
rangeAndCompression.insert({"Accept-Encoding", "gzip"});

const auto p1 = zfs1_->GET(url, onlyRange);
const auto p2 = zfs1_->GET(url, rangeAndCompression);
Expand Down