-
Notifications
You must be signed in to change notification settings - Fork 7.4k
Base64 functions #3350
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
alexey-milovidov
merged 9 commits into
ClickHouse:master
from
alex-krash:base64_functions
Nov 23, 2018
Merged
Base64 functions #3350
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
5ff30eb
Initial implementation
41281ad
Make compilation on-demand
alex-krash c1b8f7c
Change repository clone method
alex-krash 92916d3
Merge https://github.com/yandex/ClickHouse into base64_functions
alex-krash 468ca70
Minor review fixes
alex-krash 6ab45d0
Arch-based compilation
alex-krash 050abdf
Auto detect of codec
alex-krash 177a00f
Merge https://github.com/yandex/ClickHouse into base64_functions
alex-krash af01328
Merge branch 'master' into base64_functions
alexey-milovidov File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
option (ENABLE_BASE64 "Enable base64" ON) | ||
|
||
if (ENABLE_BASE64) | ||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/base64") | ||
message (WARNING "submodule contrib/base64 is missing. to fix try run: \n git submodule update --init --recursive") | ||
else() | ||
set (BASE64_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/base64/include) | ||
set (BASE64_LIBRARY base64) | ||
set (USE_BASE64 1) | ||
endif() | ||
endif () | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
config.h |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
SET(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/base64) | ||
|
||
set(base64_compile_instructions "") | ||
LIST(LENGTH base64_compile_instructions 0) | ||
macro(cast_to_bool var instruction) | ||
if (HAVE_${var}) | ||
set(base64_${var} 1) | ||
set(base64_${var}_opt ${instruction}) | ||
else() | ||
set(base64_${var} 0) | ||
endif() | ||
endmacro() | ||
|
||
cast_to_bool(SSSE3 "-mssse3") | ||
cast_to_bool(SSE41 "-msse4.1") | ||
cast_to_bool(SSE42 "-msse4.2") | ||
cast_to_bool(AVX "-mavx") | ||
cast_to_bool(AVX2 "-mavx2") | ||
|
||
# write config.h file, to include it in application | ||
file(READ config-header.tpl header) | ||
file(WRITE config.h ${header}) | ||
file(APPEND config.h "#define HAVE_SSSE3 ${base64_SSSE3}\n") | ||
file(APPEND config.h "#define HAVE_SSE41 ${base64_SSE41}\n") | ||
file(APPEND config.h "#define HAVE_SSE42 ${base64_SSE42}\n") | ||
file(APPEND config.h "#define HAVE_AVX ${base64_AVX}\n") | ||
file(APPEND config.h "#define HAVE_AVX2 ${base64_AVX2}\n") | ||
|
||
set(HAVE_FAST_UNALIGNED_ACCESS 0) | ||
if (${base64_SSSE3} OR ${base64_SSE41} OR ${base64_SSE42} OR ${base64_AVX} OR ${base64_AVX2}) | ||
set(HAVE_FAST_UNALIGNED_ACCESS 1) | ||
endif () | ||
|
||
file(APPEND config.h "#define HAVE_FAST_UNALIGNED_ACCESS " ${HAVE_FAST_UNALIGNED_ACCESS} "\n") | ||
|
||
add_library(base64 ${LINK_MODE} | ||
${LIBRARY_DIR}/lib/lib.c | ||
${LIBRARY_DIR}/lib/codec_choose.c | ||
${LIBRARY_DIR}/lib/arch/avx/codec.c | ||
${LIBRARY_DIR}/lib/arch/avx2/codec.c | ||
${LIBRARY_DIR}/lib/arch/generic/codec.c | ||
${LIBRARY_DIR}/lib/arch/neon32/codec.c | ||
${LIBRARY_DIR}/lib/arch/neon64/codec.c | ||
${LIBRARY_DIR}/lib/arch/sse41/codec.c | ||
${LIBRARY_DIR}/lib/arch/sse42/codec.c | ||
${LIBRARY_DIR}/lib/arch/ssse3/codec.c | ||
|
||
${LIBRARY_DIR}/lib/codecs.h | ||
config.h) | ||
|
||
target_compile_options(base64 PRIVATE ${base64_SSSE3_opt} ${base64_SSE41_opt} ${base64_SSE42_opt} ${base64_AVX_opt} ${base64_AVX2_opt}) | ||
target_include_directories(base64 PRIVATE ${LIBRARY_DIR}/include .) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
#define HAVE_NEON32 0 | ||
#define HAVE_NEON64 0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
#include <Common/config.h> | ||
#if USE_BASE64 | ||
#include <Columns/ColumnConst.h> | ||
#include <Columns/ColumnString.h> | ||
#include <DataTypes/DataTypeString.h> | ||
#include <Functions/FunctionFactory.h> | ||
#include <Functions/FunctionHelpers.h> | ||
#include <Functions/GatherUtils/Algorithms.h> | ||
#include <IO/WriteHelpers.h> | ||
#include <libbase64.h> | ||
|
||
|
||
namespace DB | ||
{ | ||
using namespace GatherUtils; | ||
|
||
namespace ErrorCodes | ||
{ | ||
extern const int ILLEGAL_COLUMN; | ||
extern const int ILLEGAL_TYPE_OF_ARGUMENT; | ||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; | ||
extern const int INCORRECT_DATA; | ||
} | ||
|
||
struct Base64Encode | ||
{ | ||
static constexpr auto name = "base64Encode"; | ||
static size_t getBufferSize(size_t string_length, size_t string_count) | ||
{ | ||
return ( ( string_length - string_count ) / 3 + string_count ) * 4 + string_count ; | ||
} | ||
}; | ||
|
||
struct Base64Decode | ||
{ | ||
static constexpr auto name = "base64Decode"; | ||
|
||
static size_t getBufferSize(size_t string_length, size_t string_count) | ||
{ | ||
return ( ( string_length - string_count) / 4 + string_count) * 3 + string_count; | ||
} | ||
}; | ||
|
||
struct TryBase64Decode | ||
{ | ||
static constexpr auto name = "tryBase64Decode"; | ||
|
||
static size_t getBufferSize(size_t string_length, size_t string_count) | ||
{ | ||
return Base64Decode::getBufferSize(string_length, string_count); | ||
} | ||
}; | ||
|
||
template <typename Func> | ||
class FunctionBase64Conversion : public IFunction | ||
{ | ||
public: | ||
static constexpr auto name = Func::name; | ||
|
||
static FunctionPtr create(const Context &) | ||
{ | ||
return std::make_shared<FunctionBase64Conversion>(); | ||
} | ||
|
||
String getName() const override | ||
{ | ||
return Func::name; | ||
} | ||
|
||
size_t getNumberOfArguments() const override | ||
{ | ||
return 1; | ||
} | ||
|
||
bool useDefaultImplementationForConstants() const override | ||
{ | ||
return true; | ||
} | ||
|
||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override | ||
{ | ||
if (!WhichDataType(arguments[0].type).isString()) | ||
throw Exception( | ||
"Illegal type " + arguments[0].type->getName() + " of 1 argument of function " + getName() + ". Must be String.", | ||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); | ||
|
||
return std::make_shared<DataTypeString>(); | ||
} | ||
|
||
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override | ||
{ | ||
const ColumnPtr column_string = block.getByPosition(arguments[0]).column; | ||
const ColumnString * input = checkAndGetColumn<ColumnString>(column_string.get()); | ||
|
||
if (!input) | ||
throw Exception( | ||
"Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(), | ||
ErrorCodes::ILLEGAL_COLUMN); | ||
|
||
auto dst_column = ColumnString::create(); | ||
auto & dst_data = dst_column->getChars(); | ||
auto & dst_offsets = dst_column->getOffsets(); | ||
|
||
size_t reserve = Func::getBufferSize(input->getChars().size(), input->size()); | ||
dst_data.resize(reserve); | ||
dst_offsets.resize(input_rows_count); | ||
|
||
const ColumnString::Offsets & src_offsets = input->getOffsets(); | ||
|
||
auto source = reinterpret_cast<const char *>(input->getChars().data()); | ||
auto dst = reinterpret_cast<char *>(dst_data.data()); | ||
auto dst_pos = dst; | ||
|
||
size_t src_offset_prev = 0; | ||
|
||
int codec = getCodec(); | ||
for (size_t row = 0; row < input_rows_count; ++row) | ||
{ | ||
size_t srclen = src_offsets[row] - src_offset_prev - 1; | ||
size_t outlen = 0; | ||
|
||
if constexpr (std::is_same_v<Func, Base64Encode>) | ||
{ | ||
base64_encode(source, srclen, dst_pos, &outlen, codec); | ||
} | ||
else if constexpr (std::is_same_v<Func, Base64Decode>) | ||
{ | ||
if (!base64_decode(source, srclen, dst_pos, &outlen, codec)) | ||
{ | ||
throw Exception("Failed to " + getName() + " input '" + String(source, srclen) + "'", ErrorCodes::INCORRECT_DATA); | ||
} | ||
} | ||
else | ||
{ | ||
// during decoding character array can be partially polluted | ||
// if fail, revert back and clean | ||
auto savepoint = dst_pos; | ||
if (!base64_decode(source, srclen, dst_pos, &outlen, codec)) | ||
{ | ||
outlen = 0; | ||
dst_pos = savepoint; | ||
// clean the symbol | ||
dst_pos[0] = 0; | ||
} | ||
} | ||
|
||
source += srclen + 1; | ||
alex-krash marked this conversation as resolved.
Show resolved
Hide resolved
|
||
dst_pos += outlen + 1; | ||
|
||
dst_offsets[row] = dst_pos - dst; | ||
src_offset_prev = src_offsets[row]; | ||
} | ||
|
||
dst_data.resize(dst_pos - dst); | ||
|
||
block.getByPosition(result).column = std::move(dst_column); | ||
} | ||
|
||
private: | ||
static int getCodec() | ||
{ | ||
return 0; | ||
} | ||
}; | ||
} | ||
#endif |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#include <Functions/FunctionBase64Conversion.h> | ||
#if USE_BASE64 | ||
#include <DataTypes/DataTypeString.h> | ||
#include <Functions/FunctionFactory.h> | ||
|
||
namespace DB | ||
{ | ||
|
||
void registerFunctionBase64Decode(FunctionFactory & factory) | ||
{ | ||
factory.registerFunction<FunctionBase64Conversion<Base64Decode>>(); | ||
} | ||
} | ||
#endif |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#include <Functions/FunctionBase64Conversion.h> | ||
#if USE_BASE64 | ||
#include <DataTypes/DataTypeString.h> | ||
#include <Functions/FunctionFactory.h> | ||
|
||
namespace DB | ||
{ | ||
|
||
void registerFunctionBase64Encode(FunctionFactory & factory) | ||
{ | ||
factory.registerFunction<FunctionBase64Conversion<Base64Encode>>(); | ||
} | ||
} | ||
#endif |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.