Skip to content

modules/zstd: add BlockDecoder #1214

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 26 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
a8c0510
modules/zstd: Add buffer library
rw1nkler Oct 24, 2023
7828870
modules/zstd: Add Buffer use-case example
rw1nkler Oct 31, 2023
3748388
modules/zstd/buffer: Add benchmarking rules
lpawelcz Jan 25, 2024
fc57626
modules/zstd: Add library for parsing magic number
rw1nkler Oct 24, 2023
1d11225
modules/zstd: Add library for parsing frame header
rw1nkler Oct 24, 2023
374b000
modules/zstd/frame_header: Add benchmarking rules
lpawelcz Jan 9, 2024
4f0a064
dependency_support/libzstd: Make zstd_errors.h public
lpawelcz Jan 10, 2024
6ddea32
dependency_support: Add decodecorpus binary
rw1nkler Nov 9, 2023
b4fb1ec
modules/zstd: Add data generator library
rw1nkler Nov 14, 2023
e25cbe1
modules/zstd: Add zstd frame header tests
rw1nkler Nov 7, 2023
dd34d02
modules/zstd: Add common zstd definitions
rw1nkler Nov 22, 2023
e6a0e0f
modules/zstd: Add raw block decoder
rw1nkler Nov 22, 2023
09267ae
modules/zstd/raw_block_decoder: Add benchmarking rules
lpawelcz Jan 9, 2024
d31c6eb
modules/zstd: Add rle block decoder
lpawelcz Nov 20, 2023
4c19f10
modules/zstd/rle_block_dec: Specify fifo depths for internal channels
lpawelcz Jan 9, 2024
5689ed9
modules/zstd/rle_block_decoder: Add benchmarking rules
lpawelcz Dec 29, 2023
06616b6
modules/zstd: Add block header parsing library
rw1nkler Nov 14, 2023
a91e03d
modules/zstd: Add SequenceExecutorPacket to common definitions
lpawelcz Jan 4, 2024
75bf542
modules/zstd: Add block data muxer library
rw1nkler Nov 22, 2023
9ae404f
modules/zstd/dec_mux: Add benchmarking rules
lpawelcz Jan 9, 2024
9397622
modules/zstd: Add block demuxer library
mtdudek Nov 27, 2023
26bd29d
modules/zstd/dec_demux: Specify fifo depths for internal channels
lpawelcz Jan 9, 2024
50b730b
modules/zstd/dec_demux: Add benchmarking rules
lpawelcz Jan 9, 2024
4b40857
modules/zstd: Add block decoder module
lpawelcz Nov 28, 2023
a022286
modules/zstd/block_dec: Specify fifo depths for internal channels
lpawelcz Dec 29, 2023
caf2b7c
modules/zstd/block_dec: Add benchmarking rules
lpawelcz Jan 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions dependency_support/com_github_facebook_zstd/bundled.BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ cc_library(
]),
hdrs = [
"lib/zstd.h",
"lib/common/zstd_errors.h",
],
strip_include_prefix = "lib",
local_defines = [
Expand All @@ -48,3 +49,42 @@ cc_library(
],
visibility = ["//visibility:public"],
)

# NOTE: Required because of direct zstd_compress.c include in decodecorpus sources
cc_library(
name = "decodecorpus_includes",
hdrs = [
"lib/compress/zstd_compress.c",
],
)

cc_binary(
name = "decodecorpus",
srcs = [
"tests/decodecorpus.c",
] + glob(
[
"programs/*.c",
"programs/*.h",
],
exclude = [
"programs/zstdcli.c",
],
),
deps = [
":zstd",
":decodecorpus_includes",
],
includes = [
"lib/",
"lib/common/",
"lib/compress/",
"lib/dictBuilder/",
"lib/deprecated/",
"programs/",
],
local_defines = [
"XXH_NAMESPACE=ZSTD_",
],
visibility = ["//visibility:public"],
)
152 changes: 152 additions & 0 deletions dependency_support/com_github_facebook_zstd/decodecorpus.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
diff --git tests/decodecorpus.c tests/decodecorpus.c
index 50935d31..522b3769 100644
--- tests/decodecorpus.c
+++ tests/decodecorpus.c
@@ -240,6 +240,12 @@ typedef enum {
gt_block, /* generate compressed blocks without block/frame headers */
} genType_e;

+typedef enum {
+ lt_raw,
+ lt_rle,
+ lt_compressed,
+} literalType_e;
+
/*-*******************************************************
* Global variables (set from command line)
*********************************************************/
@@ -252,7 +258,11 @@ U32 g_maxBlockSize = ZSTD_BLOCKSIZE_MAX; /* <= 128 KB */

struct {
int contentSize; /* force the content size to be present */
-} opts; /* advanced options on generation */
+ blockType_e *blockType; /* force specific block type */
+ literalType_e *literalType; /* force specific literals type */
+ int frame_header_only; /* generate only frame header */
+ int no_magic; /* do not generate magic number */
+} opts;

/* Generate and write a random frame header */
static void writeFrameHeader(U32* seed, frame_t* frame, dictInfo info)
@@ -317,8 +327,10 @@ static void writeFrameHeader(U32* seed, frame_t* frame, dictInfo info)
}

/* write out the header */
- MEM_writeLE32(op + pos, ZSTD_MAGICNUMBER);
- pos += 4;
+ if (!opts.no_magic) {
+ MEM_writeLE32(op + pos, ZSTD_MAGICNUMBER);
+ pos += 4;
+ }

{
/*
@@ -363,8 +375,10 @@ static void writeFrameHeader(U32* seed, frame_t* frame, dictInfo info)
/* Write a literal block in either raw or RLE form, return the literals size */
static size_t writeLiteralsBlockSimple(U32* seed, frame_t* frame, size_t contentSize)
{
+ int force_literal_type = opts.literalType != NULL;
+ int const type = (force_literal_type) ? *(opts.literalType) : RAND(seed) % 2;
+
BYTE* op = (BYTE*)frame->data;
- int const type = RAND(seed) % 2;
int const sizeFormatDesc = RAND(seed) % 8;
size_t litSize;
size_t maxLitSize = MIN(contentSize, g_maxBlockSize);
@@ -612,8 +626,15 @@ static size_t writeLiteralsBlockCompressed(U32* seed, frame_t* frame, size_t con

static size_t writeLiteralsBlock(U32* seed, frame_t* frame, size_t contentSize)
{
- /* only do compressed for larger segments to avoid compressibility issues */
- if (RAND(seed) & 7 && contentSize >= 64) {
+ int select_compressed = 0;
+ if (opts.literalType) {
+ select_compressed = *(opts.literalType) == lt_compressed;
+ } else {
+ /* only do compressed for larger segments to avoid compressibility issues */
+ select_compressed = RAND(seed) & 7 && contentSize >= 64;
+ }
+
+ if (select_compressed) {
return writeLiteralsBlockCompressed(seed, frame, contentSize);
} else {
return writeLiteralsBlockSimple(seed, frame, contentSize);
@@ -1030,7 +1051,8 @@ static size_t writeCompressedBlock(U32* seed, frame_t* frame, size_t contentSize
static void writeBlock(U32* seed, frame_t* frame, size_t contentSize,
int lastBlock, dictInfo info)
{
- int const blockTypeDesc = RAND(seed) % 8;
+ int force_block_type = opts.blockType != NULL;
+ int const blockTypeDesc = (force_block_type) ? *(opts.blockType) : RAND(seed) % 8;
size_t blockSize;
int blockType;

@@ -1069,7 +1091,7 @@ static void writeBlock(U32* seed, frame_t* frame, size_t contentSize,

frame->data = op;
compressedSize = writeCompressedBlock(seed, frame, contentSize, info);
- if (compressedSize >= contentSize) { /* compressed block must be strictly smaller than uncompressed one */
+ if (compressedSize >= contentSize && !force_block_type) { /* compressed block must be strictly smaller than uncompressed one */
blockType = 0;
memcpy(op, frame->src, contentSize);

@@ -1240,7 +1262,11 @@ static U32 generateFrame(U32 seed, frame_t* fr, dictInfo info)
DISPLAYLEVEL(3, "frame seed: %u\n", (unsigned)seed);
initFrame(fr);

+
writeFrameHeader(&seed, fr, info);
+ if (opts.frame_header_only)
+ return seed;
+
writeBlocks(&seed, fr, info);
writeChecksum(fr);

@@ -1768,6 +1794,9 @@ static void advancedUsage(const char* programName)
DISPLAY( " --max-block-size-log=# : max block size log, must be in range [2, 17]\n");
DISPLAY( " --max-content-size-log=# : max content size log, must be <= 20\n");
DISPLAY( " (this is ignored with gen-blocks)\n");
+ DISPLAY( " --block-type=# : force certain block type (raw=0, rle=1, compressed=2)\n");
+ DISPLAY( " --frame-header-only : dump only frame header\n");
+ DISPLAY( " --no-magic : do not add magic number\n");
}

/*! readU32FromChar() :
@@ -1889,6 +1918,18 @@ int main(int argc, char** argv)
U32 value = readU32FromChar(&argument);
g_maxDecompressedSizeLog =
MIN(MAX_DECOMPRESSED_SIZE_LOG, value);
+ } else if (longCommandWArg(&argument, "block-type=")) {
+ U32 value = readU32FromChar(&argument);
+ opts.blockType = malloc(sizeof(blockType_e));
+ *(opts.blockType) = value;
+ } else if (longCommandWArg(&argument, "literal-type=")) {
+ U32 value = readU32FromChar(&argument);
+ opts.literalType = malloc(sizeof(literalType_e));
+ *(opts.literalType) = value;
+ } else if (strcmp(argument, "frame-header-only") == 0) {
+ opts.frame_header_only = 1;
+ } else if (strcmp(argument, "no-magic") == 0) {
+ opts.no_magic = 1;
} else {
advancedUsage(argv[0]);
return 1;
@@ -1900,6 +1941,18 @@ int main(int argc, char** argv)
return 1;
} } } } /* for (argNb=1; argNb<argc; argNb++) */

+ if (opts.blockType) {
+ if ((opts.contentSize == 0) && (*(opts.blockType) == bt_rle)) {
+ DISPLAY("Error: content-size has to be used together with blockType=1 (rle block)\n");
+ return 1;
+ }
+
+ if (opts.literalType && (*(opts.blockType) != bt_compressed)) {
+ DISPLAY("Error: literal-type can be used only with blockType=2 (compressed block)\n");
+ return 1;
+ }
+ }
+
if (!seedset) {
seed = makeSeed();
}
2 changes: 2 additions & 0 deletions dependency_support/load_external.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -297,4 +297,6 @@ def load_external_repositories():
strip_prefix = "zstd-1.4.7",
urls = ["https://github.com/facebook/zstd/releases/download/v1.4.7/zstd-1.4.7.tar.gz"],
build_file = "@//dependency_support/com_github_facebook_zstd:bundled.BUILD.bazel",
# Modify decodecorpus to allow generation of zstd frame headers only
patches = ["@com_google_xls//dependency_support/com_github_facebook_zstd:decodecorpus.patch"],
)
Loading