Skip to content

Commit 6c2ac28

Browse files
authored
zstd: Improve Best compression (#404)
* zstd: Improve Best compression Compare expected literal encoding to predefined table code side and increase long table size. * Skip best when race testing. * Remove println * Skip more on race test * Reduce more memory. * Update benchmarks
1 parent af25d77 commit 6c2ac28

File tree

7 files changed

+128
-45
lines changed

7 files changed

+128
-45
lines changed

.github/workflows/go.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ jobs:
3737
- name: Test Race
3838
env:
3939
CGO_ENABLED: 1
40-
run: go test -cpu="1,4" -short -race ./...
40+
run: go test -cpu="1,4" -short -race -v ./...
4141

4242
build-special:
4343
env:

zstd/README.md

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ file out level insize outsize millis mb/s
152152
silesia.tar zskp 1 211947520 73101992 643 313.87
153153
silesia.tar zskp 2 211947520 67504318 969 208.38
154154
silesia.tar zskp 3 211947520 64595893 2007 100.68
155-
silesia.tar zskp 4 211947520 60995370 7691 26.28
155+
silesia.tar zskp 4 211947520 60995370 8825 22.90
156156
157157
cgo zstd:
158158
silesia.tar zstd 1 211947520 73605392 543 371.56
@@ -162,7 +162,7 @@ silesia.tar zstd 9 211947520 60212393 5063 39.92
162162
163163
gzip, stdlib/this package:
164164
silesia.tar gzstd 1 211947520 80007735 1654 122.21
165-
silesia.tar gzkp 1 211947520 80369488 1168 173.06
165+
silesia.tar gzkp 1 211947520 80136201 1152 175.45
166166
167167
GOB stream of binary data. Highly compressible.
168168
https://files.klauspost.com/compress/gob-stream.7z
@@ -171,13 +171,15 @@ file out level insize outsize millis mb/s
171171
gob-stream zskp 1 1911399616 235022249 3088 590.30
172172
gob-stream zskp 2 1911399616 205669791 3786 481.34
173173
gob-stream zskp 3 1911399616 175034659 9636 189.17
174-
gob-stream zskp 4 1911399616 167273881 29337 62.13
174+
gob-stream zskp 4 1911399616 165609838 50369 36.19
175+
175176
gob-stream zstd 1 1911399616 249810424 2637 691.26
176177
gob-stream zstd 3 1911399616 208192146 3490 522.31
177178
gob-stream zstd 6 1911399616 193632038 6687 272.56
178179
gob-stream zstd 9 1911399616 177620386 16175 112.70
180+
179181
gob-stream gzstd 1 1911399616 357382641 10251 177.82
180-
gob-stream gzkp 1 1911399616 362156523 5695 320.08
182+
gob-stream gzkp 1 1911399616 359753026 5438 335.20
181183
182184
The test data for the Large Text Compression Benchmark is the first
183185
10^9 bytes of the English Wikipedia dump on Mar. 3, 2006.
@@ -187,11 +189,13 @@ file out level insize outsize millis mb/s
187189
enwik9 zskp 1 1000000000 343848582 3609 264.18
188190
enwik9 zskp 2 1000000000 317276632 5746 165.97
189191
enwik9 zskp 3 1000000000 292243069 12162 78.41
190-
enwik9 zskp 4 1000000000 275241169 36430 26.18
192+
enwik9 zskp 4 1000000000 262183768 82837 11.51
193+
191194
enwik9 zstd 1 1000000000 358072021 3110 306.65
192195
enwik9 zstd 3 1000000000 313734672 4784 199.35
193196
enwik9 zstd 6 1000000000 295138875 10290 92.68
194197
enwik9 zstd 9 1000000000 278348700 28549 33.40
198+
195199
enwik9 gzstd 1 1000000000 382578136 9604 99.30
196200
enwik9 gzkp 1 1000000000 383825945 6544 145.73
197201
@@ -202,13 +206,15 @@ file out level insize outsize millis mb/s
202206
github-june-2days-2019.json zskp 1 6273951764 699045015 10620 563.40
203207
github-june-2days-2019.json zskp 2 6273951764 617881763 11687 511.96
204208
github-june-2days-2019.json zskp 3 6273951764 524340691 34043 175.75
205-
github-june-2days-2019.json zskp 4 6273951764 503314661 93811 63.78
209+
github-june-2days-2019.json zskp 4 6273951764 470320075 170190 35.16
210+
206211
github-june-2days-2019.json zstd 1 6273951764 766284037 8450 708.00
207212
github-june-2days-2019.json zstd 3 6273951764 661889476 10927 547.57
208213
github-june-2days-2019.json zstd 6 6273951764 642756859 22996 260.18
209214
github-june-2days-2019.json zstd 9 6273951764 601974523 52413 114.16
215+
210216
github-june-2days-2019.json gzstd 1 6273951764 1164400847 29948 199.79
211-
github-june-2days-2019.json gzkp 1 6273951764 1128755542 19236 311.03
217+
github-june-2days-2019.json gzkp 1 6273951764 1125417694 21788 274.61
212218
213219
VM Image, Linux mint with a few installed applications:
214220
https://files.klauspost.com/compress/rawstudio-mint14.7z
@@ -217,13 +223,15 @@ file out level insize outsize millis mb/s
217223
rawstudio-mint14.tar zskp 1 8558382592 3667489370 20210 403.84
218224
rawstudio-mint14.tar zskp 2 8558382592 3364592300 31873 256.07
219225
rawstudio-mint14.tar zskp 3 8558382592 3158085214 77675 105.08
220-
rawstudio-mint14.tar zskp 4 8558382592 3020370044 404956 20.16
226+
rawstudio-mint14.tar zskp 4 8558382592 2965110639 857750 9.52
227+
221228
rawstudio-mint14.tar zstd 1 8558382592 3609250104 17136 476.27
222229
rawstudio-mint14.tar zstd 3 8558382592 3341679997 29262 278.92
223230
rawstudio-mint14.tar zstd 6 8558382592 3235846406 77904 104.77
224231
rawstudio-mint14.tar zstd 9 8558382592 3160778861 140946 57.91
232+
225233
rawstudio-mint14.tar gzstd 1 8558382592 3926257486 57722 141.40
226-
rawstudio-mint14.tar gzkp 1 8558382592 3970463184 41749 195.49
234+
rawstudio-mint14.tar gzkp 1 8558382592 3962605659 45113 180.92
227235
228236
CSV data:
229237
https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst
@@ -232,13 +240,15 @@ file out level insize outsize millis mb/s
232240
nyc-taxi-data-10M.csv zskp 1 3325605752 641339945 8925 355.35
233241
nyc-taxi-data-10M.csv zskp 2 3325605752 591748091 11268 281.44
234242
nyc-taxi-data-10M.csv zskp 3 3325605752 530289687 25239 125.66
235-
nyc-taxi-data-10M.csv zskp 4 3325605752 490907191 65939 48.10
243+
nyc-taxi-data-10M.csv zskp 4 3325605752 476268884 135958 23.33
244+
236245
nyc-taxi-data-10M.csv zstd 1 3325605752 687399637 8233 385.18
237246
nyc-taxi-data-10M.csv zstd 3 3325605752 598514411 10065 315.07
238247
nyc-taxi-data-10M.csv zstd 6 3325605752 570522953 20038 158.27
239248
nyc-taxi-data-10M.csv zstd 9 3325605752 517554797 64565 49.12
249+
240250
nyc-taxi-data-10M.csv gzstd 1 3325605752 928656485 23876 132.83
241-
nyc-taxi-data-10M.csv gzkp 1 3325605752 924718719 16388 193.53
251+
nyc-taxi-data-10M.csv gzkp 1 3325605752 922257165 16780 189.00
242252
```
243253

244254
## Decompressor

zstd/dict_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,9 @@ func TestEncoder_SmallDict(t *testing.T) {
103103
}
104104
dicts = append(dicts, in)
105105
for level := SpeedFastest; level < speedLast; level++ {
106+
if isRaceTest && level >= SpeedBestCompression {
107+
break
108+
}
106109
enc, err := NewWriter(nil, WithEncoderConcurrency(1), WithEncoderDict(in), WithEncoderLevel(level), WithWindowSize(1<<17))
107110
if err != nil {
108111
t.Fatal(err)

zstd/enc_best.go

Lines changed: 75 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,59 @@ package zstd
66

77
import (
88
"fmt"
9-
"math/bits"
9+
10+
"github.com/klauspost/compress"
1011
)
1112

1213
const (
13-
bestLongTableBits = 20 // Bits used in the long match table
14+
bestLongTableBits = 22 // Bits used in the long match table
1415
bestLongTableSize = 1 << bestLongTableBits // Size of the table
1516
bestLongLen = 8 // Bytes used for table hash
1617

1718
// Note: Increasing the short table bits or making the hash shorter
1819
// can actually lead to compression degradation since it will 'steal' more from the
1920
// long match table and match offsets are quite big.
2021
// This greatly depends on the type of input.
21-
bestShortTableBits = 16 // Bits used in the short match table
22+
bestShortTableBits = 18 // Bits used in the short match table
2223
bestShortTableSize = 1 << bestShortTableBits // Size of the table
2324
bestShortLen = 4 // Bytes used for table hash
2425

2526
)
2627

28+
type match struct {
29+
offset int32
30+
s int32
31+
length int32
32+
rep int32
33+
est int32
34+
}
35+
36+
const highScore = 25000
37+
38+
// estBits will estimate output bits from predefined tables.
39+
func (m *match) estBits(bitsPerByte int32) {
40+
mlc := mlCode(uint32(m.length - zstdMinMatch))
41+
var ofc uint8
42+
if m.rep < 0 {
43+
ofc = ofCode(uint32(m.s-m.offset) + 3)
44+
} else {
45+
ofc = ofCode(uint32(m.rep))
46+
}
47+
// Cost, excluding
48+
ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc]
49+
50+
// Add cost of match encoding...
51+
m.est = int32(ofTT.outBits + mlTT.outBits)
52+
m.est += int32(ofTT.deltaNbBits>>16 + mlTT.deltaNbBits>>16)
53+
// Subtract savings compared to literal encoding...
54+
m.est -= (m.length * bitsPerByte) >> 10
55+
if m.est > 0 {
56+
// Unlikely gain..
57+
m.length = 0
58+
m.est = highScore
59+
}
60+
}
61+
2762
// bestFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
2863
// The long match table contains the previous entry with the same hash,
2964
// effectively making it a "chain" of length 2.
@@ -112,6 +147,14 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
112147
return
113148
}
114149

150+
// Use this to estimate literal cost.
151+
// Scaled by 10 bits.
152+
bitsPerByte := int32((compress.ShannonEntropyBits(src) * 1024) / len(src))
153+
// Huffman can never go < 1 bit/byte
154+
if bitsPerByte < 1024 {
155+
bitsPerByte = 1024
156+
}
157+
115158
// Override src
116159
src = e.hist
117160
sLimit := int32(len(src)) - inputMargin
@@ -148,29 +191,8 @@ encodeLoop:
148191
panic("offset0 was 0")
149192
}
150193

151-
type match struct {
152-
offset int32
153-
s int32
154-
length int32
155-
rep int32
156-
}
157-
matchAt := func(offset int32, s int32, first uint32, rep int32) match {
158-
if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
159-
return match{offset: offset, s: s}
160-
}
161-
return match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
162-
}
163-
164194
bestOf := func(a, b match) match {
165-
aScore := b.s - a.s + a.length
166-
bScore := a.s - b.s + b.length
167-
if a.rep < 0 {
168-
aScore = aScore - int32(bits.Len32(uint32(a.offset)))/8
169-
}
170-
if b.rep < 0 {
171-
bScore = bScore - int32(bits.Len32(uint32(b.offset)))/8
172-
}
173-
if aScore >= bScore {
195+
if a.est+(a.s-b.s)*bitsPerByte>>10 < b.est+(b.s-a.s)*bitsPerByte>>10 {
174196
return a
175197
}
176198
return b
@@ -182,17 +204,31 @@ encodeLoop:
182204
candidateL := e.longTable[nextHashL]
183205
candidateS := e.table[nextHashS]
184206

207+
matchAt := func(offset int32, s int32, first uint32, rep int32) match {
208+
if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
209+
return match{s: s, est: highScore}
210+
}
211+
m := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
212+
m.estBits(bitsPerByte)
213+
return m
214+
}
215+
185216
best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
186217
best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
187218
best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1))
219+
188220
if canRepeat && best.length < goodEnough {
189-
best = bestOf(best, matchAt(s-offset1+1, s+1, uint32(cv>>8), 1))
190-
best = bestOf(best, matchAt(s-offset2+1, s+1, uint32(cv>>8), 2))
191-
best = bestOf(best, matchAt(s-offset3+1, s+1, uint32(cv>>8), 3))
221+
cv := uint32(cv >> 8)
222+
spp := s + 1
223+
best = bestOf(best, matchAt(spp-offset1, spp, cv, 1))
224+
best = bestOf(best, matchAt(spp-offset2, spp, cv, 2))
225+
best = bestOf(best, matchAt(spp-offset3, spp, cv, 3))
192226
if best.length > 0 {
193-
best = bestOf(best, matchAt(s-offset1+3, s+3, uint32(cv>>24), 1))
194-
best = bestOf(best, matchAt(s-offset2+3, s+3, uint32(cv>>24), 2))
195-
best = bestOf(best, matchAt(s-offset3+3, s+3, uint32(cv>>24), 3))
227+
cv >>= 16
228+
spp += 2
229+
best = bestOf(best, matchAt(spp-offset1, spp, cv, 1))
230+
best = bestOf(best, matchAt(spp-offset2, spp, cv, 2))
231+
best = bestOf(best, matchAt(spp-offset3, spp, cv, 3))
196232
}
197233
}
198234
// Load next and check...
@@ -218,12 +254,18 @@ encodeLoop:
218254
candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)]
219255
candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]
220256

257+
// Short at s+1
221258
best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
259+
// Long at s+1, s+2
222260
best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1))
223261
best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
224262
best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1))
225263
best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1))
226-
264+
if false {
265+
// Short at s+3.
266+
// Too often worse...
267+
best = bestOf(best, matchAt(e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1))
268+
}
227269
// See if we can find a better match by checking where the current best ends.
228270
// Use that offset to see if we can find a better full match.
229271
if sAt := best.s + best.length; sAt < sLimit {
@@ -428,7 +470,7 @@ func (e *bestFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
428470
e.Encode(blk, src)
429471
}
430472

431-
// ResetDict will reset and set a dictionary if not nil
473+
// Reset will reset and set a dictionary if not nil
432474
func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
433475
e.resetBase(d, singleBlock)
434476
if d == nil {

zstd/encoder_test.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ type testEncOpt struct {
3131
func getEncOpts(cMax int) []testEncOpt {
3232
var o []testEncOpt
3333
for level := speedNotSet + 1; level < speedLast; level++ {
34+
if isRaceTest && level >= SpeedBestCompression {
35+
break
36+
}
3437
for conc := 1; conc <= 4; conc *= 2 {
3538
for _, wind := range testWindowSizes {
3639
addOpt := func(name string, options ...EOption) {
@@ -75,6 +78,7 @@ func TestEncoder_EncodeAllSimple(t *testing.T) {
7578
in = append(in, in...)
7679
for _, opts := range getEncOpts(4) {
7780
t.Run(opts.name, func(t *testing.T) {
81+
runtime.GC()
7882
e, err := NewWriter(nil, opts.o...)
7983
if err != nil {
8084
t.Fatal(err)
@@ -172,6 +176,9 @@ func TestEncoder_EncodeAllEncodeXML(t *testing.T) {
172176

173177
for level := speedNotSet + 1; level < speedLast; level++ {
174178
t.Run(level.String(), func(t *testing.T) {
179+
if isRaceTest && level >= SpeedBestCompression {
180+
t.SkipNow()
181+
}
175182
e, err := NewWriter(nil, WithEncoderLevel(level))
176183
if err != nil {
177184
t.Fatal(err)
@@ -291,6 +298,9 @@ func TestEncoder_EncodeAllTwain(t *testing.T) {
291298

292299
for level := speedNotSet + 1; level < speedLast; level++ {
293300
t.Run(level.String(), func(t *testing.T) {
301+
if isRaceTest && level >= SpeedBestCompression {
302+
t.SkipNow()
303+
}
294304
for _, windowSize := range testWindowSizes {
295305
t.Run(fmt.Sprintf("window:%d", windowSize), func(t *testing.T) {
296306
e, err := NewWriter(nil, WithEncoderLevel(level), WithWindowSize(windowSize))
@@ -337,6 +347,9 @@ func TestEncoder_EncodeAllPi(t *testing.T) {
337347

338348
for level := speedNotSet + 1; level < speedLast; level++ {
339349
t.Run(level.String(), func(t *testing.T) {
350+
if isRaceTest && level >= SpeedBestCompression {
351+
t.SkipNow()
352+
}
340353
for _, windowSize := range testWindowSizes {
341354
t.Run(fmt.Sprintf("window:%d", windowSize), func(t *testing.T) {
342355
e, err := NewWriter(nil, WithEncoderLevel(level), WithWindowSize(windowSize))

zstd/race_enabled_test.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// Copyright 2019+ Klaus Post. All rights reserved.
2+
// License information can be found in the LICENSE file.
3+
4+
// +build race
5+
6+
package zstd
7+
8+
func init() {
9+
isRaceTest = true
10+
}

zstd/zstd_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
// Copyright 2019+ Klaus Post. All rights reserved.
2+
// License information can be found in the LICENSE file.
3+
14
package zstd
25

36
import (
@@ -9,6 +12,8 @@ import (
912
"time"
1013
)
1114

15+
var isRaceTest bool
16+
1217
func TestMain(m *testing.M) {
1318
ec := m.Run()
1419
if ec == 0 && runtime.NumGoroutine() > 1 {

0 commit comments

Comments
 (0)