@@ -6,24 +6,59 @@ package zstd
6
6
7
7
import (
8
8
"fmt"
9
- "math/bits"
9
+
10
+ "github.com/klauspost/compress"
10
11
)
11
12
12
13
const (
13
- bestLongTableBits = 20 // Bits used in the long match table
14
+ bestLongTableBits = 22 // Bits used in the long match table
14
15
bestLongTableSize = 1 << bestLongTableBits // Size of the table
15
16
bestLongLen = 8 // Bytes used for table hash
16
17
17
18
// Note: Increasing the short table bits or making the hash shorter
18
19
// can actually lead to compression degradation since it will 'steal' more from the
19
20
// long match table and match offsets are quite big.
20
21
// This greatly depends on the type of input.
21
- bestShortTableBits = 16 // Bits used in the short match table
22
+ bestShortTableBits = 18 // Bits used in the short match table
22
23
bestShortTableSize = 1 << bestShortTableBits // Size of the table
23
24
bestShortLen = 4 // Bytes used for table hash
24
25
25
26
)
26
27
28
+ type match struct {
29
+ offset int32
30
+ s int32
31
+ length int32
32
+ rep int32
33
+ est int32
34
+ }
35
+
36
+ const highScore = 25000
37
+
38
+ // estBits will estimate output bits from predefined tables.
39
+ func (m * match ) estBits (bitsPerByte int32 ) {
40
+ mlc := mlCode (uint32 (m .length - zstdMinMatch ))
41
+ var ofc uint8
42
+ if m .rep < 0 {
43
+ ofc = ofCode (uint32 (m .s - m .offset ) + 3 )
44
+ } else {
45
+ ofc = ofCode (uint32 (m .rep ))
46
+ }
47
+ // Cost, excluding
48
+ ofTT , mlTT := fsePredefEnc [tableOffsets ].ct .symbolTT [ofc ], fsePredefEnc [tableMatchLengths ].ct .symbolTT [mlc ]
49
+
50
+ // Add cost of match encoding...
51
+ m .est = int32 (ofTT .outBits + mlTT .outBits )
52
+ m .est += int32 (ofTT .deltaNbBits >> 16 + mlTT .deltaNbBits >> 16 )
53
+ // Subtract savings compared to literal encoding...
54
+ m .est -= (m .length * bitsPerByte ) >> 10
55
+ if m .est > 0 {
56
+ // Unlikely gain..
57
+ m .length = 0
58
+ m .est = highScore
59
+ }
60
+ }
61
+
27
62
// bestFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
28
63
// The long match table contains the previous entry with the same hash,
29
64
// effectively making it a "chain" of length 2.
@@ -112,6 +147,14 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
112
147
return
113
148
}
114
149
150
+ // Use this to estimate literal cost.
151
+ // Scaled by 10 bits.
152
+ bitsPerByte := int32 ((compress .ShannonEntropyBits (src ) * 1024 ) / len (src ))
153
+ // Huffman can never go < 1 bit/byte
154
+ if bitsPerByte < 1024 {
155
+ bitsPerByte = 1024
156
+ }
157
+
115
158
// Override src
116
159
src = e .hist
117
160
sLimit := int32 (len (src )) - inputMargin
@@ -148,29 +191,8 @@ encodeLoop:
148
191
panic ("offset0 was 0" )
149
192
}
150
193
151
- type match struct {
152
- offset int32
153
- s int32
154
- length int32
155
- rep int32
156
- }
157
- matchAt := func (offset int32 , s int32 , first uint32 , rep int32 ) match {
158
- if s - offset >= e .maxMatchOff || load3232 (src , offset ) != first {
159
- return match {offset : offset , s : s }
160
- }
161
- return match {offset : offset , s : s , length : 4 + e .matchlen (s + 4 , offset + 4 , src ), rep : rep }
162
- }
163
-
164
194
bestOf := func (a , b match ) match {
165
- aScore := b .s - a .s + a .length
166
- bScore := a .s - b .s + b .length
167
- if a .rep < 0 {
168
- aScore = aScore - int32 (bits .Len32 (uint32 (a .offset )))/ 8
169
- }
170
- if b .rep < 0 {
171
- bScore = bScore - int32 (bits .Len32 (uint32 (b .offset )))/ 8
172
- }
173
- if aScore >= bScore {
195
+ if a .est + (a .s - b .s )* bitsPerByte >> 10 < b .est + (b .s - a .s )* bitsPerByte >> 10 {
174
196
return a
175
197
}
176
198
return b
@@ -182,17 +204,31 @@ encodeLoop:
182
204
candidateL := e .longTable [nextHashL ]
183
205
candidateS := e .table [nextHashS ]
184
206
207
+ matchAt := func (offset int32 , s int32 , first uint32 , rep int32 ) match {
208
+ if s - offset >= e .maxMatchOff || load3232 (src , offset ) != first {
209
+ return match {s : s , est : highScore }
210
+ }
211
+ m := match {offset : offset , s : s , length : 4 + e .matchlen (s + 4 , offset + 4 , src ), rep : rep }
212
+ m .estBits (bitsPerByte )
213
+ return m
214
+ }
215
+
185
216
best := bestOf (matchAt (candidateL .offset - e .cur , s , uint32 (cv ), - 1 ), matchAt (candidateL .prev - e .cur , s , uint32 (cv ), - 1 ))
186
217
best = bestOf (best , matchAt (candidateS .offset - e .cur , s , uint32 (cv ), - 1 ))
187
218
best = bestOf (best , matchAt (candidateS .prev - e .cur , s , uint32 (cv ), - 1 ))
219
+
188
220
if canRepeat && best .length < goodEnough {
189
- best = bestOf (best , matchAt (s - offset1 + 1 , s + 1 , uint32 (cv >> 8 ), 1 ))
190
- best = bestOf (best , matchAt (s - offset2 + 1 , s + 1 , uint32 (cv >> 8 ), 2 ))
191
- best = bestOf (best , matchAt (s - offset3 + 1 , s + 1 , uint32 (cv >> 8 ), 3 ))
221
+ cv := uint32 (cv >> 8 )
222
+ spp := s + 1
223
+ best = bestOf (best , matchAt (spp - offset1 , spp , cv , 1 ))
224
+ best = bestOf (best , matchAt (spp - offset2 , spp , cv , 2 ))
225
+ best = bestOf (best , matchAt (spp - offset3 , spp , cv , 3 ))
192
226
if best .length > 0 {
193
- best = bestOf (best , matchAt (s - offset1 + 3 , s + 3 , uint32 (cv >> 24 ), 1 ))
194
- best = bestOf (best , matchAt (s - offset2 + 3 , s + 3 , uint32 (cv >> 24 ), 2 ))
195
- best = bestOf (best , matchAt (s - offset3 + 3 , s + 3 , uint32 (cv >> 24 ), 3 ))
227
+ cv >>= 16
228
+ spp += 2
229
+ best = bestOf (best , matchAt (spp - offset1 , spp , cv , 1 ))
230
+ best = bestOf (best , matchAt (spp - offset2 , spp , cv , 2 ))
231
+ best = bestOf (best , matchAt (spp - offset3 , spp , cv , 3 ))
196
232
}
197
233
}
198
234
// Load next and check...
@@ -218,12 +254,18 @@ encodeLoop:
218
254
candidateL = e .longTable [hashLen (cv , bestLongTableBits , bestLongLen )]
219
255
candidateL2 := e .longTable [hashLen (cv2 , bestLongTableBits , bestLongLen )]
220
256
257
+ // Short at s+1
221
258
best = bestOf (best , matchAt (candidateS .offset - e .cur , s , uint32 (cv ), - 1 ))
259
+ // Long at s+1, s+2
222
260
best = bestOf (best , matchAt (candidateL .offset - e .cur , s , uint32 (cv ), - 1 ))
223
261
best = bestOf (best , matchAt (candidateL .prev - e .cur , s , uint32 (cv ), - 1 ))
224
262
best = bestOf (best , matchAt (candidateL2 .offset - e .cur , s + 1 , uint32 (cv2 ), - 1 ))
225
263
best = bestOf (best , matchAt (candidateL2 .prev - e .cur , s + 1 , uint32 (cv2 ), - 1 ))
226
-
264
+ if false {
265
+ // Short at s+3.
266
+ // Too often worse...
267
+ best = bestOf (best , matchAt (e .table [hashLen (cv2 >> 8 , bestShortTableBits , bestShortLen )].offset - e .cur , s + 2 , uint32 (cv2 >> 8 ), - 1 ))
268
+ }
227
269
// See if we can find a better match by checking where the current best ends.
228
270
// Use that offset to see if we can find a better full match.
229
271
if sAt := best .s + best .length ; sAt < sLimit {
@@ -428,7 +470,7 @@ func (e *bestFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
428
470
e .Encode (blk , src )
429
471
}
430
472
431
- // ResetDict will reset and set a dictionary if not nil
473
+ // Reset will reset and set a dictionary if not nil
432
474
func (e * bestFastEncoder ) Reset (d * dict , singleBlock bool ) {
433
475
e .resetBase (d , singleBlock )
434
476
if d == nil {
0 commit comments