chunkenc: improve readability of compression benchmarks

bboreham · bboreham · commit 31f56cd5d36c · 2022-09-26T13:38:10.000Z
For BenchmarkWrite:

Use Go runtime facilities to report MB/s for each operation and
compression ratio.

Stop collecting all compressed chunks, as this blows up the memory of
the benchmark and creates unrealistic test conditions for the later
encoding.

For BenchmarkRead:

Replace reporting of MB/s which gave several values for each
encoding as Go hunted for the right number of benchmark iterations.

Add stats to the context for decoding, otherwise a new stats object is
created each time round the loop.

Re-order so all unsampled results come before all sampled results.
diff --git a/pkg/chunkenc/memchunk_test.go b/pkg/chunkenc/memchunk_test.go
@@ -643,11 +643,7 @@ func TestIteratorClose(t *testing.T) {
 	}
 }
 
-var result []Chunk
-
 func BenchmarkWrite(b *testing.B) {
-	chunks := []Chunk{}
-
 	entry := &logproto.Entry{
 		Timestamp: time.Unix(0, 0),
 		Line:      testdata.LogString(0),
@@ -657,6 +653,7 @@ func BenchmarkWrite(b *testing.B) {
 	for _, f := range HeadBlockFmts {
 		for _, enc := range testEncoding {
 			b.Run(fmt.Sprintf("%v-%v", f, enc), func(b *testing.B) {
+				uncompressedBytes, compressedBytes := 0, 0
 				for n := 0; n < b.N; n++ {
 					c := NewMemChunk(enc, f, testBlockSize, testTargetSize)
 					// adds until full so we trigger cut which serialize using gzip
@@ -666,9 +663,11 @@ func BenchmarkWrite(b *testing.B) {
 						entry.Line = testdata.LogString(i)
 						i++
 					}
-					chunks = append(chunks, c)
+					uncompressedBytes += c.UncompressedSize()
+					compressedBytes += c.CompressedSize()
 				}
-				result = chunks
+				b.SetBytes(int64(uncompressedBytes) / int64(b.N))
+				b.ReportMetric(float64(compressedBytes)/float64(uncompressedBytes)*100, "%compressed")
 			})
 		}
 	}
@@ -685,23 +684,17 @@ func (nomatchPipeline) ProcessString(_ int64, line string) (string, log.LabelsRe
 }
 
 func BenchmarkRead(b *testing.B) {
-	type res struct {
-		name  string
-		speed float64
-	}
-	result := []res{}
 	for _, bs := range testBlockSizes {
 		for _, enc := range testEncoding {
 			name := fmt.Sprintf("%s_%s", enc.String(), humanize.Bytes(uint64(bs)))
 			b.Run(name, func(b *testing.B) {
 				chunks, size := generateData(enc, 5, bs, testTargetSize)
+				_, ctx := stats.NewContext(context.Background())
 				b.ResetTimer()
-				bytesRead := uint64(0)
-				now := time.Now()
 				for n := 0; n < b.N; n++ {
 					for _, c := range chunks {
 						// use forward iterator for benchmark -- backward iterator does extra allocations by keeping entries in memory
-						iterator, err := c.Iterator(context.Background(), time.Unix(0, 0), time.Now(), logproto.FORWARD, nomatchPipeline{})
+						iterator, err := c.Iterator(ctx, time.Unix(0, 0), time.Now(), logproto.FORWARD, nomatchPipeline{})
 						if err != nil {
 							panic(err)
 						}
@@ -712,24 +705,23 @@ func BenchmarkRead(b *testing.B) {
 							b.Fatal(err)
 						}
 					}
-					bytesRead += size
 				}
-				result = append(result, res{
-					name:  name,
-					speed: float64(bytesRead) / time.Since(now).Seconds(),
-				})
+				b.SetBytes(int64(size))
 			})
+		}
+	}
 
-			name = fmt.Sprintf("sample_%s_%s", enc.String(), humanize.Bytes(uint64(bs)))
-
+	for _, bs := range testBlockSizes {
+		for _, enc := range testEncoding {
+			name := fmt.Sprintf("sample_%s_%s", enc.String(), humanize.Bytes(uint64(bs)))
 			b.Run(name, func(b *testing.B) {
 				chunks, size := generateData(enc, 5, bs, testTargetSize)
+				_, ctx := stats.NewContext(context.Background())
 				b.ResetTimer()
 				bytesRead := uint64(0)
-				now := time.Now()
 				for n := 0; n < b.N; n++ {
 					for _, c := range chunks {
-						iterator := c.SampleIterator(context.Background(), time.Unix(0, 0), time.Now(), countExtractor)
+						iterator := c.SampleIterator(ctx, time.Unix(0, 0), time.Now(), countExtractor)
 						for iterator.Next() {
 							_ = iterator.Sample()
 						}
@@ -739,19 +731,10 @@ func BenchmarkRead(b *testing.B) {
 					}
 					bytesRead += size
 				}
-				result = append(result, res{
-					name:  name,
-					speed: float64(bytesRead) / time.Since(now).Seconds(),
-				})
+				b.SetBytes(int64(bytesRead) / int64(b.N))
 			})
 		}
 	}
-	sort.Slice(result, func(i, j int) bool {
-		return result[i].speed > result[j].speed
-	})
-	for _, r := range result {
-		fmt.Printf("%s: %.2f MB/s\n", r.name, r.speed/1024/1024)
-	}
 }
 
 func BenchmarkBackwardIterator(b *testing.B) {