Skip to content

Commit 3fd6913

Browse files
committed
add a negative cache to regexp decoder
Often, regexp decoder can run repeatedly on the same input and skip them with regexp filter. An common example is matching cgroup path in a chain like so: ``` - name: cgroup - name: regexp regexps: - ^.*(system.slice).*$ ``` Anything that is not in system.slice will be skipped. In some cases where we filter only a small subset of inputs, the overhead of regexp matching can often be noticable. We add a cache here to test for input that would produce ErrSkipLabelSet and skip regex matching on them to reduce the work done on regexp matching. The cache size is customizable with `skip_cache_size` configuration. Signed-off-by: Daniel Dao <[email protected]>
1 parent 8f471b0 commit 3fd6913

File tree

7 files changed

+42
-1
lines changed

7 files changed

+42
-1
lines changed

.vscode/config-schema.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,10 @@ definitions:
145145
type: object
146146
allow_unknown:
147147
type: boolean
148+
skip_cache_size:
149+
type: number
148150
regexps:
149151
type: array
150152
items:
151153
type: string
154+

config/config.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ type Decoder struct {
7373
StaticMap map[string]string `yaml:"static_map"`
7474
Regexps []string `yaml:"regexps"`
7575
AllowUnknown bool `yaml:"allow_unknown"`
76+
// SkipCacheSize is size of a cache that stores label value matches ErrSkipLabelSet
77+
SkipCacheSize uint `yaml:"skip_cache_size"`
7678
}
7779

7880
// HistogramBucketType is an enum to define how to interpret histogram

decoder/regexp.go

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,32 @@ import (
66
"regexp"
77

88
"github.com/cloudflare/ebpf_exporter/v2/config"
9+
lru "github.com/hashicorp/golang-lru/v2"
910
)
1011

1112
// Regexp is a decoder that only allows inputs matching regexp
1213
type Regexp struct {
13-
cache map[string]*regexp.Regexp
14+
cache map[string]*regexp.Regexp
15+
skipCache *lru.Cache[string, struct{}]
1416
}
1517

1618
// Decode only allows inputs matching regexp
1719
func (r *Regexp) Decode(in []byte, conf config.Decoder) ([]byte, error) {
1820
if conf.Regexps == nil {
1921
return nil, errors.New("no regexps defined in config")
2022
}
23+
inputStr := string(in)
2124

2225
if r.cache == nil {
2326
r.cache = map[string]*regexp.Regexp{}
2427
}
28+
if conf.SkipCacheSize > 0 && r.skipCache == nil {
29+
skipCache, err := lru.New[string, struct{}](int(conf.SkipCacheSize))
30+
if err != nil {
31+
return nil, err
32+
}
33+
r.skipCache = skipCache
34+
}
2535

2636
for _, expr := range conf.Regexps {
2737
if _, ok := r.cache[expr]; !ok {
@@ -33,6 +43,12 @@ func (r *Regexp) Decode(in []byte, conf config.Decoder) ([]byte, error) {
3343
r.cache[expr] = compiled
3444
}
3545

46+
if r.skipCache != nil {
47+
if _, ok := r.skipCache.Get(inputStr); ok {
48+
return nil, ErrSkipLabelSet
49+
}
50+
}
51+
3652
matches := r.cache[expr].FindSubmatch(in)
3753

3854
// First sub-match if present
@@ -46,5 +62,9 @@ func (r *Regexp) Decode(in []byte, conf config.Decoder) ([]byte, error) {
4662
}
4763
}
4864

65+
if r.skipCache != nil {
66+
r.skipCache.Add(inputStr, struct{}{})
67+
}
68+
4969
return nil, ErrSkipLabelSet
5070
}

decoder/regexp_test.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,15 @@ func TestRegexpDecoder(t *testing.T) {
5454
}
5555
}
5656
}
57+
58+
func TestRegexpDecoderWithSkipCache(t *testing.T) {
59+
d := &Regexp{}
60+
input := []byte("whatever")
61+
_, err := d.Decode(input, config.Decoder{Regexps: []string{"^(systemd).*$", "^syslog-ng$"}, SkipCacheSize: 100})
62+
if !errors.Is(err, ErrSkipLabelSet) {
63+
t.Errorf("Error decoding %s: %v", input, err)
64+
}
65+
if !d.skipCache.Contains("whatever") {
66+
t.Errorf("failed to add to skipcache %s: kets=%v", input, d.skipCache.Keys())
67+
}
68+
}

examples/regexp.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ metrics:
88
decoders:
99
- name: string
1010
- name: regexp
11+
skip_cache_size: 100
1112
regexps:
1213
- ^(kswapd).*$ # anything matching kswapd prefix, mapping to one kswapd bucket
1314
- ^(systemd.*)$ # anything matching systemd prefix, mapping to one systemd bucket

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ require (
77
github.com/aquasecurity/libbpfgo v0.8.0-libbpf-1.5
88
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf
99
github.com/elastic/go-perf v0.0.0-20191212140718-9c656876f595
10+
github.com/hashicorp/golang-lru/v2 v2.0.7
1011
github.com/iovisor/gobpf v0.2.0
1112
github.com/jaypipes/pcidb v1.0.1
1213
github.com/mdlayher/sdnotify v1.0.0

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
3030
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
3131
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1 h1:e9Rjr40Z98/clHv5Yg79Is0NtosR5LXRvdr7o/6NwbA=
3232
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1/go.mod h1:tIxuGz/9mpox++sgp9fJjHO0+q1X9/UOWd798aAm22M=
33+
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
34+
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
3335
github.com/iovisor/gobpf v0.2.0 h1:34xkQxft+35GagXBk3n23eqhm0v7q0ejeVirb8sqEOQ=
3436
github.com/iovisor/gobpf v0.2.0/go.mod h1:WSY9Jj5RhdgC3ci1QaacvbFdQ8cbrEjrpiZbLHLt2s4=
3537
github.com/jaypipes/pcidb v1.0.1 h1:WB2zh27T3nwg8AE8ei81sNRb9yWBii3JGNJtT7K9Oic=

0 commit comments

Comments
 (0)