Skip to content

Commit 9102759

Browse files
authored
Make CT log selection simpler and more robust (#8152)
Simplify the way we load and handle CT logs: rather than keeping them grouped by operator, simply keep a flat list and annotate each log with its operator's name. At submission time, instead of shuffling operator groups and submitting to one log from each group, shuffle the whole set of individual logs. Support tiled logs by similarly annotating each log with whether it is tiled or not. Also make the way we know when to stop getting SCTs more robust. Previously we would stop as soon as we had two, since we knew that they would be from different operator groups and didn't care about tiled logs. Instead, introduce an explicit CT policy compliance evaluation function which tells us if the set of SCTs we have so far forms a compliant set. This is not our desired end-state for CT log submission. Ideally we'd like to: simplify things even further (don't race all the logs, simply try to submit to two at a time), improve selection (intelligently pick the next log to submit to, rather than just a random shuffle), and fine-tune latency (tiled logs should have longer timeouts than classic ones). Those improvements will come in future PRs. Part of #7872
1 parent e01bc22 commit 9102759

File tree

6 files changed

+422
-494
lines changed

6 files changed

+422
-494
lines changed

ctpolicy/ctpolicy.go

+107-107
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package ctpolicy
22

33
import (
44
"context"
5+
"encoding/base64"
56
"fmt"
67
"strings"
78
"time"
@@ -23,15 +24,14 @@ const (
2324
// CTPolicy is used to hold information about SCTs required from various
2425
// groupings
2526
type CTPolicy struct {
26-
pub pubpb.PublisherClient
27-
sctLogs loglist.List
28-
infoLogs loglist.List
29-
finalLogs loglist.List
30-
stagger time.Duration
31-
log blog.Logger
32-
winnerCounter *prometheus.CounterVec
33-
operatorGroupsGauge *prometheus.GaugeVec
34-
shardExpiryGauge *prometheus.GaugeVec
27+
pub pubpb.PublisherClient
28+
sctLogs loglist.List
29+
infoLogs loglist.List
30+
finalLogs loglist.List
31+
stagger time.Duration
32+
log blog.Logger
33+
winnerCounter *prometheus.CounterVec
34+
shardExpiryGauge *prometheus.GaugeVec
3535
}
3636

3737
// New creates a new CTPolicy struct
@@ -45,15 +45,6 @@ func New(pub pubpb.PublisherClient, sctLogs loglist.List, infoLogs loglist.List,
4545
)
4646
stats.MustRegister(winnerCounter)
4747

48-
operatorGroupsGauge := prometheus.NewGaugeVec(
49-
prometheus.GaugeOpts{
50-
Name: "ct_operator_group_size_gauge",
51-
Help: "Gauge for CT operators group size, by operator and log source (capable of providing SCT, informational logs, logs we submit final certs to).",
52-
},
53-
[]string{"operator", "source"},
54-
)
55-
stats.MustRegister(operatorGroupsGauge)
56-
5748
shardExpiryGauge := prometheus.NewGaugeVec(
5849
prometheus.GaugeOpts{
5950
Name: "ct_shard_expiration_seconds",
@@ -63,43 +54,30 @@ func New(pub pubpb.PublisherClient, sctLogs loglist.List, infoLogs loglist.List,
6354
)
6455
stats.MustRegister(shardExpiryGauge)
6556

66-
for op, group := range sctLogs {
67-
operatorGroupsGauge.WithLabelValues(op, "sctLogs").Set(float64(len(group)))
68-
69-
for _, log := range group {
70-
if log.EndExclusive.IsZero() {
71-
// Handles the case for non-temporally sharded logs too.
72-
shardExpiryGauge.WithLabelValues(op, log.Name).Set(float64(0))
73-
} else {
74-
shardExpiryGauge.WithLabelValues(op, log.Name).Set(float64(log.EndExclusive.Unix()))
75-
}
57+
for _, log := range sctLogs {
58+
if log.EndExclusive.IsZero() {
59+
// Handles the case for non-temporally sharded logs too.
60+
shardExpiryGauge.WithLabelValues(log.Operator, log.Name).Set(float64(0))
61+
} else {
62+
shardExpiryGauge.WithLabelValues(log.Operator, log.Name).Set(float64(log.EndExclusive.Unix()))
7663
}
7764
}
7865

79-
for op, group := range infoLogs {
80-
operatorGroupsGauge.WithLabelValues(op, "infoLogs").Set(float64(len(group)))
81-
}
82-
83-
for op, group := range finalLogs {
84-
operatorGroupsGauge.WithLabelValues(op, "finalLogs").Set(float64(len(group)))
85-
}
86-
8766
return &CTPolicy{
88-
pub: pub,
89-
sctLogs: sctLogs,
90-
infoLogs: infoLogs,
91-
finalLogs: finalLogs,
92-
stagger: stagger,
93-
log: log,
94-
winnerCounter: winnerCounter,
95-
operatorGroupsGauge: operatorGroupsGauge,
96-
shardExpiryGauge: shardExpiryGauge,
67+
pub: pub,
68+
sctLogs: sctLogs,
69+
infoLogs: infoLogs,
70+
finalLogs: finalLogs,
71+
stagger: stagger,
72+
log: log,
73+
winnerCounter: winnerCounter,
74+
shardExpiryGauge: shardExpiryGauge,
9775
}
9876
}
9977

10078
type result struct {
79+
log loglist.Log
10180
sct []byte
102-
url string
10381
err error
10482
}
10583

@@ -115,73 +93,68 @@ func (ctp *CTPolicy) GetSCTs(ctx context.Context, cert core.CertDER, expiration
11593
subCtx, cancel := context.WithCancel(ctx)
11694
defer cancel()
11795

118-
// This closure will be called in parallel once for each operator group.
119-
getOne := func(i int, g string) ([]byte, string, error) {
120-
// Sleep a little bit to stagger our requests to the later groups. Use `i-1`
121-
// to compute the stagger duration so that the first two groups (indices 0
96+
// This closure will be called in parallel once for each log.
97+
getOne := func(i int, l loglist.Log) ([]byte, error) {
98+
// Sleep a little bit to stagger our requests to the later logs. Use `i-1`
99+
// to compute the stagger duration so that the first two logs (indices 0
122100
// and 1) get negative or zero (i.e. instant) sleep durations. If the
123-
// context gets cancelled (most likely because two logs from other operator
124-
// groups returned SCTs already) before the sleep is complete, quit instead.
101+
// context gets cancelled (most likely because we got enough SCTs from other
102+
// logs already) before the sleep is complete, quit instead.
125103
select {
126104
case <-subCtx.Done():
127-
return nil, "", subCtx.Err()
105+
return nil, subCtx.Err()
128106
case <-time.After(time.Duration(i-1) * ctp.stagger):
129107
}
130108

131-
// Pick a random log from among those in the group. In practice, very few
132-
// operator groups have more than one log, so this loses little flexibility.
133-
url, key, err := ctp.sctLogs.PickOne(g, expiration)
134-
if err != nil {
135-
return nil, "", fmt.Errorf("unable to get log info: %w", err)
136-
}
137-
138109
sct, err := ctp.pub.SubmitToSingleCTWithResult(ctx, &pubpb.Request{
139-
LogURL: url,
140-
LogPublicKey: key,
110+
LogURL: l.Url,
111+
LogPublicKey: base64.StdEncoding.EncodeToString(l.Key),
141112
Der: cert,
142113
Kind: pubpb.SubmissionType_sct,
143114
})
144115
if err != nil {
145-
return nil, url, fmt.Errorf("ct submission to %q (%q) failed: %w", g, url, err)
116+
return nil, fmt.Errorf("ct submission to %q (%q) failed: %w", l.Name, l.Url, err)
146117
}
147118

148-
return sct.Sct, url, nil
119+
return sct.Sct, nil
149120
}
150121

151-
// Ensure that this channel has a buffer equal to the number of goroutines
152-
// we're kicking off, so that they're all guaranteed to be able to write to
153-
// it and exit without blocking and leaking.
154-
results := make(chan result, len(ctp.sctLogs))
122+
// Identify the set of candidate logs whose temporal interval includes this
123+
// cert's expiry. Randomize the order of the logs so that we're not always
124+
// trying to submit to the same two.
125+
logs := ctp.sctLogs.ForTime(expiration).Permute()
155126

156127
// Kick off a collection of goroutines to try to submit the precert to each
157-
// log operator group. Randomize the order of the groups so that we're not
158-
// always trying to submit to the same two operators.
159-
for i, group := range ctp.sctLogs.Permute() {
160-
go func(i int, g string) {
161-
sctDER, url, err := getOne(i, g)
162-
results <- result{sct: sctDER, url: url, err: err}
163-
}(i, group)
128+
// log. Ensure that the results channel has a buffer equal to the number of
129+
// goroutines we're kicking off, so that they're all guaranteed to be able to
130+
// write to it and exit without blocking and leaking.
131+
resChan := make(chan result, len(logs))
132+
for i, log := range logs {
133+
go func(i int, l loglist.Log) {
134+
sctDER, err := getOne(i, l)
135+
resChan <- result{log: l, sct: sctDER, err: err}
136+
}(i, log)
164137
}
165138

166139
go ctp.submitPrecertInformational(cert, expiration)
167140

168141
// Finally, collect SCTs and/or errors from our results channel. We know that
169-
// we will collect len(ctp.sctLogs) results from the channel because every
170-
// goroutine is guaranteed to write one result to the channel.
171-
scts := make(core.SCTDERs, 0)
142+
// we can collect len(logs) results from the channel because every goroutine
143+
// is guaranteed to write one result (either sct or error) to the channel.
144+
results := make([]result, 0)
172145
errs := make([]string, 0)
173-
for range len(ctp.sctLogs) {
174-
res := <-results
146+
for range len(logs) {
147+
res := <-resChan
175148
if res.err != nil {
176149
errs = append(errs, res.err.Error())
177-
if res.url != "" {
178-
ctp.winnerCounter.WithLabelValues(res.url, failed).Inc()
179-
}
150+
ctp.winnerCounter.WithLabelValues(res.log.Url, failed).Inc()
180151
continue
181152
}
182-
scts = append(scts, res.sct)
183-
ctp.winnerCounter.WithLabelValues(res.url, succeeded).Inc()
184-
if len(scts) >= 2 {
153+
results = append(results, res)
154+
ctp.winnerCounter.WithLabelValues(res.log.Url, succeeded).Inc()
155+
156+
scts := compliantSet(results)
157+
if scts != nil {
185158
return scts, nil
186159
}
187160
}
@@ -196,6 +169,36 @@ func (ctp *CTPolicy) GetSCTs(ctx context.Context, cert core.CertDER, expiration
196169
return nil, berrors.MissingSCTsError("failed to get 2 SCTs, got %d error(s): %s", len(errs), strings.Join(errs, "; "))
197170
}
198171

172+
// compliantSet returns a slice of SCTs which complies with all relevant CT Log
173+
// Policy requirements, namely that the set of SCTs:
174+
// - contain at least two SCTs, which
175+
// - come from logs run by at least two different operators, and
176+
// - contain at least one RFC6962-compliant (i.e. non-static/tiled) log.
177+
//
178+
// If no such set of SCTs exists, returns nil.
179+
func compliantSet(results []result) core.SCTDERs {
180+
for _, first := range results {
181+
if first.err != nil {
182+
continue
183+
}
184+
for _, second := range results {
185+
if second.err != nil {
186+
continue
187+
}
188+
if first.log.Operator == second.log.Operator {
189+
// The two SCTs must come from different operators.
190+
continue
191+
}
192+
if first.log.Tiled && second.log.Tiled {
193+
// At least one must come from a non-tiled log.
194+
continue
195+
}
196+
return core.SCTDERs{first.sct, second.sct}
197+
}
198+
}
199+
return nil
200+
}
201+
199202
// submitAllBestEffort submits the given certificate or precertificate to every
200203
// log ("informational" for precerts, "final" for certs) configured in the policy.
201204
// It neither waits for these submission to complete, nor tracks their success.
@@ -205,29 +208,26 @@ func (ctp *CTPolicy) submitAllBestEffort(blob core.CertDER, kind pubpb.Submissio
205208
logs = ctp.infoLogs
206209
}
207210

208-
for _, group := range logs {
209-
for _, log := range group {
210-
if log.StartInclusive.After(expiry) || log.EndExclusive.Equal(expiry) || log.EndExclusive.Before(expiry) {
211-
continue
212-
}
213-
214-
go func(log loglist.Log) {
215-
_, err := ctp.pub.SubmitToSingleCTWithResult(
216-
context.Background(),
217-
&pubpb.Request{
218-
LogURL: log.Url,
219-
LogPublicKey: log.Key,
220-
Der: blob,
221-
Kind: kind,
222-
},
223-
)
224-
if err != nil {
225-
ctp.log.Warningf("ct submission of cert to log %q failed: %s", log.Url, err)
226-
}
227-
}(log)
211+
for _, log := range logs {
212+
if log.StartInclusive.After(expiry) || log.EndExclusive.Equal(expiry) || log.EndExclusive.Before(expiry) {
213+
continue
228214
}
229-
}
230215

216+
go func(log loglist.Log) {
217+
_, err := ctp.pub.SubmitToSingleCTWithResult(
218+
context.Background(),
219+
&pubpb.Request{
220+
LogURL: log.Url,
221+
LogPublicKey: base64.StdEncoding.EncodeToString(log.Key),
222+
Der: blob,
223+
Kind: kind,
224+
},
225+
)
226+
if err != nil {
227+
ctp.log.Warningf("ct submission of cert to log %q failed: %s", log.Url, err)
228+
}
229+
}(log)
230+
}
231231
}
232232

233233
// submitPrecertInformational submits precertificates to any configured

0 commit comments

Comments
 (0)