Skip to content

Commit 6310972

Browse files
jcrossleyjenkins
authored and
jenkins
committed
finagle: Allow request classification for different SLOs in SLOStatsFilter
Problem The SLOStatsFilter could only be configured with a single target latency, but different server methods (or different args to those methods) may have different latency profiles that we want to account for. Solution Allow the user to define a partial function of request => SLODefinition. Differential Revision: https://phabricator.twitter.biz/D1192061
1 parent 786f06c commit 6310972

File tree

2 files changed

+196
-71
lines changed

2 files changed

+196
-71
lines changed

finagle-core/src/main/scala/com/twitter/finagle/filter/SLOStatsFilter.scala

+70-37
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
package com.twitter.finagle.filter
22

33
import com.twitter.finagle.FailureFlags
4-
import com.twitter.finagle.stats.StatsReceiver
5-
import com.twitter.finagle.param
4+
import com.twitter.finagle.Filter
5+
import com.twitter.finagle.Filter.TypeAgnostic
66
import com.twitter.finagle.Service
77
import com.twitter.finagle.ServiceFactory
88
import com.twitter.finagle.SimpleFilter
@@ -11,6 +11,7 @@ import com.twitter.finagle.Stackable
1111
import com.twitter.finagle.service.ReqRep
1212
import com.twitter.finagle.service.ResponseClass
1313
import com.twitter.finagle.service.ResponseClassifier
14+
import com.twitter.finagle.stats.StatsReceiver
1415
import com.twitter.util.Duration
1516
import com.twitter.util.Future
1617
import com.twitter.util.Stopwatch
@@ -26,7 +27,7 @@ private[twitter] object SLOStatsFilter {
2627

2728
object Param {
2829
case class Configured(
29-
latency: Duration)
30+
requestToSLODefinition: PartialFunction[Any, SLODefinition])
3031
extends Param
3132

3233
case object Disabled extends Param
@@ -36,71 +37,103 @@ private[twitter] object SLOStatsFilter {
3637

3738
val Disabled: Param = Param.Disabled
3839

39-
def configured(latency: Duration): Param = {
40-
Param.Configured(latency)
40+
def configured(
41+
requestToSLODefinition: PartialFunction[Any, SLODefinition],
42+
): Param = {
43+
Param.Configured(requestToSLODefinition)
44+
}
45+
46+
def configured(SLODefinition: SLODefinition): Param = {
47+
Param.Configured({
48+
case _ => SLODefinition
49+
})
50+
}
51+
52+
def typeAgnostic(
53+
statsReceiver: StatsReceiver,
54+
requestToSLODefinition: PartialFunction[Any, SLODefinition],
55+
responseClassifier: ResponseClassifier,
56+
nowNanos: () => Long = Stopwatch.systemNanos
57+
): TypeAgnostic = new TypeAgnostic {
58+
def toFilter[Req, Rep]: Filter[Req, Rep, Req, Rep] =
59+
new SLOStatsFilter[Req, Rep](
60+
requestToSLODefinition,
61+
responseClassifier,
62+
statsReceiver,
63+
nowNanos)
4164
}
4265

4366
def module[Req, Rep]: Stackable[ServiceFactory[Req, Rep]] =
44-
new Stack.Module3[param.Stats, param.ResponseClassifier, Param, ServiceFactory[Req, Rep]] {
67+
new Stack.Module3[
68+
com.twitter.finagle.param.Stats,
69+
com.twitter.finagle.param.ResponseClassifier,
70+
Param,
71+
ServiceFactory[Req, Rep]
72+
] {
4573
val role = SLOStatsFilter.role
4674
val description =
4775
"Record number of SLO violations of underlying service"
4876
override def make(
49-
_stats: param.Stats,
50-
_responseClassifier: param.ResponseClassifier,
77+
_stats: com.twitter.finagle.param.Stats,
78+
_responseClassifier: com.twitter.finagle.param.ResponseClassifier,
5179
params: Param,
5280
next: ServiceFactory[Req, Rep]
5381
): ServiceFactory[Req, Rep] = {
5482
params match {
5583
case Param.Disabled => next
56-
case Param.Configured(latency) =>
57-
val param.Stats(statsReceiver) = _stats
58-
val param.ResponseClassifier(responseClassifier) = _responseClassifier
84+
case Param.Configured(requestToSLODefinition) =>
85+
val com.twitter.finagle.param.Stats(statsReceiver) = _stats
86+
val com.twitter.finagle.param.ResponseClassifier(responseClassifier) =
87+
_responseClassifier
88+
5989
new SLOStatsFilter(
60-
statsReceiver.scope("slo"),
61-
latency.inNanoseconds,
62-
responseClassifier).andThen(next)
90+
requestToSLODefinition,
91+
responseClassifier,
92+
statsReceiver.scope("slo")).andThen(next)
6393
}
6494
}
6595
}
6696
}
6797

98+
case class SLODefinition(scope: String, latency: Duration)
99+
68100
/**
69-
* A [[com.twitter.finagle.Filter]] that records the number of slo violations from the underlying
70-
* service. A request is classified as violating the slo if any of the following occur:
101+
* A [[com.twitter.finagle.Filter]] that records the number of slo violations (as determined from
102+
* `requestToSLODefinition`) from the underlying service. A request is classified as violating the
103+
* slo if any of the following occur:
71104
* - The response returns after `latency` duration has elapsed
72105
* - The response is classified as a failure according to the ResponseClassifier (but is not
73106
* ignorable or interrupted)
74107
*/
75-
private[finagle] class SLOStatsFilter[Req, Rep](
76-
statsReceiver: StatsReceiver,
77-
latencyNanos: Long,
108+
class SLOStatsFilter[Req, Rep](
109+
requestToSLODefinition: PartialFunction[Any, SLODefinition],
78110
responseClassifier: ResponseClassifier,
111+
statsReceiver: StatsReceiver,
79112
nowNanos: () => Long = Stopwatch.systemNanos)
80113
extends SimpleFilter[Req, Rep] {
81114

82-
private[this] val violationsScope = statsReceiver.scope("violations")
83-
private[this] val violationsTotalCounter = violationsScope.counter("total")
84-
private[this] val violationsFailuresCounter = violationsScope.counter("failures")
85-
private[this] val violationsLatencyCounter = violationsScope.counter("latency")
86-
87115
def apply(request: Req, service: Service[Req, Rep]): Future[Rep] = {
88116
val start = nowNanos()
89117
service(request).respond { response =>
90118
if (!isIgnorable(response)) {
91-
var violated = false
92-
if (nowNanos() - start > latencyNanos) {
93-
violated = true
94-
violationsLatencyCounter.incr()
95-
}
96-
97-
if (isFailure(request, response)) {
98-
violated = true
99-
violationsFailuresCounter.incr()
100-
}
101-
102-
if (violated) {
103-
violationsTotalCounter.incr()
119+
if (requestToSLODefinition.isDefinedAt(request)) {
120+
val sloDefinition = requestToSLODefinition(request)
121+
var violated = false
122+
if (nowNanos() - start > sloDefinition.latency.inNanoseconds) {
123+
violated = true
124+
statsReceiver.counter(sloDefinition.scope, "violations", "latency").incr()
125+
}
126+
127+
if (isFailure(request, response)) {
128+
violated = true
129+
statsReceiver.counter(sloDefinition.scope, "violations", "failures").incr()
130+
}
131+
132+
if (violated) {
133+
statsReceiver.counter(sloDefinition.scope, "violations", "total").incr()
134+
}
135+
136+
statsReceiver.counter(sloDefinition.scope, "total").incr()
104137
}
105138
}
106139
}

0 commit comments

Comments
 (0)