Skip to content

Commit f246cb3

Browse files
authored
shardController: revisit shard shutdown order, add latency metrics (#314)
1 parent 85ba1df commit f246cb3

File tree

4 files changed

+18
-12
lines changed

4 files changed

+18
-12
lines changed

common/metrics/defs.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,9 @@ const (
488488
ShardItemRemovedCounter
489489
MembershipChangedCounter
490490
NumShardsGauge
491+
GetEngineForShardErrorCounter
492+
GetEngineForShardLatency
493+
RemoveEngineForShardLatency
491494
)
492495

493496
// Matching metrics enum
@@ -558,6 +561,9 @@ var MetricDefs = map[ServiceIdx]map[int]metricDefinition{
558561
ShardItemRemovedCounter: {metricName: "sharditem-removed-count", metricType: Counter},
559562
MembershipChangedCounter: {metricName: "membership-changed-count", metricType: Counter},
560563
NumShardsGauge: {metricName: "numshards-gauge", metricType: Gauge},
564+
GetEngineForShardErrorCounter: {metricName: "get-engine-for-shard-errors", metricType: Counter},
565+
GetEngineForShardLatency: {metricName: "get-engine-for-shard-latency", metricType: Timer},
566+
RemoveEngineForShardLatency: {metricName: "remove-engine-for-shard-latency", metricType: Timer},
561567
},
562568
Matching: {
563569
PollSuccessCounter: {metricName: "poll.success"},

glide.lock

Lines changed: 0 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

glide.yaml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,6 @@ import:
2020
- package: github.com/dgryski/go-farm
2121
- package: github.com/emirpasic/gods
2222
- package: github.com/davecgh/go-spew
23-
- package: github.com/uber-go/timer
24-
subpackages:
25-
- twheel
2623
- package: github.com/urfave/cli
2724
- package: gopkg.in/yaml.v2
2825
- package: gopkg.in/validator.v2

service/history/shardController.go

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -167,15 +167,18 @@ func (c *shardController) GetEngine(workflowID string) (Engine, error) {
167167
}
168168

169169
func (c *shardController) getEngineForShard(shardID int) (Engine, error) {
170+
sw := c.metricsClient.StartTimer(metrics.HistoryShardControllerScope, metrics.GetEngineForShardLatency)
171+
defer sw.Stop()
170172
item, err := c.getOrCreateHistoryShardItem(shardID)
171173
if err != nil {
172174
return nil, err
173175
}
174-
175176
return item.getOrCreateEngine(c.shardClosedCh)
176177
}
177178

178179
func (c *shardController) removeEngineForShard(shardID int) {
180+
sw := c.metricsClient.StartTimer(metrics.HistoryShardControllerScope, metrics.RemoveEngineForShardLatency)
181+
defer sw.Stop()
179182
item, _ := c.removeHistoryShardItem(shardID)
180183
if item != nil {
181184
item.stopEngine()
@@ -297,6 +300,7 @@ AcquireLoop:
297300
if info.Identity() == c.host.Identity() {
298301
_, err1 := c.getEngineForShard(shardID)
299302
if err1 != nil {
303+
c.metricsClient.IncCounter(metrics.HistoryShardControllerScope, metrics.GetEngineForShardErrorCounter)
300304
logging.LogOperationFailedEvent(c.logger, fmt.Sprintf("Unable to create history shard engine: %v", shardID),
301305
err1)
302306
continue AcquireLoop
@@ -381,16 +385,19 @@ func (i *historyShardsItem) stopEngine() {
381385
i.Lock()
382386
defer i.Unlock()
383387

384-
if i.executionMgr != nil {
385-
i.executionMgr.Close()
386-
}
387-
388388
if i.engine != nil {
389389
logging.LogShardEngineStoppingEvent(i.logger, i.host.Identity(), i.shardID)
390390
i.engine.Stop()
391391
i.engine = nil
392392
logging.LogShardEngineStoppedEvent(i.logger, i.host.Identity(), i.shardID)
393393
}
394+
395+
// Shutting down executionMgr will close all connections
396+
// to cassandra for this engine. So, make sure to
397+
// close executionMgr only after stopping the engine
398+
if i.executionMgr != nil {
399+
i.executionMgr.Close()
400+
}
394401
}
395402

396403
func isShardOwnershiptLostError(err error) bool {

0 commit comments

Comments
 (0)