Skip to content

Commit 9224d64

Browse files
author
Tamer Eldeeb
authored
Turn RangeID into a non-static Cassandra column (#293)
We observed that when executing CAS queries on static fields, the coordinator issues a range query during the read phase of paxos with LIMIT 1. This is problematic, as it causes table scans when only a point query is needed. This change turns RangeID into a non-static column to avoid this problem. This change also reverts the gc_grace_seconds setting back to default 10 days to avoid masking similar issues by fast removal of tombstones. Issue #277
1 parent 3560320 commit 9224d64

File tree

8 files changed

+160
-118
lines changed

8 files changed

+160
-118
lines changed

common/persistence/cassandraPersistence.go

Lines changed: 127 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,12 @@ const (
246246
templateUpdateLeaseQuery = `UPDATE executions ` +
247247
`SET range_id = ? ` +
248248
`WHERE shard_id = ? ` +
249+
`and type = ? ` +
250+
`and domain_id = ? ` +
251+
`and workflow_id = ? ` +
252+
`and run_id = ? ` +
253+
`and visibility_ts = ? ` +
254+
`and task_id = ? ` +
249255
`IF range_id = ?`
250256

251257
templateGetWorkflowExecutionQuery = `SELECT execution, activity_map, timer_map, child_executions_map, request_cancel_map ` +
@@ -277,7 +283,7 @@ const (
277283
`and run_id = ? ` +
278284
`and visibility_ts = ? ` +
279285
`and task_id = ? ` +
280-
`IF next_event_id = ? and range_id = ?`
286+
`IF next_event_id = ?`
281287

282288
templateUpdateActivityInfoQuery = `UPDATE executions ` +
283289
`SET activity_map[ ? ] =` + templateActivityInfoType + ` ` +
@@ -288,7 +294,7 @@ const (
288294
`and run_id = ? ` +
289295
`and visibility_ts = ? ` +
290296
`and task_id = ? ` +
291-
`IF next_event_id = ? and range_id = ?`
297+
`IF next_event_id = ?`
292298

293299
templateUpdateTimerInfoQuery = `UPDATE executions ` +
294300
`SET timer_map[ ? ] =` + templateTimerInfoType + ` ` +
@@ -299,7 +305,7 @@ const (
299305
`and run_id = ? ` +
300306
`and visibility_ts = ? ` +
301307
`and task_id = ? ` +
302-
`IF next_event_id = ? and range_id = ?`
308+
`IF next_event_id = ?`
303309

304310
templateUpdateChildExecutionInfoQuery = `UPDATE executions ` +
305311
`SET child_executions_map[ ? ] =` + templateChildExecutionInfoType + ` ` +
@@ -310,7 +316,7 @@ const (
310316
`and run_id = ? ` +
311317
`and visibility_ts = ? ` +
312318
`and task_id = ? ` +
313-
`IF next_event_id = ? and range_id = ?`
319+
`IF next_event_id = ?`
314320

315321
templateUpdateRequestCancelInfoQuery = `UPDATE executions ` +
316322
`SET request_cancel_map[ ? ] =` + templateRequestCancelInfoType + ` ` +
@@ -321,7 +327,7 @@ const (
321327
`and run_id = ? ` +
322328
`and visibility_ts = ? ` +
323329
`and task_id = ? ` +
324-
`IF next_event_id = ? and range_id = ?`
330+
`IF next_event_id = ?`
325331

326332
templateDeleteActivityInfoQuery = `DELETE activity_map[ ? ] ` +
327333
`FROM executions ` +
@@ -332,7 +338,7 @@ const (
332338
`and run_id = ? ` +
333339
`and visibility_ts = ? ` +
334340
`and task_id = ? ` +
335-
`IF next_event_id = ? and range_id = ?`
341+
`IF next_event_id = ?`
336342

337343
templateDeleteTimerInfoQuery = `DELETE timer_map[ ? ] ` +
338344
`FROM executions ` +
@@ -343,7 +349,7 @@ const (
343349
`and run_id = ? ` +
344350
`and visibility_ts = ? ` +
345351
`and task_id = ? ` +
346-
`IF next_event_id = ? and range_id = ?`
352+
`IF next_event_id = ?`
347353

348354
templateDeleteChildExecutionInfoQuery = `DELETE child_executions_map[ ? ] ` +
349355
`FROM executions ` +
@@ -354,7 +360,7 @@ const (
354360
`and run_id = ? ` +
355361
`and visibility_ts = ? ` +
356362
`and task_id = ? ` +
357-
`IF next_event_id = ? and range_id = ?`
363+
`IF next_event_id = ?`
358364

359365
templateDeleteRequestCancelInfoQuery = `DELETE request_cancel_map[ ? ] ` +
360366
`FROM executions ` +
@@ -365,7 +371,7 @@ const (
365371
`and run_id = ? ` +
366372
`and visibility_ts = ? ` +
367373
`and task_id = ? ` +
368-
`IF next_event_id = ? and range_id = ?`
374+
`IF next_event_id = ?`
369375

370376
templateDeleteWorkflowExecutionQuery = `DELETE FROM executions ` +
371377
`WHERE shard_id = ? ` +
@@ -680,11 +686,18 @@ func (d *cassandraPersistence) CreateWorkflowExecution(request *CreateWorkflowEx
680686
batch.Query(templateUpdateLeaseQuery,
681687
request.RangeID,
682688
d.shardID,
689+
rowTypeShard,
690+
rowTypeShardDomainID,
691+
rowTypeShardWorkflowID,
692+
rowTypeShardRunID,
693+
defaultVisibilityTimestamp,
694+
rowTypeShardTaskID,
683695
request.RangeID,
684696
)
685697

686698
previous := make(map[string]interface{})
687-
applied, _, err := d.session.MapExecuteBatchCAS(batch, previous)
699+
applied, iter, err := d.session.MapExecuteBatchCAS(batch, previous)
700+
defer iter.Close()
688701
if err != nil {
689702
if isTimeoutError(err) {
690703
// Write may have succeeded, but we don't know
@@ -694,34 +707,62 @@ func (d *cassandraPersistence) CreateWorkflowExecution(request *CreateWorkflowEx
694707
return nil, &workflow.InternalServiceError{
695708
Message: fmt.Sprintf("CreateWorkflowExecution operation failed. Error: %v", err),
696709
}
710+
697711
}
698712

699713
if !applied {
700-
if rangeID, ok := previous["range_id"].(int64); ok && rangeID != request.RangeID {
701-
// CreateWorkflowExecution failed because rangeID was modified
702-
return nil, &ShardOwnershipLostError{
703-
ShardID: d.shardID,
704-
Msg: fmt.Sprintf("Failed to create workflow execution. Request RangeID: %v, Actual RangeID: %v",
705-
request.RangeID, rangeID),
714+
// There can be two reasons why the query does not get applied. Either the RangeID has changed, or
715+
// the workflow is already started. Check the row info returned by Cassandra to figure out which one it is.
716+
GetFailureReasonLoop:
717+
for {
718+
rowType, ok := previous["type"].(int)
719+
if !ok {
720+
// This should never happen, as all our rows have the type field.
721+
break GetFailureReasonLoop
722+
}
723+
724+
if rowType == rowTypeShard {
725+
if rangeID, ok := previous["range_id"].(int64); ok && rangeID != request.RangeID {
726+
// CreateWorkflowExecution failed because rangeID was modified
727+
return nil, &ShardOwnershipLostError{
728+
ShardID: d.shardID,
729+
Msg: fmt.Sprintf("Failed to create workflow execution. Request RangeID: %v, Actual RangeID: %v",
730+
request.RangeID, rangeID),
731+
}
732+
}
733+
734+
} else {
735+
var columns []string
736+
for k, v := range previous {
737+
columns = append(columns, fmt.Sprintf("%s=%v", k, v))
738+
}
739+
740+
if execution, ok := previous["execution"].(map[string]interface{}); ok {
741+
// CreateWorkflowExecution failed because it already exists
742+
msg := fmt.Sprintf("Workflow execution already running. WorkflowId: %v, RunId: %v, rangeID: %v, columns: (%v)",
743+
execution["workflow_id"], execution["run_id"], request.RangeID, strings.Join(columns, ","))
744+
return nil, &workflow.WorkflowExecutionAlreadyStartedError{
745+
Message: common.StringPtr(msg),
746+
StartRequestId: common.StringPtr(fmt.Sprintf("%v", execution["create_request_id"])),
747+
RunId: common.StringPtr(fmt.Sprintf("%v", execution["run_id"])),
748+
}
749+
}
750+
}
751+
752+
previous = make(map[string]interface{})
753+
if !iter.MapScan(previous) {
754+
// Cassandra returns the actual row that caused a condition failure, so we should always return
755+
// from the checks above, but just in case.
756+
break GetFailureReasonLoop
706757
}
707758
}
708759

760+
// At this point we only know that the write was not applied.
761+
// Return the row information returned by Cassandra.
709762
var columns []string
710763
for k, v := range previous {
711764
columns = append(columns, fmt.Sprintf("%s=%v", k, v))
712765
}
713-
714-
if execution, ok := previous["execution"].(map[string]interface{}); ok {
715-
// CreateWorkflowExecution failed because it already exists
716-
msg := fmt.Sprintf("Workflow execution already running. WorkflowId: %v, RunId: %v, rangeID: %v, columns: (%v)",
717-
execution["workflow_id"], execution["run_id"], request.RangeID, strings.Join(columns, ","))
718-
return nil, &workflow.WorkflowExecutionAlreadyStartedError{
719-
Message: common.StringPtr(msg),
720-
StartRequestId: common.StringPtr(fmt.Sprintf("%v", execution["create_request_id"])),
721-
RunId: common.StringPtr(fmt.Sprintf("%v", execution["run_id"])),
722-
}
723-
}
724-
725766
return nil, &ConditionFailedError{
726767
Msg: fmt.Sprintf("Failed to create workflow execution. Request RangeID: %v, columns: (%v)",
727768
request.RangeID, strings.Join(columns, ",")),
@@ -910,8 +951,7 @@ func (d *cassandraPersistence) UpdateWorkflowExecution(request *UpdateWorkflowEx
910951
executionInfo.RunID,
911952
defaultVisibilityTimestamp,
912953
rowTypeExecutionTaskID,
913-
request.Condition,
914-
request.RangeID)
954+
request.Condition)
915955

916956
d.createTransferTasks(batch, request.TransferTasks, executionInfo.DomainID, executionInfo.WorkflowID,
917957
executionInfo.RunID, cqlNowTimestamp)
@@ -948,8 +988,22 @@ func (d *cassandraPersistence) UpdateWorkflowExecution(request *UpdateWorkflowEx
948988
rowTypeExecutionTaskID)
949989
}
950990

991+
// Verifies that the RangeID has not changed
992+
batch.Query(templateUpdateLeaseQuery,
993+
request.RangeID,
994+
d.shardID,
995+
rowTypeShard,
996+
rowTypeShardDomainID,
997+
rowTypeShardWorkflowID,
998+
rowTypeShardRunID,
999+
defaultVisibilityTimestamp,
1000+
rowTypeShardTaskID,
1001+
request.RangeID,
1002+
)
1003+
9511004
previous := make(map[string]interface{})
952-
applied, _, err := d.session.MapExecuteBatchCAS(batch, previous)
1005+
applied, iter, err := d.session.MapExecuteBatchCAS(batch, previous)
1006+
defer iter.Close()
9531007
if err != nil {
9541008
if isTimeoutError(err) {
9551009
// Write may have succeeded, but we don't know
@@ -962,23 +1016,45 @@ func (d *cassandraPersistence) UpdateWorkflowExecution(request *UpdateWorkflowEx
9621016
}
9631017

9641018
if !applied {
965-
if rangeID, ok := previous["range_id"].(int64); ok && rangeID != request.RangeID {
966-
// UpdateWorkflowExecution failed because rangeID was modified
967-
return &ShardOwnershipLostError{
968-
ShardID: d.shardID,
969-
Msg: fmt.Sprintf("Failed to update workflow execution. Request RangeID: %v, Actual RangeID: %v",
970-
request.RangeID, rangeID),
1019+
// There can be two reasons why the query does not get applied. Either the RangeID has changed, or
1020+
// the next_event_id check failed. Check the row info returned by Cassandra to figure out which one it is.
1021+
GetFailureReasonLoop:
1022+
for {
1023+
rowType, ok := previous["type"].(int)
1024+
if !ok {
1025+
// This should never happen, as all our rows have the type field.
1026+
break GetFailureReasonLoop
1027+
}
1028+
1029+
if rowType == rowTypeShard {
1030+
if rangeID, ok := previous["range_id"].(int64); ok && rangeID != request.RangeID {
1031+
// UpdateWorkflowExecution failed because rangeID was modified
1032+
return &ShardOwnershipLostError{
1033+
ShardID: d.shardID,
1034+
Msg: fmt.Sprintf("Failed to update workflow execution. Request RangeID: %v, Actual RangeID: %v",
1035+
request.RangeID, rangeID),
1036+
}
1037+
}
1038+
} else {
1039+
if nextEventID, ok := previous["next_event_id"].(int64); ok && nextEventID != request.Condition {
1040+
// CreateWorkflowExecution failed because next event ID is unexpected
1041+
return &ConditionFailedError{
1042+
Msg: fmt.Sprintf("Failed to update workflow execution. Request Condition: %v, Actual Value: %v",
1043+
request.Condition, nextEventID),
1044+
}
1045+
}
9711046
}
972-
}
9731047

974-
if nextEventID, ok := previous["next_event_id"].(int64); ok && nextEventID != request.Condition {
975-
// CreateWorkflowExecution failed because next event ID is unexpected
976-
return &ConditionFailedError{
977-
Msg: fmt.Sprintf("Failed to update workflow execution. Request Condition: %v, Actual Value: %v",
978-
request.Condition, nextEventID),
1048+
previous = make(map[string]interface{})
1049+
if !iter.MapScan(previous) {
1050+
// Cassandra returns the actual row that caused a condition failure, so we should always return
1051+
// from the checks above, but just in case.
1052+
break GetFailureReasonLoop
9791053
}
9801054
}
9811055

1056+
// At this point we only know that the write was not applied.
1057+
// Return the row information returned by Cassandra.
9821058
var columns []string
9831059
for k, v := range previous {
9841060
columns = append(columns, fmt.Sprintf("%s=%v", k, v))
@@ -1549,8 +1625,7 @@ func (d *cassandraPersistence) updateActivityInfos(batch *gocql.Batch, activityI
15491625
runID,
15501626
defaultVisibilityTimestamp,
15511627
rowTypeExecutionTaskID,
1552-
condition,
1553-
rangeID)
1628+
condition)
15541629
}
15551630

15561631
if deleteInfo != nil {
@@ -1563,8 +1638,7 @@ func (d *cassandraPersistence) updateActivityInfos(batch *gocql.Batch, activityI
15631638
runID,
15641639
defaultVisibilityTimestamp,
15651640
rowTypeExecutionTaskID,
1566-
condition,
1567-
rangeID)
1641+
condition)
15681642
}
15691643
}
15701644

@@ -1585,8 +1659,7 @@ func (d *cassandraPersistence) updateTimerInfos(batch *gocql.Batch, timerInfos [
15851659
runID,
15861660
defaultVisibilityTimestamp,
15871661
rowTypeExecutionTaskID,
1588-
condition,
1589-
rangeID)
1662+
condition)
15901663
}
15911664

15921665
for _, t := range deleteInfos {
@@ -1599,8 +1672,7 @@ func (d *cassandraPersistence) updateTimerInfos(batch *gocql.Batch, timerInfos [
15991672
runID,
16001673
defaultVisibilityTimestamp,
16011674
rowTypeExecutionTaskID,
1602-
condition,
1603-
rangeID)
1675+
condition)
16041676
}
16051677
}
16061678

@@ -1622,8 +1694,7 @@ func (d *cassandraPersistence) updateChildExecutionInfos(batch *gocql.Batch, chi
16221694
runID,
16231695
defaultVisibilityTimestamp,
16241696
rowTypeExecutionTaskID,
1625-
condition,
1626-
rangeID)
1697+
condition)
16271698
}
16281699

16291700
// deleteInfo is the initiatedID for ChildInfo being deleted
@@ -1637,8 +1708,7 @@ func (d *cassandraPersistence) updateChildExecutionInfos(batch *gocql.Batch, chi
16371708
runID,
16381709
defaultVisibilityTimestamp,
16391710
rowTypeExecutionTaskID,
1640-
condition,
1641-
rangeID)
1711+
condition)
16421712
}
16431713
}
16441714

@@ -1657,8 +1727,7 @@ func (d *cassandraPersistence) updateRequestCancelInfos(batch *gocql.Batch, requ
16571727
runID,
16581728
defaultVisibilityTimestamp,
16591729
rowTypeExecutionTaskID,
1660-
condition,
1661-
rangeID)
1730+
condition)
16621731
}
16631732

16641733
// deleteInfo is the initiatedID for RequestCancelInfo being deleted
@@ -1672,8 +1741,7 @@ func (d *cassandraPersistence) updateRequestCancelInfos(batch *gocql.Batch, requ
16721741
runID,
16731742
defaultVisibilityTimestamp,
16741743
rowTypeExecutionTaskID,
1675-
condition,
1676-
rangeID)
1744+
condition)
16771745
}
16781746
}
16791747

0 commit comments

Comments
 (0)