Skip to content

Validate TaskList partition updates via CLI are safe #6682

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions tools/cli/admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -736,6 +736,11 @@ func newAdminTaskListCommands() []*cli.Command {
Aliases: []string{"nwp"},
Usage: "Number of write partitions",
},
&cli.BoolFlag{
Name: FlagForce,
Aliases: []string{"f"},
Usage: "Force an update operation that may be unsafe",
},
},
Action: AdminUpdateTaskListPartitionConfig,
},
Expand Down
76 changes: 69 additions & 7 deletions tools/cli/admin_task_list_commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,16 @@
package cli

import (
"context"
"fmt"
"io"
"os"
"strings"

"github.com/urfave/cli/v2"

"github.com/uber/cadence/client/frontend"
"github.com/uber/cadence/common"
"github.com/uber/cadence/common/types"
"github.com/uber/cadence/tools/common/commoncli"
)
Expand Down Expand Up @@ -171,6 +174,10 @@ func AdminUpdateTaskListPartitionConfig(c *cli.Context) error {
if err != nil {
return err
}
frontendClient, err := getDeps(c).ServerFrontendClient(c)
if err != nil {
return err
}
domain, err := getRequiredOption(c, FlagDomain)
if err != nil {
return commoncli.Problem("Required flag not found: ", err)
Expand All @@ -179,6 +186,7 @@ func AdminUpdateTaskListPartitionConfig(c *cli.Context) error {
if err != nil {
return commoncli.Problem("Required flag not found: ", err)
}
force := c.Bool(FlagForce)
var taskListType *types.TaskListType
if strings.ToLower(c.String(FlagTaskListType)) == "activity" {
taskListType = types.TaskListTypeActivity.Ptr()
Expand All @@ -200,25 +208,79 @@ func AdminUpdateTaskListPartitionConfig(c *cli.Context) error {
if err != nil {
return commoncli.Problem("Error in creating context:", err)
}
cfg := &types.TaskListPartitionConfig{
ReadPartitions: createPartitions(numReadPartitions),
WritePartitions: createPartitions(numWritePartitions),
}
tl := &types.TaskList{Name: taskList, Kind: types.TaskListKindNormal.Ptr()}
if !force {
err = validateChange(ctx, frontendClient, domain, tl, taskListType, cfg)
if err != nil {
return commoncli.Problem("Potentially unsafe operation. Specify '--force' to proceed anyway: ", err)
}
}
_, err = adminClient.UpdateTaskListPartitionConfig(ctx, &types.UpdateTaskListPartitionConfigRequest{
Domain: domain,
TaskList: &types.TaskList{Name: taskList, Kind: types.TaskListKindNormal.Ptr()},
TaskListType: taskListType,
PartitionConfig: &types.TaskListPartitionConfig{
ReadPartitions: createPartitions(numReadPartitions),
WritePartitions: createPartitions(numWritePartitions),
},
Domain: domain,
TaskList: tl,
TaskListType: taskListType,
PartitionConfig: cfg,
})
if err != nil {
return commoncli.Problem("Operation UpdateTaskListPartitionConfig failed.", err)
}
return nil
}

func validateChange(ctx context.Context, client frontend.Client, domain string, tl *types.TaskList, tlt *types.TaskListType, newCfg *types.TaskListPartitionConfig) error {
description, err := client.DescribeTaskList(ctx, &types.DescribeTaskListRequest{
Domain: domain,
TaskList: tl,
TaskListType: tlt,
})
if err != nil {
return fmt.Errorf("DescribeTaskList failed: %w", err)
}
// Illegal operations are rejected by the server (read < write), but unsafe ones are still allowed
if description.PartitionConfig != nil {
oldCfg := description.PartitionConfig
// Ensure they're not removing active write partitions
if len(newCfg.ReadPartitions) < len(oldCfg.WritePartitions) {
return fmt.Errorf("remove write partitions, then read partitions. Removing an active write partition risks losing tasks. Proposed read count is less than current write count (%d < %d)", len(newCfg.ReadPartitions), len(oldCfg.WritePartitions))
}
// Ensure removed read partitions are drained
for i := len(newCfg.ReadPartitions); i < len(oldCfg.ReadPartitions); i++ {
partition, err := client.DescribeTaskList(ctx, &types.DescribeTaskListRequest{
Domain: domain,
TaskList: &types.TaskList{Name: getPartitionTaskListName(tl.Name, i), Kind: tl.Kind},
TaskListType: tlt,
IncludeTaskListStatus: true,
})
if err != nil {
return fmt.Errorf("DescribeTaskList failed for partition %d: %w", i, err)
}
if partition.TaskListStatus.BacklogCountHint != 0 {
return fmt.Errorf("partition %d still has %d tasks remaining", i, partition.TaskListStatus.BacklogCountHint)
}
}
}
// If it's otherwise valid but there are no pollers, they might have mistyped the name
if len(description.Pollers) == 0 {
return fmt.Errorf("'%s' has no pollers of type '%s'", tl.Name, tlt.String())
}
return nil
}

func createPartitions(num int) map[int]*types.TaskListPartition {
result := make(map[int]*types.TaskListPartition, num)
for i := 0; i < num; i++ {
result[i] = &types.TaskListPartition{}
}
return result
}

func getPartitionTaskListName(root string, partition int) string {
if partition <= 0 {
return root
}
return fmt.Sprintf("%v%v/%v", common.ReservedTaskListPrefix, root, partition)
}
Loading
Loading