Skip to content

Commit e1e436d

Browse files
committed
Add support to run repair on specific nodes or specific datacenter
1 parent ae8bce7 commit e1e436d

26 files changed

+881
-476
lines changed

src/packaging/bin/spreaper

+19-4
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,13 @@ def _arguments_for_repair_and_schedule(parser):
196196
parser.add_argument("--incremental", default="false",
197197
help=("Incremental repair (true or false), "
198198
"or use the configured default if not given (false)"))
199+
parser.add_argument("--datacenters", default=None,
200+
help=("a comma separated list of datacenters to repair (do not use spaces after commas). "
201+
"Cannot be used in conjunction with --nodes."))
202+
parser.add_argument("--nodes", default=None,
203+
help=("a comma separated list of nodes to repair, "
204+
"appropriate for repairing a specific list of nodes after an outage (do not use spaces after commas). "
205+
"Cannot be used in conjunction with --datacenters"))
199206

200207

201208
def _arguments_for_scheduling(parser):
@@ -453,7 +460,9 @@ class ReaperCLI(object):
453460
segmentCount=args.segment_count,
454461
repairParallelism=args.repair_parallelism,
455462
intensity=args.intensity,
456-
incrementalRepair=args.incremental)
463+
incrementalRepair=args.incremental,
464+
nodes=args.nodes,
465+
datacenters=args.datacenters)
457466
else:
458467
print ("# Registering repair run for cluster '{0}', and keyspace '{1}', "
459468
"targeting all tables in the keyspace").format(args.cluster_name,
@@ -463,7 +472,9 @@ class ReaperCLI(object):
463472
segmentCount=args.segment_count,
464473
repairParallelism=args.repair_parallelism,
465474
intensity=args.intensity,
466-
incrementalRepair=args.incremental)
475+
incrementalRepair=args.incremental,
476+
nodes=args.nodes,
477+
datacenters=args.datacenters)
467478
repair_run = json.loads(reply)
468479
print "# Repair run with id={0} created".format(repair_run.get('id'))
469480
if not args.dont_start_repair:
@@ -500,7 +511,9 @@ class ReaperCLI(object):
500511
intensity=args.intensity,
501512
scheduleDaysBetween=args.schedule_days_between,
502513
scheduleTriggerTime=args.schedule_trigger_time,
503-
incrementalRepair=args.incremental)
514+
incrementalRepair=args.incremental,
515+
nodes=args.nodes,
516+
datacenters=args.datacenters)
504517
else:
505518
print ("# Registering repair schedule for cluster '{0}', and keyspace '{1}', "
506519
"targeting all tables in the keyspace, with scheduled days between "
@@ -514,7 +527,9 @@ class ReaperCLI(object):
514527
intensity=args.intensity,
515528
scheduleDaysBetween=args.schedule_days_between,
516529
scheduleTriggerTime=args.schedule_trigger_time,
517-
incrementalRepair=args.incremental)
530+
incrementalRepair=args.incremental,
531+
nodes=args.nodes,
532+
datacenters=args.datacenters)
518533
repair_schedule = json.loads(reply)
519534
print "# Repair schedule with id={0} created:".format(repair_schedule.get('id'))
520535
print json.dumps(repair_schedule, indent=2, sort_keys=True)

src/server/src/main/java/com/spotify/reaper/cassandra/JmxProxy.java

+22-13
Original file line numberDiff line numberDiff line change
@@ -562,7 +562,8 @@ public String getCassandraVersion(){
562562
* @throws ReaperException
563563
*/
564564
public int triggerRepair(BigInteger beginToken, BigInteger endToken, String keyspace,
565-
RepairParallelism repairParallelism, Collection<String> columnFamilies, boolean fullRepair) throws ReaperException {
565+
RepairParallelism repairParallelism, Collection<String> columnFamilies, boolean fullRepair,
566+
Collection<String> datacenters) throws ReaperException {
566567
checkNotNull(ssProxy, "Looks like the proxy is not connected");
567568
String cassandraVersion = getCassandraVersion();
568569
boolean canUseDatacenterAware = false;
@@ -586,11 +587,13 @@ public int triggerRepair(BigInteger beginToken, BigInteger endToken, String keys
586587
}
587588
try {
588589
if (cassandraVersion.startsWith("2.0") || cassandraVersion.startsWith("1.")) {
589-
return triggerRepairPre2dot1(repairParallelism, keyspace, columnFamilies, beginToken, endToken);
590+
return triggerRepairPre2dot1(repairParallelism, keyspace, columnFamilies, beginToken, endToken, datacenters.size() > 0 ? datacenters : null);
590591
} else if (cassandraVersion.startsWith("2.1")){
591-
return triggerRepair2dot1(fullRepair, repairParallelism, keyspace, columnFamilies, beginToken, endToken, cassandraVersion);
592+
return triggerRepair2dot1(fullRepair, repairParallelism, keyspace, columnFamilies, beginToken, endToken,
593+
cassandraVersion, datacenters.size() > 0 ? datacenters : null);
592594
} else {
593-
return triggerRepairPost2dot2(fullRepair, repairParallelism, keyspace, columnFamilies, beginToken, endToken, cassandraVersion);
595+
return triggerRepairPost2dot2(fullRepair, repairParallelism, keyspace, columnFamilies, beginToken, endToken,
596+
cassandraVersion, datacenters);
594597
}
595598
} catch (Exception e) {
596599
LOG.error("Segment repair failed", e);
@@ -599,7 +602,9 @@ public int triggerRepair(BigInteger beginToken, BigInteger endToken, String keys
599602
}
600603

601604

602-
public int triggerRepairPost2dot2(boolean fullRepair, RepairParallelism repairParallelism, String keyspace, Collection<String> columnFamilies, BigInteger beginToken, BigInteger endToken, String cassandraVersion) {
605+
public int triggerRepairPost2dot2(boolean fullRepair, RepairParallelism repairParallelism, String keyspace,
606+
Collection<String> columnFamilies, BigInteger beginToken, BigInteger endToken, String cassandraVersion,
607+
Collection<String> datacenters) {
603608
Map<String, String> options = new HashMap<>();
604609

605610
options.put(RepairOption.PARALLELISM_KEY, repairParallelism.getName());
@@ -613,26 +618,29 @@ public int triggerRepairPost2dot2(boolean fullRepair, RepairParallelism repairPa
613618
options.put(RepairOption.RANGES_KEY, beginToken.toString() + ":" + endToken.toString());
614619
}
615620

616-
//options.put(RepairOption.DATACENTERS_KEY, StringUtils.join(specificDataCenters, ","));
621+
options.put(RepairOption.DATACENTERS_KEY, StringUtils.join(datacenters, ","));
617622
//options.put(RepairOption.HOSTS_KEY, StringUtils.join(specificHosts, ","));
618623

619624
return ((StorageServiceMBean) ssProxy).repairAsync(keyspace, options);
620625
}
621626

622-
public int triggerRepair2dot1(boolean fullRepair, RepairParallelism repairParallelism, String keyspace, Collection<String> columnFamilies, BigInteger beginToken, BigInteger endToken, String cassandraVersion) {
627+
public int triggerRepair2dot1(boolean fullRepair, RepairParallelism repairParallelism, String keyspace,
628+
Collection<String> columnFamilies, BigInteger beginToken, BigInteger endToken, String cassandraVersion,
629+
Collection<String> datacenters) {
623630
if (fullRepair) {
624631
// full repair
625632
if (repairParallelism.equals(RepairParallelism.DATACENTER_AWARE)) {
626633
return ((StorageServiceMBean) ssProxy).forceRepairRangeAsync(beginToken.toString(), endToken.toString(),
627-
keyspace, repairParallelism.ordinal(), cassandraVersion.startsWith("2.2")?new HashSet<String>():null, cassandraVersion.startsWith("2.2")?new HashSet<String>():null, fullRepair,
634+
keyspace, repairParallelism.ordinal(), datacenters,
635+
cassandraVersion.startsWith("2.2") ? new HashSet<String>() : null, fullRepair,
628636
columnFamilies.toArray(new String[columnFamilies.size()]));
629637
}
630638

631639
boolean snapshotRepair = repairParallelism.equals(RepairParallelism.SEQUENTIAL);
632640

633641
return ((StorageServiceMBean) ssProxy).forceRepairRangeAsync(beginToken.toString(), endToken.toString(),
634642
keyspace, snapshotRepair ? RepairParallelism.SEQUENTIAL.ordinal() : RepairParallelism.PARALLEL.ordinal(),
635-
cassandraVersion.startsWith("2.2")?new HashSet<String>():null, cassandraVersion.startsWith("2.2")?new HashSet<String>():null, fullRepair,
643+
datacenters, cassandraVersion.startsWith("2.2") ? new HashSet<String>() : null, fullRepair,
636644
columnFamilies.toArray(new String[columnFamilies.size()]));
637645

638646
}
@@ -642,11 +650,12 @@ public int triggerRepair2dot1(boolean fullRepair, RepairParallelism repairParall
642650
fullRepair, columnFamilies.toArray(new String[columnFamilies.size()]));
643651
}
644652

645-
public int triggerRepairPre2dot1(RepairParallelism repairParallelism, String keyspace, Collection<String> columnFamilies, BigInteger beginToken, BigInteger endToken) {
653+
public int triggerRepairPre2dot1(RepairParallelism repairParallelism, String keyspace,
654+
Collection<String> columnFamilies, BigInteger beginToken, BigInteger endToken, Collection<String> datacenters) {
646655
// Cassandra 1.2 and 2.0 compatibility
647656
if (repairParallelism.equals(RepairParallelism.DATACENTER_AWARE)) {
648657
return ((StorageServiceMBean20) ssProxy).forceRepairRangeAsync(beginToken.toString(), endToken.toString(),
649-
keyspace, repairParallelism.ordinal(), null, null,
658+
keyspace, repairParallelism.ordinal(), datacenters, null,
650659
columnFamilies.toArray(new String[columnFamilies.size()]));
651660
}
652661
boolean snapshotRepair = repairParallelism.equals(RepairParallelism.SEQUENTIAL);
@@ -855,8 +864,8 @@ private static RMIClientSocketFactory getRMIClientSocketFactory() {
855864
class ColumnFamilyStoreMBeanIterator
856865
implements Iterator<Map.Entry<String, ColumnFamilyStoreMBean>> {
857866

858-
private Iterator<ObjectName> resIter;
859-
private MBeanServerConnection mbeanServerConn;
867+
private final Iterator<ObjectName> resIter;
868+
private final MBeanServerConnection mbeanServerConn;
860869

861870
public ColumnFamilyStoreMBeanIterator(MBeanServerConnection mbeanServerConn)
862871
throws MalformedObjectNameException, NullPointerException, IOException {

src/server/src/main/java/com/spotify/reaper/core/RepairUnit.java

+23-4
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,18 @@ public class RepairUnit {
2222
private final String clusterName;
2323
private final String keyspaceName;
2424
private final Set<String> columnFamilies;
25-
private final Boolean incrementalRepair;
25+
private final Boolean incrementalRepair;
26+
private final Set<String> nodes;
27+
private final Set<String> datacenters;
2628

2729
private RepairUnit(Builder builder, UUID id) {
2830
this.id = id;
2931
this.clusterName = builder.clusterName;
3032
this.keyspaceName = builder.keyspaceName;
3133
this.columnFamilies = builder.columnFamilies;
3234
this.incrementalRepair = builder.incrementalRepair;
35+
this.nodes = builder.nodes;
36+
this.datacenters = builder.datacenters;
3337
}
3438

3539
public UUID getId() {
@@ -47,9 +51,17 @@ public String getKeyspaceName() {
4751
public Set<String> getColumnFamilies() {
4852
return columnFamilies;
4953
}
50-
54+
5155
public Boolean getIncrementalRepair() {
52-
return incrementalRepair;
56+
return incrementalRepair;
57+
}
58+
59+
public Set<String> getNodes() {
60+
return nodes;
61+
}
62+
63+
public Set<String> getDatacenters() {
64+
return datacenters;
5365
}
5466

5567
public Builder with() {
@@ -62,19 +74,26 @@ public static class Builder {
6274
public final String keyspaceName;
6375
public final Set<String> columnFamilies;
6476
public final boolean incrementalRepair;
77+
public final Set<String> nodes;
78+
public final Set<String> datacenters;
6579

66-
public Builder(String clusterName, String keyspaceName, Set<String> columnFamilies, Boolean incrementalRepair) {
80+
public Builder(String clusterName, String keyspaceName, Set<String> columnFamilies, Boolean incrementalRepair,
81+
Set<String> nodes, Set<String> datacenters) {
6782
this.clusterName = clusterName;
6883
this.keyspaceName = keyspaceName;
6984
this.columnFamilies = columnFamilies;
7085
this.incrementalRepair = incrementalRepair;
86+
this.nodes = nodes;
87+
this.datacenters = datacenters;
7188
}
7289

7390
private Builder(RepairUnit original) {
7491
clusterName = original.clusterName;
7592
keyspaceName = original.keyspaceName;
7693
columnFamilies = original.columnFamilies;
7794
incrementalRepair = original.incrementalRepair;
95+
nodes = original.nodes;
96+
datacenters = original.datacenters;
7897
}
7998

8099
public RepairUnit build(UUID id) {

0 commit comments

Comments
 (0)