@@ -160,7 +160,7 @@ public void run() {
160
160
context .repairManager .scheduleRetry (this );
161
161
break ;
162
162
}
163
- } catch (RuntimeException e ) {
163
+ } catch (RuntimeException | ReaperException e ) {
164
164
LOG .error ("RepairRun FAILURE, scheduling retry" );
165
165
LOG .error (e .toString ());
166
166
LOG .error (Arrays .toString (e .getStackTrace ()));
@@ -173,7 +173,7 @@ public void run() {
173
173
/**
174
174
* Starts the repair run.
175
175
*/
176
- private void start () {
176
+ private void start () throws ReaperException {
177
177
LOG .info ("Repairs for repair run #{} starting" , repairRunId );
178
178
synchronized (this ) {
179
179
RepairRun repairRun = context .storage .getRepairRun (repairRunId ).get ();
@@ -198,13 +198,24 @@ private void endRepairRun() {
198
198
}
199
199
}
200
200
201
+ private void confirmJMXConnectionIsOpen () throws ReaperException {
202
+ if (jmxConnection == null || !jmxConnection .isConnectionAlive ()) {
203
+ LOG .debug ("connecting JMX proxy for repair runner on run id: {}" , repairRunId );
204
+ Cluster cluster = context .storage .getCluster (this .clusterName ).get ();
205
+ jmxConnection = context .jmxConnectionFactory .connectAny (cluster );
206
+ LOG .debug ("successfully reestablished JMX proxy for repair runner" );
207
+ }
208
+ }
209
+
201
210
/**
202
211
* Get the next segment and repair it. If there is none, we're done.
203
212
*/
204
- private void startNextSegment () {
213
+ private void startNextSegment () throws ReaperException {
205
214
boolean scheduleRetry = true ;
206
215
boolean anythingRunningStill = false ;
207
216
217
+ confirmJMXConnectionIsOpen ();
218
+
208
219
// We want to know whether a repair was started,
209
220
// so that a rescheduling of this runner will happen.
210
221
boolean repairStarted = false ;
@@ -221,6 +232,12 @@ private void startNextSegment() {
221
232
if (startTime != null && startTime .isBefore (DateTime .now ().minusDays (1 ))) {
222
233
LOG .warn ("Looks like segment #{} has been running more than a day. Start time: {}" ,
223
234
supposedlyRunningSegment .getId (), supposedlyRunningSegment .getStartTime ());
235
+ } else if (startTime != null && startTime .isBefore (DateTime .now ().minusHours (1 ))) {
236
+ LOG .info ("Looks like segment #{} has been running more than an hour. Start time: {}" ,
237
+ supposedlyRunningSegment .getId (), supposedlyRunningSegment .getStartTime ());
238
+ } else if (startTime != null && startTime .isBefore (DateTime .now ().minusMinutes (2 ))) {
239
+ LOG .debug ("Looks like segment #{} has been running more than two minutes. Start time: {}" ,
240
+ supposedlyRunningSegment .getId (), supposedlyRunningSegment .getStartTime ());
224
241
}
225
242
// No need to try starting new repair for already active slot.
226
243
continue ;
@@ -287,19 +304,13 @@ private boolean repairSegment(final int rangeIndex, final long segmentId, RingRa
287
304
String keyspace = repairUnit .getKeyspaceName ();
288
305
LOG .debug ("preparing to repair segment {} on run with id {}" , segmentId , repairRunId );
289
306
290
- if (jmxConnection == null || !jmxConnection .isConnectionAlive ()) {
291
- try {
292
- LOG .debug ("connecting JMX proxy for repair runner on run id: {}" , repairRunId );
293
- Cluster cluster = context .storage .getCluster (repairUnit .getClusterName ()).get ();
294
- jmxConnection = context .jmxConnectionFactory .connectAny (cluster );
295
- } catch (ReaperException e ) {
296
- e .printStackTrace ();
297
- LOG .warn ("Failed to reestablish JMX connection in runner #{}, retrying" , repairRunId );
298
- currentlyRunningSegments .set (rangeIndex , -1 );
299
- return true ;
300
- }
301
- LOG .debug ("successfully reestablished JMX proxy for repair runner on run id: {}" ,
302
- repairRunId );
307
+ try {
308
+ confirmJMXConnectionIsOpen ();
309
+ } catch (ReaperException e ) {
310
+ e .printStackTrace ();
311
+ LOG .warn ("Failed to reestablish JMX connection in runner #{}, retrying" , repairRunId );
312
+ currentlyRunningSegments .set (rangeIndex , -1 );
313
+ return true ;
303
314
}
304
315
305
316
List <String > potentialCoordinators ;
0 commit comments