Skip to content

Commit d044dd5

Browse files
committed
deffer hashing when necessary
1 parent 0326444 commit d044dd5

File tree

13 files changed

+130
-87
lines changed

13 files changed

+130
-87
lines changed

src/main/java/com/instaclustr/esop/azure/AzureBackuper.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ protected void cleanup() throws Exception {
6868
}
6969

7070
@Override
71-
public FreshenResult freshenRemoteObject(ManifestEntry manifestEntry, final RemoteObjectReference object) throws Exception {
71+
public RefreshingOutcome freshenRemoteObject(ManifestEntry manifestEntry, final RemoteObjectReference object) throws Exception {
7272
final CloudBlockBlob blob = ((AzureRemoteObjectReference) object).blob;
7373

7474
final Instant now = Instant.now();
@@ -78,16 +78,16 @@ public FreshenResult freshenRemoteObject(ManifestEntry manifestEntry, final Remo
7878
blob.getMetadata().put(DATE_TIME_METADATA_KEY, now.toString());
7979
blob.uploadMetadata();
8080

81-
return FreshenResult.FRESHENED;
81+
return new RefreshingOutcome(FreshenResult.FRESHENED, null);
8282
} else {
83-
return blob.exists() ? FreshenResult.FRESHENED : FreshenResult.UPLOAD_REQUIRED;
83+
return blob.exists() ? new RefreshingOutcome(FreshenResult.FRESHENED, null) : new RefreshingOutcome(FreshenResult.UPLOAD_REQUIRED, null);
8484
}
8585
} catch (final StorageException e) {
8686
if (e.getHttpStatusCode() != 404) {
8787
throw e;
8888
}
8989

90-
return FreshenResult.UPLOAD_REQUIRED;
90+
return new RefreshingOutcome(FreshenResult.UPLOAD_REQUIRED, null);
9191
}
9292
}
9393

src/main/java/com/instaclustr/esop/gcp/GCPBackuper.java

+5-5
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ public RemoteObjectReference objectKeyToNodeAwareRemoteReference(final Path obje
5353
}
5454

5555
@Override
56-
public FreshenResult freshenRemoteObject(ManifestEntry manifestEntry, final RemoteObjectReference object) {
56+
public RefreshingOutcome freshenRemoteObject(ManifestEntry manifestEntry, final RemoteObjectReference object) {
5757
final BlobId blobId = ((GCPRemoteObjectReference) object).blobId;
5858

5959
try {
@@ -63,21 +63,21 @@ public FreshenResult freshenRemoteObject(ManifestEntry manifestEntry, final Remo
6363
.setTarget(BlobInfo.newBuilder(blobId).build(), Storage.BlobTargetOption.predefinedAcl(BUCKET_OWNER_FULL_CONTROL))
6464
.build());
6565

66-
return FreshenResult.FRESHENED;
66+
return new RefreshingOutcome(FreshenResult.FRESHENED, null);
6767
} else {
6868
final Blob blob = storage.get(blobId);
6969
if (blob == null || !blob.exists()) {
70-
return FreshenResult.UPLOAD_REQUIRED;
70+
return new RefreshingOutcome(FreshenResult.UPLOAD_REQUIRED, null);
7171
} else {
72-
return FreshenResult.FRESHENED;
72+
return new RefreshingOutcome(FreshenResult.FRESHENED, null);
7373
}
7474
}
7575
} catch (final StorageException e) {
7676
if (e.getCode() != 404) {
7777
throw e;
7878
}
7979

80-
return FreshenResult.UPLOAD_REQUIRED;
80+
return new RefreshingOutcome(FreshenResult.UPLOAD_REQUIRED, null);
8181
}
8282
}
8383

src/main/java/com/instaclustr/esop/impl/AbstractTracker.java

+9-8
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import com.google.common.util.concurrent.ListenableFuture;
2020
import com.google.common.util.concurrent.ListeningExecutorService;
2121

22+
import com.instaclustr.esop.impl.hash.HashService;
2223
import org.slf4j.Logger;
2324
import org.slf4j.LoggerFactory;
2425

@@ -48,7 +49,7 @@ public abstract class AbstractTracker<UNIT extends Unit, SESSION extends Session
4849

4950
protected final ListeningExecutorService finisherExecutorService;
5051
protected final OperationsService operationsService;
51-
protected final HashSpec hashSpec;
52+
protected final HashService hashService;
5253

5354
protected final List<UNIT> units = Collections.synchronizedList(new ArrayList<>());
5455
protected final Set<Session<UNIT>> sessions = Collections.synchronizedSet(new HashSet<>());
@@ -58,10 +59,10 @@ public abstract class AbstractTracker<UNIT extends Unit, SESSION extends Session
5859

5960
public AbstractTracker(final ListeningExecutorService finisherExecutorService,
6061
final OperationsService operationsService,
61-
final HashSpec hashSpec) {
62+
final HashService hashService) {
6263
this.finisherExecutorService = finisherExecutorService;
6364
this.operationsService = operationsService;
64-
this.hashSpec = hashSpec;
65+
this.hashService = hashService;
6566

6667
}
6768

@@ -90,7 +91,7 @@ public abstract UNIT constructUnitToSubmit(final INTERACTOR interactor,
9091
final ManifestEntry manifestEntry,
9192
final AtomicBoolean shouldCancel,
9293
final String snapshotTag,
93-
final HashSpec hashSpec);
94+
final HashService hashService);
9495

9596
public abstract Session<UNIT> constructSession();
9697

@@ -131,7 +132,7 @@ public synchronized Session<UNIT> submit(final INTERACTOR interactor,
131132
}
132133

133134
if (alreadySubmitted == null) {
134-
final UNIT unit = constructUnitToSubmit(interactor, entry, operation.getShouldCancel(), snapshotTag, hashSpec);
135+
final UNIT unit = constructUnitToSubmit(interactor, entry, operation.getShouldCancel(), snapshotTag, hashService);
135136

136137
units.add(unit);
137138
futures.put(executorService.submit(unit), unit);
@@ -226,7 +227,7 @@ public static abstract class Unit implements java.util.concurrent.Callable<Void>
226227
@JsonIgnore
227228
protected String snapshotTag;
228229
@JsonIgnore
229-
protected HashSpec hashSpec;
230+
protected HashService hashService;
230231
protected final ManifestEntry manifestEntry;
231232
protected volatile State state = NOT_STARTED;
232233
protected Throwable throwable = null;
@@ -235,10 +236,10 @@ public static abstract class Unit implements java.util.concurrent.Callable<Void>
235236

236237
public Unit(final ManifestEntry manifestEntry,
237238
final AtomicBoolean shouldCancel,
238-
final HashSpec hashSpec) {
239+
final HashService hashService) {
239240
this.manifestEntry = manifestEntry;
240241
this.shouldCancel = shouldCancel;
241-
this.hashSpec = hashSpec;
242+
this.hashService = hashService;
242243
}
243244

244245
public enum State {

src/main/java/com/instaclustr/esop/impl/SSTableUtils.java

+2-7
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ public class SSTableUtils {
4444
private static final int SSTABLE_PREFIX_IDX = 1;
4545
private static final int SSTABLE_GENERATION_IDX = 2;
4646
private static final Pattern CHECKSUM_RE = Pattern.compile("^([a-zA-Z0-9]+).*");
47-
private static final HashService hashService = new HashServiceImpl(new HashSpec());
4847

4948
public static String sstableHash(Path path) throws IOException {
5049
final Matcher matcher = SSTABLE_RE.matcher(path.getFileName().toString());
@@ -105,14 +104,11 @@ public static String calculateChecksum(final Path filePath) throws IOException {
105104
public static Map<String, List<ManifestEntry>> getSSTables(String keyspace,
106105
String table,
107106
Path snapshotDirectory,
108-
Path tableBackupPath,
109-
HashSpec hashSpec) throws IOException {
107+
Path tableBackupPath) throws IOException {
110108
if (!Files.exists(snapshotDirectory)) {
111109
return Collections.emptyMap();
112110
}
113111

114-
final HashService hashService = new HashServiceImpl(hashSpec);
115-
116112
return Files.list(snapshotDirectory)
117113
.flatMap(path -> {
118114
if (isCassandra22SecIndex(path)) {
@@ -151,12 +147,11 @@ public static Map<String, List<ManifestEntry>> getSSTables(String keyspace,
151147
}
152148

153149
backupPath = backupPath.resolve(hash).resolve(manifestComponentFileName.getFileName());
154-
final String hashOfFile = hashService.hash(sstableComponent);
155150

156151
entries.add(new ManifestEntry(backupPath,
157152
sstableComponent,
158153
ManifestEntry.Type.FILE,
159-
hashOfFile,
154+
null, // don't hash on listing, make it faster
160155
new KeyspaceTable(keyspace, table),
161156
null));
162157
}

src/main/java/com/instaclustr/esop/impl/Snapshots.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -571,7 +571,7 @@ public static Table parse(final String keyspace, final String table, final List<
571571
final Path tablePath = Paths.get("data").resolve(Paths.get(keyspace, table));
572572

573573
for (final Path path : value) {
574-
tb.sstables.putAll(SSTableUtils.getSSTables(keyspace, table, path, tablePath, Snapshots.hashSpec));
574+
tb.sstables.putAll(SSTableUtils.getSSTables(keyspace, table, path, tablePath));
575575
}
576576

577577
final Optional<Path> schemaPath = value.stream().map(p -> p.resolve("schema.cql")).filter(Files::exists).findFirst();

src/main/java/com/instaclustr/esop/impl/backup/Backuper.java

+13-1
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,24 @@ protected Backuper(final BaseBackupOperationRequest request) {
1919
this.retrier = RetrierFactory.getRetrier(request.retry);
2020
}
2121

22+
public static class RefreshingOutcome
23+
{
24+
public FreshenResult result;
25+
public String hash;
26+
27+
public RefreshingOutcome(FreshenResult result, String hash)
28+
{
29+
this.result = result;
30+
this.hash = hash;
31+
}
32+
}
33+
2234
public enum FreshenResult {
2335
FRESHENED,
2436
UPLOAD_REQUIRED
2537
}
2638

27-
public abstract FreshenResult freshenRemoteObject(ManifestEntry manifestEntry, final RemoteObjectReference object) throws Exception;
39+
public abstract RefreshingOutcome freshenRemoteObject(ManifestEntry manifestEntry, final RemoteObjectReference object) throws Exception;
2840

2941
public abstract void uploadFile(final ManifestEntry manifestEntry,
3042
final InputStream localFileStream,

src/main/java/com/instaclustr/esop/impl/backup/UploadTracker.java

+22-10
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
import com.google.common.util.concurrent.ListeningExecutorService;
1313
import com.google.common.util.concurrent.RateLimiter;
1414

15+
import com.instaclustr.esop.impl.hash.HashService;
16+
import com.instaclustr.esop.impl.hash.HashServiceImpl;
1517
import org.slf4j.Logger;
1618
import org.slf4j.LoggerFactory;
1719

@@ -45,17 +47,17 @@ public class UploadTracker extends AbstractTracker<UploadUnit, UploadSession, Ba
4547
@Inject
4648
public UploadTracker(final @UploadingFinisher ListeningExecutorService finisherExecutorService,
4749
final OperationsService operationsService,
48-
final HashSpec hashSpec) {
49-
super(finisherExecutorService, operationsService, hashSpec);
50+
final HashService hashService) {
51+
super(finisherExecutorService, operationsService, hashService);
5052
}
5153

5254
@Override
5355
public UploadUnit constructUnitToSubmit(final Backuper backuper,
5456
final ManifestEntry manifestEntry,
5557
final AtomicBoolean shouldCancel,
5658
final String snapshotTag,
57-
final HashSpec hashSpec) {
58-
return new UploadUnit(backuper, manifestEntry, shouldCancel, snapshotTag, hashSpec);
59+
final HashService hashService) {
60+
return new UploadUnit(backuper, manifestEntry, shouldCancel, snapshotTag, hashService);
5961
}
6062

6163
@Override
@@ -96,8 +98,8 @@ public UploadUnit(final Backuper backuper,
9698
final ManifestEntry manifestEntry,
9799
final AtomicBoolean shouldCancel,
98100
final String snapshotTag,
99-
final HashSpec hashSpec) {
100-
super(manifestEntry, shouldCancel, hashSpec);
101+
final HashService hashService) {
102+
super(manifestEntry, shouldCancel, hashService);
101103
this.backuper = backuper;
102104
this.snapshotTag = snapshotTag;
103105
}
@@ -118,23 +120,29 @@ public Void call() {
118120
final RemoteObjectReference ref = getRemoteObjectReference(manifestEntry.objectKey);
119121

120122
// try to refresh object / decide if it is required to upload it
121-
Callable<Boolean> condition = () -> {
123+
Callable<Backuper.RefreshingOutcome> condition = () -> {
122124
try {
123-
return backuper.freshenRemoteObject(manifestEntry, ref) == FRESHENED;
125+
126+
return backuper.freshenRemoteObject(manifestEntry, ref);
124127
} catch (final Exception ex) {
125128
throw new RetriableException("Failed to refresh remote object" + ref.objectKey, ex);
126129
}
127130
};
128131

129-
if (manifestEntry.type != MANIFEST_FILE && getRetrier(backuper.request.retry).submit(condition)) {
130-
logger.info("{}skipping the upload of alredy uploaded file {}",
132+
Backuper.RefreshingOutcome refreshmentOutcome = getRetrier(backuper.request.retry).submit(condition);
133+
134+
if (manifestEntry.type != MANIFEST_FILE && refreshmentOutcome.result == FRESHENED) {
135+
logger.info("{}skipping the upload of already uploaded file {}",
131136
snapshotTag != null ? "Snapshot " + snapshotTag + " - " : "",
132137
ref.canonicalPath);
133138

134139
state = State.FINISHED;
135140
return null;
136141
}
137142

143+
if (refreshmentOutcome.hash != null)
144+
manifestEntry.hash = refreshmentOutcome.hash;
145+
138146
// do the upload
139147
getRetrier(backuper.request.retry).submit(() -> {
140148
try (final InputStream fileStream = new BufferedInputStream(new FileInputStream(manifestEntry.localFile.toFile()))) {
@@ -144,6 +152,10 @@ public Void call() {
144152
snapshotTag != null ? "Snapshot " + snapshotTag + " - " : "",
145153
manifestEntry.objectKey,
146154
DataSize.bytesToHumanReadable(manifestEntry.size)));
155+
156+
if (manifestEntry.hash == null)
157+
manifestEntry.hash = hashService.hash(manifestEntry.localFile);
158+
147159
// never encrypt manifest
148160
if (manifestEntry.type == MANIFEST_FILE) {
149161
backuper.uploadFile(manifestEntry, rateLimitedStream, ref);

src/main/java/com/instaclustr/esop/impl/hash/HashServiceImpl.java

+3
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ public void verify(final Path path, final String expectedHash) throws HashVerifi
6969

7070
final String hashOfFile = getHash(path.toAbsolutePath().toFile());
7171

72+
if (hashOfFile == null)
73+
return;
74+
7275
if (!hashOfFile.equals(expectedHash)) {
7376
throw new HashVerificationException(format("hash of %s (%s) does not match with expected hash %s",
7477
path,

src/main/java/com/instaclustr/esop/impl/restore/DownloadTracker.java

+9-10
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import com.google.common.util.concurrent.ListeningExecutorService;
99

10+
import com.instaclustr.esop.impl.hash.HashService;
1011
import org.slf4j.Logger;
1112
import org.slf4j.LoggerFactory;
1213

@@ -17,8 +18,6 @@
1718
import com.instaclustr.esop.impl.ManifestEntry.Type;
1819
import com.instaclustr.esop.impl.RemoteObjectReference;
1920
import com.instaclustr.esop.impl.hash.HashService.HashVerificationException;
20-
import com.instaclustr.esop.impl.hash.HashServiceImpl;
21-
import com.instaclustr.esop.impl.hash.HashSpec;
2221
import com.instaclustr.esop.impl.restore.DownloadTracker.DownloadSession;
2322
import com.instaclustr.esop.impl.restore.DownloadTracker.DownloadUnit;
2423
import com.instaclustr.esop.impl.restore.RestoreModules.DownloadingFinisher;
@@ -36,17 +35,17 @@ public class DownloadTracker extends AbstractTracker<DownloadUnit, DownloadSessi
3635
@Inject
3736
public DownloadTracker(final @DownloadingFinisher ListeningExecutorService finisherExecutorService,
3837
final OperationsService operationsService,
39-
final HashSpec hashSpec) {
40-
super(finisherExecutorService, operationsService, hashSpec);
38+
final HashService hashService) {
39+
super(finisherExecutorService, operationsService, hashService);
4140
}
4241

4342
@Override
4443
public DownloadUnit constructUnitToSubmit(final Restorer restorer,
4544
final ManifestEntry manifestEntry,
4645
final AtomicBoolean shouldCancel,
4746
final String snapshotTag,
48-
final HashSpec hashSpec) {
49-
return new DownloadUnit(restorer, manifestEntry, shouldCancel, snapshotTag, hashSpec);
47+
final HashService hashService) {
48+
return new DownloadUnit(restorer, manifestEntry, shouldCancel, snapshotTag, hashService);
5049
}
5150

5251
@Override
@@ -80,8 +79,8 @@ public DownloadUnit(final Restorer restorer,
8079
final ManifestEntry manifestEntry,
8180
final AtomicBoolean shouldCancel,
8281
final String snapshotTag,
83-
final HashSpec hashSpec) {
84-
super(manifestEntry, shouldCancel, hashSpec);
82+
final HashService hashService) {
83+
super(manifestEntry, shouldCancel, hashService);
8584
this.restorer = restorer;
8685
super.snapshotTag = snapshotTag;
8786
}
@@ -106,7 +105,7 @@ public Void call() {
106105
// hash upon downloading
107106
try {
108107
if (manifestEntry.type == Type.FILE) {
109-
new HashServiceImpl(hashSpec).verify(localPath, manifestEntry.hash);
108+
hashService.verify(localPath, manifestEntry.hash);
110109
}
111110
} catch (final HashVerificationException ex) {
112111
// delete it if has is wrong so on the next try, it will be missing and we will download it again
@@ -123,7 +122,7 @@ public Void call() {
123122
logger.info(String.format("Skipping download of file %s to %s, file already exists locally.",
124123
remoteObjectReference.getObjectKey(), manifestEntry.localFile));
125124
// if it exists, verify its hash to be sure it was not altered
126-
new HashServiceImpl(hashSpec).verify(localPath, manifestEntry.hash);
125+
hashService.verify(localPath, manifestEntry.hash);
127126
state = FINISHED;
128127
} else {
129128
// if it exists and manifest does not have hash field, consider it to be finished without any check

0 commit comments

Comments
 (0)