Skip to content

Commit bd9dbee

Browse files
chroma-droidrescrv
andauthored
[HOTFIX] applying PR #4855 to release/2025-06-13 (#4860)
This PR cherry-picks the commit 9331c11 onto release/2025-06-13. If there are unresolved conflicts, please resolve them manually. Co-authored-by: Robert Escriva <[email protected]>
1 parent 9319994 commit bd9dbee

File tree

2 files changed

+61
-32
lines changed

2 files changed

+61
-32
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Seeds for failure cases proptest has generated in the past. It is
2+
# automatically read and these particular cases re-run before any
3+
# novel cases are generated.
4+
#
5+
# It is recommended to check this file in to source control so that
6+
# everyone who runs the test benefits from these saved cases.
7+
cc 224805d1b23398d59a037d3ae192e5d32fc114957b64ea0b8209d67ef642035b # shrinks to operations_before_seal = [], operations_after_migrate = [OperationRecord { id: "", document: None, embedding: Some("[...]"), metadata: None, operation: Add }]
8+
cc 6c09d8d5a095fde4c6263f24e208c2db1ac203d8f8f7d899573659268255c239 # shrinks to initial_operations = [], source_operations = [], fork_operations = []
9+
cc 85a395fbfca69e7c9197fce89252b273c98389e3f5a773c443f405639f137e49 # shrinks to operations = [(0, OperationRecord { id: "", document: None, embedding: Some("[...]"), metadata: None, operation: Add })]
10+
cc 9fbbfa4c87cfe3d41253eadb2bc048aa36bc939208cbe23e3ee5eb8094df91d4 # shrinks to read_offset = 1, batch_size = 1, operations = [OperationRecord { id: "", document: None, embedding: Some("[...]"), metadata: None, operation: Add }]
11+
cc ba6cef35853bca82d7527a7f42b2dbb8a6d60f3084f249993b56cf945fd83b0e # shrinks to read_offset = 1, batch_size = 1, operations = [OperationRecord { id: "", document: None, embedding: Some("[...]"), metadata: None, operation: Add }]
12+
cc bec924432fdfb2bd91273126b0cd30300a6988be3bbb3cf4ceb928eb8a37bacd # shrinks to initial_operations = [], source_operations = [], fork_operations = []

rust/log-service/src/lib.rs

Lines changed: 49 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -308,14 +308,18 @@ struct RollupPerCollection {
308308
}
309309

310310
impl RollupPerCollection {
311-
fn new(first_observation: LogPosition, num_records: u64) -> Self {
311+
fn new(
312+
first_observation: LogPosition,
313+
num_records: u64,
314+
initial_insertion_epoch_us: u64,
315+
) -> Self {
312316
Self {
313317
start_log_position: first_observation,
314318
limit_log_position: LogPosition::from_offset(
315319
first_observation.offset().saturating_add(num_records),
316320
),
317321
reinsert_count: 0,
318-
initial_insertion_epoch_us: 0,
322+
initial_insertion_epoch_us,
319323
}
320324
}
321325

@@ -329,14 +333,15 @@ impl RollupPerCollection {
329333
if log_position < self.start_log_position {
330334
self.start_log_position = log_position;
331335
}
332-
if log_position + num_records > self.limit_log_position {
333-
self.limit_log_position = log_position + num_records;
336+
if log_position.offset().saturating_add(num_records) > self.limit_log_position.offset() {
337+
self.limit_log_position =
338+
LogPosition::from_offset(log_position.offset().saturating_add(num_records));
334339
}
335340
// Take the biggest reinsert count.
336341
self.reinsert_count = std::cmp::max(self.reinsert_count, reinsert_count);
337342
// Consider the most recent initial insertion time so if we've compacted earlier we drop.
338343
self.initial_insertion_epoch_us =
339-
std::cmp::max(self.initial_insertion_epoch_us, initial_insertion_epoch_us);
344+
std::cmp::min(self.initial_insertion_epoch_us, initial_insertion_epoch_us);
340345
}
341346

342347
fn witness_cursor(&mut self, witness: Option<&Witness>) {
@@ -349,6 +354,8 @@ impl RollupPerCollection {
349354
self.start_log_position = witness
350355
.map(|x| x.1.position)
351356
.unwrap_or(LogPosition::from_offset(1));
357+
assert!(self.start_log_position <= self.limit_log_position);
358+
self.limit_log_position = self.limit_log_position.max(self.start_log_position);
352359
}
353360

354361
fn is_empty(&self) -> bool {
@@ -359,14 +366,20 @@ impl RollupPerCollection {
359366
DirtyMarker::MarkDirty {
360367
collection_id,
361368
log_position: self.start_log_position,
362-
num_records: self.limit_log_position - self.start_log_position,
363-
reinsert_count: self.reinsert_count,
369+
num_records: self
370+
.limit_log_position
371+
.offset()
372+
.saturating_sub(self.start_log_position.offset()),
373+
reinsert_count: self.reinsert_count.saturating_add(1),
364374
initial_insertion_epoch_us: self.initial_insertion_epoch_us,
365375
}
366376
}
367377

368378
fn requires_backpressure(&self, threshold: u64) -> bool {
369-
self.limit_log_position - self.start_log_position >= threshold
379+
self.limit_log_position
380+
.offset()
381+
.saturating_sub(self.start_log_position.offset())
382+
>= threshold
370383
}
371384
}
372385

@@ -430,9 +443,13 @@ impl DirtyMarker {
430443
reinsert_count,
431444
initial_insertion_epoch_us,
432445
} => {
433-
let position = rollups
434-
.entry(*collection_id)
435-
.or_insert_with(|| RollupPerCollection::new(*log_position, *num_records));
446+
let position = rollups.entry(*collection_id).or_insert_with(|| {
447+
RollupPerCollection::new(
448+
*log_position,
449+
*num_records,
450+
*initial_insertion_epoch_us,
451+
)
452+
});
436453
position.observe_dirty_marker(
437454
*log_position,
438455
*num_records,
@@ -2461,22 +2478,21 @@ mod tests {
24612478
fn rollup_per_collection_new() {
24622479
let start_position = LogPosition::from_offset(10);
24632480
let num_records = 5;
2464-
let rollup = RollupPerCollection::new(start_position, num_records);
2481+
let rollup = RollupPerCollection::new(start_position, num_records, 0);
24652482

24662483
assert_eq!(start_position, rollup.start_log_position);
24672484
assert_eq!(LogPosition::from_offset(15), rollup.limit_log_position);
24682485
assert_eq!(0, rollup.reinsert_count);
2469-
assert_eq!(0, rollup.initial_insertion_epoch_us);
24702486
}
24712487

24722488
#[test]
24732489
fn rollup_per_collection_observe_dirty_marker() {
24742490
let start_position = LogPosition::from_offset(10);
2475-
let mut rollup = RollupPerCollection::new(start_position, 5);
24762491
let now = SystemTime::now()
24772492
.duration_since(SystemTime::UNIX_EPOCH)
24782493
.unwrap()
24792494
.as_micros() as u64;
2495+
let mut rollup = RollupPerCollection::new(start_position, 5, now);
24802496

24812497
// Observe a marker that extends the range
24822498
rollup.observe_dirty_marker(LogPosition::from_offset(20), 10, 3, now);
@@ -2489,22 +2505,22 @@ mod tests {
24892505
rollup.observe_dirty_marker(LogPosition::from_offset(5), 2, 1, now - 1000);
24902506
assert_eq!(LogPosition::from_offset(5), rollup.start_log_position);
24912507
assert_eq!(LogPosition::from_offset(30), rollup.limit_log_position);
2492-
assert_eq!(3, rollup.reinsert_count); // Should keep max
2493-
assert_eq!(now, rollup.initial_insertion_epoch_us); // Should keep max
2508+
assert_eq!(3, rollup.reinsert_count); // Same
2509+
assert_eq!(now - 1000, rollup.initial_insertion_epoch_us); // Should move to min
24942510
}
24952511

24962512
#[test]
24972513
fn rollup_per_collection_is_empty() {
2498-
let rollup = RollupPerCollection::new(LogPosition::from_offset(10), 0);
2514+
let rollup = RollupPerCollection::new(LogPosition::from_offset(10), 0, 42);
24992515
assert!(rollup.is_empty());
25002516

2501-
let rollup = RollupPerCollection::new(LogPosition::from_offset(10), 5);
2517+
let rollup = RollupPerCollection::new(LogPosition::from_offset(10), 5, 42);
25022518
assert!(!rollup.is_empty());
25032519
}
25042520

25052521
#[test]
25062522
fn rollup_per_collection_requires_backpressure() {
2507-
let rollup = RollupPerCollection::new(LogPosition::from_offset(10), 100);
2523+
let rollup = RollupPerCollection::new(LogPosition::from_offset(10), 100, 42);
25082524
assert!(rollup.requires_backpressure(50));
25092525
assert!(!rollup.requires_backpressure(150));
25102526
assert!(rollup.requires_backpressure(100)); // Equal case
@@ -2518,7 +2534,7 @@ mod tests {
25182534
.unwrap()
25192535
.as_micros() as u64;
25202536

2521-
let mut rollup = RollupPerCollection::new(LogPosition::from_offset(10), 5);
2537+
let mut rollup = RollupPerCollection::new(LogPosition::from_offset(10), 5, now);
25222538
rollup.observe_dirty_marker(LogPosition::from_offset(10), 5, 2, now);
25232539

25242540
let marker = rollup.dirty_marker(collection_id);
@@ -2533,7 +2549,7 @@ mod tests {
25332549
assert_eq!(collection_id, cid);
25342550
assert_eq!(LogPosition::from_offset(10), log_position);
25352551
assert_eq!(5, num_records);
2536-
assert_eq!(2, reinsert_count);
2552+
assert_eq!(3, reinsert_count);
25372553
assert_eq!(now, initial_insertion_epoch_us);
25382554
}
25392555
_ => panic!("Expected MarkDirty variant"),
@@ -2674,7 +2690,7 @@ mod tests {
26742690
assert_eq!(LogPosition::from_offset(10), rollup1.start_log_position);
26752691
assert_eq!(LogPosition::from_offset(33), rollup1.limit_log_position);
26762692
assert_eq!(1, rollup1.reinsert_count); // max of 1 and 0
2677-
assert_eq!(now, rollup1.initial_insertion_epoch_us); // max of now and now-1000
2693+
assert_eq!(now - 1000, rollup1.initial_insertion_epoch_us); // max of now and now-1000
26782694

26792695
// Check collection_id2 rollup
26802696
let rollup2 = rollup.get(&collection_id2).unwrap();
@@ -2827,7 +2843,7 @@ mod tests {
28272843

28282844
#[test]
28292845
fn rollup_per_collection_witness_functionality() {
2830-
let rollup = RollupPerCollection::new(LogPosition::from_offset(10), 5);
2846+
let rollup = RollupPerCollection::new(LogPosition::from_offset(10), 5, 42);
28312847

28322848
// Test that the rollup can handle boundary conditions
28332849
assert_eq!(LogPosition::from_offset(10), rollup.start_log_position);
@@ -2837,11 +2853,11 @@ mod tests {
28372853

28382854
#[test]
28392855
fn rollup_per_collection_backpressure_boundary_conditions() {
2840-
let rollup = RollupPerCollection::new(LogPosition::from_offset(0), u64::MAX);
2856+
let rollup = RollupPerCollection::new(LogPosition::from_offset(0), u64::MAX, 42);
28412857
assert!(rollup.requires_backpressure(u64::MAX - 1));
28422858
assert!(rollup.requires_backpressure(u64::MAX));
28432859

2844-
let rollup = RollupPerCollection::new(LogPosition::from_offset(u64::MAX - 100), 50);
2860+
let rollup = RollupPerCollection::new(LogPosition::from_offset(u64::MAX - 100), 50, 42);
28452861
assert!(!rollup.requires_backpressure(100));
28462862
assert!(rollup.requires_backpressure(25));
28472863
}
@@ -2997,11 +3013,11 @@ mod tests {
29973013

29983014
#[test]
29993015
fn rollup_per_collection_gap_handling() {
3000-
let mut rollup = RollupPerCollection::new(LogPosition::from_offset(10), 5);
30013016
let now = SystemTime::now()
30023017
.duration_since(SystemTime::UNIX_EPOCH)
30033018
.unwrap()
30043019
.as_micros() as u64;
3020+
let mut rollup = RollupPerCollection::new(LogPosition::from_offset(10), 5, now + 1);
30053021

30063022
rollup.observe_dirty_marker(LogPosition::from_offset(20), 5, 1, now);
30073023

@@ -3059,7 +3075,7 @@ mod tests {
30593075
collection_rollup.limit_log_position
30603076
);
30613077
assert_eq!(99, collection_rollup.reinsert_count);
3062-
assert_eq!(now + 999, collection_rollup.initial_insertion_epoch_us);
3078+
assert_eq!(now, collection_rollup.initial_insertion_epoch_us);
30633079
}
30643080

30653081
#[test]
@@ -3118,7 +3134,7 @@ mod tests {
31183134
#[test]
31193135
fn rollup_per_collection_extreme_positions() {
31203136
let start_position = LogPosition::from_offset(u64::MAX - 10);
3121-
let rollup = RollupPerCollection::new(start_position, 5);
3137+
let rollup = RollupPerCollection::new(start_position, 5, 42);
31223138

31233139
assert_eq!(start_position, rollup.start_log_position);
31243140
assert!(!rollup.is_empty());
@@ -3127,7 +3143,7 @@ mod tests {
31273143

31283144
#[test]
31293145
fn rollup_per_collection_zero_epoch() {
3130-
let mut rollup = RollupPerCollection::new(LogPosition::from_offset(10), 5);
3146+
let mut rollup = RollupPerCollection::new(LogPosition::from_offset(10), 5, u64::MAX);
31313147

31323148
rollup.observe_dirty_marker(LogPosition::from_offset(15), 5, 1, 0);
31333149

@@ -3193,23 +3209,24 @@ mod tests {
31933209

31943210
#[test]
31953211
fn rollup_per_collection_edge_case_positions() {
3196-
let mut rollup = RollupPerCollection::new(LogPosition::from_offset(100), 0);
3212+
let mut rollup = RollupPerCollection::new(LogPosition::from_offset(100), 0, 1042);
31973213

31983214
rollup.observe_dirty_marker(LogPosition::from_offset(50), 25, 1, 1000);
31993215

32003216
assert_eq!(LogPosition::from_offset(50), rollup.start_log_position);
32013217
assert_eq!(LogPosition::from_offset(100), rollup.limit_log_position);
3218+
assert_eq!(1000, rollup.initial_insertion_epoch_us);
32023219
}
32033220

32043221
#[test]
32053222
fn backpressure_threshold_verification() {
3206-
let rollup = RollupPerCollection::new(LogPosition::from_offset(0), 100);
3223+
let rollup = RollupPerCollection::new(LogPosition::from_offset(0), 100, 42);
32073224

32083225
assert!(rollup.requires_backpressure(99));
32093226
assert!(rollup.requires_backpressure(100));
32103227
assert!(!rollup.requires_backpressure(101));
32113228

3212-
let zero_rollup = RollupPerCollection::new(LogPosition::from_offset(10), 0);
3229+
let zero_rollup = RollupPerCollection::new(LogPosition::from_offset(10), 0, 42);
32133230
assert!(!zero_rollup.requires_backpressure(1));
32143231
assert!(zero_rollup.requires_backpressure(0));
32153232
}

0 commit comments

Comments
 (0)