Skip to content

Commit f42abec

Browse files
committed
Clean up and add comments
1 parent 96ff2eb commit f42abec

File tree

1 file changed

+13
-6
lines changed

1 file changed

+13
-6
lines changed

src/indexer/log_merge_policy.rs

+13-6
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ impl LogMergePolicy {
8989

9090
impl MergePolicy for LogMergePolicy {
9191
fn compute_merge_candidates(&self, segments: &[SegmentMeta]) -> Vec<MergeCandidate> {
92+
// Filter for segments that have less than the target number of docs, count total unmerged
93+
// docs, and sort in descending order
9294
let mut unmerged_docs = 0;
9395
let mut levels = segments
9496
.iter()
@@ -98,11 +100,13 @@ impl MergePolicy for LogMergePolicy {
98100
.sorted_by(|(a, _), (b, _)| b.cmp(a))
99101
.collect_vec();
100102

103+
// If there are enough unmerged documents to create a new segment of the target size,
104+
// then create a merge candidate for them.
101105
let mut candidates = Vec::new();
102106
if unmerged_docs >= self.target_segment_size {
103107
let mut batch_docs = 0;
104108
let mut batch = Vec::new();
105-
// Pop segments segments from levels, smallest first due to sort at start
109+
// Start with the smallest segments and add them to the batch until we reach the target
106110
while let Some((docs, seg)) = levels.pop() {
107111
batch_docs += docs;
108112
batch.push(seg);
@@ -116,6 +120,8 @@ impl MergePolicy for LogMergePolicy {
116120
// drain to reuse the buffer
117121
batch.drain(..).map(|seg| seg.id()).collect(),
118122
));
123+
// If there aren't enough documents to create another segment of the target size
124+
// then break
119125
if unmerged_docs <= self.target_segment_size {
120126
break;
121127
}
@@ -127,19 +133,18 @@ impl MergePolicy for LogMergePolicy {
127133
let mut batch = Vec::new();
128134
levels
129135
.iter()
130-
.map(|(docs, seg)| {
136+
.chunk_by(|(docs, _)| {
131137
let segment_log_size = f64::from(self.clip_min_size(*docs as u32)).log2();
132138
if segment_log_size < (current_max_log_size - self.level_log_size) {
133139
// update current_max_log_size to create a new group
134140
current_max_log_size = segment_log_size;
135141
}
136-
(current_max_log_size, seg)
142+
current_max_log_size
137143
})
138-
.chunk_by(|(level, _)| *level)
139144
.into_iter()
140145
.for_each(|(_, group)| {
141146
let mut hit_delete_threshold = false;
142-
group.into_iter().for_each(|(_, seg)| {
147+
group.for_each(|(_, seg)| {
143148
batch.push(seg.id());
144149
if !hit_delete_threshold && self.segment_above_deletes_threshold(seg) {
145150
hit_delete_threshold = true;
@@ -354,12 +359,14 @@ mod tests {
354359

355360
#[test]
356361
fn test_skip_merge_large_segments() {
362+
// All of these should be merged into a single segment since 2 * 49_999 < 100_000
357363
let test_input_merge_all = vec![
358364
create_random_segment_meta(49_999),
359365
create_random_segment_meta(49_999),
360366
create_random_segment_meta(49_999),
361367
];
362368

369+
// Only two of these should be merged since 2 * 50_000 >= 100_000, then the third is left
363370
let test_input_merge_two = vec![
364371
create_random_segment_meta(50_000),
365372
create_random_segment_meta(50_000),
@@ -371,8 +378,8 @@ mod tests {
371378
let result_list_merge_two =
372379
test_merge_policy().compute_merge_candidates(&test_input_merge_two);
373380

374-
assert_eq!(result_list_merge_two[0].0.len(), 2);
375381
assert_eq!(result_list_merge_all[0].0.len(), 3);
382+
assert_eq!(result_list_merge_two[0].0.len(), 2);
376383
}
377384

378385
#[test]

0 commit comments

Comments
 (0)