Skip to content

Commit 1489950

Browse files
lbergelsontomwhite
authored andcommitted
Refactor GVCFWriter to allow push/pull iteration.
1 parent 39206f8 commit 1489950

File tree

10 files changed

+469
-255
lines changed

10 files changed

+469
-255
lines changed

src/main/java/org/broadinstitute/hellbender/utils/downsampling/Downsampler.java

+8-2
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
package org.broadinstitute.hellbender.utils.downsampling;
22

33
import org.broadinstitute.hellbender.utils.Utils;
4+
import org.broadinstitute.hellbender.utils.iterators.PushPullTransformer;
45

56
import java.util.Collection;
67
import java.util.List;
78

89
/**
910
* The basic downsampler API, with no reads-specific operations.
1011
*/
11-
public abstract class Downsampler<T> {
12+
public abstract class Downsampler<T> implements PushPullTransformer<T> {
1213

1314
/**
1415
* Number of items discarded by this downsampler since the last call to resetStats()
@@ -22,6 +23,7 @@ public abstract class Downsampler<T> {
2223
*
2324
* @param item the individual item to submit to the downsampler for consideration
2425
*/
26+
@Override
2527
public abstract void submit( final T item );
2628

2729
/**
@@ -30,7 +32,8 @@ public abstract class Downsampler<T> {
3032
*
3133
* @param items the collection of items to submit to the downsampler for consideration
3234
*/
33-
public void submit( final Collection<T> items ) {
35+
@Override
36+
public void submit(final Collection<T> items) {
3437
Utils.nonNull(items, "submitted items must not be null");
3538

3639
for ( final T item : items ) {
@@ -43,13 +46,15 @@ public void submit( final Collection<T> items ) {
4346
*
4447
* @return true if this downsampler has > 0 finalized items, otherwise false
4548
*/
49+
@Override
4650
public abstract boolean hasFinalizedItems();
4751

4852
/**
4953
* Return (and *remove*) all items that have survived downsampling and are waiting to be retrieved.
5054
*
5155
* @return a list of all finalized items this downsampler contains, or an empty list if there are none
5256
*/
57+
@Override
5358
public abstract List<T> consumeFinalizedItems();
5459

5560
/**
@@ -112,6 +117,7 @@ protected void incrementNumberOfDiscardedItems( final int newlyDiscardedItems )
112117
* Used to tell the downsampler that no more items will be submitted to it, and that it should
113118
* finalize any pending items.
114119
*/
120+
@Override
115121
public abstract void signalEndOfInput();
116122

117123
/**
Original file line numberDiff line numberDiff line change
@@ -1,91 +1,23 @@
11
package org.broadinstitute.hellbender.utils.downsampling;
22

3-
import org.broadinstitute.hellbender.utils.Utils;
3+
import org.broadinstitute.hellbender.utils.iterators.PushToPullIterator;
44
import org.broadinstitute.hellbender.utils.read.GATKRead;
55

6-
import java.util.Collection;
76
import java.util.Iterator;
8-
import java.util.NoSuchElementException;
97

108
/**
119
* Iterator wrapper around our generic {@link ReadsDownsampler)} interface. Wraps an iterator of reads,
1210
* and downsamples the reads from that iterator using the provided downsampler.
1311
*
1412
* Converts the push-style {@link ReadsDownsampler)} interface to a pull model.
1513
*/
16-
public final class ReadsDownsamplingIterator implements Iterator<GATKRead>, Iterable<GATKRead> {
17-
18-
private final Iterator<GATKRead> nestedReadIterator;
19-
private final ReadsDownsampler downsampler;
20-
private Iterator<GATKRead> cachedDownsampledReads = null;
21-
private GATKRead nextRead = null;
14+
public final class ReadsDownsamplingIterator extends PushToPullIterator<GATKRead> {
2215

2316
/**
24-
* @param iter wrapped iterator from which this iterator will pull reads to be downsampled
17+
* @param iter wrapped iterator from which this iterator will pull reads to be downsampled
2518
* @param downsampler downsampler through which the reads from the wrapped iterator will be fed
2619
*/
27-
public ReadsDownsamplingIterator( Iterator<GATKRead> iter, ReadsDownsampler downsampler ) {
28-
Utils.nonNull(iter, "iterator must not be null");
29-
Utils.nonNull(downsampler, "downsampler must not be null");
30-
31-
this.nestedReadIterator = iter;
32-
this.downsampler = downsampler;
33-
34-
advanceToNextRead();
35-
}
36-
37-
@Override
38-
public boolean hasNext() {
39-
return nextRead != null;
40-
}
41-
42-
@Override
43-
public GATKRead next() {
44-
if ( nextRead == null ) {
45-
throw new NoSuchElementException("next() called when there are no more items");
46-
}
47-
48-
final GATKRead toReturn = nextRead;
49-
advanceToNextRead();
50-
51-
return toReturn;
52-
}
53-
54-
private void advanceToNextRead() {
55-
if ( readyToReleaseReads() || fillDownsampledReadsCache() ) {
56-
nextRead = cachedDownsampledReads.next();
57-
}
58-
else {
59-
nextRead = null;
60-
}
61-
}
62-
63-
private boolean readyToReleaseReads() {
64-
return cachedDownsampledReads != null && cachedDownsampledReads.hasNext();
65-
}
66-
67-
private boolean fillDownsampledReadsCache() {
68-
while ( nestedReadIterator.hasNext() && ! downsampler.hasFinalizedItems() ) {
69-
downsampler.submit(nestedReadIterator.next());
70-
}
71-
72-
if ( ! nestedReadIterator.hasNext() ) {
73-
downsampler.signalEndOfInput();
74-
}
75-
76-
final Collection<GATKRead> downsampledReads = downsampler.consumeFinalizedItems();
77-
cachedDownsampledReads = downsampledReads.iterator();
78-
79-
return cachedDownsampledReads.hasNext();
80-
}
81-
82-
@Override
83-
public void remove() {
84-
throw new UnsupportedOperationException("Cannot remove records via a ReadsDownsamplingIterator");
85-
}
86-
87-
@Override
88-
public Iterator<GATKRead> iterator() {
89-
return this;
20+
public ReadsDownsamplingIterator(Iterator<GATKRead> iter, ReadsDownsampler downsampler) {
21+
super(iter, downsampler);
9022
}
9123
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
package org.broadinstitute.hellbender.utils.iterators;
2+
3+
import org.broadinstitute.hellbender.utils.Utils;
4+
5+
import java.util.Collection;
6+
import java.util.List;
7+
8+
/**
9+
* A class that receives a stream of elements and transforms or filters them in some way, such as by downsampling with
10+
* a {@link org.broadinstitute.hellbender.utils.downsampling.Downsampler}. Elements are submitted in a push-style model,
11+
* in contrast to Java's pull-style {@link java.util.Iterator}. A transformer may be used to transform an iterator of
12+
* elements using {@link PushToPullIterator}.
13+
*
14+
* @param <T> type of items to be submitted
15+
* @see PushToPullIterator
16+
* @see org.broadinstitute.hellbender.utils.downsampling.Downsampler
17+
*/
18+
public interface PushPullTransformer<T> {
19+
/**
20+
* Submit one item to the transformer for consideration. Some transformers will be able to determine
21+
* immediately whether the item survives the transformation process, while others will need to see
22+
* more items before making that determination.
23+
*
24+
* @param item the individual item to submit to the transformer for consideration
25+
*/
26+
void submit(T item);
27+
28+
/**
29+
* Are there items that have survived the transformation process waiting to be retrieved?
30+
*
31+
* @return true if this transformer has > 0 finalized items, otherwise false
32+
*/
33+
boolean hasFinalizedItems();
34+
35+
/**
36+
* Return (and *remove*) all items that have survived transformation and are waiting to be retrieved.
37+
*
38+
* @return a list of all finalized items this transformer contains, or an empty list if there are none
39+
*/
40+
List<T> consumeFinalizedItems();
41+
42+
/**
43+
* Used to tell the transformer that no more items will be submitted to it, and that it should
44+
* finalize any pending items.
45+
*/
46+
void signalEndOfInput();
47+
48+
default void submit(final Collection<T> items) {
49+
Utils.nonNull(items, "submitted items must not be null");
50+
items.forEach(this::submit);
51+
}
52+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
package org.broadinstitute.hellbender.utils.iterators;
2+
3+
import org.broadinstitute.hellbender.utils.Utils;
4+
5+
import java.util.Collection;
6+
import java.util.Iterator;
7+
import java.util.NoSuchElementException;
8+
9+
/**
10+
* Iterator wrapper around our generic {@link PushPullTransformer)} interface. Wraps an iterator of input elements,
11+
* and transforms the reads from that iterator using the provided transformer.
12+
*
13+
* Converts the push-style {@link PushPullTransformer)} interface to a pull model.
14+
*
15+
* @param <T> type of items to be iterated over
16+
*/
17+
public class PushToPullIterator<T> implements Iterator<T>, Iterable<T> {
18+
19+
private final Iterator<T> inputElements;
20+
private final PushPullTransformer<T> transformer;
21+
private Iterator<T> cachedElements = null;
22+
private T nextElement = null;
23+
24+
/**
25+
* @param inputElements wrapped iterator from which this iterator will pull elements
26+
* @param transformer transformer through which the reads from the wrapped iterator will be fed
27+
*/
28+
public PushToPullIterator(Iterator<T> inputElements, PushPullTransformer<T> transformer ) {
29+
Utils.nonNull(inputElements, "iterator must not be null");
30+
Utils.nonNull(transformer, "transformer must not be null");
31+
32+
this.inputElements = inputElements;
33+
this.transformer = transformer;
34+
35+
advanceToNextElement();
36+
}
37+
38+
@Override
39+
public boolean hasNext() {
40+
return nextElement != null;
41+
}
42+
43+
@Override
44+
public T next() {
45+
if ( nextElement == null ) {
46+
throw new NoSuchElementException("next() called when there are no more items");
47+
}
48+
49+
final T toReturn = nextElement;
50+
advanceToNextElement();
51+
52+
return toReturn;
53+
}
54+
55+
private void advanceToNextElement() {
56+
if ( readyToReleaseReads() || fillCache() ) {
57+
nextElement = cachedElements.next();
58+
}
59+
else {
60+
nextElement = null;
61+
}
62+
}
63+
64+
private boolean readyToReleaseReads() {
65+
return cachedElements != null && cachedElements.hasNext();
66+
}
67+
68+
private boolean fillCache() {
69+
while ( inputElements.hasNext() && ! transformer.hasFinalizedItems() ) {
70+
transformer.submit(inputElements.next());
71+
}
72+
73+
if ( ! inputElements.hasNext() ) {
74+
transformer.signalEndOfInput();
75+
}
76+
77+
final Collection<T> transformedElements = transformer.consumeFinalizedItems();
78+
cachedElements = transformedElements.iterator();
79+
80+
return cachedElements.hasNext();
81+
}
82+
83+
@Override
84+
public void remove() {
85+
throw new UnsupportedOperationException("Cannot remove records via a Push");
86+
}
87+
88+
@Override
89+
public Iterator<T> iterator() {
90+
return this;
91+
}
92+
}

0 commit comments

Comments
 (0)