Skip to content

Commit ad25001

Browse files
hannahhowardAlan Shaw
and
Alan Shaw
authored
feat: add ordering option to getPath (#19)
# Goals This adds support for depth first traversals to Dagula. It's designed to be in conformance with ipfs/specs#412. (will update if parameter names change) # Implementation - adds order parameter to options struct for getPath and get - renames breadFirstSearch to blockLinks (it isn't really "bread first search" so much as "all the links in this block"). - handles ordering change inside of `get`, using recursion for depth-first-search - adds tests for both orders. --------- Co-authored-by: Alan Shaw <[email protected]>
1 parent ceca20a commit ad25001

File tree

2 files changed

+181
-8
lines changed

2 files changed

+181
-8
lines changed

index.js

+16-8
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import { CID } from 'multiformats/cid'
33
import * as dagPB from '@ipld/dag-pb'
44
import * as Block from 'multiformats/block'
55
import { exporter, walkPath } from 'ipfs-unixfs-exporter'
6-
import { transform } from 'streaming-iterables'
6+
import { parallelMap, transform } from 'streaming-iterables'
77
import { Decoders, Hashers } from './defaults.js'
88
import { identity } from 'multiformats/hashes/identity'
99

@@ -38,9 +38,10 @@ export class Dagula {
3838
*/
3939
async * get (cid, options = {}) {
4040
cid = typeof cid === 'string' ? CID.parse(cid) : cid
41+
const order = options.order ?? 'rnd'
4142
log('getting DAG %s', cid)
4243
let cids = Array.isArray(cid) ? cid : [cid]
43-
const search = options.search || breadthFirstSearch()
44+
const search = options.search || blockLinks()
4445

4546
/** @type {AbortController[]} */
4647
let aborters = []
@@ -49,7 +50,8 @@ export class Dagula {
4950

5051
while (cids.length > 0) {
5152
log('fetching %d CIDs', cids.length)
52-
const fetchBlocks = transform(cids.length, async cid => {
53+
const parallelFn = order === 'dfs' ? parallelMap : transform
54+
const fetchBlocks = parallelFn(cids.length, async cid => {
5355
if (signal) {
5456
const aborter = new AbortController()
5557
aborters.push(aborter)
@@ -77,7 +79,12 @@ export class Dagula {
7779
// createUnsafe here.
7880
const block = await Block.create({ bytes, cid, codec: decoder, hasher })
7981
yield block
80-
nextCids = nextCids.concat(search(block))
82+
const blockCids = search(block)
83+
if (order === 'dfs') {
84+
yield * this.get(blockCids, options)
85+
} else {
86+
nextCids = nextCids.concat(blockCids)
87+
}
8188
}
8289
log('%d CIDs in links', nextCids.length)
8390
cids = nextCids
@@ -94,6 +101,7 @@ export class Dagula {
94101
* @param {string} cidPath
95102
* @param {object} [options]
96103
* @param {AbortSignal} [options.signal]
104+
* @param {'dfs'|'unk'} [options.order] Specify desired block ordering. `dfs` - Depth First Search, `unk` - unknown ordering.
97105
* @param {'all'|'file'|'block'} [options.carScope] control how many layers of the dag are returned
98106
* 'all': return the entire dag starting at path. (default)
99107
* 'block': return the block identified by the path.
@@ -142,7 +150,7 @@ export class Dagula {
142150
const links = getLinks(base, this.#decoders)
143151
// fetch the entire dag rooted at the end of the provided path
144152
if (links.length) {
145-
yield * this.get(links, { signal: options.signal })
153+
yield * this.get(links, { signal: options.signal, order: options.order })
146154
}
147155
}
148156
// non-files, like directories, and IPLD Maps only return blocks necessary for their enumeration
@@ -152,7 +160,7 @@ export class Dagula {
152160
if (base.unixfs.type === 'hamt-sharded-directory') {
153161
const hamtLinks = base.node.Links?.filter(l => l.Name.length === 2).map(l => l.Hash) || []
154162
if (hamtLinks.length) {
155-
yield * this.get(hamtLinks, { search: hamtSearch, signal: options.signal })
163+
yield * this.get(hamtLinks, { search: hamtSearch, signal: options.signal, order: options.order })
156164
}
157165
}
158166
}
@@ -221,7 +229,7 @@ export class Dagula {
221229
*
222230
* @param {([name, cid]: [string, Link]) => boolean} linkFilter
223231
*/
224-
export function breadthFirstSearch (linkFilter = () => true) {
232+
export function blockLinks (linkFilter = () => true) {
225233
/**
226234
* @param {import('multiformats').BlockView} block
227235
*/
@@ -245,7 +253,7 @@ export function breadthFirstSearch (linkFilter = () => true) {
245253
}
246254
}
247255

248-
export const hamtSearch = breadthFirstSearch(([name]) => name.length === 2)
256+
export const hamtSearch = blockLinks(([name]) => name.length === 2)
249257

250258
/**
251259
* Get links as array of CIDs for a UnixFS entry.

test/getPath.test.js

+165
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,171 @@ test('should getPath on file with carScope=file', async t => {
200200
t.deepEqual(blocks.at(3).bytes, filePart2.bytes)
201201
})
202202

203+
test('should getPath on large file with carScope=file, default ordering', async t => {
204+
// return all blocks in path and all blocks for resolved target of path
205+
const filePart1 = await Block.decode({ codec: raw, bytes: fromString(`MORE TEST DATA ${Date.now()}`), hasher: sha256 })
206+
const filePart2 = await Block.decode({ codec: raw, bytes: fromString(`EVEN MORE TEST DATA ${Date.now()}`), hasher: sha256 })
207+
const filePart3 = await Block.decode({ codec: raw, bytes: fromString(`SO MUCH TEST DATA ${Date.now()}`), hasher: sha256 })
208+
const filePart4 = await Block.decode({ codec: raw, bytes: fromString(`TEST DATA DOING THE MOST ${Date.now()}`), hasher: sha256 })
209+
const fileSubNode1 = await Block.encode({
210+
codec: dagPB,
211+
hasher: sha256,
212+
value: {
213+
Data: new UnixFSv1({ type: 'file' }).marshal(),
214+
Links: [
215+
{ Name: '0', Hash: filePart1.cid },
216+
{ Name: '1', Hash: filePart2.cid }
217+
]
218+
}
219+
})
220+
const fileSubNode2 = await Block.encode({
221+
codec: dagPB,
222+
hasher: sha256,
223+
value: {
224+
Data: new UnixFSv1({ type: 'file' }).marshal(),
225+
Links: [
226+
{ Name: '0', Hash: filePart3.cid },
227+
{ Name: '1', Hash: filePart4.cid }
228+
]
229+
}
230+
})
231+
232+
const fileNode = await Block.encode({
233+
codec: dagPB,
234+
hasher: sha256,
235+
value: {
236+
Data: new UnixFSv1({ type: 'file' }).marshal(),
237+
Links: [
238+
{ Name: '0', Hash: fileSubNode1.cid },
239+
{ Name: '1', Hash: fileSubNode2.cid }
240+
]
241+
}
242+
})
243+
244+
const dirNode = await Block.encode({
245+
codec: dagPB,
246+
hasher: sha256,
247+
value: {
248+
Data: new UnixFSv1({ type: 'directory' }).marshal(),
249+
Links: [
250+
{ Name: 'foo', Hash: fileNode.cid },
251+
{ Name: 'other', Hash: CID.parse('QmUNLLsPACCz1vLxQVkXqqLX5R1X345qqfHbsf67hvA3Nn') }
252+
]
253+
}
254+
})
255+
256+
const peer = await startBitswapPeer([filePart1, filePart2, filePart3, filePart4, fileSubNode1, fileSubNode2, fileNode, dirNode])
257+
258+
const libp2p = await getLibp2p()
259+
const dagula = await fromNetwork(libp2p, { peer: peer.libp2p.getMultiaddrs()[0] })
260+
261+
const blocks = []
262+
const carScope = 'file'
263+
for await (const entry of dagula.getPath(`${dirNode.cid}/foo`, { carScope })) {
264+
blocks.push(entry)
265+
}
266+
// did not try and return block for `other`
267+
t.is(blocks.length, 8)
268+
t.deepEqual(blocks.at(0).cid, dirNode.cid)
269+
t.deepEqual(blocks.at(0).bytes, dirNode.bytes)
270+
t.deepEqual(blocks.at(1).cid, fileNode.cid)
271+
t.deepEqual(blocks.at(1).bytes, fileNode.bytes)
272+
t.deepEqual(blocks.at(2).cid, fileSubNode1.cid)
273+
t.deepEqual(blocks.at(2).bytes, fileSubNode1.bytes)
274+
t.deepEqual(blocks.at(3).cid, fileSubNode2.cid)
275+
t.deepEqual(blocks.at(3).bytes, fileSubNode2.bytes)
276+
t.deepEqual(blocks.at(4).cid, filePart1.cid)
277+
t.deepEqual(blocks.at(4).bytes, filePart1.bytes)
278+
t.deepEqual(blocks.at(5).cid, filePart2.cid)
279+
t.deepEqual(blocks.at(5).bytes, filePart2.bytes)
280+
t.deepEqual(blocks.at(6).cid, filePart3.cid)
281+
t.deepEqual(blocks.at(6).bytes, filePart3.bytes)
282+
t.deepEqual(blocks.at(7).cid, filePart4.cid)
283+
t.deepEqual(blocks.at(7).bytes, filePart4.bytes)
284+
})
285+
286+
test('should getPath on large file with carScope=file, dfs ordering', async t => {
287+
// return all blocks in path and all blocks for resolved target of path
288+
const filePart1 = await Block.decode({ codec: raw, bytes: fromString(`MORE TEST DATA ${Date.now()}`), hasher: sha256 })
289+
const filePart2 = await Block.decode({ codec: raw, bytes: fromString(`EVEN MORE TEST DATA ${Date.now()}`), hasher: sha256 })
290+
const filePart3 = await Block.decode({ codec: raw, bytes: fromString(`SO MUCH TEST DATA ${Date.now()}`), hasher: sha256 })
291+
const filePart4 = await Block.decode({ codec: raw, bytes: fromString(`TEST DATA DOING THE MOST ${Date.now()}`), hasher: sha256 })
292+
const fileSubNode1 = await Block.encode({
293+
codec: dagPB,
294+
hasher: sha256,
295+
value: {
296+
Data: new UnixFSv1({ type: 'file' }).marshal(),
297+
Links: [
298+
{ Name: '0', Hash: filePart1.cid },
299+
{ Name: '1', Hash: filePart2.cid }
300+
]
301+
}
302+
})
303+
const fileSubNode2 = await Block.encode({
304+
codec: dagPB,
305+
hasher: sha256,
306+
value: {
307+
Data: new UnixFSv1({ type: 'file' }).marshal(),
308+
Links: [
309+
{ Name: '0', Hash: filePart3.cid },
310+
{ Name: '1', Hash: filePart4.cid }
311+
]
312+
}
313+
})
314+
315+
const fileNode = await Block.encode({
316+
codec: dagPB,
317+
hasher: sha256,
318+
value: {
319+
Data: new UnixFSv1({ type: 'file' }).marshal(),
320+
Links: [
321+
{ Name: '0', Hash: fileSubNode1.cid },
322+
{ Name: '1', Hash: fileSubNode2.cid }
323+
]
324+
}
325+
})
326+
327+
const dirNode = await Block.encode({
328+
codec: dagPB,
329+
hasher: sha256,
330+
value: {
331+
Data: new UnixFSv1({ type: 'directory' }).marshal(),
332+
Links: [
333+
{ Name: 'foo', Hash: fileNode.cid },
334+
{ Name: 'other', Hash: CID.parse('QmUNLLsPACCz1vLxQVkXqqLX5R1X345qqfHbsf67hvA3Nn') }
335+
]
336+
}
337+
})
338+
339+
const peer = await startBitswapPeer([filePart1, filePart2, filePart3, filePart4, fileSubNode1, fileSubNode2, fileNode, dirNode])
340+
341+
const libp2p = await getLibp2p()
342+
const dagula = await fromNetwork(libp2p, { peer: peer.libp2p.getMultiaddrs()[0] })
343+
344+
const blocks = []
345+
const carScope = 'file'
346+
for await (const entry of dagula.getPath(`${dirNode.cid}/foo`, { carScope, order: 'dfs' })) {
347+
blocks.push(entry)
348+
}
349+
// did not try and return block for `other`
350+
t.is(blocks.length, 8)
351+
t.deepEqual(blocks.at(0).cid, dirNode.cid)
352+
t.deepEqual(blocks.at(0).bytes, dirNode.bytes)
353+
t.deepEqual(blocks.at(1).cid, fileNode.cid)
354+
t.deepEqual(blocks.at(1).bytes, fileNode.bytes)
355+
t.deepEqual(blocks.at(2).cid, fileSubNode1.cid)
356+
t.deepEqual(blocks.at(2).bytes, fileSubNode1.bytes)
357+
t.deepEqual(blocks.at(3).cid, filePart1.cid)
358+
t.deepEqual(blocks.at(3).bytes, filePart1.bytes)
359+
t.deepEqual(blocks.at(4).cid, filePart2.cid)
360+
t.deepEqual(blocks.at(4).bytes, filePart2.bytes)
361+
t.deepEqual(blocks.at(5).cid, fileSubNode2.cid)
362+
t.deepEqual(blocks.at(5).bytes, fileSubNode2.bytes)
363+
t.deepEqual(blocks.at(6).cid, filePart3.cid)
364+
t.deepEqual(blocks.at(6).bytes, filePart3.bytes)
365+
t.deepEqual(blocks.at(7).cid, filePart4.cid)
366+
t.deepEqual(blocks.at(7).bytes, filePart4.bytes)
367+
})
203368
test('should getPath on file with carScope=block', async t => {
204369
// return all blocks in path and all blocks for resolved target of path
205370
const filePart1 = await Block.decode({ codec: raw, bytes: fromString(`MORE TEST DATA ${Date.now()}`), hasher: sha256 })

0 commit comments

Comments
 (0)