Skip to content
This repository was archived by the owner on Apr 29, 2020. It is now read-only.

Commit 0873691

Browse files
committed
feat: default to using raw leaves
Builds on #49. A separate PR as it's slightly contentious. Sets default DAG construction to be a `dag-pb` root node, `dag-pb` intermediate nodes and `ipld-raw` nodes for leaves. This will make parsing ever so slightly faster and DAG sizes ever so slightly smaller as there is no protobuf wrapper for the actual file data. Currently you may end up with `ipld-raw` leaves or `dag-pb` leaves that contain UnixFS entries with type 'file' or 'raw' depending on where the importer is invoked from. E.g. to generate the same CIDs as go-IPFS, `ipfs.add` will result in a balanced DAG with UnixFS leaf nodes with a type 'file', `ipfs.files.write` will result in a trickle DAG with UnixFS leaf nodes of a type `raw`, and specifying CID version 1 will get you `ipld-raw` leaf nodes and whatever tree strategy you specifed, default balanced. I think this is chaos, we should use `ipld-raw` leaf types everywhere and only offer options to change the DAG structure, not leaf types.
1 parent f6882f8 commit 0873691

File tree

6 files changed

+36
-28
lines changed

6 files changed

+36
-28
lines changed

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,8 @@ The input's file paths and directory structure will be preserved in the [`dag-pb
141141
- `onlyHash` (boolean, defaults to false): Only chunk and hash - do not write to disk
142142
- `hashAlg` (string): multihash hashing algorithm to use
143143
- `cidVersion` (integer, default 0): the CID version to use when storing the data (storage keys are based on the CID, _including_ it's version)
144-
- `rawLeaves` (boolean, defaults to false): When a file would span multiple DAGNodes, if this is true the leaf nodes will not be wrapped in `UnixFS` protobufs and will instead contain the raw file bytes
145-
- `leafType` (string, defaults to `'file'`) what type of UnixFS node leaves should be - can be `'file'` or `'raw'` (ignored when `rawLeaves` is `true`)
144+
- `rawLeaves` (boolean, defaults to true): When a file would span multiple DAGNodes, if this is true the leaf nodes will not be wrapped in `UnixFS` protobufs and will instead contain the raw file bytes
145+
- `leafType` (string, defaults to `'file'`) what type of UnixFS node leaves should be - can be `'file'` or `'raw'` (ignored when `rawLeaves` is explicitly set to `true`)
146146
- `blockWriteConcurrency` (positive integer, defaults to 10) How many blocks to hash and write to the block store concurrently. For small numbers of large files this should be high (e.g. 50).
147147
- `fileImportConcurrency` (number, defaults to 50) How many files to import concurrently. For large numbers of small files this should be high (e.g. 50).
148148

src/dag-builder/file/index.js

+4-3
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@ const reduce = (file, ipld, options) => {
5353
if (leaves.length === 1 && leaves[0].single && options.reduceSingleLeafToSelf) {
5454
const leaf = leaves[0]
5555

56-
if (leaf.cid.codec === 'raw') {
56+
// TODO: fix this for when onlyHash is passed
57+
if (leaf.cid.codec === 'raw' && !options.onlyHash) {
5758
// only one leaf node which is a buffer
5859
const buffer = await ipld.get(leaf.cid)
5960

@@ -87,8 +88,8 @@ const reduce = (file, ipld, options) => {
8788

8889
const links = leaves
8990
.filter(leaf => {
90-
if (leaf.cid.codec === 'raw' && leaf.size) {
91-
return true
91+
if (leaf.cid.codec === 'raw') {
92+
return Boolean(leaf.size)
9293
}
9394

9495
if (!leaf.unixfs.data && leaf.unixfs.fileSize()) {

src/index.js

+8-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ const mergeOptions = require('merge-options').bind({ ignoreUndefined: true })
66
const defaultOptions = {
77
chunker: 'fixed',
88
strategy: 'balanced', // 'flat', 'trickle'
9-
rawLeaves: false,
9+
rawLeaves: true,
1010
onlyHash: false,
1111
reduceSingleLeafToSelf: true,
1212
codec: 'dag-pb',
@@ -48,7 +48,7 @@ module.exports = async function * (source, ipld, options = {}) {
4848
opts.rawLeaves = true
4949
}
5050

51-
// go-ifps trickle dag defaults to unixfs raw leaves, balanced dag defaults to file leaves
51+
// go-ipfs trickle dag defaults to unixfs raw leaves, balanced dag defaults to file leaves
5252
if (options.strategy === 'trickle') {
5353
opts.leafType = 'raw'
5454
opts.reduceSingleLeafToSelf = false
@@ -58,6 +58,12 @@ module.exports = async function * (source, ipld, options = {}) {
5858
opts.codec = options.format
5959
}
6060

61+
if (options.leafType && options.rawLeaves == null) {
62+
// if the user has specified a custom UnixFS leaf type and not specified
63+
// raw leaves, really do not use raw leaves
64+
opts.rawLeaves = false
65+
}
66+
6167
let dagBuilder
6268

6369
if (typeof options.dagBuilder === 'function') {

test/chunker-custom.spec.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ describe('custom chunker', function () {
4646
for await (const part of importer([{ path: 'test', content }], inmem, {
4747
chunker
4848
})) {
49-
expect(part.size).to.equal(116)
49+
expect(part.size).to.equal(104)
5050
}
5151
})
5252

test/hash-parity-with-go-ipfs.spec.js

+2-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ const expectedHashes = {
2525

2626
strategies.forEach(strategy => {
2727
const options = {
28-
strategy: strategy
28+
strategy: strategy,
29+
rawLeaves: false
2930
}
3031

3132
describe('go-ipfs interop using importer:' + strategy, () => {

test/importer.spec.js

+19-19
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ const baseFiles = {
4747
path: '200Bytes.txt'
4848
},
4949
'1.2MiB.txt': {
50-
cid: 'QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q',
50+
cid: 'QmQLTvhjmSa7657mKdSfTjxFBdwxmK8n9tZC9Xdp9DtxWY',
5151
size: 1258000,
5252
type: 'file',
5353
path: '1.2MiB.txt'
@@ -64,19 +64,19 @@ const strategyBaseFiles = {
6464
flat: baseFiles,
6565
balanced: extend({}, baseFiles, {
6666
'1.2MiB.txt': {
67-
cid: 'QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q',
67+
cid: 'QmQLTvhjmSa7657mKdSfTjxFBdwxmK8n9tZC9Xdp9DtxWY',
6868
type: 'file'
6969
}
7070
}),
7171
trickle: extend({}, baseFiles, {
7272
'200Bytes.txt': {
73-
cid: 'QmY8bwnoKAKvJ8qtyPhWNxSS6sxiGVTJ9VpdQffs2KB5pE',
73+
cid: 'QmagyRwMfYhczYNv5SvcJc8xxXjZQBTTHS2jEqNMva2mYT',
7474
size: 200,
7575
type: 'file',
7676
path: '200Bytes.txt'
7777
},
7878
'1.2MiB.txt': {
79-
cid: 'QmfAxsHrpaLLuhbqqbo9KQyvQNawMnVSwutYoJed75pnco',
79+
cid: 'QmQLTvhjmSa7657mKdSfTjxFBdwxmK8n9tZC9Xdp9DtxWY',
8080
type: 'file'
8181
}
8282
})
@@ -91,51 +91,51 @@ const strategies = [
9191
const strategyOverrides = {
9292
balanced: {
9393
'foo-big': {
94-
cid: 'QmaFgyFJUP4fxFySJCddg2Pj6rpwSywopWk87VEVv52RSj',
94+
cid: 'QmR6jJgszuuWxVDVc4PCRa6domvNf6guQK7pboZnbwnht1',
9595
path: 'foo-big',
9696
size: 1335478,
9797
type: 'directory'
9898
},
9999
pim: {
100-
cid: 'QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i',
100+
cid: 'QmSA5QoZVieErY7TuQihgUiVc8BB6ZzgwnKvZSHcdTyFiK',
101101
path: 'pim',
102102
size: 1335744,
103103
type: 'directory'
104104
},
105105
'pam/pum': {
106-
cid: 'QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i',
106+
cid: 'QmSA5QoZVieErY7TuQihgUiVc8BB6ZzgwnKvZSHcdTyFiK',
107107
path: 'pam/pum',
108108
size: 1335744,
109109
type: 'directory'
110110
},
111111
pam: {
112-
cid: 'QmRgdtzNx1H1BPJqShdhvWZ2D4DA2HUgZJ3XLtoXei27Av',
112+
cid: 'QmTQcBCQvVR68R3r4eJwKL4VwDci1TCH5cj1njmcQ95Eif',
113113
path: 'pam',
114114
size: 2671269,
115115
type: 'directory'
116116
}
117117
},
118118
trickle: {
119119
'foo-big': {
120-
cid: 'QmaKbhFRy9kcCbcwrLsqYHWMiY44BDYkqTCMpAxDdd2du2',
120+
cid: 'QmR6jJgszuuWxVDVc4PCRa6domvNf6guQK7pboZnbwnht1',
121121
path: 'foo-big',
122122
size: 1334657,
123123
type: 'directory'
124124
},
125125
pim: {
126-
cid: 'QmbWGdnua4YuYpWJb7fE25PRbW9GbKKLqq9Ucmnsg2gxnt',
126+
cid: 'QmZmrY8nLks6N66f68c1biFpay3dSVy9PGvVRZLgT2T2gD',
127127
path: 'pim',
128128
size: 1334923,
129129
type: 'directory'
130130
},
131131
'pam/pum': {
132-
cid: 'QmbWGdnua4YuYpWJb7fE25PRbW9GbKKLqq9Ucmnsg2gxnt',
132+
cid: 'QmZmrY8nLks6N66f68c1biFpay3dSVy9PGvVRZLgT2T2gD',
133133
path: 'pam/pum',
134134
size: 1334923,
135135
type: 'directory'
136136
},
137137
pam: {
138-
cid: 'QmSuh47G9Qm3PFv1zziojtHxqCjuurSdtWAzxLxoKJPq2U',
138+
cid: 'Qmf1necujkKZHrYyj56eKFKAi9DPH8UyZg44KaTciso2Sc',
139139
path: 'pam',
140140
size: 2669627,
141141
type: 'directory'
@@ -147,19 +147,19 @@ const strategyOverrides = {
147147
type: 'file'
148148
},
149149
'foo/bar': {
150-
cid: 'QmTGMxKPzSGNBDp6jhTwnZxGW6w1S9ciyycRJ4b2qcQaHK',
150+
cid: 'QmcKJQd9cH6sip78HbjkGVLshjxtjfvnZ5ih169kZUK3Yg',
151151
size: 0,
152152
path: 'foo/bar',
153153
type: 'directory'
154154
},
155155
foo: {
156-
cid: 'Qme4A8fZmwfZESappfPcxSMTZVACiEzhHKtYRMuM1hbkDp',
156+
cid: 'QmQG6CVVTiz1TwnnueRMBZxB6d5WGJakRocHe4KGz5yHzg',
157157
size: 0,
158158
path: 'foo',
159159
type: 'directory'
160160
},
161161
'small.txt': {
162-
cid: 'QmXmZ3qT328JxWtQXqrmvma2FmPp7tMdNiSuYvVJ5QRhKs',
162+
cid: 'QmX2YDaeaAFbyNK4gat5jQMJhZL171n5Qummc5jcMsmXUu',
163163
size: 15,
164164
type: 'file',
165165
path: 'small.txt'
@@ -225,7 +225,7 @@ strategies.forEach((strategy) => {
225225
}),
226226
'foo-big': {
227227
path: 'foo-big',
228-
cid: 'QmaFgyFJUP4fxFySJCddg2Pj6rpwSywopWk87VEVv52RSj',
228+
cid: 'QmR6jJgszuuWxVDVc4PCRa6domvNf6guQK7pboZnbwnht1',
229229
size: 1328120,
230230
type: 'directory'
231231
},
@@ -237,7 +237,7 @@ strategies.forEach((strategy) => {
237237
}),
238238
pim: {
239239
path: 'pim',
240-
cid: 'QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i',
240+
cid: 'QmSA5QoZVieErY7TuQihgUiVc8BB6ZzgwnKvZSHcdTyFiK',
241241
size: 1328386,
242242
type: 'directory'
243243
},
@@ -248,13 +248,13 @@ strategies.forEach((strategy) => {
248248
type: 'directory'
249249
},
250250
'pam/pum': {
251-
cid: 'QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i',
251+
cid: 'QmSA5QoZVieErY7TuQihgUiVc8BB6ZzgwnKvZSHcdTyFiK',
252252
path: 'pam/pum',
253253
size: 1328386,
254254
type: 'directory'
255255
},
256256
pam: {
257-
cid: 'QmRgdtzNx1H1BPJqShdhvWZ2D4DA2HUgZJ3XLtoXei27Av',
257+
cid: 'QmTQcBCQvVR68R3r4eJwKL4VwDci1TCH5cj1njmcQ95Eif',
258258
path: 'pam',
259259
size: 2656553,
260260
type: 'directory'

0 commit comments

Comments
 (0)