Skip to content

Commit 74da6a0

Browse files
weiihannsivaratrisrinivas
authored andcommitted
triedb/pathdb: improve perf by separating nodes map (ethereum#31306)
This PR refactors the `nodeSet` structure in the path database to use separate maps for account and storage trie nodes, resulting in performance improvements. The change maintains the same API while optimizing the internal data structure.
1 parent 87e1c64 commit 74da6a0

File tree

1 file changed

+125
-68
lines changed

1 file changed

+125
-68
lines changed

triedb/pathdb/nodes.go

+125-68
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,9 @@ import (
3636
// transition, typically corresponding to a block execution. It can also represent
3737
// the combined trie node set from several aggregated state transitions.
3838
type nodeSet struct {
39-
size uint64 // aggregated size of the trie node
40-
nodes map[common.Hash]map[string]*trienode.Node // node set, mapped by owner and path
39+
size uint64 // aggregated size of the trie node
40+
accountNodes map[string]*trienode.Node // account trie nodes, mapped by path
41+
storageNodes map[common.Hash]map[string]*trienode.Node // storage trie nodes, mapped by owner and path
4142
}
4243

4344
// newNodeSet constructs the set with the provided dirty trie nodes.
@@ -46,21 +47,30 @@ func newNodeSet(nodes map[common.Hash]map[string]*trienode.Node) *nodeSet {
4647
if nodes == nil {
4748
nodes = make(map[common.Hash]map[string]*trienode.Node)
4849
}
49-
s := &nodeSet{nodes: nodes}
50+
s := &nodeSet{
51+
accountNodes: make(map[string]*trienode.Node),
52+
storageNodes: make(map[common.Hash]map[string]*trienode.Node),
53+
}
54+
for owner, subset := range nodes {
55+
if owner == (common.Hash{}) {
56+
s.accountNodes = subset
57+
} else {
58+
s.storageNodes[owner] = subset
59+
}
60+
}
5061
s.computeSize()
5162
return s
5263
}
5364

5465
// computeSize calculates the database size of the held trie nodes.
5566
func (s *nodeSet) computeSize() {
5667
var size uint64
57-
for owner, subset := range s.nodes {
58-
var prefix int
59-
if owner != (common.Hash{}) {
60-
prefix = common.HashLength // owner (32 bytes) for storage trie nodes
61-
}
68+
for path, n := range s.accountNodes {
69+
size += uint64(len(n.Blob) + len(path))
70+
}
71+
for _, subset := range s.storageNodes {
6272
for path, n := range subset {
63-
size += uint64(prefix + len(n.Blob) + len(path))
73+
size += uint64(common.HashLength + len(n.Blob) + len(path))
6474
}
6575
}
6676
s.size = size
@@ -79,15 +89,18 @@ func (s *nodeSet) updateSize(delta int64) {
7989

8090
// node retrieves the trie node with node path and its trie identifier.
8191
func (s *nodeSet) node(owner common.Hash, path []byte) (*trienode.Node, bool) {
82-
subset, ok := s.nodes[owner]
83-
if !ok {
84-
return nil, false
92+
// Account trie node
93+
if owner == (common.Hash{}) {
94+
n, ok := s.accountNodes[string(path)]
95+
return n, ok
8596
}
86-
n, ok := subset[string(path)]
97+
// Storage trie node
98+
subset, ok := s.storageNodes[owner]
8799
if !ok {
88100
return nil, false
89101
}
90-
return n, true
102+
n, ok := subset[string(path)]
103+
return n, ok
91104
}
92105

93106
// merge integrates the provided dirty nodes into the set. The provided nodeset
@@ -97,35 +110,44 @@ func (s *nodeSet) merge(set *nodeSet) {
97110
delta int64 // size difference resulting from node merging
98111
overwrite counter // counter of nodes being overwritten
99112
)
100-
for owner, subset := range set.nodes {
101-
var prefix int
102-
if owner != (common.Hash{}) {
103-
prefix = common.HashLength
113+
114+
// Merge account nodes
115+
for path, n := range set.accountNodes {
116+
if orig, exist := s.accountNodes[path]; !exist {
117+
delta += int64(len(n.Blob) + len(path))
118+
} else {
119+
delta += int64(len(n.Blob) - len(orig.Blob))
120+
overwrite.add(len(orig.Blob) + len(path))
104121
}
105-
current, exist := s.nodes[owner]
122+
s.accountNodes[path] = n
123+
}
124+
125+
// Merge storage nodes
126+
for owner, subset := range set.storageNodes {
127+
current, exist := s.storageNodes[owner]
106128
if !exist {
107129
for path, n := range subset {
108-
delta += int64(prefix + len(n.Blob) + len(path))
130+
delta += int64(common.HashLength + len(n.Blob) + len(path))
109131
}
110132
// Perform a shallow copy of the map for the subset instead of claiming it
111133
// directly from the provided nodeset to avoid potential concurrent map
112134
// read/write issues. The nodes belonging to the original diff layer remain
113135
// accessible even after merging. Therefore, ownership of the nodes map
114136
// should still belong to the original layer, and any modifications to it
115137
// should be prevented.
116-
s.nodes[owner] = maps.Clone(subset)
138+
s.storageNodes[owner] = maps.Clone(subset)
117139
continue
118140
}
119141
for path, n := range subset {
120142
if orig, exist := current[path]; !exist {
121-
delta += int64(prefix + len(n.Blob) + len(path))
143+
delta += int64(common.HashLength + len(n.Blob) + len(path))
122144
} else {
123145
delta += int64(len(n.Blob) - len(orig.Blob))
124-
overwrite.add(prefix + len(orig.Blob) + len(path))
146+
overwrite.add(common.HashLength + len(orig.Blob) + len(path))
125147
}
126148
current[path] = n
127149
}
128-
s.nodes[owner] = current
150+
s.storageNodes[owner] = current
129151
}
130152
overwrite.report(gcTrieNodeMeter, gcTrieNodeBytesMeter)
131153
s.updateSize(delta)
@@ -136,34 +158,38 @@ func (s *nodeSet) merge(set *nodeSet) {
136158
func (s *nodeSet) revertTo(db ethdb.KeyValueReader, nodes map[common.Hash]map[string]*trienode.Node) {
137159
var delta int64
138160
for owner, subset := range nodes {
139-
current, ok := s.nodes[owner]
140-
if !ok {
141-
panic(fmt.Sprintf("non-existent subset (%x)", owner))
142-
}
143-
for path, n := range subset {
144-
orig, ok := current[path]
145-
if !ok {
146-
// There is a special case in merkle tree that one child is removed
147-
// from a fullNode which only has two children, and then a new child
148-
// with different position is immediately inserted into the fullNode.
149-
// In this case, the clean child of the fullNode will also be marked
150-
// as dirty because of node collapse and expansion. In case of database
151-
// rollback, don't panic if this "clean" node occurs which is not
152-
// present in buffer.
153-
var blob []byte
154-
if owner == (common.Hash{}) {
155-
blob = rawdb.ReadAccountTrieNode(db, []byte(path))
156-
} else {
157-
blob = rawdb.ReadStorageTrieNode(db, owner, []byte(path))
161+
if owner == (common.Hash{}) {
162+
// Account trie nodes
163+
for path, n := range subset {
164+
orig, ok := s.accountNodes[path]
165+
if !ok {
166+
blob := rawdb.ReadAccountTrieNode(db, []byte(path))
167+
if bytes.Equal(blob, n.Blob) {
168+
continue
169+
}
170+
panic(fmt.Sprintf("non-existent account node (%v) blob: %v", path, crypto.Keccak256Hash(n.Blob).Hex()))
158171
}
159-
// Ignore the clean node in the case described above.
160-
if bytes.Equal(blob, n.Blob) {
161-
continue
172+
s.accountNodes[path] = n
173+
delta += int64(len(n.Blob)) - int64(len(orig.Blob))
174+
}
175+
} else {
176+
// Storage trie nodes
177+
current, ok := s.storageNodes[owner]
178+
if !ok {
179+
panic(fmt.Sprintf("non-existent subset (%x)", owner))
180+
}
181+
for path, n := range subset {
182+
orig, ok := current[path]
183+
if !ok {
184+
blob := rawdb.ReadStorageTrieNode(db, owner, []byte(path))
185+
if bytes.Equal(blob, n.Blob) {
186+
continue
187+
}
188+
panic(fmt.Sprintf("non-existent storage node (%x %v) blob: %v", owner, path, crypto.Keccak256Hash(n.Blob).Hex()))
162189
}
163-
panic(fmt.Sprintf("non-existent node (%x %v) blob: %v", owner, path, crypto.Keccak256Hash(n.Blob).Hex()))
190+
current[path] = n
191+
delta += int64(len(n.Blob)) - int64(len(orig.Blob))
164192
}
165-
current[path] = n
166-
delta += int64(len(n.Blob)) - int64(len(orig.Blob))
167193
}
168194
}
169195
s.updateSize(delta)
@@ -184,8 +210,21 @@ type journalNodes struct {
184210

185211
// encode serializes the content of trie nodes into the provided writer.
186212
func (s *nodeSet) encode(w io.Writer) error {
187-
nodes := make([]journalNodes, 0, len(s.nodes))
188-
for owner, subset := range s.nodes {
213+
nodes := make([]journalNodes, 0, len(s.storageNodes)+1)
214+
215+
// Encode account nodes
216+
if len(s.accountNodes) > 0 {
217+
entry := journalNodes{Owner: common.Hash{}}
218+
for path, node := range s.accountNodes {
219+
entry.Nodes = append(entry.Nodes, journalNode{
220+
Path: []byte(path),
221+
Blob: node.Blob,
222+
})
223+
}
224+
nodes = append(nodes, entry)
225+
}
226+
// Encode storage nodes
227+
for owner, subset := range s.storageNodes {
189228
entry := journalNodes{Owner: owner}
190229
for path, node := range subset {
191230
entry.Nodes = append(entry.Nodes, journalNode{
@@ -204,43 +243,61 @@ func (s *nodeSet) decode(r *rlp.Stream) error {
204243
if err := r.Decode(&encoded); err != nil {
205244
return fmt.Errorf("load nodes: %v", err)
206245
}
207-
nodes := make(map[common.Hash]map[string]*trienode.Node)
246+
s.accountNodes = make(map[string]*trienode.Node)
247+
s.storageNodes = make(map[common.Hash]map[string]*trienode.Node)
248+
208249
for _, entry := range encoded {
209-
subset := make(map[string]*trienode.Node)
210-
for _, n := range entry.Nodes {
211-
if len(n.Blob) > 0 {
212-
subset[string(n.Path)] = trienode.New(crypto.Keccak256Hash(n.Blob), n.Blob)
213-
} else {
214-
subset[string(n.Path)] = trienode.NewDeleted()
250+
if entry.Owner == (common.Hash{}) {
251+
// Account nodes
252+
for _, n := range entry.Nodes {
253+
if len(n.Blob) > 0 {
254+
s.accountNodes[string(n.Path)] = trienode.New(crypto.Keccak256Hash(n.Blob), n.Blob)
255+
} else {
256+
s.accountNodes[string(n.Path)] = trienode.NewDeleted()
257+
}
258+
}
259+
} else {
260+
// Storage nodes
261+
subset := make(map[string]*trienode.Node)
262+
for _, n := range entry.Nodes {
263+
if len(n.Blob) > 0 {
264+
subset[string(n.Path)] = trienode.New(crypto.Keccak256Hash(n.Blob), n.Blob)
265+
} else {
266+
subset[string(n.Path)] = trienode.NewDeleted()
267+
}
215268
}
269+
s.storageNodes[entry.Owner] = subset
216270
}
217-
nodes[entry.Owner] = subset
218271
}
219-
s.nodes = nodes
220272
s.computeSize()
221273
return nil
222274
}
223275

224276
// write flushes nodes into the provided database batch as a whole.
225277
func (s *nodeSet) write(batch ethdb.Batch, clean *fastcache.Cache) int {
226-
return writeNodes(batch, s.nodes, clean)
278+
nodes := make(map[common.Hash]map[string]*trienode.Node)
279+
if len(s.accountNodes) > 0 {
280+
nodes[common.Hash{}] = s.accountNodes
281+
}
282+
for owner, subset := range s.storageNodes {
283+
nodes[owner] = subset
284+
}
285+
return writeNodes(batch, nodes, clean)
227286
}
228287

229288
// reset clears all cached trie node data.
230289
func (s *nodeSet) reset() {
231-
s.nodes = make(map[common.Hash]map[string]*trienode.Node)
290+
s.accountNodes = make(map[string]*trienode.Node)
291+
s.storageNodes = make(map[common.Hash]map[string]*trienode.Node)
232292
s.size = 0
233293
}
234294

235295
// dbsize returns the approximate size of db write.
236296
func (s *nodeSet) dbsize() int {
237297
var m int
238-
for owner, nodes := range s.nodes {
239-
if owner == (common.Hash{}) {
240-
m += len(nodes) * len(rawdb.TrieNodeAccountPrefix) // database key prefix
241-
} else {
242-
m += len(nodes) * (len(rawdb.TrieNodeStoragePrefix)) // database key prefix
243-
}
298+
m += len(s.accountNodes) * len(rawdb.TrieNodeAccountPrefix) // database key prefix
299+
for _, nodes := range s.storageNodes {
300+
m += len(nodes) * (len(rawdb.TrieNodeStoragePrefix)) // database key prefix
244301
}
245302
return m + int(s.size)
246303
}

0 commit comments

Comments
 (0)