Skip to content

Commit 5b36986

Browse files
Add graph extraction helpers to make it easier to use NVL (#517)
* add graph extraction helpers * cleaupapis * add better docs * Update packages/query-tools/src/cypher-execution/extract-unique-nodes-and-relationships.test.ts Co-authored-by: Isak Nilsson <[email protected]> * review comments --------- Co-authored-by: Isak Nilsson <[email protected]>
1 parent 039d1c5 commit 5b36986

File tree

5 files changed

+423
-1
lines changed

5 files changed

+423
-1
lines changed

packages/query-tools/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
"build-esm": "tsc --module esnext --outDir dist/esm",
3535
"build-commonjs": "tsc --module commonjs --outDir dist/cjs",
3636
"dev": "concurrently 'npm:build-esm -- --watch' 'npm:build-commonjs -- --watch'",
37-
"clean": "rm -rf {dist,tsconfig.tsbuildinfo}"
37+
"clean": "rm -rf {dist,tsconfig.tsbuildinfo}",
38+
"test": "vitest"
3839
}
3940
}
Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
import type { Record } from 'neo4j-driver';
2+
import { Node, Path, PathSegment, Relationship } from 'neo4j-driver';
3+
4+
import { describe, expect, test } from 'vitest';
5+
import { extractUniqueNodesAndRels } from './extract-unique-nodes-and-relationships';
6+
7+
describe('extractNodesAndRels', () => {
8+
test('should map bolt records with a path to nodes and relationships', () => {
9+
const startNode = new Node(
10+
1,
11+
['Person'],
12+
{
13+
prop1: 'prop1',
14+
},
15+
'node1',
16+
);
17+
const endNode = new Node(
18+
2,
19+
['Movie'],
20+
{
21+
prop2: 'prop2',
22+
},
23+
'node2',
24+
);
25+
26+
const relationship = new Relationship(
27+
3,
28+
1,
29+
2,
30+
'ACTED_IN',
31+
{},
32+
'rel1',
33+
'node1',
34+
'node2',
35+
);
36+
const pathSegment = new PathSegment(startNode, relationship, endNode);
37+
const path = new Path(startNode, endNode, [pathSegment]);
38+
39+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
40+
const boltRecord = {
41+
keys: ['p'],
42+
get: () => path,
43+
} as unknown as Record;
44+
45+
const { nodes, relationships } = extractUniqueNodesAndRels([boltRecord]);
46+
47+
const [graphNodeStart] = nodes.filter(
48+
(node) => node.elementId.toString() === 'node1',
49+
);
50+
const [graphNodeEnd] = nodes.filter(
51+
(node) => node.elementId.toString() === 'node2',
52+
);
53+
const [firstRel] = relationships;
54+
55+
if (
56+
graphNodeStart === undefined ||
57+
graphNodeEnd === undefined ||
58+
firstRel === undefined
59+
) {
60+
throw new Error('Error in test data, got undefined');
61+
}
62+
63+
expect(nodes.length).toBe(2);
64+
65+
expect(graphNodeStart.labels).toEqual(['Person']);
66+
expect(graphNodeStart.properties).toEqual({ prop1: 'prop1' });
67+
68+
expect(graphNodeEnd.labels).toEqual(['Movie']);
69+
expect(graphNodeEnd.properties).toEqual({ prop2: 'prop2' });
70+
expect(relationships.length).toBe(1);
71+
72+
expect(firstRel.elementId.toString()).toEqual('rel1');
73+
expect(firstRel.startNodeElementId.toString()).toEqual('node1');
74+
expect(firstRel.endNodeElementId.toString()).toEqual('node2');
75+
expect(firstRel.type).toEqual('ACTED_IN');
76+
expect(firstRel.properties).toEqual({});
77+
});
78+
79+
test('should deduplicate bolt records based on node id and filter out dangling relationships', () => {
80+
const node1 = new Node(
81+
1,
82+
['Person'],
83+
{
84+
prop1: 'prop1',
85+
},
86+
'node1',
87+
);
88+
const node2 = new Node(
89+
1,
90+
['Person'],
91+
{
92+
prop1: 'prop1',
93+
},
94+
'node1',
95+
);
96+
const relationship = new Relationship(
97+
2,
98+
1,
99+
34,
100+
'ACTED_IN',
101+
{},
102+
'rel1',
103+
'node1',
104+
'node34',
105+
);
106+
107+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
108+
const boltRecord = {
109+
keys: ['n'],
110+
get: () => [node1, node2, relationship],
111+
} as unknown as Record;
112+
113+
const { nodes, relationships, limitHit } = extractUniqueNodesAndRels([
114+
boltRecord,
115+
]);
116+
expect(limitHit).toBe(false);
117+
expect(nodes.length).toBe(1);
118+
expect(relationships.length).toBe(0);
119+
});
120+
121+
test('should respect the max nodes limit and filter out dangling relations', () => {
122+
const startNode = new Node(
123+
1,
124+
['Person'],
125+
{
126+
prop1: 'prop1',
127+
},
128+
'node1',
129+
);
130+
const endNode = new Node(
131+
2,
132+
['Movie'],
133+
{
134+
prop2: 'prop2',
135+
},
136+
'node2',
137+
);
138+
const relationship = new Relationship(
139+
3,
140+
1,
141+
2,
142+
'ACTED_IN',
143+
{},
144+
'rel1',
145+
'node1',
146+
'node2',
147+
);
148+
const pathSegment = new PathSegment(startNode, relationship, endNode);
149+
const path = new Path(startNode, endNode, [pathSegment]);
150+
151+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
152+
const boltRecord = {
153+
keys: ['p'],
154+
get: () => path,
155+
} as unknown as Record;
156+
157+
const { nodes, relationships, limitHit } = extractUniqueNodesAndRels(
158+
[boltRecord],
159+
{ nodeLimit: 1 },
160+
);
161+
expect(limitHit).toBe(true);
162+
expect(nodes.length).toBe(1);
163+
const [graphNodeStart] = nodes;
164+
expect(graphNodeStart).toBeDefined();
165+
if (graphNodeStart === undefined) {
166+
throw new Error('Error in test data, got undefined');
167+
}
168+
expect(graphNodeStart.labels).toEqual(['Person']);
169+
expect(graphNodeStart.properties).toEqual({ prop1: 'prop1' });
170+
expect(relationships.length).toBe(0);
171+
});
172+
173+
test('should respect the max nodes limit and not filter out dangling relations when asked to keep them', () => {
174+
const startNode = new Node(
175+
1,
176+
['Person'],
177+
{
178+
prop1: 'prop1',
179+
},
180+
'node1',
181+
);
182+
const endNode = new Node(
183+
2,
184+
['Movie'],
185+
{
186+
prop2: 'prop2',
187+
},
188+
'node2',
189+
);
190+
const relationship = new Relationship(
191+
3,
192+
1,
193+
2,
194+
'ACTED_IN',
195+
{},
196+
'rel1',
197+
'node1',
198+
'node2',
199+
);
200+
const pathSegment = new PathSegment(startNode, relationship, endNode);
201+
const path = new Path(startNode, endNode, [pathSegment]);
202+
203+
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
204+
const boltRecord = {
205+
keys: ['p'],
206+
get: () => path,
207+
} as unknown as Record;
208+
209+
const { nodes, relationships, limitHit } = extractUniqueNodesAndRels(
210+
[boltRecord],
211+
{
212+
nodeLimit: 1,
213+
keepDanglingRels: true,
214+
},
215+
);
216+
expect(limitHit).toBe(true);
217+
expect(nodes.length).toBe(1);
218+
const [graphNodeStart] = nodes;
219+
expect(graphNodeStart).toBeDefined();
220+
if (graphNodeStart === undefined) {
221+
throw new Error('Error in test data, got undefined');
222+
}
223+
224+
expect(graphNodeStart.labels).toEqual(['Person']);
225+
expect(graphNodeStart.properties).toEqual({ prop1: 'prop1' });
226+
expect(relationships.length).toBe(1);
227+
});
228+
229+
test('should handle empty results', () => {
230+
const { nodes, relationships, limitHit } = extractUniqueNodesAndRels([]);
231+
expect(limitHit).toBe(false);
232+
expect(nodes.length).toBe(0);
233+
expect(relationships.length).toBe(0);
234+
});
235+
});
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
import type {
2+
Integer,
3+
Node,
4+
Path,
5+
Record,
6+
RecordShape,
7+
Relationship,
8+
} from 'neo4j-driver';
9+
import { isNode, isPath, isRelationship } from 'neo4j-driver';
10+
import { CypherProperty } from '../data-types/cypher-data-types';
11+
12+
export type Properties = RecordShape<string, CypherProperty>;
13+
14+
/**
15+
* Result type containing deduplicated nodes and relationships extracted from Neo4j records.
16+
*/
17+
export type DeduplicatedNodesAndRels = {
18+
/** Array of unique nodes found in the records */
19+
nodes: Node<Integer, Properties, string>[];
20+
/** Array of unique relationships found in the records */
21+
relationships: Relationship<Integer, Properties, string>[];
22+
/** Whether the max node limit was reached during extraction */
23+
limitHit: boolean;
24+
};
25+
26+
/**
27+
* Extracts and deduplicates nodes and relationships from Neo4j query records.
28+
*
29+
* This function processes Neo4j records to find all nodes and relationships,
30+
* removing duplicates based on their element IDs. It can handle various data
31+
* structures including individual nodes/relationships, paths, arrays, and
32+
* nested objects.
33+
*
34+
* @param records - Array of Neo4j records to process
35+
* @param options - Configuration options for extraction
36+
* @param options.nodeLimit - Maximum number of unique nodes to extract (optional)
37+
* @param options.keepDanglingRels - Whether to keep relationships whose start/end nodes are missing (default: false)
38+
*
39+
* @returns The {@link DeduplicatedNodesAndRels} containing unique nodes and relationships
40+
*/
41+
export const extractUniqueNodesAndRels = (
42+
records: Record[],
43+
{
44+
nodeLimit,
45+
keepDanglingRels = false,
46+
}: { nodeLimit?: number; keepDanglingRels?: boolean } = {},
47+
): DeduplicatedNodesAndRels => {
48+
let limitHit = false;
49+
50+
const items = new Set<unknown>();
51+
52+
for (const record of records) {
53+
for (const key of record.keys) {
54+
items.add(record.get(key));
55+
}
56+
}
57+
58+
const paths: Path[] = [];
59+
60+
const nodeMap = new Map<string, Node>();
61+
function addNode(n: Node) {
62+
if (!limitHit) {
63+
const id = n.elementId.toString();
64+
if (!nodeMap.has(id)) {
65+
nodeMap.set(id, n);
66+
}
67+
if (typeof nodeLimit === 'number' && nodeMap.size === nodeLimit) {
68+
limitHit = true;
69+
}
70+
}
71+
}
72+
73+
const relMap = new Map<string, Relationship>();
74+
function addRel(r: Relationship) {
75+
const id = r.elementId.toString();
76+
if (!relMap.has(id)) {
77+
relMap.set(id, r);
78+
}
79+
}
80+
81+
const findAllEntities = (item: unknown) => {
82+
if (typeof item !== 'object' || !item) {
83+
return;
84+
}
85+
86+
if (isRelationship(item)) {
87+
addRel(item);
88+
} else if (isNode(item)) {
89+
addNode(item);
90+
} else if (isPath(item)) {
91+
paths.push(item);
92+
} else if (Array.isArray(item)) {
93+
item.forEach(findAllEntities);
94+
} else {
95+
Object.values(item).forEach(findAllEntities);
96+
}
97+
};
98+
99+
findAllEntities(Array.from(items));
100+
101+
for (const path of paths) {
102+
addNode(path.start);
103+
addNode(path.end);
104+
for (const segment of path.segments) {
105+
addNode(segment.start);
106+
addNode(segment.end);
107+
addRel(segment.relationship);
108+
}
109+
}
110+
111+
const nodes = Array.from(nodeMap.values());
112+
113+
const relationships = Array.from(relMap.values()).filter((item) => {
114+
if (keepDanglingRels) {
115+
return true;
116+
}
117+
118+
// We'd get dangling relationships from
119+
// match ()-[a:ACTED_IN]->() return a;
120+
// or from hitting the node limit
121+
const start = item.startNodeElementId.toString();
122+
const end = item.endNodeElementId.toString();
123+
return nodeMap.has(start) && nodeMap.has(end);
124+
});
125+
126+
return { nodes, relationships, limitHit };
127+
};

packages/query-tools/src/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
export { FRIENDLY_ERROR_MESSAGES } from './connectionErrorHandler';
22
export type { ConnectionError } from './connectionErrorHandler';
3+
export * from './cypher-execution/extract-unique-nodes-and-relationships';
34
export {
45
deserializeTypeAnnotations,
56
serializeTypeAnnotations,
@@ -19,6 +20,7 @@ export type {
1920
} from './metadataPoller';
2021
export type { Neo4jConnection, QueryResultWithLimit } from './neo4jConnection';
2122
export type { Database } from './queries/databases';
23+
export { graphResultTransformer } from './result-transformers/graph-result-transformer';
2224
export { Neo4jSchemaPoller } from './schemaPoller';
2325
export type { ConnnectionResult } from './schemaPoller';
2426
export type { CypherDataType } from './types/cypher-data-types';

0 commit comments

Comments
 (0)