Skip to content

Commit 2175362

Browse files
authored
Merge pull request #433 from dyashuni/filter_warning
Add code comments that python filter works slow in multi-threaded mode
2 parents d86f8f9 + 32f4b02 commit 2175362

File tree

6 files changed

+11
-5
lines changed

6 files changed

+11
-5
lines changed

.github/workflows/build.yml

+3-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ jobs:
2020

2121
- name: Test
2222
timeout-minutes: 15
23-
run: python -m unittest discover -v --start-directory tests/python --pattern "bindings_test*.py"
23+
run: |
24+
python -m unittest discover -v --start-directory examples --pattern "example*.py"
25+
python -m unittest discover -v --start-directory tests/python --pattern "bindings_test*.py"
2426
2527
test_cpp:
2628
runs-on: ${{matrix.os}}

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ For other spaces use the nmslib library https://github.com/nmslib/nmslib.
8080
* `knn_query(data, k = 1, num_threads = -1, filter = None)` make a batch query for `k` closest elements for each element of the
8181
* `data` (shape:`N*dim`). Returns a numpy array of (shape:`N*k`).
8282
* `num_threads` sets the number of cpu threads to use (-1 means use default).
83-
* `filter` filters elements by its labels, returns elements with allowed ids
83+
* `filter` filters elements by its labels, returns elements with allowed ids. Note that search with a filter works slow in python in multithreaded mode. It is recommended to set `num_threads=1`
8484
* Thread-safe with other `knn_query` calls, but not with `add_items`.
8585

8686
* `load_index(path_to_index, max_elements = 0, allow_replace_deleted = False)` loads the index from persistence to the uninitialized index.

examples/EXAMPLES.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,8 @@ print("Querying only even elements")
147147
# Define filter function that allows only even ids
148148
filter_function = lambda idx: idx%2 == 0
149149
# Query the elements for themselves and search only for even elements:
150-
labels, distances = hnsw_index.knn_query(data, k=1, filter=filter_function)
150+
# Warning: search with python filter works slow in multithreaded mode, therefore we set num_threads=1
151+
labels, distances = hnsw_index.knn_query(data, k=1, num_threads=1, filter=filter_function)
151152
# labels contain only elements with even id
152153
```
153154

examples/example_filter.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -41,5 +41,6 @@
4141
# Define filter function that allows only even ids
4242
filter_function = lambda idx: idx%2 == 0
4343
# Query the elements for themselves and search only for even elements:
44-
labels, distances = hnsw_index.knn_query(data, k=1, filter=filter_function)
44+
# Warning: search with a filter works slow in python in multithreaded mode, therefore we set num_threads=1
45+
labels, distances = hnsw_index.knn_query(data, k=1, num_threads=1, filter=filter_function)
4546
# labels contain only elements with even id

python_bindings/bindings.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,7 @@ class Index {
623623
data_numpy_l = new hnswlib::labeltype[rows * k];
624624
data_numpy_d = new dist_t[rows * k];
625625

626+
// Warning: search with a filter works slow in python in multithreaded mode. For best performance set num_threads=1
626627
CustomFilterFunctor idFilter(filter);
627628
CustomFilterFunctor* p_idFilter = filter ? &idFilter : nullptr;
628629

tests/python/bindings_test_filter.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ def testRandomSelf(self):
4747
print("Querying only even elements")
4848
# Query the even elements for themselves and measure recall:
4949
filter_function = lambda id: id%2 == 0
50-
labels, distances = hnsw_index.knn_query(data, k=1, filter=filter_function)
50+
# Warning: search with a filter works slow in python in multithreaded mode, therefore we set num_threads=1
51+
labels, distances = hnsw_index.knn_query(data, k=1, num_threads=1, filter=filter_function)
5152
self.assertAlmostEqual(np.mean(labels.reshape(-1) == np.arange(len(data))), .5, 3)
5253
# Verify that there are only even elements:
5354
self.assertTrue(np.max(np.mod(labels, 2)) == 0)

0 commit comments

Comments
 (0)