Skip to content

test: create e2e environment; stop testing spacy in unit tests #9212

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Apr 11, 2025
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
run: pip install hatch==${{ env.HATCH_VERSION }}

- name: Run tests
run: hatch run test:e2e
run: hatch run e2e:test

- name: Send event to Datadog
if: failure() && github.event_name == 'schedule'
Expand Down
4 changes: 2 additions & 2 deletions haystack/components/extractors/named_entity_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
from transformers import Pipeline as HfPipeline

with LazyImport(message="Run 'pip install spacy'") as spacy_import:
import spacy
from spacy import Language as SpacyPipeline
import spacy # pylint: disable=import-error
from spacy import Language as SpacyPipeline # pylint: disable=import-error


class NamedEntityExtractorBackend(Enum):
Expand Down
28 changes: 19 additions & 9 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,11 @@ format = "ruff format {args}"
format-check = "ruff format --check {args}"

[tool.hatch.envs.test]
extra-dependencies = [
# this is a self-referential environment: we don't need to inherit from default
# https://hatch.pypa.io/1.13/config/environment/overview/#self-referential-environments
template = "test"

dependencies = [
"numpy>=2", # Haystack is compatible both with numpy 1.x and 2.x, but we test with 2.x
"numba>=0.54.0", # This pin helps uv resolve the dependency tree. See https://github.com/astral-sh/uv/issues/7881

Expand All @@ -95,13 +99,6 @@ extra-dependencies = [
"openai-whisper>=20231106", # LocalWhisperTranscriber
"arrow>=1.3.0", # Jinja2TimeExtension

# NamedEntityExtractor
"spacy>=3.8,<3.9",
"spacy-curated-transformers>=0.2,<=0.3",
Copy link
Member Author

@anakin87 anakin87 Apr 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"spacy-curated-transformers>=0.2,<=0.3",: this is no longer needed.

"en-core-web-trf @ https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.8.0/en_core_web_trf-3.8.0-py3-none-any.whl",
# spacy requires thinc, which depends on blis. We pin blis because version 1.2.1 does not have wheels for python 3.9
# and compiling it from source takes much time.
"blis<1.2.1; python_version < '3.10'",

# Converters
"pypdf", # PyPDFToDocument
Expand Down Expand Up @@ -161,14 +158,27 @@ extra-dependencies = [
]

[tool.hatch.envs.test.scripts]
e2e = "pytest e2e"
unit = 'pytest --cov-report xml:coverage.xml --cov="haystack" -m "not integration" {args:test}'
integration = 'pytest --maxfail=5 -m "integration" {args:test}'
integration-mac = 'pytest --maxfail=5 -m "integration" -k "not tika" {args:test}'
integration-windows = 'pytest --maxfail=5 -m "integration" -k "not tika" {args:test}'
types = "mypy --install-types --non-interactive --cache-dir=.mypy_cache/ {args:haystack}"
lint = "pylint -ry -j 0 {args:haystack}"

[tool.hatch.envs.e2e]
template = "test"
extra-dependencies = [
# NamedEntityExtractor
"spacy>=3.8,<3.9",
"en-core-web-trf @ https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.8.0/en_core_web_trf-3.8.0-py3-none-any.whl",
# spacy requires thinc, which depends on blis. We pin blis because version 1.2.1 does not have wheels for python 3.9
# and compiling it from source takes much time.
"blis<1.2.1; python_version < '3.10'",
]

[tool.hatch.envs.e2e.scripts]
test = "pytest e2e"

[tool.hatch.envs.readme]
installer = "uv"
detached = true # To avoid installing the dependencies from the default environment
Expand Down
9 changes: 5 additions & 4 deletions test/components/extractors/test_named_entity_extractor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# SPDX-FileCopyrightText: 2022-present deepset GmbH <[email protected]>
#
# SPDX-License-Identifier: Apache-2.0

# Note: We do not test the Spacy backend in this module.
# Spacy is not installed in the test environment to keep the CI fast.
# We test the Spacy backend in e2e/pipelines/test_named_entity_extractor.py.

from haystack.utils.auth import Secret
import pytest

Expand All @@ -17,10 +22,6 @@ def test_named_entity_extractor_backend():

_ = NamedEntityExtractor(backend="hugging_face", model="dslim/bert-base-NER")

_ = NamedEntityExtractor(backend=NamedEntityExtractorBackend.SPACY, model="en_core_web_sm")

_ = NamedEntityExtractor(backend="spacy", model="en_core_web_sm")

with pytest.raises(ComponentError, match=r"Invalid backend"):
NamedEntityExtractor(backend="random_backend", model="dslim/bert-base-NER")

Expand Down
Loading