Skip to content

Commit 05aad96

Browse files
authored
Reimplement file exclusion logic (#3507)
1 parent c4e37e4 commit 05aad96

File tree

8 files changed

+80
-142
lines changed

8 files changed

+80
-142
lines changed

.config/dictionary.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@ parseable
247247
pathex
248248
pathlib
249249
pathspec
250+
pathspecs
250251
pbrun
251252
pfexec
252253
pickleable

.config/requirements.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ black>=22.8.0 # MIT
77
filelock>=3.3.0 # The Unlicense
88
jsonschema>=4.10.0 # MIT, version needed for improved errors
99
packaging>=21.3 # Apache-2.0,BSD-2-Clause
10+
pathspec>=0.9.0 # Mozilla Public License 2.0 (MPL 2.0)
1011
pyyaml>=5.4.1 # MIT (centos 9 has 5.3.1)
1112
rich>=12.0.0 # MIT
1213
ruamel.yaml>=0.17.0,<0.18,!=0.17.29,!=0.17.30 # MIT, next version is planned to have breaking changes

.github/workflows/tox.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ jobs:
5959
WSLENV: FORCE_COLOR:PYTEST_REQPASS:TOXENV:GITHUB_STEP_SUMMARY
6060
# Number of expected test passes, safety measure for accidental skip of
6161
# tests. Update value if you add/remove tests.
62-
PYTEST_REQPASS: 806
62+
PYTEST_REQPASS: 803
6363
steps:
6464
- name: Activate WSL1
6565
if: "contains(matrix.shell, 'wsl')"

src/ansiblelint/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,7 @@ def path_inject() -> None:
365365
# We do know that finding ansible in PATH does not guarantee that it is
366366
# functioning or that is in fact the same version that was installed as
367367
# our dependency, but addressing this would be done by ansible-compat.
368-
for cmd in ("ansible", "git"):
368+
for cmd in ("ansible",):
369369
if not shutil.which(cmd):
370370
msg = f"Failed to find runtime dependency '{cmd}' in PATH"
371371
raise RuntimeError(msg)

src/ansiblelint/file_utils.py

Lines changed: 67 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,20 @@
44
import copy
55
import logging
66
import os
7-
import subprocess
87
import sys
9-
from collections import OrderedDict, defaultdict
8+
from collections import defaultdict
109
from contextlib import contextmanager
1110
from pathlib import Path
1211
from tempfile import NamedTemporaryFile
1312
from typing import TYPE_CHECKING, Any, cast
1413

14+
import pathspec
1515
import wcmatch.pathlib
1616
import wcmatch.wcmatch
1717
from yaml.error import YAMLError
1818

1919
from ansiblelint.config import BASE_KINDS, Options, options
20-
from ansiblelint.constants import CONFIG_FILENAMES, GIT_CMD, FileType, States
21-
from ansiblelint.logger import warn_or_fail
20+
from ansiblelint.constants import CONFIG_FILENAMES, FileType, States
2221

2322
if TYPE_CHECKING:
2423
from collections.abc import Iterator, Sequence
@@ -419,93 +418,22 @@ def data(self) -> Any:
419418

420419

421420
# pylint: disable=redefined-outer-name
422-
def discover_lintables(options: Options) -> dict[str, Any]:
421+
def discover_lintables(options: Options) -> list[str]:
423422
"""Find all files that we know how to lint.
424423
425424
Return format is normalized, relative for stuff below cwd, ~/ for content
426425
under current user and absolute for everything else.
427426
"""
428-
# git is preferred as it also considers .gitignore
429-
# As --recurse-submodules is incompatible with --others we need to run
430-
# twice to get combined results.
431-
commands = {
432-
"tracked": {
433-
"cmd": [
434-
*GIT_CMD,
435-
"ls-files",
436-
"--cached",
437-
"--exclude-standard",
438-
"--recurse-submodules",
439-
"-z",
440-
],
441-
"remove": False,
442-
},
443-
"others": {
444-
"cmd": [
445-
*GIT_CMD,
446-
"ls-files",
447-
"--cached",
448-
"--others",
449-
"--exclude-standard",
450-
"-z",
451-
],
452-
"remove": False,
453-
},
454-
"absent": {
455-
"cmd": [
456-
*GIT_CMD,
457-
"ls-files",
458-
"--deleted",
459-
"-z",
460-
],
461-
"remove": True,
462-
},
463-
}
464-
465-
out: set[str] = set()
466-
try:
467-
for k, value in commands.items():
468-
if not isinstance(value["cmd"], list):
469-
msg = f"Expected list but got {type(value['cmd'])}"
470-
raise TypeError(msg)
471-
result = subprocess.check_output(
472-
value["cmd"], # noqa: S603
473-
stderr=subprocess.STDOUT,
474-
text=True,
475-
).split("\x00")[:-1]
476-
_logger.info(
477-
"Discovered files to lint using git (%s): %s",
478-
k,
479-
" ".join(value["cmd"]),
480-
)
481-
out = out.union(result) if not value["remove"] else out - set(result)
482-
483-
except subprocess.CalledProcessError as exc:
484-
if not (exc.returncode == 128 and "fatal: not a git repository" in exc.output):
485-
err = exc.output.rstrip("\n")
486-
warn_or_fail(f"Failed to discover lintable files using git: {err}")
487-
except FileNotFoundError as exc:
488-
if options.verbosity:
489-
warn_or_fail(f"Failed to locate command: {exc}")
490-
491-
# Applying exclude patterns
492-
if not out:
493-
out = set(".")
494-
495-
exclude_pattern = "|".join(str(x) for x in options.exclude_paths)
496-
_logger.info("Looking up for files, excluding %s ...", exclude_pattern)
497-
# remove './' prefix from output of WcMatch
498-
out = {
499-
strip_dotslash_prefix(fname)
500-
for fname in wcmatch.wcmatch.WcMatch(
501-
".",
502-
exclude_pattern=exclude_pattern,
503-
flags=wcmatch.wcmatch.RECURSIVE,
504-
limit=256,
505-
).match()
506-
}
507-
508-
return OrderedDict.fromkeys(sorted(out))
427+
if not options.lintables:
428+
options.lintables = ["."]
429+
430+
return [
431+
str(filename)
432+
for filename in get_all_files(
433+
*[Path(s) for s in options.lintables],
434+
exclude_paths=options.exclude_paths,
435+
)
436+
]
509437

510438

511439
def strip_dotslash_prefix(fname: str) -> str:
@@ -598,3 +526,56 @@ def _guess_parent(lintable: Lintable) -> Lintable | None:
598526
except IndexError:
599527
pass
600528
return None
529+
530+
531+
def get_all_files(
532+
*paths: Path,
533+
exclude_paths: list[str] | None = None,
534+
) -> list[Path]:
535+
"""Recursively retrieve all files from given folders."""
536+
all_files: list[Path] = []
537+
exclude_paths = [] if exclude_paths is None else exclude_paths
538+
539+
def is_excluded(path_to_check: Path) -> bool:
540+
"""Check if a file is exclude by current specs."""
541+
return any(spec.match_file(str(path_to_check)) for spec in pathspecs)
542+
543+
for path in paths:
544+
pathspecs = [
545+
pathspec.GitIgnoreSpec.from_lines(
546+
[
547+
".git",
548+
".tox",
549+
".mypy_cache",
550+
"__pycache__",
551+
".DS_Store",
552+
".coverage",
553+
".pytest_cache",
554+
".ruff_cache",
555+
*exclude_paths,
556+
],
557+
),
558+
]
559+
gitignore = path / ".gitignore"
560+
if gitignore.exists():
561+
with gitignore.open(encoding="UTF-8") as f:
562+
_logger.info("Loading ignores from %s", gitignore)
563+
pathspecs.append(
564+
pathspec.GitIgnoreSpec.from_lines(f.read().splitlines()),
565+
)
566+
567+
# Iterate over all items in the directory
568+
if path.is_file():
569+
all_files.append(path)
570+
else:
571+
for item in sorted(path.iterdir()):
572+
if is_excluded(item):
573+
_logger.info("Excluded: %s", item)
574+
continue
575+
if item.is_file():
576+
all_files.append(item)
577+
# If it's a directory, recursively call the function
578+
elif item.is_dir():
579+
all_files.extend(get_all_files(item, exclude_paths=exclude_paths))
580+
581+
return all_files

test/test_file_utils.py

Lines changed: 7 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Tests for file utility functions."""
22
from __future__ import annotations
33

4+
import copy
45
import logging
56
import os
67
import time
@@ -10,7 +11,6 @@
1011
import pytest
1112

1213
from ansiblelint import cli, file_utils
13-
from ansiblelint.__main__ import initialize_logger
1414
from ansiblelint.file_utils import (
1515
Lintable,
1616
cwd,
@@ -27,7 +27,6 @@
2727
from _pytest.logging import LogCaptureFixture
2828
from _pytest.monkeypatch import MonkeyPatch
2929

30-
from ansiblelint.config import Options
3130
from ansiblelint.constants import FileType
3231
from ansiblelint.rules import RulesCollection
3332

@@ -73,39 +72,7 @@ def test_expand_paths_vars(
7372
assert expand_paths_vars([test_path]) == [expected] # type: ignore[list-item]
7473

7574

76-
@pytest.mark.parametrize(
77-
("reset_env_var", "message_prefix"),
78-
(
79-
# simulate absence of git command
80-
("PATH", "Failed to locate command: "),
81-
# simulate a missing git repo
82-
("GIT_DIR", "Looking up for files"),
83-
),
84-
ids=("no-git-cli", "outside-git-repo"),
85-
)
86-
def test_discover_lintables_git_verbose(
87-
reset_env_var: str,
88-
message_prefix: str,
89-
monkeypatch: MonkeyPatch,
90-
caplog: LogCaptureFixture,
91-
) -> None:
92-
"""Ensure that autodiscovery lookup failures are logged."""
93-
options = cli.get_config(["-v"])
94-
initialize_logger(options.verbosity)
95-
monkeypatch.setenv(reset_env_var, "")
96-
file_utils.discover_lintables(options)
97-
98-
assert any(m[2].startswith("Looking up for files") for m in caplog.record_tuples)
99-
assert any(m.startswith(message_prefix) for m in caplog.messages)
100-
101-
102-
@pytest.mark.parametrize(
103-
"is_in_git",
104-
(True, False),
105-
ids=("in Git", "outside Git"),
106-
)
10775
def test_discover_lintables_silent(
108-
is_in_git: bool,
10976
monkeypatch: MonkeyPatch,
11077
capsys: CaptureFixture[str],
11178
caplog: LogCaptureFixture,
@@ -119,16 +86,16 @@ def test_discover_lintables_silent(
11986
caplog.set_level(logging.FATAL)
12087
options = cli.get_config([])
12188
test_dir = Path(__file__).resolve().parent
122-
lint_path = test_dir / ".." / "examples" / "roles" / "test-role"
123-
if not is_in_git:
124-
monkeypatch.setenv("GIT_DIR", "")
89+
lint_path = (test_dir / ".." / "examples" / "roles" / "test-role").resolve()
12590

12691
yaml_count = len(list(lint_path.glob("**/*.yml"))) + len(
12792
list(lint_path.glob("**/*.yaml")),
12893
)
12994

13095
monkeypatch.chdir(str(lint_path))
131-
files = file_utils.discover_lintables(options)
96+
my_options = copy.deepcopy(options)
97+
my_options.lintables = [str(lint_path)]
98+
files = file_utils.discover_lintables(my_options)
13299
stderr = capsys.readouterr().err
133100
assert (
134101
not stderr
@@ -144,7 +111,7 @@ def test_discover_lintables_umlaut(monkeypatch: MonkeyPatch) -> None:
144111
"""Verify that filenames containing German umlauts are not garbled by the discover_lintables."""
145112
options = cli.get_config([])
146113
test_dir = Path(__file__).resolve().parent
147-
lint_path = test_dir / ".." / "examples" / "playbooks"
114+
lint_path = (test_dir / ".." / "examples" / "playbooks").resolve()
148115

149116
monkeypatch.chdir(str(lint_path))
150117
files = file_utils.discover_lintables(options)
@@ -293,23 +260,13 @@ def test_discover_lintables_umlaut(monkeypatch: MonkeyPatch) -> None:
293260
), # content should determine it as a play
294261
),
295262
)
296-
def test_kinds(monkeypatch: MonkeyPatch, path: str, kind: FileType) -> None:
263+
def test_kinds(path: str, kind: FileType) -> None:
297264
"""Verify auto-detection logic based on DEFAULT_KINDS."""
298-
options = cli.get_config([])
299-
300-
# pylint: disable=unused-argument
301-
def mockreturn(options: Options) -> dict[str, Any]: # noqa: ARG001
302-
return {normpath(path): kind}
303-
304265
# assert Lintable is able to determine file type
305266
lintable_detected = Lintable(path)
306267
lintable_expected = Lintable(path, kind=kind)
307268
assert lintable_detected == lintable_expected
308269

309-
monkeypatch.setattr(file_utils, "discover_lintables", mockreturn)
310-
result = file_utils.discover_lintables(options)
311-
assert lintable_detected.kind == result[lintable_expected.name]
312-
313270

314271
def test_find_project_root_1(tmp_path: Path) -> None:
315272
"""Verify find_project_root()."""

test/test_runner.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,8 @@ def test_runner_exclude_globs(
9999
)
100100

101101
matches = runner.run()
102-
# we expect to find one 2 matches from the very few .yaml file we have there (most of them have .yml extension)
103-
assert len(matches) == 2
102+
# we expect to find one match from the very few .yaml file we have there (most of them have .yml extension)
103+
assert len(matches) == 1
104104

105105

106106
@pytest.mark.parametrize(

test/test_utils.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -300,8 +300,6 @@ def test_cli_auto_detect(capfd: CaptureFixture[str]) -> None:
300300

301301
out, err = capfd.readouterr()
302302

303-
# Confirmation that it runs in auto-detect mode
304-
assert "Discovered files to lint using git" in err
305303
# An expected rule match from our examples
306304
assert (
307305
"examples/playbooks/empty_playbook.yml:1:1: "

0 commit comments

Comments
 (0)