4
4
# SPDX-License-Identifier: Apache-2.0
5
5
6
6
7
+ import os
8
+ import re
7
9
from enum import Enum
8
10
from pathlib import Path
9
11
@@ -77,7 +79,7 @@ def _prepare_files_labels(
77
79
return filenames , labels
78
80
79
81
80
- def _resolve_path (folder : str | Path , root : str | Path | None = None ) -> Path :
82
+ def resolve_path (folder : str | Path , root : str | Path | None = None ) -> Path :
81
83
"""Combine root and folder and returns the absolute path.
82
84
83
85
This allows users to pass either a root directory and relative paths, or absolute paths to each of the
@@ -98,3 +100,147 @@ def _resolve_path(folder: str | Path, root: str | Path | None = None) -> Path:
98
100
# root provided; prepend root and return absolute path
99
101
path = (Path (root ) / folder ).resolve ()
100
102
return path
103
+
104
+
105
+ def is_path_too_long (path : str | Path , max_length : int = 512 ) -> bool :
106
+ r"""Check if the path contains too long input.
107
+
108
+ Args:
109
+ path (str | Path): Path to check.
110
+ max_length (int): Maximum length a path can be before it is considered too long.
111
+ Defaults to ``512``.
112
+
113
+ Returns:
114
+ bool: True if the path contains too long input, False otherwise.
115
+
116
+ Examples:
117
+ >>> contains_too_long_input("./datasets/MVTec/bottle/train/good/000.png")
118
+ False
119
+
120
+ >>> contains_too_long_input("./datasets/MVTec/bottle/train/good/000.png" + "a" * 4096)
121
+ True
122
+ """
123
+ return len (str (path )) > max_length
124
+
125
+
126
+ def contains_non_printable_characters (path : str | Path ) -> bool :
127
+ r"""Check if the path contains non-printable characters.
128
+
129
+ Args:
130
+ path (str | Path): Path to check.
131
+
132
+ Returns:
133
+ bool: True if the path contains non-printable characters, False otherwise.
134
+
135
+ Examples:
136
+ >>> contains_non_printable_characters("./datasets/MVTec/bottle/train/good/000.png")
137
+ False
138
+
139
+ >>> contains_non_printable_characters("./datasets/MVTec/bottle/train/good/000.png\0")
140
+ True
141
+ """
142
+ printable_pattern = re .compile (r"^[\x20-\x7E]+$" )
143
+ return not printable_pattern .match (str (path ))
144
+
145
+
146
+ def validate_path (path : str | Path , base_dir : str | Path | None = None ) -> Path :
147
+ """Validate the path.
148
+
149
+ Args:
150
+ path (str | Path): Path to validate.
151
+ base_dir (str | Path): Base directory to restrict file access.
152
+
153
+ Returns:
154
+ Path: Validated path.
155
+
156
+ Examples:
157
+ >>> validate_path("./datasets/MVTec/bottle/train/good/000.png")
158
+ PosixPath('/abs/path/to/anomalib/datasets/MVTec/bottle/train/good/000.png')
159
+
160
+ >>> validate_path("./datasets/MVTec/bottle/train/good/000.png", base_dir="./datasets/MVTec")
161
+ PosixPath('/abs/path/to/anomalib/datasets/MVTec/bottle/train/good/000.png')
162
+
163
+ Path to an outside file/directory should raise ValueError:
164
+
165
+ >>> validate_path("/usr/local/lib")
166
+ Traceback (most recent call last):
167
+ File "<string>", line 1, in <module>
168
+ File "<string>", line 18, in validate_path
169
+ ValueError: Access denied: Path is outside the allowed directory
170
+
171
+ Path to a non-existing file should raise FileNotFoundError:
172
+
173
+ >>> validate_path("/path/to/unexisting/file")
174
+ Traceback (most recent call last):
175
+ File "<string>", line 1, in <module>
176
+ File "<string>", line 18, in validate_path
177
+ FileNotFoundError: Path does not exist: /path/to/unexisting/file
178
+
179
+ Accessing a file without read permission should raise PermissionError:
180
+
181
+ .. note::
182
+
183
+ Note that, we are using ``/usr/local/bin`` directory as an example here.
184
+ If this directory does not exist on your system, this will raise
185
+ ``FileNotFoundError`` instead of ``PermissionError``. You could change
186
+ the directory to any directory that you do not have read permission.
187
+
188
+ >>> validate_path("/bin/bash", base_dir="/bin/")
189
+ Traceback (most recent call last):
190
+ File "<string>", line 1, in <module>
191
+ File "<string>", line 18, in validate_path
192
+ PermissionError: Read permission denied for the file: /usr/local/bin
193
+
194
+ """
195
+ # Check if the path is of an appropriate type
196
+ if not isinstance (path , str | Path ):
197
+ raise TypeError ("Expected str, bytes or os.PathLike object, not " + type (path ).__name__ )
198
+
199
+ # Check if the path is too long
200
+ if is_path_too_long (path ):
201
+ msg = f"Path is too long: { path } "
202
+ raise ValueError (msg )
203
+
204
+ # Check if the path contains non-printable characters
205
+ if contains_non_printable_characters (path ):
206
+ msg = f"Path contains non-printable characters: { path } "
207
+ raise ValueError (msg )
208
+
209
+ # Sanitize paths
210
+ path = Path (path ).resolve ()
211
+ base_dir = Path (base_dir ).resolve () if base_dir else Path .home ()
212
+
213
+ # Check if the resolved path is within the base directory
214
+ if not str (path ).startswith (str (base_dir )):
215
+ msg = "Access denied: Path is outside the allowed directory"
216
+ raise ValueError (msg )
217
+
218
+ # Check if the path exists
219
+ if not path .exists ():
220
+ msg = f"Path does not exist: { path } "
221
+ raise FileNotFoundError (msg )
222
+
223
+ # Check the read and execute permissions
224
+ if not (os .access (path , os .R_OK ) or os .access (path , os .X_OK )):
225
+ msg = f"Read or execute permissions denied for the path: { path } "
226
+ raise PermissionError (msg )
227
+
228
+ return path
229
+
230
+
231
+ def validate_and_resolve_path (
232
+ folder : str | Path ,
233
+ root : str | Path | None = None ,
234
+ base_dir : str | Path | None = None ,
235
+ ) -> Path :
236
+ """Validate and resolve the path.
237
+
238
+ Args:
239
+ folder (str | Path): Folder location containing image or mask data.
240
+ root (str | Path | None): Root directory for the dataset.
241
+ base_dir (str | Path | None): Base directory to restrict file access.
242
+
243
+ Returns:
244
+ Path: Validated and resolved path.
245
+ """
246
+ return validate_path (resolve_path (folder , root ), base_dir )
0 commit comments