Skip to content

Commit 36e03a3

Browse files
committed
revert unlabeled auto train detection back. Fixed integration tests
1 parent 2a3e2c2 commit 36e03a3

File tree

15 files changed

+70
-104
lines changed

15 files changed

+70
-104
lines changed

otx/cli/manager/config_manager.py

+35-36
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33
# Copyright (C) 2023 Intel Corporation
44
# SPDX-License-Identifier: Apache-2.0
55
#
6+
import logging
67
import os
78
import shutil
89
from datetime import datetime
910
from pathlib import Path
1011
from typing import Any, Dict, List, Optional
11-
import logging
1212

1313
from datumaro.components.dataset import Dataset
1414
from datumaro.components.dataset_base import IDataset
@@ -251,73 +251,72 @@ def _configure_train_type(self):
251251
If unlabeled_images presented in dataset structure and it is sufficient to start Semi-SL -> Semi-SL
252252
Overwise set Incremental training type.
253253
"""
254-
255-
def _check_is_only_images(dir):
256-
"""Check if a directory contains only images."""
254+
def _count_imgs_in_dir(dir, recursive=False):
255+
"""count number of images in directory recursively"""
257256
import glob
258257
valid_suff = ["jpg", "png", "jpeg", "gif"]
259258
num_valid_imgs = 0
260-
for files in glob.iglob(f'{dir}/*'):
259+
for files in glob.iglob(f"{dir}/**", recursive=recursive):
261260
suff = files.split(".")[-1]
262261
if suff.lower() in valid_suff:
263262
num_valid_imgs += 1
264-
return num_valid_imgs > 0
265263

266-
def _check_semisl_requirements(train_dir, unlabeled_dir, thershold=0.07):
264+
return num_valid_imgs
265+
266+
def _check_semisl_requirements(unlabeled_dir):
267267
"""Check if quantity of unlabeled images is sufficient for Semi-SL learning."""
268268
if unlabeled_dir is None:
269-
unlabeled_folder_name = [
270-
os.path.join(train_dir, item)
271-
for item in os.listdir(train_dir)
272-
if (item.startswith("unlabeled") and os.path.isdir(os.path.join(train_dir, item)))
273-
]
274-
if unlabeled_folder_name:
275-
unlabeled_valid_path = unlabeled_folder_name[0]
276-
else:
277-
return False
278-
else:
279-
if not os.path.isdir(unlabeled_dir) or not os.listdir(unlabeled_dir):
280-
raise ValueError(
281-
"unlabeled-data-roots isn't a directory, it doesn't exist or it is empty. "
282-
"Please, check command line and directory path."
283-
)
284-
unlabeled_valid_path = unlabeled_dir
269+
return False
285270

286-
all_unlabeled_images = len(os.listdir(unlabeled_valid_path))
287-
all_train_images = len(os.listdir(train_dir))
271+
if not os.path.isdir(unlabeled_dir) or not os.listdir(unlabeled_dir):
272+
raise ValueError(
273+
"unlabeled-data-roots isn't a directory, it doesn't exist or it is empty. "
274+
"Please, check command line and directory path."
275+
)
276+
277+
all_unlabeled_images = _count_imgs_in_dir(unlabeled_dir, recursive=True)
288278
# check if number of unlabeled images is more than relative thershold
289-
if all_unlabeled_images > 1 and all_unlabeled_images >= thershold * all_train_images:
290-
return unlabeled_valid_path
279+
if all_unlabeled_images > 1:
280+
return unlabeled_dir
291281

292282
logging.warning(
293-
"WARNING: There are none or too litle images to start Semi-SL training. "
294-
"It should be more than relative threshold (at least 7% of labeled images) "
295-
"Start Supervised training instead."
296-
)
283+
"WARNING: There are none or too litle images to start Semi-SL training. "
284+
"It should be more than relative threshold (at least 7% of labeled images) "
285+
"Start Supervised training instead."
286+
)
297287

298288
# if user explicitly passed train type via args
299289
if self.args.train_type is not None:
300290
self.train_type = self.args.train_type
301291
return
302292

303-
path_to_train_data = Path(self.args.train_data_roots)
304-
if not Path.is_dir(path_to_train_data) or not os.listdir(path_to_train_data):
293+
if self.mode == "build" and self.args.train_data_roots is None:
294+
# Case, when we want to build environment with tempate without dataset path
295+
# Set train_type to Incremental by default
296+
self.train_type = "Incremental"
297+
return
298+
299+
if (
300+
self.args.train_data_roots is None
301+
or not os.path.isdir(self.args.train_data_roots)
302+
or not os.listdir(self.args.train_data_roots)
303+
):
305304
raise ValueError(
306305
"train-data-roots isn't a directory, it doesn't exist or it is empty. "
307306
"Please, check command line and directory path."
308307
)
309308

310-
if _check_is_only_images(path_to_train_data):
309+
if _count_imgs_in_dir(self.args.train_data_roots):
311310
# If train folder with images only was passed to args
312311
# Then we start self-supervised training
313312
print("[*] Selfsupervised training type detected")
314313
self.train_type = "Selfsupervised"
315314
return
316315

317316
# if user explicitly passed unlabeled images folder
318-
valid_unlabeled_path = _check_semisl_requirements(path_to_train_data, self.args.unlabeled_data_roots)
317+
valid_unlabeled_path = _check_semisl_requirements(self.args.unlabeled_data_roots)
319318
if valid_unlabeled_path:
320-
print(f"[*] Semisupervised training type detected with unalabeled data: {valid_unlabeled_path}")
319+
print(f"[*] Semisupervised training type detected with unlabeled data: {valid_unlabeled_path}")
321320
self.train_type = "Semisupervised"
322321
return
323322

Binary file not shown.
Binary file not shown.
Binary file not shown.

tests/integration/cli/classification/test_classification.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -50,15 +50,8 @@
5050
# Warmstart using data w/ 'intel', 'openvino', 'opencv' classes
5151
args_selfsl = {
5252
"--train-data-roots": "tests/assets/classification_dataset",
53-
"train_params": [
54-
"params",
55-
"--learning_parameters.num_iters",
56-
"1",
57-
"--learning_parameters.batch_size",
58-
"4",
59-
"--algo_backend.train_type",
60-
"Selfsupervised",
61-
],
53+
"--train-type": "Selfsupervised",
54+
"train_params": ["params", "--learning_parameters.num_iters", "1", "--learning_parameters.batch_size", "4"],
6255
}
6356

6457
# Training params for resume, num_iters*2
@@ -241,8 +234,9 @@ def test_otx_train_semisl(self, template, tmp_dir_path):
241234
tmp_dir_path = tmp_dir_path / "multi_class_cls/test_semisl"
242235
args_semisl = copy.deepcopy(args)
243236
args_semisl["--unlabeled-data-roots"] = args["--train-data-roots"]
244-
args_semisl["train_params"].extend(["--algo_backend.train_type", "Semisupervised"])
245237
otx_train_testing(template, tmp_dir_path, otx_dir, args_semisl)
238+
template_dir = get_template_dir(template, tmp_dir_path)
239+
assert os.path.exists(f"{template_dir}/semisl")
246240

247241
@e2e_pytest_component
248242
@pytest.mark.skipif(MULTI_GPU_UNAVAILABLE, reason="The number of gpu is insufficient")
@@ -251,15 +245,18 @@ def test_otx_multi_gpu_train_semisl(self, template, tmp_dir_path):
251245
tmp_dir_path = tmp_dir_path / "multi_class_cls/test_multi_gpu_semisl"
252246
args_semisl_multigpu = copy.deepcopy(args)
253247
args_semisl_multigpu["--unlabeled-data-roots"] = args["--train-data-roots"]
254-
args_semisl_multigpu["train_params"].extend(["--algo_backend.train_type", "Semisupervised"])
255248
args_semisl_multigpu["--gpus"] = "0,1"
256249
otx_train_testing(template, tmp_dir_path, otx_dir, args_semisl_multigpu)
250+
template_dir = get_template_dir(template, tmp_dir_path)
251+
assert os.path.exists(f"{template_dir}/semisl")
257252

258253
@e2e_pytest_component
259254
@pytest.mark.parametrize("template", default_templates, ids=default_templates_ids)
260255
def test_otx_train_selfsl(self, template, tmp_dir_path):
261256
tmp_dir_path = tmp_dir_path / "multi_class_cls/test_selfsl"
262257
otx_train_testing(template, tmp_dir_path, otx_dir, args_selfsl)
258+
template_dir = get_template_dir(template, tmp_dir_path)
259+
assert os.path.exists(f"{template_dir}/selfsl")
263260

264261
@e2e_pytest_component
265262
@pytest.mark.skipif(MULTI_GPU_UNAVAILABLE, reason="The number of gpu is insufficient")
@@ -269,6 +266,8 @@ def test_otx_multi_gpu_train_selfsl(self, template, tmp_dir_path):
269266
args_selfsl_multigpu = copy.deepcopy(args_selfsl)
270267
args_selfsl_multigpu["--gpus"] = "0,1"
271268
otx_train_testing(template, tmp_dir_path, otx_dir, args_selfsl_multigpu)
269+
template_dir = get_template_dir(template, tmp_dir_path)
270+
assert os.path.exists(f"{template_dir}/selfsl")
272271

273272
@e2e_pytest_component
274273
@pytest.mark.parametrize("template", templates, ids=templates_ids)
@@ -426,8 +425,9 @@ def test_otx_train_semisl(self, template, tmp_dir_path):
426425
tmp_dir_path = tmp_dir_path / "multi_label_cls" / "test_semisl"
427426
args_semisl = copy.deepcopy(args_m)
428427
args_semisl["--unlabeled-data-roots"] = args_m["--train-data-roots"]
429-
args_semisl["train_params"].extend(["--algo_backend.train_type", "Semisupervised"])
430428
otx_train_testing(template, tmp_dir_path, otx_dir, args_semisl)
429+
template_dir = get_template_dir(template, tmp_dir_path)
430+
assert os.path.exists(f"{template_dir}/semisl")
431431

432432

433433
args_h = {

tests/integration/cli/detection/test_detection.py

+5-9
Original file line numberDiff line numberDiff line change
@@ -44,15 +44,7 @@
4444
"--test-data-roots": "tests/assets/car_tree_bug",
4545
"--unlabeled-data-roots": "tests/assets/car_tree_bug",
4646
"--input": "tests/assets/car_tree_bug/images/train",
47-
"train_params": [
48-
"params",
49-
"--learning_parameters.num_iters",
50-
"1",
51-
"--learning_parameters.batch_size",
52-
"4",
53-
"--algo_backend.train_type",
54-
"Semisupervised",
55-
],
47+
"train_params": ["params", "--learning_parameters.num_iters", "1", "--learning_parameters.batch_size", "4"],
5648
}
5749

5850
# Training params for resume, num_iters*2
@@ -206,6 +198,8 @@ def test_otx_multi_gpu_train(self, template, tmp_dir_path):
206198
def test_otx_train_semisl(self, template, tmp_dir_path):
207199
tmp_dir_path = tmp_dir_path / "detection/test_semisl"
208200
otx_train_testing(template, tmp_dir_path, otx_dir, args_semisl)
201+
template_dir = get_template_dir(template, tmp_dir_path)
202+
assert os.path.exists(f"{template_dir}/semisl")
209203

210204
@e2e_pytest_component
211205
@pytest.mark.skipif(MULTI_GPU_UNAVAILABLE, reason="The number of gpu is insufficient")
@@ -215,6 +209,8 @@ def test_otx_multi_gpu_train_semisl(self, template, tmp_dir_path):
215209
args_semisl_multigpu = copy.deepcopy(args_semisl)
216210
args_semisl_multigpu["--gpus"] = "0,1"
217211
otx_train_testing(template, tmp_dir_path, otx_dir, args_semisl_multigpu)
212+
template_dir = get_template_dir(template, tmp_dir_path)
213+
assert os.path.exists(f"{template_dir}/semisl")
218214

219215
@e2e_pytest_component
220216
@pytest.mark.parametrize("template", default_templates, ids=default_templates_ids)

tests/integration/cli/semantic_segmentation/test_segmentation.py

+5-30
Original file line numberDiff line numberDiff line change
@@ -46,36 +46,14 @@
4646
"--val-data-roots": "tests/assets/common_semantic_segmentation_dataset/val",
4747
"--test-data-roots": "tests/assets/common_semantic_segmentation_dataset/val",
4848
"--unlabeled-data-roots": "tests/assets/common_semantic_segmentation_dataset/train",
49-
"train_params": [
50-
"params",
51-
"--learning_parameters.num_iters",
52-
"1",
53-
"--learning_parameters.batch_size",
54-
"4",
55-
"--algo_backend.train_type",
56-
"Semisupervised",
57-
],
58-
}
59-
60-
args_semisl_auto_config = {
61-
"--train-data-roots": "tests/assets/common_semantic_segmentation_dataset/train_with_unlabeled",
62-
"--val-data-roots": "tests/assets/common_semantic_segmentation_dataset/val",
63-
"--test-data-roots": "tests/assets/common_semantic_segmentation_dataset/val",
6449
"train_params": ["params", "--learning_parameters.num_iters", "1", "--learning_parameters.batch_size", "4"],
6550
}
6651

6752
args_selfsl = {
6853
"--train-data-roots": "tests/assets/common_semantic_segmentation_dataset/train",
6954
"--input": "tests/assets/segmentation/custom/images/training",
70-
"train_params": [
71-
"params",
72-
"--learning_parameters.num_iters",
73-
"1",
74-
"--learning_parameters.batch_size",
75-
"4",
76-
"--algo_backend.train_type",
77-
"Selfsupervised",
78-
],
55+
"--train-type": "Selfsupervised",
56+
"train_params": ["params", "--learning_parameters.num_iters", "1", "--learning_parameters.batch_size", "4"],
7957
}
8058

8159
# Training params for resume, num_iters*2
@@ -205,12 +183,6 @@ def test_otx_multi_gpu_train(self, template, tmp_dir_path):
205183
def test_otx_train_semisl(self, template, tmp_dir_path):
206184
tmp_dir_path = tmp_dir_path / "segmentation/test_semisl"
207185
otx_train_testing(template, tmp_dir_path, otx_dir, args_semisl)
208-
209-
@e2e_pytest_component
210-
@pytest.mark.parametrize("template", templates_inc_segnext, ids=templates_ids_inc_segnext)
211-
def test_otx_train_semisl_auto_config(self, template, tmp_dir_path):
212-
tmp_dir_path = tmp_dir_path / "segmentation/test_semisl_auto"
213-
otx_train_testing(template, tmp_dir_path, otx_dir, args_semisl_auto_config)
214186
template_dir = get_template_dir(template, tmp_dir_path)
215187
# Check that semi-sl launched
216188
assert os.path.exists(f"{template_dir}/semisl")
@@ -223,6 +195,9 @@ def test_otx_multi_gpu_train_semisl(self, template, tmp_dir_path):
223195
args_semisl_multigpu = copy.deepcopy(args_semisl)
224196
args_semisl_multigpu["--gpus"] = "0,1"
225197
otx_train_testing(template, tmp_dir_path, otx_dir, args_semisl_multigpu)
198+
template_dir = get_template_dir(template, tmp_dir_path)
199+
# Check that semi-sl launched
200+
assert os.path.exists(f"{template_dir}/semisl")
226201

227202
@e2e_pytest_component
228203
@pytest.mark.parametrize("template", templates, ids=templates_ids)

tests/test_suite/run_test_command.py

+2
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,8 @@ def otx_train_testing(template, root, otx_dir, args, deterministic=True):
140140
command_line.extend(["--gpus", args["--gpus"]])
141141
if "--multi-gpu-port" in args:
142142
command_line.extend(["--multi-gpu-port", args["--multi-gpu-port"]])
143+
if "--train-type" in args:
144+
command_line.extend(["--train-type", args["--train-type"]])
143145
if deterministic:
144146
command_line.extend(["--deterministic"])
145147
if "train_params" in args:

tests/unit/cli/manager/test_config_manager.py

+11-17
Original file line numberDiff line numberDiff line change
@@ -502,23 +502,17 @@ def test_auto_semisl_detection(self, mocker):
502502
config_manager.args.unlabeled_data_roots = "non_exist_dir"
503503
with pytest.raises(ValueError):
504504
config_manager._get_train_type(ignore_args=False)
505-
try:
506-
config_manager.args.unlabeled_data_roots = None
507-
os.mkdir("tests/assets/classification_dataset/unlabeled_images")
508-
# unlabeled root is empty
509-
config_manager.args.train_data_roots = "tests/assets/classification_dataset"
510-
assert config_manager._get_train_type(ignore_args=False) == "Incremental"
511-
Path('tests/assets/classification_dataset/unlabeled_images/file.jpg').touch()
512-
# number of images in unlabeled root is unsufficient
513-
assert config_manager._get_train_type(ignore_args=False) == "Incremental"
514-
config_manager.args.unlabeled_data_roots = "tests/assets/classification_dataset/unlabeled_images"
515-
assert config_manager._get_train_type(ignore_args=False) == "Incremental"
516-
# number of images in unlabeled root is sufficient
517-
Path('tests/assets/classification_dataset/unlabeled_images/file2.jpg').touch()
518-
Path('tests/assets/classification_dataset/unlabeled_images/file3.jpg').touch()
519-
assert config_manager._get_train_type(ignore_args=False) == "Semisupervised"
520-
finally:
521-
shutil.rmtree("tests/assets/classification_dataset/unlabeled_images")
505+
tempdir = tempfile.mkdtemp()
506+
# unlabeled root is empty
507+
config_manager.args.unlabeled_data_roots = str(tempdir)
508+
with pytest.raises(ValueError):
509+
config_manager._get_train_type(ignore_args=False)
510+
Path(f"{tempdir}/file.jpg").touch()
511+
# number of images in unlabeled root is unsufficient
512+
assert config_manager._get_train_type(ignore_args=False) == "Incremental"
513+
Path(f"{tempdir}/file1.jpg").touch()
514+
Path(f"{tempdir}/file2.jpg").touch()
515+
assert config_manager._get_train_type(ignore_args=False) == "Semisupervised"
522516

523517
@e2e_pytest_unit
524518
def test_auto_selfsl_detection(self, mocker):

0 commit comments

Comments
 (0)