open-edge-platform · jaegukhyun · Jul 27, 2023 · Jul 18, 2023 · Jul 18, 2023 · Jul 18, 2023
@@ -6,7 +6,7 @@ All notable changes to this project will be documented in this file.
 
 ### New features
 
--
+- Add YOLOX variants as new object detector models (<https://github.com/openvinotoolkit/training_extensions/pull/2402>)
 
 ### Enhancements
 

@@ -102,10 +102,17 @@ In addition to these models, we supports experimental models for object detectio
 +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------+---------------------+-----------------+
 | `Custom_Object_Detection_Gen3_ResNeXt101_ATSS <https://github.com/openvinotoolkit/training_extensions/blob/develop/src/otx/algorithms/detection/configs/detection/resnext101_atss/template_experimental.yaml>`_           |   ResNeXt101-ATSS   | 434.75              | 344.0           |
 +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------+---------------------+-----------------+
+| `Object_Detection_YOLOX_S <https://github.com/openvinotoolkit/training_extensions/blob/develop/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_s/template_experimental.yaml>`_                            |       YOLOX_S       | 33.51               | 46.0            |
++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------+---------------------+-----------------+
+| `Object_Detection_YOLOX_L <https://github.com/openvinotoolkit/training_extensions/blob/develop/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_l/template_experimental.yaml>`_                            |       YOLOX_L       | 194.57              | 207.0           |
++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------+---------------------+-----------------+
+| `Object_Detection_YOLOX_X <https://github.com/openvinotoolkit/training_extensions/blob/develop/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/template_experimental.yaml>`_                            |       YOLOX_X       | 352.42              | 378.0           |
++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------+---------------------+-----------------+
 
 `Deformable_DETR <https://arxiv.org/abs/2010.04159>`_ is `DETR <https://arxiv.org/abs/2005.12872>`_ based model, and it solves slow convergence problem of DETR. `DINO <https://arxiv.org/abs/2203.03605>`_ improves Deformable DETR based methods via denoising anchor boxes. Current SOTA models for object detection are based on DINO. 
 Although transformer based models show notable performance on various object detection benchmark, CNN based model still show good performance with proper latency.
 Therefore, we added a new experimental CNN based method, ResNeXt101-ATSS. ATSS still shows good performance among `RetinaNet <https://arxiv.org/abs/1708.02002>`_ based models. We integrated large ResNeXt101 backbone to our Custom ATSS head, and it shows good transfer learning performance.
+In addition, we added a YOLOX variants to support users' diverse situations.
 
 .. note::
 
@@ -147,6 +154,12 @@ We trained each model with a single Nvidia GeForce RTX3090.
 +----------------------------+------------------+-----------+-----------+-----------+-----------+--------------+
 | ResNet50-DINO              | 49.0 (66.4)      | 47.2      | 99.5      | 62.9      | 93.5      | 99.1         |
 +----------------------------+------------------+-----------+-----------+-----------+-----------+--------------+
+| YOLOX_S                    | 40.3 (59.1)      | 37.1      | 93.6      | 54.8      | 92.7      | 98.8         |
++----------------------------+------------------+-----------+-----------+-----------+-----------+--------------+
+| YOLOX_L                    | 49.4 (67.1)      | 44.5      | 94.6      | 55.8      | 91.8      | 99.0         |
++----------------------------+------------------+-----------+-----------+-----------+-----------+--------------+
+| YOLOX_X                    | 50.9 (68.4)      | 44.2      | 96.3      | 56.2      | 91.5      | 98.9         |
++----------------------------+------------------+-----------+-----------+-----------+-----------+--------------+
 
 ************************
 Semi-supervised Learning

@@ -450,7 +450,7 @@ def load_inferencer(
         ]
         if self.task_type == TaskType.DETECTION:
             if (
-                self.task_environment.model_template.model_template_id == "Custom_Object_Detection_YOLOX"
+                "YOLOX" in self.task_environment.model_template.model_template_id
                 and not self.config.tiling_parameters.enable_tiling
             ):
                 args.append({"resize_type": "fit_to_window_letterbox", "pad_value": 114})

@@ -0,0 +1,4 @@
+"""Initialization of YOLOX_L model for Detection Task."""
+
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,33 @@
+{
+  "base": {
+    "find_unused_parameters": true,
+    "nncf_config": {
+      "target_metric_name": "mAP",
+      "input_info": {
+        "sample_size": [1, 3, 640, 640]
+      },
+      "compression": [],
+      "log_dir": "/tmp"
+    }
+  },
+  "nncf_quantization": {
+    "optimizer": {
+      "lr": 0.0005
+    },
+    "nncf_config": {
+      "compression": [
+        {
+          "algorithm": "quantization"
+        }
+      ],
+      "accuracy_aware_training": {
+        "mode": "early_exit",
+        "params": {
+          "maximal_absolute_accuracy_degradation": 0.01,
+          "maximal_total_epochs": 20
+        }
+      }
+    }
+  },
+  "order_of_parts": ["nncf_quantization"]
+}
@@ -0,0 +1,81 @@
+"""Data Pipeline of YOLOX_L model for Detection Task."""
+
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# pylint: disable=invalid-name
+
+__img_size = (640, 640)
+__img_norm_cfg = dict(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], to_rgb=False)
+
+train_pipeline = [
+    dict(type="Mosaic", img_scale=__img_size, pad_val=114.0),
+    dict(
+        type="RandomAffine",
+        scaling_ratio_range=(0.1, 2),
+        border=(-__img_size[0] // 2, -__img_size[1] // 2),
+    ),
+    dict(type="MixUp", img_scale=__img_size, ratio_range=(0.8, 1.6), pad_val=114.0),
+    dict(type="YOLOXHSVRandomAug"),
+    dict(type="RandomFlip", flip_ratio=0.5),
+    dict(type="Resize", img_scale=__img_size, keep_ratio=True),
+    dict(type="Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))),
+    dict(type="Normalize", **__img_norm_cfg),
+    dict(type="DefaultFormatBundle"),
+    dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
+]
+
+test_pipeline = [
+    dict(type="LoadImageFromFile"),
+    dict(
+        type="MultiScaleFlipAug",
+        img_scale=__img_size,
+        flip=False,
+        transforms=[
+            dict(type="Resize", keep_ratio=True),
+            dict(type="RandomFlip"),
+            dict(type="Pad", pad_to_square=True, pad_val=dict(img=(114.0, 114.0, 114.0))),
+            dict(type="Normalize", **__img_norm_cfg),
+            dict(type="DefaultFormatBundle"),
+            dict(type="Collect", keys=["img"]),
+        ],
+    ),
+]
+
+__dataset_type = "CocoDataset"
+__data_root = "data/coco/"
+__samples_per_gpu = 2
+
+data = dict(
+    samples_per_gpu=__samples_per_gpu,
+    workers_per_gpu=4,
+    train=dict(
+        # make sure to clean up recipe dataset
+        _delete_=True,
+        type="MultiImageMixDataset",
+        dataset=dict(
+            type=__dataset_type,
+            ann_file=__data_root + "annotations/instances_train2017.json",
+            img_prefix=__data_root + "train2017/",
+            pipeline=[
+                dict(type="LoadImageFromFile", to_float32=False),
+                dict(type="LoadAnnotations", with_bbox=True),
+            ],
+        ),
+        pipeline=train_pipeline,
+    ),
+    val=dict(
+        type=__dataset_type,
+        ann_file=__data_root + "annotations/instances_val2017.json",
+        img_prefix=__data_root + "val2017/",
+        test_mode=True,
+        pipeline=test_pipeline,
+    ),
+    test=dict(
+        type=__dataset_type,
+        ann_file=__data_root + "annotations/instances_val2017.json",
+        img_prefix=__data_root + "val2017/",
+        test_mode=True,
+        pipeline=test_pipeline,
+    ),
+)
@@ -0,0 +1,14 @@
+"""MMDeploy config of YOLOX_L model for Detection Task."""
+
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+_base_ = ["../../base/deployments/base_detection_dynamic.py"]
+
+ir_config = dict(
+    output_names=["boxes", "labels"],
+)
+
+backend_config = dict(
+    model_inputs=[dict(opt_shapes=dict(input=[-1, 3, 640, 640]))],
+)
@@ -0,0 +1,16 @@
+metric: mAP
+search_algorithm: asha
+early_stop: None
+hp_space:
+  learning_parameters.learning_rate:
+    param_type: qloguniform
+    range:
+      - 0.0001
+      - 0.01
+      - 0.0001
+  learning_parameters.batch_size:
+    param_type: qloguniform
+    range:
+      - 4
+      - 16
+      - 2
@@ -0,0 +1,24 @@
+"""Model configuration of YOLOX_L model for Detection Task."""
+
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# pylint: disable=invalid-name
+
+_base_ = ["../../../../../recipes/stages/detection/incremental.py", "../../base/models/detector.py"]
+
+model = dict(
+    type="CustomYOLOX",
+    backbone=dict(type="CSPDarknet", deepen_factor=1.0, widen_factor=1.0, out_indices=(2, 3, 4)),
+    neck=dict(type="YOLOXPAFPN", in_channels=[256, 512, 1024], out_channels=256, num_csp_blocks=3),
+    bbox_head=dict(type="CustomYOLOXHead", num_classes=80, in_channels=256, feat_channels=256),
+    train_cfg=dict(assigner=dict(type="SimOTAAssigner", center_radius=2.5)),
+    # In order to align the source code, the threshold of the val phase is
+    # 0.01, and the threshold of the test phase is 0.001.
+    test_cfg=dict(score_thr=0.01, nms=dict(type="nms", iou_threshold=0.65), max_per_img=100),
+)
+load_from = "https://download.openmmlab.com/mmdetection/v2.0/yolox/\
+yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth"
+
+fp16 = dict(loss_scale=512.0)
+ignore = False
@@ -0,0 +1,4 @@
+"""Initialization of YOLOX_L model for Semi-SL Detection Task."""
+
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,33 @@
+{
+  "base": {
+    "find_unused_parameters": true,
+    "nncf_config": {
+      "target_metric_name": "mAP",
+      "input_info": {
+        "sample_size": [1, 3, 640, 640]
+      },
+      "compression": [],
+      "log_dir": "/tmp"
+    }
+  },
+  "nncf_quantization": {
+    "optimizer": {
+      "lr": 0.0005
+    },
+    "nncf_config": {
+      "compression": [
+        {
+          "algorithm": "quantization"
+        }
+      ],
+      "accuracy_aware_training": {
+        "mode": "early_exit",
+        "params": {
+          "maximal_absolute_accuracy_degradation": 0.01,
+          "maximal_total_epochs": 20
+        }
+      }
+    }
+  },
+  "order_of_parts": ["nncf_quantization"]
+}
-Original file line number
+Diff line change
@@ Expand Up @@
     ### New features
-    -
+    - Add YOLOX variants as new object detector models (<https://github.com/openvinotoolkit/training_extensions/pull/2402>)
     ### Enhancements
@@ Expand Down @@