[QuantizationModifier] freeze bn stats and disable observers for QAT finetuning support

Benjamin · Benjamin · commit 3ae10f2d0564 · 2022-12-12T15:58:37.000-05:00
diff --git a/src/sparseml/pytorch/sparsification/quantization/modifier_quantization.py b/src/sparseml/pytorch/sparsification/quantization/modifier_quantization.py
@@ -21,6 +21,7 @@
 
 from typing import Any, Dict, List, Optional, Type
 
+import torch
 from torch.nn import Module
 from torch.optim.optimizer import Optimizer
 
@@ -31,6 +32,7 @@
 )
 from sparseml.pytorch.sparsification.quantization.helpers import (
     configure_module_bn_wrappers,
+    freeze_bn_stats,
     fuse_module_conv_bn_relus,
 )
 from sparseml.pytorch.sparsification.quantization.legacy_modifier_quantization import (
@@ -89,6 +91,8 @@ class QuantizationModifier(ScheduledModifier):
     |                   num_bits: 8
     |                   symmetric: True
     |       exclude_module_types: ["ReLU"]
+    |       disable_quantization_observer_epoch: 2.0
+    |       freeze_bn_stats_epoch: 3.0
 
     :param start_epoch: The epoch to start the modifier at
     :param default_scheme: Default QuantizationScheme to use when enabling quantization
@@ -108,6 +112,11 @@ class QuantizationModifier(ScheduledModifier):
         specification to quantize that module type with. Default is None
     :param exclude_module_types: optional list of module class names
         to not quantize. Default is None
+    :param disable_quantization_observer_epoch: Epoch to disable updates to the module
+        quantization observers. At this point, quantized weights and zero points will
+        not be updated. Leave None to not disable observers during QAT. Default is None
+    :param freeze_bn_stats_epoch: Epoch to stop the tracking of batch norm stats. Leave
+        None to not stop tracking batch norm stats during QAT. Default is None
     :param end_epoch: Disabled, setting to anything other than -1 will raise an
         exception. For compatibility with YAML serialization only.
     """
@@ -119,6 +128,8 @@ def __init__(
         submodule_schemes: Optional[Dict[str, QuantizationSchemeLoadable]] = None,
         module_type_schemes: Optional[Dict[str, QuantizationSchemeLoadable]] = None,
         exclude_module_types: Optional[List[str]] = None,
+        disable_quantization_observer_epoch: Optional[float] = None,
+        freeze_bn_stats_epoch: Optional[float] = None,
         end_epoch: float = -1.0,
     ):
         raise_if_torch_quantization_not_available()
@@ -137,8 +148,12 @@ def __init__(
             module_type_schemes, self._default_scheme
         )
         self._exclude_module_types = exclude_module_types
+        self._disable_quantization_observer_epoch = disable_quantization_observer_epoch
+        self._freeze_bn_stats_epoch = freeze_bn_stats_epoch
 
         self._qat_enabled = False
+        self._quantization_observer_disabled = False
+        self._bn_stats_frozen = False
 
     @BaseModifier.sparsification_types.getter
     def sparsification_types(self) -> List[SparsificationTypes]:
@@ -231,6 +246,42 @@ def exclude_module_types(self, value: Optional[List[str]]):
         """
         self._exclude_module_types = value
 
+    @ModifierProp()
+    def disable_quantization_observer_epoch(self) -> Optional[float]:
+        """
+        :return: Epoch to disable updates to the module
+            quantization observers. At this point, quantized weights and zero points
+            will not be updated. When None, observers never disabled during QAT
+        """
+        return self._disable_quantization_observer_epoch
+
+    @disable_quantization_observer_epoch.setter
+    def disable_quantization_observer_epoch(self, value: Optional[float]):
+        """
+        :params value: Epoch to disable updates to the module
+            quantization observers. At this point, quantized weights and zero points
+            will not be updated. Set None to not disable observers during QAT
+        """
+        self._disable_quantization_observer_epoch = value
+        self._validate_params()
+
+    @ModifierProp()
+    def freeze_bn_stats_epoch(self) -> Optional[float]:
+        """
+        :return: Epoch to stop the tracking of batch norm stats. When
+            None, batch norm stats are track for all of training
+        """
+        return self._freeze_bn_stats_epoch
+
+    @freeze_bn_stats_epoch.setter
+    def freeze_bn_stats_epoch(self, value: Optional[float]):
+        """
+        :params value: Epoch to stop the tracking of batch norm stats. Set
+            None to not stop tracking batch norm stats during QAT
+        """
+        self._freeze_bn_stats_epoch = value
+        self._validate_params()
+
     def initialize(
         self,
         module: Module,
@@ -285,15 +336,61 @@ def update_ready(self, epoch: float, steps_per_epoch: int) -> bool:
             return False
 
         pending = self.start_pending(epoch, steps_per_epoch)
+        pending |= self._freeze_bn_stats_update_ready(epoch)
+        pending |= self._disable_quantization_observer_update_ready(epoch)
 
         return pending
 
+    def advance_epochs(self, ref_start_epoch: float = None):
+        """
+        Advance epoch attributes given a reference start epoch
+
+        :param ref_start_epoch: the reference, i.e. new, start epoch
+        """
+        if ref_start_epoch is None:
+            return
+
+        super().advance_epochs(ref_start_epoch=ref_start_epoch)
+
+        if self._disable_quantization_observer_epoch is not None:
+            self._disable_quantization_observer_epoch = (
+                max(0.0, self._disable_quantization_observer_epoch) + ref_start_epoch
+            )
+
+        if self._freeze_bn_stats_epoch is not None:
+            self._freeze_bn_stats_epoch = (
+                max(0.0, self._freeze_bn_stats_epoch) + ref_start_epoch
+            )
+        self._validate_params()
+
     def _check_quantization_update(
         self, module: Module, epoch: float, steps_per_epoch: int
     ):
         if self.start_pending(epoch, steps_per_epoch) and not self._qat_enabled:
             self._enable_module_qat(module)
 
+        if self._disable_quantization_observer_update_ready(epoch):
+            module.apply(torch.quantization.disable_observer)
+            self._quantization_observer_disabled = True
+
+        if self._freeze_bn_stats_update_ready(epoch):
+            module.apply(freeze_bn_stats)
+            self._bn_stats_frozen = True
+
+    def _disable_quantization_observer_update_ready(self, epoch: float) -> bool:
+        return (
+            self._disable_quantization_observer_epoch is not None
+            and epoch >= self._disable_quantization_observer_epoch
+            and not self._quantization_observer_disabled
+        )
+
+    def _freeze_bn_stats_update_ready(self, epoch: float) -> bool:
+        return (
+            self._freeze_bn_stats_epoch is not None
+            and epoch >= self._freeze_bn_stats_epoch
+            and not self._bn_stats_frozen
+        )
+
     def _enable_module_qat(self, module: Module):
         # fuse conv-bn-relu blocks prior to quantization emulation
         fuse_module_conv_bn_relus(module, inplace=True)
@@ -318,6 +415,30 @@ def _enable_module_qat(self, module: Module):
 
         self._qat_enabled = True
 
+    def _validate_params(self):
+        self.validate_schedule()
+        if (
+            self._disable_quantization_observer_epoch is not None
+            and self._disable_quantization_observer_epoch < self._start_epoch
+        ):
+            raise ValueError(
+                f"disable_quantization_observer_epoch may not be greater than "
+                f"start_epoch for QuantizationModifier, received: "
+                f"{self._disable_quantization_observer_epoch} with start_epoch "
+                f"{self._start_epoch}"
+            )
+
+        if (
+            self._freeze_bn_stats_epoch is not None
+            and self._freeze_bn_stats_epoch < self._start_epoch
+        ):
+            raise ValueError(
+                "freeze_bn_stats_epoch may not be greater than start_epoch"
+                " for QuantizationModifier, received: {} with start_epoch {}".format(
+                    self._freeze_bn_stats_epoch, self._start_epoch
+                )
+            )
+
 
 class _QuantizationSchemesDict(dict):
     # wrapper class for dict to override the __str__ method for yaml serialization
diff --git a/tests/sparseml/pytorch/sparsification/quantization/test_modifier_quantization.py b/tests/sparseml/pytorch/sparsification/quantization/test_modifier_quantization.py
@@ -122,6 +122,21 @@ def _test_qat_applied(modifier, model):
             assert not hasattr(module, "qconfig")
 
 
+def _test_freeze_bn_stats_observer_applied(modifier, epoch):
+    if modifier.disable_quantization_observer_epoch is not None and (
+        epoch >= modifier.disable_quantization_observer_epoch
+    ):
+        assert modifier._quantization_observer_disabled
+    else:
+        assert not modifier._quantization_observer_disabled
+    if modifier.freeze_bn_stats_epoch is not None and (
+        epoch >= modifier.freeze_bn_stats_epoch
+    ):
+        assert modifier._bn_stats_frozen
+    else:
+        assert not modifier._bn_stats_frozen
+
+
 @pytest.mark.skipif(
     os.getenv("NM_ML_SKIP_PYTORCH_TESTS", False),
     reason="Skipping pytorch tests",
@@ -148,6 +163,8 @@ def _test_qat_applied(modifier, model):
             lambda: QuantizationModifier(
                 start_epoch=0.0,
                 submodule_schemes=dict(seq="default"),
+                freeze_bn_stats_epoch=2.0,
+                disable_quantization_observer_epoch=3.0,
             ),
             LinearNet,
         ),
@@ -175,6 +192,8 @@ def _test_qat_applied(modifier, model):
             lambda: QuantizationModifier(
                 start_epoch=2.0,
                 module_type_schemes=dict(Conv2d=QuantizationScheme(weights=None)),
+                freeze_bn_stats_epoch=2.5,
+                disable_quantization_observer_epoch=2.2,
             ),
             ConvNet,
         ),
@@ -206,10 +225,16 @@ def test_lifecycle(
 
         self.initialize_helper(modifier, model)
 
+        _test_freeze_bn_stats_observer_applied(modifier, 0.0)
         for epoch in range(int(modifier.start_epoch)):
             assert not modifier.update_ready(epoch, test_steps_per_epoch)
 
         update_epochs = [modifier.start_epoch]
+        if modifier.disable_quantization_observer_epoch is not None:
+            update_epochs.append(modifier.disable_quantization_observer_epoch)
+        if modifier.freeze_bn_stats_epoch is not None:
+            update_epochs.append(modifier.freeze_bn_stats_epoch)
+        update_epochs.sort()
         for epoch in update_epochs:
             assert modifier.update_ready(epoch, test_steps_per_epoch)
         # test update ready is still true after start epoch
@@ -225,9 +250,11 @@ def test_lifecycle(
             _test_qat_applied(modifier, model)
             pass
 
-        modifier.scheduled_update(
-            model, optimizer, modifier.start_epoch, test_steps_per_epoch
-        )
+        for update_epoch in update_epochs:
+            modifier.scheduled_update(
+                model, optimizer, update_epoch, test_steps_per_epoch
+            )
+            _test_freeze_bn_stats_observer_applied(modifier, update_epoch)
 
         # test update ready is False after start epoch is applied, before disable epochs
         if (
@@ -270,6 +297,8 @@ def test_quantization_modifier_yaml():
     )
     module_type_schemes = dict(Linear=dict(output_activations=dict(symmetric=False)))
     exclude_module_types = ["LayerNorm", "Tanh"]
+    disable_quantization_observer_epoch = 2.0
+    freeze_bn_stats_epoch = 3.0
 
     yaml_str = f"""
     !QuantizationModifier
@@ -278,6 +307,8 @@ def test_quantization_modifier_yaml():
         submodule_schemes: {submodule_schemes}
         module_type_schemes: {module_type_schemes}
         exclude_module_types: {exclude_module_types}
+        disable_quantization_observer_epoch: {disable_quantization_observer_epoch}
+        freeze_bn_stats_epoch: {freeze_bn_stats_epoch}
     """
     yaml_modifier = QuantizationModifier.load_obj(
         yaml_str
@@ -291,6 +322,8 @@ def test_quantization_modifier_yaml():
         submodule_schemes=submodule_schemes,
         module_type_schemes=module_type_schemes,
         exclude_module_types=exclude_module_types,
+        disable_quantization_observer_epoch=disable_quantization_observer_epoch,
+        freeze_bn_stats_epoch=freeze_bn_stats_epoch,
     )
 
     assert isinstance(yaml_modifier, QuantizationModifier)
@@ -322,3 +355,13 @@ def test_quantization_modifier_yaml():
         == serialized_modifier.exclude_module_types
         == obj_modifier.exclude_module_types
     )
+    assert (
+        yaml_modifier.disable_quantization_observer_epoch
+        == serialized_modifier.disable_quantization_observer_epoch
+        == obj_modifier.disable_quantization_observer_epoch
+    )
+    assert (
+        yaml_modifier.freeze_bn_stats_epoch
+        == serialized_modifier.freeze_bn_stats_epoch
+        == obj_modifier.freeze_bn_stats_epoch
+    )