SeldonIO · ascillitoe · Jan 24, 2023 · Sep 2, 2022 · Sep 7, 2022 · Nov 3, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@
 See the [documentation](https://docs.seldon.io/projects/alibi-detect/en/latest/cd/methods/mmddrift.html) and [example notebook](https://docs.seldon.io/projects/alibi-detect/en/latest/examples/cd_mmd_keops.html) for more info ([#548](https://github.com/SeldonIO/alibi-detect/pull/548)).
 - **New feature** Added support for serializing detectors with PyTorch backends, and detectors containing PyTorch models in their proprocessing functions ([#656](https://github.com/SeldonIO/alibi-detect/pull/656)).
 - **New feature** Added support for serializing detectors with KeOps backends ([#681](https://github.com/SeldonIO/alibi-detect/pull/681)).
+- **New feature** Added support for saving and loading online detectors' state. This allows a detector to be restarted from previously generated checkpoints ([#604](https://github.com/SeldonIO/alibi-detect/pull/604)).
 - **New feature** Added a PyTorch version of the `UAE` preprocessing utility function ([#656](https://github.com/SeldonIO/alibi-detect/pull/656)).
 - If a `categories_per_feature` dictionary is not passed to `TabularDrift`, a warning is now raised to inform the user that all features are assumed to be numerical ([#606](https://github.com/SeldonIO/alibi-detect/pull/606)).
 - For the `ClassifierDrift` and `SpotTheDiffDrift` detectors, we can also return the out-of-fold instances of the reference and test sets. When using `train_size` for training the detector, this allows to associate the returned prediction probabilities with the correct instances.

diff --git a/alibi_detect/base.py b/alibi_detect/base.py
@@ -1,8 +1,9 @@
 from abc import ABC, abstractmethod
+import os
 import copy
 import json
 import numpy as np
-from typing import Dict, Any, Optional
+from typing import Dict, Any, Optional, Union
 from typing_extensions import Protocol, runtime_checkable
 from alibi_detect.version import __version__
 
@@ -153,7 +154,7 @@ def from_config(cls, config: dict):
         detector.config['meta']['version_warning'] = version_warning
         return detector
 
-    def _set_config(self, inputs):  # TODO - move to BaseDetector once config save/load implemented for non-drift
+    def _set_config(self, inputs: dict):  # TODO - move to BaseDetector once config save/load implemented for non-drift
         """
         Set a detectors `config` attribute upon detector instantiation.
 
@@ -216,17 +217,27 @@ def predict(self) -> Any: ...
 class ConfigurableDetector(Detector, Protocol):
     """Type Protocol for detectors that have support for saving via config.
 
-    Used for typing save and load functionality in `alibi_detect.saving.saving.py`.
+    Used for typing save and load functionality in `alibi_detect.saving.saving`.
+    """
+    def get_config(self) -> dict: ...
 
-    Note:
-        This exists to distinguish between detectors with and without support for config saving and loading. Once all
-        detector support this then this protocol will be removed.
+    @classmethod
+    def from_config(cls, config: dict): ...
+
+    def _set_config(self, inputs: dict): ...
+
+
+@runtime_checkable
+class StatefulDetectorOnline(ConfigurableDetector, Protocol):
+    """Type Protocol for detectors that have support for save/loading of online state.
+
+    Used for typing save and load functionality in `alibi_detect.saving.saving`.
     """
-    def get_config(self): ...
+    t: int = 0
 
-    def from_config(self): ...
+    def save_state(self, filepath: Union[str, os.PathLike]): ...
 
-    def _set_config(self): ...
+    def load_state(self, filepath: Union[str, os.PathLike]): ...
 
 
 class NumpyEncoder(json.JSONEncoder):

diff --git a/alibi_detect/cd/base_online.py b/alibi_detect/cd/base_online.py
@@ -1,23 +1,26 @@
 import logging
+import warnings
 from abc import abstractmethod
-from typing import Any, Callable, Dict, List, Optional, Union
+from typing import Any, Callable, Dict, List, Optional, Union, Tuple, TYPE_CHECKING
 
 import numpy as np
 from alibi_detect.base import BaseDetector, concept_drift_dict
 from alibi_detect.cd.utils import get_input_shape
-from alibi_detect.utils.frameworks import has_pytorch, has_tensorflow
+from alibi_detect.utils.state import StateMixin
+from alibi_detect.utils._types import Literal
 
-if has_pytorch:
+if TYPE_CHECKING:
     import torch
-
-if has_tensorflow:
     import tensorflow as tf
 
 logger = logging.getLogger(__name__)
 
 
-class BaseMultiDriftOnline(BaseDetector):
+class BaseMultiDriftOnline(BaseDetector, StateMixin):
+    t: int = 0
     thresholds: np.ndarray
+    backend: Literal['pytorch', 'tensorflow']
+    online_state_keys: Tuple[str, ...]
 
     def __init__(
             self,
@@ -126,17 +129,46 @@ def _preprocess_xt(self, x_t: Union[np.ndarray, Any]) -> np.ndarray:
         return x_t[None, :]
 
     def get_threshold(self, t: int) -> float:
+        """
+        Return the threshold for timestep `t`.
+
+        Parameters
+        ----------
+        t
+            The timestep to return a threshold for.
+
+        Returns
+        -------
+        The threshold at timestep `t`.
+        """
         return self.thresholds[t] if t < self.window_size else self.thresholds[-1]
 
-    def _initialise(self) -> None:
+    def _initialise_state(self) -> None:
+        """
+        Initialise online state (the stateful attributes updated by `score` and `predict`).
+
+        If a subclassed detector has additional online state, an additional `_initialise_state` should be defined,
+        with a call to `super()._initialise_state()` included (see `LSDDDriftOnlineTorch._initialise_state()` for
+        an example).
+        """
         self.t = 0  # corresponds to a test set of ref data
         self.test_stats = np.array([])  # type: ignore[var-annotated]
         self.drift_preds = np.array([])  # type: ignore[var-annotated]
-        self._configure_ref_subset()
 
     def reset(self) -> None:
-        "Resets the detector but does not reconfigure thresholds."
-        self._initialise()
+        """
+        Deprecated reset method. This method will be repurposed or removed in the future. To reset the detector to
+        its initial state (`t=0`) use :meth:`reset_state`.
+        """
+        self.reset_state()
+        warnings.warn('This method is deprecated and will be removed/repurposed in the future. To reset the detector '
+                      'to its initial state use `reset_state`.', DeprecationWarning)
+
+    def reset_state(self) -> None:
+        """
+        Resets the detector to its initial state (`t=0`). This does not include reconfiguring thresholds.
+        """
+        self._initialise_state()
 
     def predict(self, x_t: Union[np.ndarray, Any], return_test_stat: bool = True,
                 ) -> Dict[Dict[str, str], Dict[str, Union[int, float]]]:
@@ -177,8 +209,10 @@ def predict(self, x_t: Union[np.ndarray, Any], return_test_stat: bool = True,
         return cd
 
 
-class BaseUniDriftOnline(BaseDetector):
+class BaseUniDriftOnline(BaseDetector, StateMixin):
+    t: int = 0
     thresholds: np.ndarray
+    online_state_keys: Tuple[str, ...]
 
     def __init__(
             self,
@@ -291,6 +325,20 @@ def _update_state(self, x_t: np.ndarray):
         pass
 
     def _check_x(self, x: Any, x_ref: bool = False) -> np.ndarray:
+        """
+        Check the type and shape of the data `x`, and coerces it to the correct shape if possible.
+
+        Parameters
+        ----------
+        x
+            The data to be checked.
+        x_ref
+            Whether `x` is a batch of reference data instances (if `True`), or a single test data instance (if `False`).
+
+        Returns
+        -------
+        The checked data, coerced to be a np.ndarray of the correct shape.
+        """
         # Check the type of x
         if isinstance(x, np.ndarray):
             pass
@@ -333,21 +381,51 @@ def _preprocess_xt(self, x_t: Union[np.ndarray, Any]) -> np.ndarray:
         return x_t
 
     def get_threshold(self, t: int) -> np.ndarray:
+        """
+        Return the threshold for timestep `t`.
+
+        Parameters
+        ----------
+        t
+            The timestep to return a threshold for.
+
+        Returns
+        -------
+        The threshold at timestep `t`.
+        """
         return self.thresholds[t] if t < len(self.thresholds) else self.thresholds[-1]
 
-    def _initialise(self) -> None:
+    def _initialise_state(self) -> None:
+        """
+        Initialise online state (the stateful attributes updated by `score` and `predict`).
+
+        If a subclassed detector has additional online state, an additional `_initialise_state` should be defined,
+        with a call to `super()._initialise_state()` included (see `CVMDriftOnlineTorch._initialise_state()` for
+        an example).
+        """
         self.t = 0
+        self.xs = np.array([])  # type: ignore[var-annotated]
         self.test_stats = np.empty([0, len(self.window_sizes), self.n_features])
         self.drift_preds = np.array([])  # type: ignore[var-annotated]
-        self._configure_ref()
 
     @abstractmethod
     def _check_drift(self, test_stats: np.ndarray, thresholds: np.ndarray) -> int:
         pass
 
     def reset(self) -> None:
-        "Resets the detector but does not reconfigure thresholds."
-        self._initialise()
+        """
+        Deprecated reset method. This method will be repurposed or removed in the future. To reset the detector to
+        its initial state (`t=0`) use :meth:`reset_state`.
+        """
+        self.reset_state()
+        warnings.warn('This method is deprecated and will be removed/repurposed in the future. To reset the detector '
+                      'to its initial state use `reset_state`.', DeprecationWarning)
+
+    def reset_state(self) -> None:
+        """
+        Resets the detector to its initial state (`t=0`). This does not include reconfiguring thresholds.
+        """
+        self._initialise_state()
 
     def predict(self, x_t: Union[np.ndarray, Any], return_test_stat: bool = True,
                 ) -> Dict[Dict[str, str], Dict[str, Union[int, float]]]:

diff --git a/alibi_detect/cd/cvm_online.py b/alibi_detect/cd/cvm_online.py
@@ -9,6 +9,8 @@
 
 
 class CVMDriftOnline(BaseUniDriftOnline, DriftConfigMixin):
+    online_state_keys = ('t', 'test_stats', 'drift_preds', 'xs', 'ids_ref_wins', 'ids_wins_ref', 'ids_wins_wins')
+
     def __init__(
             self,
             x_ref: Union[np.ndarray, list],
@@ -92,10 +94,14 @@ def __init__(
         self.batch_size = n_bootstraps if batch_size is None else batch_size
 
         # Configure thresholds and initialise detector
-        self._initialise()
+        self._initialise_state()
         self._configure_thresholds()
+        self._configure_ref()
 
     def _configure_ref(self) -> None:
+        """
+        Configure the reference data.
+        """
         ids_ref_ref = self.x_ref[None, :, :] >= self.x_ref[:, None, :]
         self.ref_cdf_ref = np.sum(ids_ref_ref, axis=0) / self.n
 
@@ -162,6 +168,14 @@ def _simulate_streams(self, t_max: int) -> np.ndarray:
         return stats
 
     def _update_state(self, x_t: np.ndarray):
+        """
+        Update online state based on the provided test instance.
+
+        Parameters
+        ----------
+        x_t
+            The test instance.
+        """
         self.t += 1
         if self.t == 1:
             # Initialise stream
@@ -186,6 +200,15 @@ def _update_state(self, x_t: np.ndarray):
                 [self.ids_wins_wins, (x_t <= self.xs[-self.max_ws:, :])[None, :, :]], 0
             )
 
+    def _initialise_state(self) -> None:
+        """
+        Initialise online state (the stateful attributes updated by `score` and `predict`).
+        """
+        super()._initialise_state()
+        self.ids_ref_wins = np.array([])
+        self.ids_wins_ref = np.array([])
+        self.ids_wins_wins = np.array([])
+
     def score(self, x_t: Union[np.ndarray, Any]) -> np.ndarray:
         """
         Compute the test-statistic (CVM) between the reference window(s) and test window.

diff --git a/alibi_detect/cd/fet_online.py b/alibi_detect/cd/fet_online.py
@@ -10,6 +10,8 @@
 
 
 class FETDriftOnline(BaseUniDriftOnline, DriftConfigMixin):
+    online_state_keys = ('t', 'test_stats', 'drift_preds', 'xs')
+
     def __init__(
             self,
             x_ref: Union[np.ndarray, list],
@@ -119,10 +121,14 @@ def __init__(
             raise ValueError("The `x_ref` data consists of all 0's or all 1's. Thresholds cannot be configured.")
 
         # Configure thresholds and initialise detector
-        self._initialise()
+        self._initialise_state()
         self._configure_thresholds()
+        self._configure_ref()
 
     def _configure_ref(self) -> None:
+        """
+        Configure the reference data.
+        """
         self.sum_ref = np.sum(self.x_ref, axis=0)
 
     def _configure_thresholds(self) -> None:
@@ -227,6 +233,14 @@ def _exp_moving_avg(arr: np.ndarray, lam: float) -> np.ndarray:
         return output
 
     def _update_state(self, x_t: np.ndarray):
+        """
+        Update online state based on the provided test instance.
+
+        Parameters
+        ----------
+        x_t
+            The test instance.
+        """
         self.t += 1
         if self.t == 1:
             # Initialise stream

diff --git a/alibi_detect/cd/lsdd_online.py b/alibi_detect/cd/lsdd_online.py
@@ -1,3 +1,4 @@
+import os
 import numpy as np
 from typing import Any, Callable, Dict, Optional, Union
 from alibi_detect.utils.frameworks import has_pytorch, has_tensorflow, BackendValidator, Framework
@@ -112,9 +113,11 @@ def test_stats(self):
     def thresholds(self):
         return [self._detector.thresholds[min(s, self._detector.window_size-1)] for s in range(self.t)]
 
-    def reset(self):
-        "Resets the detector but does not reconfigure thresholds."
-        self._detector.reset()
+    def reset_state(self):
+        """
+        Resets the detector to its initial state (`t=0`). This does not include reconfiguring thresholds.
+        """
+        self._detector.reset_state()
 
     def predict(self, x_t: Union[np.ndarray, Any], return_test_stat: bool = True) \
             -> Dict[Dict[str, str], Dict[str, Union[int, float]]]:
@@ -163,3 +166,26 @@ def get_config(self) -> dict:  # Needed due to need to unnormalize x_ref
         # Unnormalize x_ref
         cfg['x_ref'] = self._detector._unnormalize(cfg['x_ref'])
         return cfg
+
+    def save_state(self, filepath: Union[str, os.PathLike]):
+        """
+        Save a detector's state to disk in order to generate a checkpoint.
+
+        Parameters
+        ----------
+        filepath
+            The directory to save state to.
+        """
+        self._detector.save_state(filepath)
+
+    def load_state(self, filepath: Union[str, os.PathLike]):
+        """
+        Load the detector's state from disk, in order to restart from a checkpoint previously generated with
+        :meth:`~save_state`.
+
+        Parameters
+        ----------
+        filepath
+            The directory to load state from.
+        """
+        self._detector.load_state(filepath)