Skip to content

Commit d5e7607

Browse files
Fixed #1077 Add extra check for window size (#1078)
* Add extra check for window size * update module to include extra check for self join * add tests for warning * revise comment * ignore coverage * minor improvement in docstring * fix flake8 * Revised test function using expected signature * fixed format * Revise function to pass test * Update stumpy/core.py * improve comments * improve readability of function * minor improvement in the description of param * remove redundant test function * Revise logic and the comment * improving comments * minor change * minor change in comment * minor change in comment * update aamp for checking window size * improve docstring and comments * improve docstring * use smaller input to make test function more understandable * updated stumped and aamped * updated maamp and maamped modules * update different modules to consider the change in core.check_window_size * minor fix * improve comments * improve comments * improved the explanations * minor change in the description of function * improve the clarity of the logic * improve comment * improve description of function * minor change * improve readability and consistency * minor change * minor changes
1 parent 9504301 commit d5e7607

18 files changed

+145
-41
lines changed

stumpy/aamp.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -407,17 +407,17 @@ def aamp(T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):
407407
if T_B.ndim != 1: # pragma: no cover
408408
raise ValueError(f"T_B is {T_B.ndim}-dimensional and must be 1-dimensional. ")
409409

410-
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
411-
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
412-
413410
n_A = T_A.shape[0]
414411
n_B = T_B.shape[0]
415412
l = n_A - m + 1
416413

414+
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
417415
excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
418-
if ignore_trivial:
416+
if ignore_trivial: # self-join
417+
core.check_window_size(m, max_size=min(n_A, n_B), n=n_A)
419418
diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64)
420-
else:
419+
else: # AB-join
420+
core.check_window_size(m, max_size=min(n_A, n_B))
421421
diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64)
422422

423423
P, PL, PR, I, IL, IR = _aamp(

stumpy/aamped.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -386,17 +386,17 @@ def aamped(client, T_A, m, T_B=None, ignore_trivial=True, p=2.0, k=1):
386386
if T_B.ndim != 1: # pragma: no cover
387387
raise ValueError(f"T_B is {T_B.ndim}-dimensional and must be 1-dimensional. ")
388388

389-
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
390-
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
391-
392389
n_A = T_A.shape[0]
393390
n_B = T_B.shape[0]
394391

392+
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
395393
excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
396394

397395
if ignore_trivial:
396+
core.check_window_size(m, max_size=min(n_A, n_B), n=n_A)
398397
diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64)
399398
else:
399+
core.check_window_size(m, max_size=min(n_A, n_B))
400400
diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64)
401401

402402
_aamped = core._client_to_func(client)

stumpy/aampi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def __init__(self, T, m, egress=True, p=2.0, k=1, mp=None):
111111
computed internally using `stumpy.aamp`.
112112
"""
113113
self._T = core._preprocess(T)
114-
core.check_window_size(m, max_size=self._T.shape[-1])
114+
core.check_window_size(m, max_size=self._T.shape[0])
115115
self._m = m
116116
self._n = self._T.shape[0]
117117
self._excl_zone = int(np.ceil(self._m / config.STUMPY_EXCL_ZONE_DENOM))

stumpy/core.py

Lines changed: 65 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -554,11 +554,12 @@ def get_max_window_size(n):
554554
return max_m
555555

556556

557-
def check_window_size(m, max_size=None):
557+
def check_window_size(m, max_size=None, n=None):
558558
"""
559559
Check the window size and ensure that it is greater than or equal to 3 and, if
560-
`max_size` is provided, ensure that the window size is less than or equal to the
561-
`max_size`
560+
``max_size`` is provided, ensure that the window size is less than or equal to
561+
the ``max_size``. Furthermore, if ``n`` is provided, then a self-join is assumed
562+
and it checks whether all subsequences have at least one non-trivial neighbor.
562563
563564
Parameters
564565
----------
@@ -568,6 +569,10 @@ def check_window_size(m, max_size=None):
568569
max_size : int, default None
569570
The maximum window size allowed
570571
572+
n : int, default None
573+
The length of the time series in the case of a self-join.
574+
``n`` should not be supplied (or set to ``None``) in the case of an AB-join.
575+
571576
Returns
572577
-------
573578
None
@@ -589,6 +594,60 @@ def check_window_size(m, max_size=None):
589594
if max_size is not None and m > max_size:
590595
raise ValueError(f"The window size must be less than or equal to {max_size}")
591596

597+
if n is not None:
598+
# Raise warning if there is at least one subsequence with no eligible
599+
# (non-trivial) neighbor in the case of a self-join.
600+
601+
# For any time series `T`, an "eligible nearest neighbor" subsequence for
602+
# the central-most subsequence must be located outside the `excl_zone`,
603+
# and the central-most subsequence will ALWAYS have the smallest relative
604+
# (index-wise) distance to its farthest neighbor amongst all other subsequences.
605+
# Therefore, we only need to check whether the `excl_zone` eliminates all
606+
# "neighbors" for the central-most subsequence in `T`. In fact, we just need to
607+
# verify whether the `excl_zone` eliminates the "neighbor" that is farthest
608+
# away (index-wise) from the central-most subsequence. If it does not, this
609+
# implies that all subsequences in `T` will have at least one "eligible nearest
610+
# neighbor" that is located outside of their respective excl_zone.
611+
612+
excl_zone = int(math.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
613+
614+
l = n - m + 1
615+
# The start index of subsequences are: 0, 1, ..., l-1
616+
617+
# If `l` is odd
618+
# Suppose `l == 5`. So, the start index of the subsequences
619+
# are: 0, 1, 2, 3, 4
620+
# The central subsequence is located at index position c=2, with two
621+
# farthest neighbors, one located at index 0, and the other is located
622+
# at index 4. In both cases, the relative (index-wise) distance is 2,
623+
# which is simply `5 // 2`. In general, it can be shown that the
624+
# (index-wise) distance from the central subsequence to its farthest
625+
# neighbor is `l // 2`.
626+
627+
# If `l` is even
628+
# Suppose `l == 6`. So, the start index of the subsequences
629+
# are: 0, 1, 2, 3, 4, 5
630+
# There are two central-most subsequences, located at the index
631+
# positions c=2 and c=3. For the central-most subsequence at index
632+
# position c=2, its farthest neighbor will be located at index 5 (to the
633+
# right of c=2) and, for the central-most subsequence at index position
634+
# c=3, its farthest neighbor will be located at index 0 (to the left of
635+
# c=3). In both cases, the relative (index-wise) distance is 3,
636+
# which is simply `6 // 2`. In general, it can be shown that the
637+
# (index-wise) distance from the central-most subsequence to its
638+
# farthest neighbor is `l // 2`.
639+
640+
# Therefore, regardless if `l` is even or odd, for the central
641+
# subsequence for any time series, the index location of its
642+
# farthest neighbor will always be `l // 2` index positions away.
643+
diff_to_farthest_idx = l // 2
644+
if diff_to_farthest_idx <= excl_zone:
645+
msg = (
646+
f"The window size, 'm = {m}', may be too large and could lead to "
647+
+ "meaningless results. Consider reducing 'm' where necessary"
648+
)
649+
warnings.warn(msg)
650+
592651

593652
@njit(fastmath=config.STUMPY_FASTMATH_TRUE)
594653
def _sliding_dot_product(Q, T):
@@ -1354,7 +1413,7 @@ def mass_absolute(Q, T, T_subseq_isfinite=None, p=2.0, query_idx=None):
13541413
raise ValueError(f"`Q` is {Q.ndim}-dimensional and must be 1-dimensional. ")
13551414
Q_isfinite = np.isfinite(Q)
13561415

1357-
check_window_size(m, max_size=Q.shape[-1])
1416+
check_window_size(m, max_size=Q.shape[0])
13581417

13591418
if query_idx is not None: # pragma: no cover
13601419
query_idx = int(query_idx)
@@ -1701,7 +1760,7 @@ def mass(
17011760
raise ValueError(f"Q is {Q.ndim}-dimensional and must be 1-dimensional. ")
17021761
Q_isfinite = np.isfinite(Q)
17031762

1704-
check_window_size(m, max_size=Q.shape[-1])
1763+
check_window_size(m, max_size=Q.shape[0])
17051764

17061765
if query_idx is not None:
17071766
query_idx = int(query_idx)
@@ -1926,7 +1985,7 @@ def mass_distance_matrix(
19261985
T_subseq_isconstant=T_subseq_isconstant,
19271986
)
19281987

1929-
check_window_size(m, max_size=min(Q.shape[-1], T.shape[-1]))
1988+
check_window_size(m, max_size=min(Q.shape[0], T.shape[0]))
19301989

19311990
return _mass_distance_matrix(
19321991
Q,

stumpy/gpu_aamp.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -536,8 +536,13 @@ def gpu_aamp(T_A, m, T_B=None, ignore_trivial=True, device_id=0, p=2.0, k=1):
536536
"For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
537537
)
538538

539-
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
540539
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
540+
if ignore_trivial: # self-join
541+
core.check_window_size(
542+
m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0]
543+
)
544+
else: # AB-join
545+
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
541546

542547
n = T_B.shape[0]
543548
w = T_A.shape[0] - m + 1

stumpy/gpu_stump.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -666,8 +666,13 @@ def gpu_stump(
666666
"For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
667667
)
668668

669-
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
670669
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
670+
if ignore_trivial: # self-join
671+
core.check_window_size(
672+
m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0]
673+
)
674+
else: # AB-join
675+
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
671676

672677
n = T_B.shape[0]
673678
w = T_A.shape[0] - m + 1

stumpy/maamp.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ def maamp_subspace(
140140
returned.
141141
"""
142142
T = core._preprocess(T)
143-
core.check_window_size(m, max_size=T.shape[-1])
143+
core.check_window_size(m, max_size=T.shape[1], n=T.shape[1])
144144

145145
subseqs, _ = core.preprocess_non_normalized(T[:, subseq_idx : subseq_idx + m], m)
146146
neighbors, _ = core.preprocess_non_normalized(T[:, nn_idx : nn_idx + m], m)
@@ -269,7 +269,7 @@ def maamp_mdl(
269269
A list of numpy.ndarrays that contains the `k`th-dimensional subspaces
270270
"""
271271
T = core._preprocess(T)
272-
core.check_window_size(m, max_size=T.shape[-1])
272+
core.check_window_size(m, max_size=T.shape[1], n=T.shape[1])
273273

274274
if discretize_func is None:
275275
T_isfinite = np.isfinite(T)
@@ -441,7 +441,7 @@ def maamp_multi_distance_profile(query_idx, T, m, include=None, discords=False,
441441
err = f"T is {T.ndim}-dimensional and must be at least 1-dimensional"
442442
raise ValueError(f"{err}")
443443

444-
core.check_window_size(m, max_size=T.shape[1])
444+
core.check_window_size(m, max_size=T.shape[1], n=T.shape[1])
445445

446446
if include is not None: # pragma: no cover
447447
include = core._preprocess_include(include)
@@ -933,7 +933,7 @@ def maamp(T, m, include=None, discords=False, p=2.0):
933933
err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional"
934934
raise ValueError(f"{err}")
935935

936-
core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]))
936+
core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1])
937937

938938
if include is not None:
939939
include = core._preprocess_include(include)

stumpy/maamped.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ def maamped(client, T, m, include=None, discords=False, p=2.0):
389389
err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional"
390390
raise ValueError(f"{err}")
391391

392-
core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]))
392+
core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1])
393393

394394
if include is not None:
395395
include = core._preprocess_include(include)

stumpy/mstump.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ def subspace(
217217
array([0, 1])
218218
"""
219219
T = core._preprocess(T)
220-
core.check_window_size(m, max_size=T.shape[-1])
220+
core.check_window_size(m, max_size=T.shape[1], n=T.shape[1])
221221
T_subseq_isconstant = core.process_isconstant(T, m, T_subseq_isconstant)
222222

223223
if discretize_func is None:
@@ -409,7 +409,7 @@ def mdl(
409409
(array([ 80. , 111.509775]), [array([1]), array([0, 1])])
410410
"""
411411
T = core._preprocess(T)
412-
core.check_window_size(m, max_size=T.shape[-1])
412+
core.check_window_size(m, max_size=T.shape[1], n=T.shape[1])
413413
T_subseq_isconstant = core.process_isconstant(T, m, T_subseq_isconstant)
414414

415415
if discretize_func is None:
@@ -1228,7 +1228,9 @@ def mstump(
12281228
err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional"
12291229
raise ValueError(f"{err}")
12301230

1231-
core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]))
1231+
# mstump currently only supports self-join. Therefore, the argument `n=T_A.shape[1]`
1232+
# must be passed to the function `core.check_window_size`.
1233+
core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1])
12321234

12331235
if include is not None:
12341236
include = core._preprocess_include(include)

stumpy/mstumped.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,9 @@ def mstumped(
505505
err = f"T is {T_A.ndim}-dimensional and must be at least 1-dimensional"
506506
raise ValueError(f"{err}")
507507

508-
core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]))
508+
# mstump currently only supports self-join. Therefore, the argument `n=T_A.shape[1]`
509+
# must be passed to the function `core.check_window_size`.
510+
core.check_window_size(m, max_size=min(T_A.shape[1], T_B.shape[1]), n=T_A.shape[1])
509511

510512
if include is not None:
511513
include = core._preprocess_include(include)

stumpy/scraamp.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -646,10 +646,15 @@ def __init__(
646646
"For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
647647
)
648648

649-
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
650649
self._ignore_trivial = core.check_ignore_trivial(
651650
self._T_A, self._T_B, self._ignore_trivial
652651
)
652+
if self._ignore_trivial: # self-join
653+
core.check_window_size(
654+
m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0]
655+
)
656+
else: # AB-join
657+
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
653658

654659
self._n_A = self._T_A.shape[0]
655660
self._n_B = self._T_B.shape[0]

stumpy/scrump.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -905,10 +905,15 @@ def __init__(
905905
"For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
906906
)
907907

908-
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
909908
self._ignore_trivial = core.check_ignore_trivial(
910909
self._T_A, self._T_B, self._ignore_trivial
911910
)
911+
if self._ignore_trivial:
912+
core.check_window_size(
913+
m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0]
914+
)
915+
else:
916+
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
912917

913918
self._n_A = self._T_A.shape[0]
914919
self._n_B = self._T_B.shape[0]

stumpy/stamp.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,13 +208,14 @@ def stamp(
208208
if T_B.ndim != 1: # pragma: no cover
209209
raise ValueError(f"T_B is {T_B.ndim}-dimensional and must be 1-dimensional. ")
210210

211-
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
212-
213211
subseq_T_A = core.rolling_window(T_A, m)
214212
excl_zone = int(np.ceil(m / 2))
215213

216214
# Add exclusionary zone
217215
if ignore_trivial:
216+
core.check_window_size(
217+
m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0]
218+
)
218219
out = [
219220
_mass_PI(
220221
subseq,
@@ -229,6 +230,7 @@ def stamp(
229230
for i, subseq in enumerate(subseq_T_A)
230231
]
231232
else:
233+
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
232234
out = [
233235
_mass_PI(
234236
subseq,

stumpy/stomp.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,13 @@ def _stomp(T_A, m, T_B=None, ignore_trivial=True):
8181
if T_B.ndim != 1: # pragma: no cover
8282
raise ValueError(f"T_B is {T_B.ndim}-dimensional and must be 1-dimensional. ")
8383

84-
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
8584
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
85+
if ignore_trivial: # self-join
86+
core.check_window_size(
87+
m, max_size=min(T_A.shape[0], T_B.shape[0]), n=T_A.shape[0]
88+
)
89+
else: # AB-join
90+
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
8691

8792
n = T_A.shape[0]
8893
l = n - m + 1

stumpy/stump.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -711,18 +711,17 @@ def stump(
711711
"For multidimensional STUMP use `stumpy.mstump` or `stumpy.mstumped`"
712712
)
713713

714-
core.check_window_size(m, max_size=min(T_A.shape[0], T_B.shape[0]))
715-
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
716-
717714
n_A = T_A.shape[0]
718715
n_B = T_B.shape[0]
719716
l = n_A - m + 1
720717

718+
ignore_trivial = core.check_ignore_trivial(T_A, T_B, ignore_trivial)
721719
excl_zone = int(np.ceil(m / config.STUMPY_EXCL_ZONE_DENOM))
722-
723-
if ignore_trivial:
720+
if ignore_trivial: # self-join
721+
core.check_window_size(m, max_size=min(n_A, n_B), n=n_A)
724722
diags = np.arange(excl_zone + 1, n_A - m + 1, dtype=np.int64)
725-
else:
723+
else: # AB-join
724+
core.check_window_size(m, max_size=min(n_A, n_B))
726725
diags = np.arange(-(n_A - m + 1) + 1, n_B - m + 1, dtype=np.int64)
727726

728727
P, PL, PR, I, IL, IR = _stump(

0 commit comments

Comments
 (0)