Merge branch 'main' of github.com:sandialabs/pyttb

dmdunla · dmdunla · commit fbbf5545a261 · 2022-10-12T00:14:15.000-06:00
diff --git a/pyttb/ktensor.py b/pyttb/ktensor.py
@@ -1360,9 +1360,143 @@ def shape(self):
         """
         return tuple([f.shape[0] for f in self.factor_matrices])
 
-    # TODO implement
-    def score(self, other, **kwargs):
-        assert False, "Not yet implemented" # pragma: no cover
+    def score(self, other, weight_penalty=True, threshold=0.99, greedy=True):
+        """
+        Checks if two ktensor instances match except for permutation.
+
+        We define matching as follows. If A (self) and B (other) are single component
+        ktensors that have been normalized so that their weights are weights_a and
+        weights_b, then the score is defined as
+
+            score = penalty * (a1.T*b1) * (a2.T*b2) * ... * (aR.T*bR),
+
+        where the penalty is defined by the weights such that
+
+            penalty = 1 - abs(weights_a - weights_b) / max(weights_a, weights_b).
+
+        The score of multi-component ktensors is a normalized sum of the
+        scores across the best permutation of the components of A. A can have
+        more components than B --- any extra components are ignored in terms of
+        the matching score.
+
+        Parameters
+        ----------
+        other: :class:`pyttb.ktensor`
+            `ktensor` to match against
+        weight_penalty: bool
+            Flag indicating whether or not to consider the weights in the calculations.
+            Default: true
+        threshold: float
+            Threshold specified in the formula above for determining a match.
+            Default: 0.99
+        greedy: bool
+            Flag indicating whether or not to consider all possible matchings
+            (exponentially expensive) or just do a greedy matching. Default: true
+
+        Returns
+        -------
+        int
+            Score (between 0 and 1)
+        :class:`pyttb.ktensor`
+            Copy of `self`, which has been normalized and permuted to best match `other`
+        bool
+            Flag indicating a match according to a user-specified threshold
+        :class:`Numpy.ndarray`
+            Permutation (i.e. array of indices of the modes of self) of the components
+            of self that was used to best match other
+
+        Example
+        -------
+        Create two `ktensor` instances:
+
+        >>> A = ttb.ktensor.from_data(np.array([2, 1, 3]), np.ones((3,3)), np.ones((4,3)), np.ones((5,3)))
+        >>> B = ttb.ktensor.from_data(np.array([2, 4]), np.ones((3,2)), np.ones((4,2)), np.ones((5,2)))
+
+        Compute `score` using `ktensor.weights`:
+
+        >>> score,Aperm,flag,perm = A.score(B)
+        >>> print(score)
+        0.875
+        >>> print(perm)
+        [0 2 1]
+
+        Compute `score` not using `ktensor.weights`:
+
+        >>> score,Aperm,flag,perm = A.score(B,weight_penalty=False)
+        >>> print(score)
+        1.0
+        >>> print(perm)
+        [0 1 2]
+        """
+
+        if not greedy:
+            assert False, "Not yet implemented. Only greedy method is implemented currently."
+
+        if not isinstance(other, ktensor):
+            assert False, "The first input should be a ktensor"
+
+        if not (self.shape == other.shape):
+            assert False, "Size mismatch"
+
+        # Set-up
+        N = self.ndims
+        RA = self.ncomponents
+        RB = other.ncomponents
+
+        # We're matching components in A to B
+        if (RA < RB):
+            assert False, "Tensor A must have at least as many components as tensor B"
+
+        # Make sure columns of factor matrices are normalized
+        A = ttb.ktensor.from_tensor_type(self).normalize()
+        B = ttb.ktensor.from_tensor_type(other).normalize()
+
+        # Compute all possible vector-vector congruences.
+
+        # Compute every pair for each mode
+        Cbig = ttb.tensor.from_function(np.zeros, (RA,RB,N))
+        for n in range(N):
+            Cbig[:,:,n] = np.abs(A.factor_matrices[n].T @ B.factor_matrices[n])
+
+        # Collapse across all modes using the product
+        C = Cbig.collapse(np.array([2]), np.prod).double()
+
+        # Calculate penalty based on differences in the Lambda's
+        # Note that we are assuming the the lambda value are positive because the
+        # ktensor's were previously normalized.
+        if weight_penalty:
+            P = np.zeros((RA, RB))
+            for ra in range(RA):
+                la = A.weights[ra]
+                for rb in range(RB):
+                    lb = B.weights[rb]
+                    if (la == 0) and (lb == 0):
+                        # if both lambda values are zero (0), they match
+                        P[ra, rb] = 1
+                    else:
+                        P[ra, rb] = 1 - (np.abs(la-lb) / np.max([np.abs(la),np.abs(lb)]))
+            C = P * C
+
+        # Option to do greedy matching
+        if greedy:
+            best_perm = -1 * np.ones((RA), dtype=np.int)
+            best_score = 0
+            for r in range(RB):
+                idx = np.argmax(C.reshape(np.prod(C.shape),order='F'))
+                ij = tt_ind2sub((RA, RB), idx)
+                best_score = best_score + C[ij[0], ij[1]]
+                C[ij[0], :] = -10
+                C[:, ij[1]] = -10
+                best_perm[ij[1]] = ij[0]
+            best_score = best_score / RB
+            flag = 1
+
+            # Rearrange the components of A according to the best matching
+            foo = np.arange(RA)
+            tf = np.in1d(foo, best_perm)
+            best_perm[RB:RA+1] = foo[~tf]
+            A.arrange(permutation=best_perm)
+            return best_score, A, flag, best_perm
 
     def symmetrize(self):
         """
diff --git a/pyttb/pyttb_utils.py b/pyttb/pyttb_utils.py
@@ -514,9 +514,9 @@ def tt_ind2sub(shape, idx):
     :class:`numpy.ndarray`
     """
     if idx.size == 0:
-        return np.array([])
+        return np.empty(shape=(0,len(shape)), dtype=int)
 
-    return np.array(np.unravel_index(idx, shape)).transpose()
+    return np.array(np.unravel_index(idx, shape, order='F')).transpose()
 
 
 def tt_subsubsref(obj, s):
@@ -575,7 +575,7 @@ def tt_sub2ind(shape, subs):
     """
     if subs.size == 0:
         return np.array([])
-    idx = np.ravel_multi_index(tuple(subs.transpose()), shape)
+    idx = np.ravel_multi_index(tuple(subs.transpose()), shape, order='F')
     return idx
 
 
diff --git a/pyttb/sptensor.py b/pyttb/sptensor.py
@@ -426,7 +426,7 @@ def extract(self, searchsubs):
             assert False, 'Invalid subscripts'
 
         # Set the default answer to zero
-        a = np.zeros(shape=(p, 1))
+        a = np.zeros(shape=(p, 1), dtype=self.vals.dtype)
 
         # Find which indices already exist and their locations
         loc = ttb.tt_ismember_rows(searchsubs, self.subs)
@@ -1112,18 +1112,20 @@ def __getitem__(self, item):
 
         Examples
         --------
-        >>> X = sptensor(np.array([[4,4,4],[2,2,1],[2,3,2]]),np.array([[3],[5],[1]]),(4,4,4))
-        >>> X[1,2,1] #<-- returns zero
-        >>> X[4,4,4] #<-- returns 3
-        >>> X[3:4,:,:] #<-- returns 1 x 4 x 4 sptensor
+        >>> X = sptensor(np.array([[3,3,3],[1,1,0],[1,2,1]]),np.array([3,5,1]),(4,4,4))
+        >>> X[0,1,0] #<-- returns zero
+        >>> X[3,3,3] #<-- returns 3
+        >>> X[2:3,:,:] #<-- returns 1 x 4 x 4 sptensor
         X = sptensor([6;16;26],[1;1;1],30);
         X([1:6]') <-- extracts a subtensor
-        X([1:6]','extract') %<-- extracts a vector of 6 elements
         """
+        # This does not work like MATLAB TTB; you must call sptensor.extract to get this functionality
+        # X([1:6]','extract') %<-- extracts a vector of 6 elements
+
         #TODO IndexError for value outside of indices
         # TODO Key error if item not in container
         # *** CASE 1: Rectangular Subtensor ***
-        if isinstance(item, tuple) and len(item) == self.ndims and item[len(item)-1] != 'extract':
+        if isinstance(item, tuple) and len(item) == self.ndims:
             # Extract the subdimensions to be extracted from self
             region = item
 
@@ -1160,7 +1162,7 @@ def __getitem__(self, item):
             # Return a single double value for a zero-order sub-tensor
             if newsiz.size == 0:
                 if vals.size == 0:
-                    a = 0
+                    a = np.array([[0]])
                 else:
                     a = vals
                 return a
@@ -1177,21 +1179,22 @@ def __getitem__(self, item):
         # Case 2: EXTRACT
 
         # *** CASE 2a: Subscript indexing ***
-        if len(item) > 1 and isinstance(item[-1], str) and item[-1] == 'extract':
-            # extract array of subscripts
-            srchsubs = np.array(item[0])
-            item = item[0]
+        if isinstance(item, np.ndarray) and len(item.shape) == 2 and item.shape[1] == self.ndims:
+            srchsubs = np.array(item)
 
        # *** CASE 2b: Linear indexing ***
         else:
             # Error checking
-            if not isinstance(item, list) and not isinstance(item, np.ndarray):
+            if isinstance(item, list):
+                idx = np.array(item)
+            elif isinstance(item, np.ndarray):
+                idx = item
+            else:
                 assert False, 'Invalid indexing'
 
-            idx = item
             if len(idx.shape) != 1:
                 assert False, 'Expecting a row index'
-            #idx=np.expand_dims(idx, axis=1)
+
             # extract linear indices and convert to subscripts
             srchsubs = tt_ind2sub(self.shape, idx)
 
diff --git a/pyttb/tensor.py b/pyttb/tensor.py
@@ -298,10 +298,10 @@ def find(self):
 
         :return:
         """
-        idx = np.where(self.data > 0)
-        subs = np.array(idx).transpose()
-        vals = self.data[idx]
-        return subs, vals[:, None]
+        idx = np.nonzero(np.ravel(self.data,order='F'))[0]
+        subs = ttb.tt_ind2sub(self.shape,idx)
+        vals = self.data[tuple(subs.T)][:,None]
+        return subs, vals
 
     def full(self):
         """
@@ -1623,7 +1623,7 @@ def __repr__(self):
                 s += str(self.data)
                 s += '\n'
                 return s
-        for i, j in enumerate(range(0, np.prod(self.shape), self.shape[-1]*self.shape[-2])):
+        for i in np.arange(np.prod(self.shape[:-2])):
             s += 'data'
             if self.ndims == 2:
                 s += '[:, :]'
diff --git a/tests/test_ktensor.py b/tests/test_ktensor.py
@@ -375,7 +375,7 @@ def test_ktensor_issymetric(sample_ktensor_2way, sample_ktensor_symmetric):
 def test_ktensor_mask(sample_ktensor_2way):
     (data, K) = sample_ktensor_2way
     W = ttb.tensor.from_data(np.array([[0, 1], [1, 0]]))
-    assert (K.mask(W) == np.array([[39], [63]])).all()
+    assert (K.mask(W) == np.array([[63], [39]])).all()
 
     # Mask too large
     with pytest.raises(AssertionError) as excinfo:
@@ -614,7 +614,43 @@ def test_ktensor_redistribute(sample_ktensor_2way):
     assert (np.array([[5, 6], [7, 8]]) == K[1]).all()
     assert (np.array([1, 1]) == K.weights).all()
 
-@pytest.mark.indevelopment
+pytest.mark.indevelopment
+def test_ktensor_score():
+    A = ttb.ktensor.from_data(np.array([2, 1, 3]), np.ones((3,3)), np.ones((4,3)), np.ones((5,3)))
+    B = ttb.ktensor.from_data(np.array([2, 4]), np.ones((3,2)), np.ones((4,2)), np.ones((5,2)))
+
+    # defaults
+    score, Aperm, flag, best_perm =  A.score(B)
+    assert score == 0.875
+    assert np.allclose(Aperm.weights, np.array([15.49193338,23.23790008,7.74596669]))
+    assert flag == 1
+    assert (best_perm == np.array([0,2,1])).all()
+
+    # compare just factor matrices (i.e., do not use weights)
+    score, Aperm, flag, best_perm =  A.score(B, weight_penalty=False)
+    assert score == 1.0
+    assert np.allclose(Aperm.weights, np.array([15.49193338,7.74596669,23.23790008]))
+    assert flag == 1
+    assert (best_perm == np.array([0,1,2])).all()
+
+    # compute score using exhaustive search
+    with pytest.raises(AssertionError) as excinfo:
+        score, Aperm, flag, best_perm =  A.score(B, greedy=False)
+    assert "Not yet implemented. Only greedy method is implemented currently." in str(excinfo)
+
+    # try to compute score with tensor type other than ktensor
+    with pytest.raises(AssertionError) as excinfo:
+        score, Aperm, flag, best_perm =  A.score(ttb.tensor.from_tensor_type(B))
+    assert "The first input should be a ktensor" in str(excinfo)
+
+    # try to compute score when ktensor dimensions do not match
+    with pytest.raises(AssertionError) as excinfo:
+        # A is 3x4x5; B is 3x4x4
+        B = ttb.ktensor.from_data(np.array([2, 4]), np.ones((3,2)), np.ones((4,2)), np.ones((4,2)))
+        score, Aperm, flag, best_perm =  A.score(B)
+    assert "Size mismatch" in str(excinfo)
+
+pytest.mark.indevelopment
 def test_ktensor_shape(sample_ktensor_2way, sample_ktensor_3way):
     (data, K0) = sample_ktensor_2way
     assert K0.shape == (2, 2)
diff --git a/tests/test_pyttb_utils.py b/tests/test_pyttb_utils.py
@@ -44,8 +44,8 @@ def test_sptensor_to_sparse_matrix():
     subs = np.array([[1, 1, 1], [1, 1, 3], [2, 2, 2], [3, 3, 3]])
     vals = np.array([[0.5], [1.5], [2.5], [3.5]])
     shape = (4, 4, 4)
-    mode0 = sparse.coo_matrix(([0.5, 1.5, 2.5, 3.5], ([5, 7, 10, 15], [1, 1, 2, 3])))
-    mode1 = sparse.coo_matrix(([0.5, 1.5, 2.5, 3.5], ([5, 7, 10, 15], [1, 1, 2, 3])))
+    mode0 = sparse.coo_matrix(([0.5, 1.5, 2.5, 3.5], ([5, 13, 10, 15], [1, 1, 2, 3])))
+    mode1 = sparse.coo_matrix(([0.5, 1.5, 2.5, 3.5], ([5, 13, 10, 15], [1, 1, 2, 3])))
     mode2 = sparse.coo_matrix(([0.5, 1.5, 2.5, 3.5], ([5, 5, 10, 15], [1, 3, 2, 3])))
     Ynt = [mode0, mode1, mode2]
     sptensorInstance = ttb.sptensor().from_data(subs, vals, shape)
@@ -330,15 +330,17 @@ def test_tt_ind2sub_valid():
     subs = np.array([[0, 0, 0], [1, 1, 1], [3, 3, 3]])
     idx = np.array([0, 21, 63])
     shape = (4, 4, 4)
+    print(f'\nttb.tt_ind2sub(shape, idx): {ttb.tt_ind2sub(shape, idx)}')
     assert (ttb.tt_ind2sub(shape, idx) == subs).all()
 
-    subs = np.array([[0, 1], [1, 0]])
+    subs = np.array([[1, 0], [0, 1]])
     idx = np.array([1, 2])
     shape = (2, 2)
+    print(f'\nttb.tt_ind2sub(shape, idx): {ttb.tt_ind2sub(shape, idx)}')
     assert (ttb.tt_ind2sub(shape, idx) == subs).all()
 
     empty = np.array([])
-    assert (ttb.tt_ind2sub(shape, empty) == empty).all()
+    assert (ttb.tt_ind2sub(shape, empty) == np.empty(shape=(0,len(shape)), dtype=int)).all()
 
 @pytest.mark.indevelopment
 def test_tt_subsubsref_valid():
diff --git a/tests/test_sptensor.py b/tests/test_sptensor.py
diff --git a/tests/test_tensor.py b/tests/test_tensor.py