"""Classes and functions for working with matricized sparse tensors."""
# Copyright 2024 National Technology & Engineering Solutions of Sandia,
# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the
# U.S. Government retains certain rights in this software.
from __future__ import annotations
from typing import Literal, Optional, Tuple, Union
import numpy as np
from numpy_groupies import aggregate as accumarray
from scipy import sparse
import pyttb as ttb
from pyttb.pyttb_utils import (
gather_wrap_dims,
np_to_python,
tt_ind2sub,
)
[docs]
class sptenmat:
"""Store sparse tensor as a sparse matrix."""
__slots__ = ("tshape", "rdims", "cdims", "subs", "vals")
def __init__( # noqa: PLR0913
self,
subs: Optional[np.ndarray] = None,
vals: Optional[np.ndarray] = None,
rdims: Optional[np.ndarray] = None,
cdims: Optional[np.ndarray] = None,
tshape: Tuple[int, ...] = (),
copy: bool = True,
):
"""Construct a :class:`pyttb.sptenmat`.
Constructed from a set of 2D subscripts (subs)
and values (vals) along with the mappings of the row (rdims) and column
indices (cdims) and the shape of the original tensor (tshape).
If you already have an sparse tensor see :method:`pyttb.sptensor.to_sptenmat`.
Parameters
----------
subs:
Location of non-zero entries, in sptenmat.
vals:
Values for non-zero entries, in sptenmat.
rdims:
Mapping of row indices.
cdims:
Mapping of column indices.
tshape:
Shape of the original tensor.
copy:
Whether to make a copy of provided data or just reference it.
Skips error checking when just setting reference.
Examples
--------
Create an empty :class:`pyttb.sptenmat`:
>>> S = ttb.sptenmat()
>>> S # doctest: +NORMALIZE_WHITESPACE
sptenmat corresponding to a sptensor of shape () with 0 nonzeros and order F
rdims = [ ] (modes of sptensor corresponding to rows)
cdims = [ ] (modes of sptensor corresponding to columns)
Create a :class:`pyttb.sptenmat` from subscripts, values, and unwrapping
dimensions:
>>> subs = np.array([[1, 6], [1, 7]])
>>> vals = np.array([[6], [7]])
>>> tshape = (4, 4, 4)
>>> S = ttb.sptenmat(\
subs,\
vals,\
rdims=np.array([0]),\
cdims=np.array([1,2]),\
tshape=tshape\
)
>>> S # doctest: +NORMALIZE_WHITESPACE
sptenmat corresponding to a sptensor of shape (4, 4, 4) with 2 nonzeros \
and order F
rdims = [ 0 ] (modes of sptensor corresponding to rows)
cdims = [ 1, 2 ] (modes of sptensor corresponding to columns)
[1, 6] = 6
[1, 7] = 7
"""
# Empty case
if rdims is None and cdims is None:
assert (
subs is None and vals is None
), "Must provide rdims or cdims with values"
self.subs = np.array([], ndmin=2, dtype=int)
self.vals = np.array([], ndmin=2)
self.rdims = np.array([], dtype=int)
self.cdims = np.array([], dtype=int)
self.tshape: Union[Tuple[()], Tuple[int, ...]] = ()
return
if subs is None:
subs = np.array([], ndmin=2, dtype=int)
if vals is None:
vals = np.array([], ndmin=2)
n = len(tshape)
alldims = np.array([range(n)])
# Error check
rdims, cdims = gather_wrap_dims(n, rdims, cdims)
# if rdims or cdims is empty, hstack will output an array of float not int
if rdims.size == 0:
dims = cdims.copy()
elif cdims.size == 0:
dims = rdims.copy()
else:
dims = np.hstack([rdims, cdims], dtype=int)
assert len(dims) == n and (alldims == np.sort(dims)).all(), (
"Incorrect specification of dimensions, the sorted concatenation of "
"rdims and cdims must be range(len(tshape))."
)
assert subs.size == 0 or np.prod(np.array(tshape)[rdims]) >= np.max(
subs[:, 0]
), "Invalid row index."
assert subs.size == 0 or np.prod(np.array(tshape)[cdims]) >= np.max(
subs[:, 1]
), "Invalid column index."
# Sum any duplicates
newsubs = subs
newvals = vals
if vals.size == 0:
assert vals.size == 0, "Empty subs requires empty vals"
newsubs = np.array([])
newvals = np.array([])
elif copy:
# Identify only the unique indices
newsubs, loc = np.unique(subs, axis=0, return_inverse=True)
# Sum the corresponding values
# Squeeze to convert from column vector to row vector
newvals = accumarray(
loc.flatten(), np.squeeze(vals, axis=1), size=newsubs.shape[0], func=sum
)
if copy:
# Find the nonzero indices of the new values
nzidx = np.nonzero(newvals)
newsubs = newsubs[nzidx]
# None index to convert from row back to column vector
newvals = newvals[nzidx]
if newvals.size > 0:
newvals = newvals[:, None]
self.tshape = tshape
self.rdims = rdims.copy().astype(int)
self.cdims = cdims.copy().astype(int)
self.subs = newsubs
self.vals = newvals
else:
self.tshape = tshape
self.rdims = rdims
self.cdims = cdims
self.subs = newsubs
self.vals = newvals
[docs]
@classmethod
def from_array(
cls,
array: Union[sparse.coo_matrix, np.ndarray],
rdims: Optional[np.ndarray] = None,
cdims: Optional[np.ndarray] = None,
tshape: Tuple[int, ...] = (),
):
"""Construct a :class:`pyttb.sptenmat`.
Constructed from a coo_matrix
along with the mappings of the row (rdims) and column
indices (cdims) and the shape of the original tensor (tshape).
Parameters
----------
array:
Representation of sparse tensor data (sparse or dense).
rdims:
Mapping of row indices.
cdims:
Mapping of column indices.
tshape:
Shape of the original tensor.
Examples
--------
Create a :class:`pyttb.sptenmat` from a sparse matrix and unwrapping
dimensions. Infer column dimensions from row dimensions specification.
>>> data = np.array([6, 7])
>>> rows = np.array([1, 1])
>>> cols = np.array([6, 7])
>>> sparse_matrix = sparse.coo_matrix((data, (rows, cols)))
>>> tshape = (4, 4, 4)
>>> S = ttb.sptenmat.from_array(\
sparse_matrix,\
rdims=np.array([0]),\
tshape=tshape\
)
>>> S # doctest: +NORMALIZE_WHITESPACE
sptenmat corresponding to a sptensor of shape (4, 4, 4) with 2 nonzeros \
and order F
rdims = [ 0 ] (modes of sptensor corresponding to rows)
cdims = [ 1, 2 ] (modes of sptensor corresponding to columns)
[1, 6] = 6
[1, 7] = 7
"""
vals = None
if isinstance(array, np.ndarray):
vals = np.expand_dims(array[array.nonzero()], axis=1)
elif sparse.issparse(array):
vals = np.expand_dims(array.tocoo(False).data, axis=1)
else:
raise ValueError(
f"Expected sparse matrix or array but received: {type(array)}"
)
subs = np.vstack(array.nonzero()).transpose()
return ttb.sptenmat(subs, vals, rdims, cdims, tshape)
@property
def order(self) -> Literal["F"]:
"""Return the data layout of the underlying storage."""
return "F"
def _matches_order(self, array: np.ndarray) -> bool:
"""Check if provided array matches tensor memory layout."""
if array.flags["C_CONTIGUOUS"] and self.order == "C":
return True
if array.flags["F_CONTIGUOUS"] and self.order == "F":
return True
return False
[docs]
def copy(self) -> sptenmat:
"""
Return a deep copy of the :class:`pyttb.sptenmat`.
Examples
--------
Create a :class:`pyttb.sptenmat` (ST1) and make a deep copy. Verify
the deep copy (ST3) is not just a reference (like ST2) to the original.
>>> S1 = ttb.sptensor(shape=(2, 2))
>>> S1[0, 0] = 1
>>> ST1 = S1.to_sptenmat(np.array([0]))
>>> ST2 = ST1
>>> ST3 = ST1.copy()
>>> ST1[0, 0] = 3
>>> ST1.to_sptensor().isequal(ST2.to_sptensor())
True
>>> ST1.to_sptensor().isequal(ST3.to_sptensor())
False
"""
return sptenmat(
self.subs,
self.vals,
self.rdims,
self.cdims,
self.tshape,
copy=True,
)
[docs]
def __deepcopy__(self, memo):
"""Return deepcopy of this sptenmat."""
return self.copy()
[docs]
def to_sptensor(self) -> ttb.sptensor:
"""Construct a :class:`pyttb.sptensor` from `:class:pyttb.sptenmat`.
Examples
--------
>>> S1 = ttb.sptensor(shape=(2, 2, 2))
>>> S1[0, 0, 0] = 1
>>> S1 # doctest: +NORMALIZE_WHITESPACE
sparse tensor of shape (2, 2, 2) with 1 nonzeros and order F
[0, 0, 0] = 1.0
>>> ST1 = S1.to_sptenmat(np.array([0]))
>>> ST1 # doctest: +NORMALIZE_WHITESPACE
sptenmat corresponding to a sptensor of shape (2, 2, 2) with 1 nonzeros \
and order F
rdims = [ 0 ] (modes of sptensor corresponding to rows)
cdims = [ 1, 2 ] (modes of sptensor corresponding to columns)
[0, 0] = 1.0
>>> ST1.to_sptensor() # doctest: +NORMALIZE_WHITESPACE
sparse tensor of shape (2, 2, 2) with 1 nonzeros and order F
[0, 0, 0] = 1.0
"""
vals = None
subs = None
if self.subs.size > 0:
tshape = np.array(self.tshape)
rdims = tt_ind2sub(tshape[self.rdims], self.subs[:, 0])
cdims = tt_ind2sub(tshape[self.cdims], self.subs[:, 1])
subs = np.zeros(
(rdims.shape[0], rdims.shape[1] + cdims.shape[1]), dtype=int
)
subs[:, self.rdims] = rdims
subs[:, self.cdims] = cdims
vals = self.vals
return ttb.sptensor(subs, vals, self.tshape)
@property
def shape(self) -> Tuple[int, ...]:
"""
Return the shape of a :class:`pyttb.sptenmat`.
Examples
--------
>>> ttb.sptenmat().shape # empty sptenmat
()
>>> S1 = ttb.sptensor(shape=(2, 2, 2))
>>> S1[0, 0, 0] = 1
>>> ST1 = S1.to_sptenmat(np.array([0]))
>>> ST1.shape
(2, 4)
"""
if self.tshape == ():
return ()
else:
m = np.prod(np.array(self.tshape)[self.rdims])
n = np.prod(np.array(self.tshape)[self.cdims])
return int(m), int(n)
[docs]
def double(self) -> sparse.coo_matrix:
"""
Convert a :class:`pyttb.sptenmat` to a COO :class:`scipy.sparse.coo_matrix`.
Examples
--------
>>> S1 = ttb.sptensor(shape=(2, 2, 2))
>>> S1[0, 0, 0] = 1
>>> ST1 = S1.to_sptenmat(np.array([0]))
>>> COO = ST1.double()
>>> type(COO) # doctest: +NORMALIZE_WHITESPACE
<class 'scipy.sparse._coo.coo_matrix'>
>>> COO.nnz # doctest: +NORMALIZE_WHITESPACE
1
>>> COO.toarray() # doctest: +NORMALIZE_WHITESPACE
array([[1., 0., 0., 0.],
[0., 0., 0., 0.]])
"""
if self.subs.size == 0:
return sparse.coo_matrix(self.shape)
return sparse.coo_matrix(
(self.vals.transpose()[0], self.subs.transpose()), self.shape
)
[docs]
def full(self) -> ttb.tenmat:
"""
Convert a :class:`pyttb.sptenmat` to a (dense) :class:`pyttb.tenmat`.
Examples
--------
>>> S1 = ttb.sptensor(shape=(2, 2, 2))
>>> S1[0, 0, 0] = 1
>>> ST1 = S1.to_sptenmat(np.array([0]))
>>> ST1.full() # doctest: +NORMALIZE_WHITESPACE
matrix corresponding to a tensor of shape (2, 2, 2) with order F
rindices = [ 0 ] (modes of tensor corresponding to rows)
cindices = [ 1, 2 ] (modes of tensor corresponding to columns)
data[:, :] =
[[1. 0. 0. 0.]
[0. 0. 0. 0.]]
"""
# Create empty dense tenmat
result = ttb.tenmat(
np.zeros(self.shape, order=self.order), self.rdims, self.cdims, self.tshape
)
# Assign nonzero values
result[tuple(self.subs.transpose())] = np.squeeze(self.vals)
return result
@property
def nnz(self) -> int:
"""
Number of nonzero values in the :class:`pyttb.sptenmat`.
Examples
--------
>>> S1 = ttb.sptensor(shape=(2, 2, 2))
>>> S1[0, 0, 0] = 1
>>> ST1 = S1.to_sptenmat(np.array([0]))
>>> ST1.nnz
1
"""
return len(self.vals)
[docs]
def norm(self) -> float:
"""Compute the norm of the :class:`pyttb.sptenmat`.
Frobenius norm, or square root of the sum of
squares of entries.
Examples
--------
>>> S1 = ttb.sptensor(shape=(2, 2, 2))
>>> S1[0, 0, 0] = 1
>>> ST1 = S1.to_sptenmat(np.array([0]))
>>> ST1.norm()
1.0
"""
return np.linalg.norm(self.vals).item()
[docs]
def isequal(self, other: sptenmat) -> bool:
"""
Exact equality for :class:`pyttb.sptenmat`.
Examples
--------
>>> S1 = ttb.sptensor(shape=(2, 2, 2))
>>> S1[0, 0, 0] = 1
>>> ST1 = S1.to_sptenmat(np.array([0]))
>>> ST2 = ttb.sptenmat()
>>> ST1.isequal(ST2)
False
>>> ST1.isequal(ST1)
True
"""
if not isinstance(other, ttb.sptenmat):
raise ValueError(
f"Can only compares against other sptenmat but received: {type(other)}"
)
return (
np.array_equal(self.vals, other.vals)
and np.array_equal(self.subs, other.subs)
and self.tshape == other.tshape
and np.array_equal(self.cdims, other.cdims)
and np.array_equal(self.rdims, other.rdims)
)
[docs]
def __pos__(self):
"""
Unary plus operator (+).
Examples
--------
>>> S1 = ttb.sptensor(shape=(2, 2, 2))
>>> S1[0, 0, 0] = 1
>>> ST1 = S1.to_sptenmat(np.array([0]))
>>> +ST1 # doctest: +NORMALIZE_WHITESPACE
sptenmat corresponding to a sptensor of shape (2, 2, 2) with 1 nonzeros \
and order F
rdims = [ 0 ] (modes of sptensor corresponding to rows)
cdims = [ 1, 2 ] (modes of sptensor corresponding to columns)
[0, 0] = 1.0
"""
return self.copy()
[docs]
def __neg__(self):
"""
Unary minus operator (-).
Examples
--------
>>> S1 = ttb.sptensor(shape=(2, 2, 2))
>>> S1[0, 0, 0] = 1
>>> ST1 = S1.to_sptenmat(np.array([0]))
>>> -ST1 # doctest: +NORMALIZE_WHITESPACE
sptenmat corresponding to a sptensor of shape (2, 2, 2) with 1 nonzeros \
and order F
rdims = [ 0 ] (modes of sptensor corresponding to rows)
cdims = [ 1, 2 ] (modes of sptensor corresponding to columns)
[0, 0] = -1.0
"""
result = self.copy()
result.vals *= -1
return result
[docs]
def __setitem__(self, key, value): # noqa: PLR0912
"""
Subscripted assignment for the :class:`pyttb.sptenmat`.
Examples
--------
Create an empty :class:`pyttb.sptenmat`.
>>> ST = ttb.sptenmat(rdims=np.array([0]), tshape=(2, 2, 2))
>>> ST # doctest: +NORMALIZE_WHITESPACE
sptenmat corresponding to a sptensor of shape (2, 4) with 0 nonzeros and order F
rdims = [ 0 ] (modes of sptensor corresponding to rows)
cdims = [ 1, 2 ] (modes of sptensor corresponding to columns)
Insert a new value into it.
>>> ST[0, 0] = 1.0
>>> ST # doctest: +NORMALIZE_WHITESPACE
sptenmat corresponding to a sptensor of shape (2, 2, 2) with 1 nonzeros \
and order F
rdims = [ 0 ] (modes of sptensor corresponding to rows)
cdims = [ 1, 2 ] (modes of sptensor corresponding to columns)
[0, 0] = 1.0
Update an existing value in it.
>>> ST[0, 0] = 2.0
>>> ST # doctest: +NORMALIZE_WHITESPACE
sptenmat corresponding to a sptensor of shape (2, 2, 2) with 1 nonzeros \
and order F
rdims = [ 0 ] (modes of sptensor corresponding to rows)
cdims = [ 1, 2 ] (modes of sptensor corresponding to columns)
[0, 0] = 2.0
"""
if not isinstance(key, tuple):
raise IndexError("Sptenmat takes two arguments as a 2D array")
if len(key) != 2:
raise IndexError(
f"Wrong number of indices. Expected 2 received: {len(key)}"
)
rsubs = key[0]
if isinstance(rsubs, slice):
rsubs = np.arange(0, self.shape[0])[rsubs]
rsubs = np.asarray(rsubs, dtype=int)
if rsubs.shape == ():
rsubs = np.array([rsubs])
csubs = key[1]
if isinstance(csubs, slice):
csubs = np.arange(0, self.shape[1])[csubs]
csubs = np.asarray(csubs, dtype=int)
if csubs.shape == ():
csubs = np.array([csubs])
if isinstance(value, (int, float, np.floating)):
value = value * np.ones((len(csubs) * len(rsubs), 1))
value = np.asarray(value)
newsubs = []
newvals = []
k = -1
# Loop over row and column indices, finding appropriate row index for (i,j)
for j in range(len(csubs)):
indxc = np.array([], dtype=int)
if self.subs.size > 0:
indxc = np.where(self.subs[:, 1] == csubs[j])[0]
for i in range(len(rsubs)):
indxr = np.array([], dtype=int)
if self.subs.size > 0:
indxr = np.where(self.subs[indxc, 0] == rsubs[i])[0]
indx = indxc[indxr]
k += 1
if indx.size == 0:
newsubs.append(np.hstack([rsubs[i], csubs[j]]))
newvals.append(value[k])
else:
self.vals[indx] = value[k]
# If there are new values to append, then add them on and sort
if len(newvals) != 0:
if self.subs.size > 0:
self.subs = np.vstack((self.subs, newsubs))
self.vals = np.vstack((self.vals, newvals))
else:
self.subs = np.vstack(newsubs)
self.vals = np.vstack(newvals)
sort_idx = np.lexsort(self.subs.transpose()[::-1])
self.subs = self.subs[sort_idx]
self.vals = self.vals[sort_idx]
[docs]
def __repr__(self):
"""Return string representation of a :class:`pyttb.sptenmat`.
Examples
--------
>>> ttb.sptenmat() # doctest: +NORMALIZE_WHITESPACE
sptenmat corresponding to a sptensor of shape () with 0 nonzeros \
and order F
rdims = [ ] (modes of sptensor corresponding to rows)
cdims = [ ] (modes of sptensor corresponding to columns)
>>> S1 = ttb.sptensor(shape=(2, 2, 2))
>>> S1[0, 0, 0] = 1
>>> ST1 = S1.to_sptenmat(np.array([0]))
>>> ST1 # doctest: +NORMALIZE_WHITESPACE
sptenmat corresponding to a sptensor of shape (2, 2, 2) with 1 nonzeros \
and order F
rdims = [ 0 ] (modes of sptensor corresponding to rows)
cdims = [ 1, 2 ] (modes of sptensor corresponding to columns)
[0, 0] = 1.0
Returns
-------
str
Contains the shape, row indices (rindices), column indices (cindices)
and data as strings on different lines.
"""
s = ""
s += "sptenmat corresponding to a sptensor of shape "
if self.vals.size == 0:
s += str(np_to_python(self.shape))
else:
s += f"{np_to_python(self.tshape)!r}"
s += f" with {self.vals.size} nonzeros and order {self.order}\n"
s += "rdims = "
s += "[ " + (", ").join([str(int(d)) for d in self.rdims]) + " ] "
s += "(modes of sptensor corresponding to rows)\n"
s += "cdims = "
s += "[ " + (", ").join([str(int(d)) for d in self.cdims]) + " ] "
s += "(modes of sptensor corresponding to columns)\n"
# Stop insane printouts
if self.vals.size > 10000: # pragma: no cover
r = input("Are you sure you want to print all nonzeros? (Y/N)")
if r.upper() != "Y":
return s
# An empty ndarray with minimum dimensions still has a shape
if self.subs.size > 0:
for i in range(0, self.subs.shape[0]):
s += "\t"
s += "["
idx = self.subs[i, :]
s += str(idx.tolist())[1:]
s += " = "
s += str(self.vals[i][0])
if i < self.subs.shape[0] - 1:
s += "\n"
return s
__str__ = __repr__
if __name__ == "__main__":
import doctest # pragma: no cover
doctest.testmod() # pragma: no cover