Source code for smqtk.algorithms.nn_index.hash_index._interface_hash_index

import abc

from smqtk.algorithms import SmqtkAlgorithm
from smqtk.utils import check_empty_iterable


[docs]class HashIndex (SmqtkAlgorithm): """ Specialized ``NearestNeighborsIndex`` for indexing unique hash codes bit-vectors) in memory (numpy arrays) using the hamming distance metric. Implementations of this interface cannot be used in place of something requiring a ``NearestNeighborsIndex`` implementation due to the speciality of this interface. Only unique bit vectors should be indexed. The ``nn`` method should not return the same bit vector more than once for any query. """ def __len__(self): return self.count() @staticmethod def _empty_iterable_exception(): """ Create the exception instance to be thrown when no descriptors are provided to ``build_index``/``update_index``. :return: ValueError instance to be thrown. :rtype: ValueError """ return ValueError("No hash vectors in provided iterable.")
[docs] def build_index(self, hashes): """ Build the index with the given hash codes (bit-vectors). Subsequent calls to this method should rebuild the current index. This method shall not add to the existing index nor raise an exception to as to protect the current index. :raises ValueError: No data available in the given iterable. :param hashes: Iterable of descriptor elements to build index over. :type hashes: collections.Iterable[numpy.ndarray[bool]] """ check_empty_iterable(hashes, self._build_index, self._empty_iterable_exception())
[docs] def update_index(self, hashes): """ Additively update the current index with the one or more hash vectors given. If no index exists yet, a new one should be created using the given hash vectors. :raises ValueError: No data available in the given iterable. :param hashes: Iterable of numpy boolean hash vectors to add to this index. :type hashes: collections.Iterable[numpy.ndarray[bool]] """ check_empty_iterable(hashes, self._update_index, self._empty_iterable_exception())
[docs] def remove_from_index(self, hashes): """ Partially remove hashes from this index. :param hashes: Iterable of numpy boolean hash vectors to remove from this index. :type hashes: collections.Iterable[numpy.ndarray[bool]] :raises ValueError: No data available in the given iterable. :raises KeyError: One or more UIDs provided do not match any stored descriptors. """ check_empty_iterable(hashes, self._remove_from_index, self._empty_iterable_exception())
[docs] def nn(self, h, n=1): """ Return the nearest `N` neighbor hash codes as bit-vectors to the given hash code bit-vector. Distances are in the range [0,1] and are the percent different each neighbor hash is from the query, based on the number of bits contained in the query (normalized hamming distance). :raises ValueError: Current index is empty. :param h: Hash code to compute the neighbors of. Should be the same bit length as indexed hash codes. :type h: numpy.ndarray[bool] :param n: Number of nearest neighbors to find. :type n: int :return: Tuple of nearest N hash codes and a tuple of the distance values to those neighbors. :rtype: (tuple[numpy.ndarray[bool]], tuple[float]) """ # Only check for count because we're no longer dealing with descriptor # elements. if not self.count(): raise ValueError("No index currently set to query from!") return self._nn(h, n)
[docs] @abc.abstractmethod def count(self): """ :return: Number of elements in this index. :rtype: int """ pass
@abc.abstractmethod def _build_index(self, hashes): """ Internal method to be implemented by sub-classes to build the index with the given hash codes (bit-vectors). Subsequent calls to this method should rebuild the current index. This method shall not add to the existing index nor raise an exception to as to protect the current index. :param hashes: Iterable of descriptor elements to build index over. :type hashes: collections.Iterable[numpy.ndarray[bool]] """ @abc.abstractmethod def _update_index(self, hashes): """ Internal method to be implemented by sub-classes to additively update the current index with the one or more hash vectors given. If no index exists yet, a new one should be created using the given hash vectors. :param hashes: Iterable of numpy boolean hash vectors to add to this index. :type hashes: collections.Iterable[numpy.ndarray[bool]] """ @abc.abstractmethod def _remove_from_index(self, hashes): """ Internal method to be implemented by sub-classes to partially remove hashes from this index. :param hashes: Iterable of numpy boolean hash vectors to remove from this index. :type hashes: collections.Iterable[numpy.ndarray[bool]] :raises KeyError: One or more hashes provided do not match any stored hashes. The index should not be modified. """ @abc.abstractmethod def _nn(self, h, n=1): """ Internal method to be implemented by sub-classes to return the nearest `N` neighbor hash codes as bit-vectors to the given hash code bit-vector. Distances are in the range [0,1] and are the percent different each neighbor hash is from the query, based on the number of bits contained in the query (normalized hamming distance). When this internal method is called, we have already checked that our index is not empty. :param h: Hash code to compute the neighbors of. Should be the same bit length as indexed hash codes. :type h: numpy.ndarray[bool] :param n: Number of nearest neighbors to find. :type n: int :return: Tuple of nearest N hash codes and a tuple of the distance values to those neighbors. :rtype: (tuple[numpy.ndarray[bool]], tuple[float]) """