Source code for sknano.core.analysis.periodic_kdtree

# -*- coding: utf-8 -*-
"""
==========================================================================
Periodic KD-Tree (:mod:`sknano.core.analysis.periodic_kdtree`)
==========================================================================

.. currentmodule:: sknano.core.analysis.periodic_kdtree

.. versionadded:: 0.4.0

A wrapper around scipy.spatial.kdtree to implement periodic boundary
conditions

Original source code written by Patrick Varilly, 6 Jul 2012.

Modified and included with scikit-nano v0.4.0.

"""
from __future__ import absolute_import, division, print_function, \
    unicode_literals
__docformat__ = 'restructuredtext en'

import itertools
import heapq

import numpy as np

try:
    from scipy.spatial import KDTree, cKDTree
except ImportError:
    raise ImportError('Install scipy version >= 0.13.0 to allow '
                      'nearest-neighbor queries between atoms.')

__all__ = ['PeriodicKDTree', 'PeriodicCKDTree']


def _generate_images(x, boxsize, distance_upper_bound):
    # Map x onto the canonical unit cell, then produce the relevant
    # mirror images
    xs = x - np.where(boxsize > 0, np.floor(x / boxsize) * boxsize, 0)
    m = len(x)
    images = [xs]
    for i in range(m):
        # print('boxsize[i]: {}'.format(boxsize[i]))
        if boxsize[i] > 0:
            disp = np.zeros(m)
            disp[i] = boxsize[i]

            if distance_upper_bound == np.inf:
                images = list(
                    itertools.chain.from_iterable(
                        (xi + disp, xi, xi - disp) for xi in images))
            else:
                extra_xi = []

                # Point near lower boundary, include image on upper side
                if abs(xs[i]) < distance_upper_bound:
                    extra_xi.extend(xi + disp for xi in images)

                # Point near upper boundary, include image on lower side
                if abs(boxsize[i] - xs[i]) < distance_upper_bound:
                    extra_xi.extend(xi - disp for xi in images)

                images.extend(extra_xi)

    return images


[docs]class PeriodicKDTree(KDTree): """ kd-tree for quick nearest-neighbor lookup with periodic boundaries See scipy.spatial.kdtree for details on kd-trees. Searches with periodic boundaries are implemented by mapping all initial data points to one canonical periodic image, building an ordinary kd-tree with these points, then querying this kd-tree multiple times, if necessary, with all the relevant periodic images of the query point. Note that to ensure that no two distinct images of the same point appear in the results, it is essential to restrict the maximum distance between a query point and a data point to half the smallest box dimension. """ def __init__(self, data, boxsize=None, lattice=None, center=None, leafsize=10, verbose=False): """Construct a kd-tree. Parameters ---------- data : array_like, shape (n,k) The data points to be indexed. This array is not copied, and so modifying this data will result in bogus results. boxsize : array_like, shape (k,) Size of the periodic box along each spatial dimension. A negative or zero size for dimension k means that space is not periodic along k. leafsize : positive :class:`~python:int`, optional The number of points at which the algorithm switches over to brute-force. verbose : :class:`~python:bool`, optional """ # Map all points to canonical periodic image data = np.asarray(data) boxsize = self.boxsize = np.asarray(boxsize) wrapped_data = data - \ np.where(boxsize > 0.0, (np.floor(data / boxsize) * boxsize), 0.0) self.verbose = verbose # Calculate maximum distance_upper_bound self.max_distance_upper_bound = \ np.amin(np.where(boxsize > 0, 0.5 * boxsize, np.inf)) if verbose: print('wrapped_data:\n{}'.format(wrapped_data)) print('max_distance_upper_bound: {}'.format( self.max_distance_upper_bound)) # Set up underlying kd-tree super().__init__(wrapped_data, leafsize) # The following name is a kludge to override KDTree's private method def _KDTree__query(self, x, k=1, eps=0, p=2, distance_upper_bound=np.inf): # This is the internal query method, which guarantees that x # is a single point, not an array of points # # A slight complication: k could be "None", which means "return # all neighbors within the given distance_upper_bound". # Cap distance_upper_bound distance_upper_bound = np.amin([distance_upper_bound, self.max_distance_upper_bound]) # Run queries over all relevant images of x hits_list = [] for xi in _generate_images(x, self.boxsize, distance_upper_bound): hits_list.append( super()._KDTree__query(xi, k, eps, p, distance_upper_bound) ) # Now merge results if k is None: return list(heapq.merge(*hits_list)) elif k > 1: return heapq.nsmallest(k, itertools.chain(*hits_list)) elif k == 1: return [min(itertools.chain(*hits_list))] else: raise ValueError("Invalid k in periodic_kdtree._KDTree__query") # The following name is a kludge to override KDTree's private method def _KDTree__query_ball_point(self, x, r, p=2., eps=0): # This is the internal query method, which guarantees that x # is a single point, not an array of points # Cap r r = np.amin([r, self.max_distance_upper_bound]) # Run queries over all relevant images of x results = [] for xi in _generate_images(x, self.boxsize, r): results.extend( super()._KDTree__query_ball_point(xi, r, p, eps)) return results
# def query_ball_tree(self, other, r, p=2., eps=0): # raise NotImplementedError() # def query_pairs(self, r, p=2., eps=0): # raise NotImplementedError() # def count_neighbors(self, other, r, p=2.): # raise NotImplementedError() # def sparse_distance_matrix(self, other, max_distance, p=2.): # raise NotImplementedError()
[docs]class PeriodicCKDTree(cKDTree): """ Cython kd-tree for quick nearest-neighbor lookup with periodic boundaries See scipy.spatial.ckdtree for details on kd-trees. Searches with periodic boundaries are implemented by mapping all initial data points to one canonical periodic image, building an ordinary kd-tree with these points, then querying this kd-tree multiple times, if necessary, with all the relevant periodic images of the query point. Note that to ensure that no two distinct images of the same point appear in the results, it is essential to restrict the maximum distance between a query point and a data point to half the smallest box dimension. """ def __init__(self, data, boxsize=None, leafsize=10): """Construct a kd-tree. Parameters ---------- Construct a kd-tree. Parameters ---------- data : array_like, shape (n,k) The n data points of dimension k to be indexed. This array is not copied unless this is necessary to produce a contiguous array of doubles, and so modifying this data will result in bogus results. boxsize : array_like, shape (k,) Size of the periodic box along each spatial dimension. A negative or zero size for dimension k means that space is not periodic along k. leafsize : positive int The number of points at which the algorithm switches over to brute-force. """ # Map all points to canonical periodic image boxsize = self.boxsize = np.asarray(boxsize) data = np.asarray(data) wrapped_data = data - \ np.where(boxsize > 0.0, np.floor(data / boxsize) * boxsize, 0.0) # Calculate maximum distance_upper_bound self.max_distance_upper_bound = \ np.amin(np.where(boxsize > 0, 0.5 * boxsize, np.inf)) # Set up underlying kd-tree super().__init__(wrapped_data, leafsize) # Ideally, KDTree and cKDTree would expose identical query and __query # interfaces. But they don't, and cKDTree.__query is also inaccessible # from Python. We do our best here to cope. def __query(self, x, k=1, eps=0, p=2, distance_upper_bound=np.inf): # This is the internal query method, which guarantees that x # is a single point, not an array of points # # A slight complication: k could be "None", which means "return # all neighbors within the given distance_upper_bound". # Cap distance_upper_bound distance_upper_bound = np.amin([distance_upper_bound, self.max_distance_upper_bound]) # Run queries over all relevant images of x hits_list = [] for xi in _generate_images(x, self.boxsize, distance_upper_bound): d, i = \ super().query(xi, k, eps, p, distance_upper_bound) if k > 1: hits_list.append(list(zip(d, i))) else: hits_list.append([(d, i)]) # Now merge results if k > 1: return heapq.nsmallest(k, itertools.chain(*hits_list)) elif k == 1: return [min(itertools.chain(*hits_list))] else: raise ValueError("Invalid k in periodic_kdtree._KDTree__query")
[docs] def query(self, x, k=1, eps=0, p=2, distance_upper_bound=np.inf): """ Query the kd-tree for nearest neighbors Parameters ---------- x : array_like, last dimension self.m An array of points to query. k : integer The number of nearest neighbors to return. eps : non-negative float Return approximate nearest neighbors; the kth returned value is guaranteed to be no further than (1+eps) times the distance to the real k-th nearest neighbor. p : float, 1<=p<=infinity Which Minkowski p-norm to use. 1 is the sum-of-absolute-values "Manhattan" distance 2 is the usual Euclidean distance infinity is the maximum-coordinate-difference distance distance_upper_bound : nonnegative float Return only neighbors within this distance. This is used to prune tree searches, so if you are doing a series of nearest-neighbor queries, it may help to supply the distance to the nearest neighbor of the most recent point. Returns ------- d : array of floats The distances to the nearest neighbors. If x has shape tuple+(self.m,), then d has shape tuple+(k,). Missing neighbors are indicated with infinite distances. i : ndarray of ints The locations of the neighbors in self.data. If `x` has shape tuple+(self.m,), then `i` has shape tuple+(k,). Missing neighbors are indicated with self.n. """ x = np.asarray(x) if np.shape(x)[-1] != self.m: raise ValueError("x must consist of vectors of length " "{:d} but has shape {!s}".format(self.m, np.shape(x))) if p < 1: raise ValueError("Only p-norms with 1<=p<=infinity permitted") retshape = np.shape(x)[:-1] if retshape != (): if k > 1: dd = np.empty(retshape+(k,), dtype=np.float) dd.fill(np.inf) ii = np.empty(retshape+(k,), dtype=np.int) ii.fill(self.n) elif k == 1: dd = np.empty(retshape, dtype=np.float) dd.fill(np.inf) ii = np.empty(retshape, dtype=np.int) ii.fill(self.n) else: raise ValueError("k must be >= 1, or None") for c in np.ndindex(retshape): hits = self.__query(x[c], k=k, eps=eps, p=p, distance_upper_bound=distance_upper_bound) if k > 1: for j in range(len(hits)): dd[c+(j,)], ii[c+(j,)] = hits[j] elif k == 1: if len(hits) > 0: dd[c], ii[c] = hits[0] else: dd[c] = np.inf ii[c] = self.n return dd, ii else: hits = self.__query(x, k=k, eps=eps, p=p, distance_upper_bound=distance_upper_bound) if k == 1: if len(hits) > 0: return hits[0] else: return np.inf, self.n elif k > 1: dd = np.empty(k, dtype=np.float) dd.fill(np.inf) ii = np.empty(k, dtype=np.int) ii.fill(self.n) for j in range(len(hits)): dd[j], ii[j] = hits[j] return dd, ii else: raise ValueError("k must be >= 1, or None")
# Ideally, KDTree and cKDTree would expose identical __query_ball_point # interfaces. But they don't, and cKDTree.__query_ball_point is also # inaccessible from Python. We do our best here to cope. def __query_ball_point(self, x, r, p=2., eps=0): # This is the internal query method, which guarantees that x # is a single point, not an array of points # Cap r r = min(r, self.max_distance_upper_bound) # Run queries over all relevant images of x results = [] for xi in _generate_images(x, self.boxsize, r): results.extend(super().query_ball_point(xi, r, p, eps)) return results
[docs] def query_ball_point(self, x, r, p=2., eps=0): """ Find all points within distance r of point(s) x. Parameters ---------- x : array_like, shape tuple + (self.m,) The point or points to search for neighbors of. r : positive float The radius of points to return. p : float, optional Which Minkowski p-norm to use. Should be in the range [1, inf]. eps : nonnegative float, optional Approximate search. Branches of the tree are not explored if their nearest points are further than ``r / (1 + eps)``, and branches are added in bulk if their furthest points are nearer than ``r * (1 + eps)``. Returns ------- results : list or array of lists If `x` is a single point, returns a list of the indices of the neighbors of `x`. If `x` is an array of points, returns an object array of shape tuple containing lists of neighbors. Notes ----- If you have many points whose neighbors you want to find, you may save substantial amounts of time by putting them in a PeriodicCKDTree and using query_ball_tree. """ x = np.asarray(x).astype(np.float) if x.shape[-1] != self.m: raise ValueError("Searching for a %d-dimensional point in a " "%d-dimensional KDTree" % (x.shape[-1], self.m)) if len(x.shape) == 1: return self.__query_ball_point(x, r, p, eps) else: retshape = x.shape[:-1] result = np.empty(retshape, dtype=np.object) for c in np.ndindex(retshape): result[c] = self.__query_ball_point(x[c], r, p, eps) return result
# def query_ball_tree(self, other, r, p=2., eps=0): # raise NotImplementedError() # def query_pairs(self, r, p=2., eps=0): # raise NotImplementedError() # def count_neighbors(self, other, r, p=2.): # raise NotImplementedError() # def sparse_distance_matrix(self, other, max_distance, p=2.): # raise NotImplementedError()