# -*- coding: utf-8 -*-
"""
==========================================================================
Periodic KD-Tree (:mod:`sknano.core.analysis.periodic_kdtree`)
==========================================================================
.. currentmodule:: sknano.core.analysis.periodic_kdtree
.. versionadded:: 0.4.0
A wrapper around scipy.spatial.kdtree to implement periodic boundary
conditions
Original source code written by Patrick Varilly, 6 Jul 2012.
Modified and included with scikit-nano v0.4.0.
"""
from __future__ import absolute_import, division, print_function, \
unicode_literals
__docformat__ = 'restructuredtext en'
import itertools
import heapq
import numpy as np
try:
from scipy.spatial import KDTree, cKDTree
except ImportError:
raise ImportError('Install scipy version >= 0.13.0 to allow '
'nearest-neighbor queries between atoms.')
__all__ = ['PeriodicKDTree', 'PeriodicCKDTree']
def _generate_images(x, boxsize, distance_upper_bound):
# Map x onto the canonical unit cell, then produce the relevant
# mirror images
xs = x - np.where(boxsize > 0, np.floor(x / boxsize) * boxsize, 0)
m = len(x)
images = [xs]
for i in range(m):
# print('boxsize[i]: {}'.format(boxsize[i]))
if boxsize[i] > 0:
disp = np.zeros(m)
disp[i] = boxsize[i]
if distance_upper_bound == np.inf:
images = list(
itertools.chain.from_iterable(
(xi + disp, xi, xi - disp) for xi in images))
else:
extra_xi = []
# Point near lower boundary, include image on upper side
if abs(xs[i]) < distance_upper_bound:
extra_xi.extend(xi + disp for xi in images)
# Point near upper boundary, include image on lower side
if abs(boxsize[i] - xs[i]) < distance_upper_bound:
extra_xi.extend(xi - disp for xi in images)
images.extend(extra_xi)
return images
[docs]class PeriodicKDTree(KDTree):
"""
kd-tree for quick nearest-neighbor lookup with periodic boundaries
See scipy.spatial.kdtree for details on kd-trees.
Searches with periodic boundaries are implemented by mapping all
initial data points to one canonical periodic image, building an
ordinary kd-tree with these points, then querying this kd-tree multiple
times, if necessary, with all the relevant periodic images of the
query point.
Note that to ensure that no two distinct images of the same point
appear in the results, it is essential to restrict the maximum
distance between a query point and a data point to half the smallest
box dimension.
"""
def __init__(self, data, boxsize=None, lattice=None, center=None,
leafsize=10, verbose=False):
"""Construct a kd-tree.
Parameters
----------
data : array_like, shape (n,k)
The data points to be indexed. This array is not copied, and
so modifying this data will result in bogus results.
boxsize : array_like, shape (k,)
Size of the periodic box along each spatial dimension. A
negative or zero size for dimension k means that space is not
periodic along k.
leafsize : positive :class:`~python:int`, optional
The number of points at which the algorithm switches over to
brute-force.
verbose : :class:`~python:bool`, optional
"""
# Map all points to canonical periodic image
data = np.asarray(data)
boxsize = self.boxsize = np.asarray(boxsize)
wrapped_data = data - \
np.where(boxsize > 0.0, (np.floor(data / boxsize) * boxsize), 0.0)
self.verbose = verbose
# Calculate maximum distance_upper_bound
self.max_distance_upper_bound = \
np.amin(np.where(boxsize > 0, 0.5 * boxsize, np.inf))
if verbose:
print('wrapped_data:\n{}'.format(wrapped_data))
print('max_distance_upper_bound: {}'.format(
self.max_distance_upper_bound))
# Set up underlying kd-tree
super().__init__(wrapped_data, leafsize)
# The following name is a kludge to override KDTree's private method
def _KDTree__query(self, x, k=1, eps=0, p=2, distance_upper_bound=np.inf):
# This is the internal query method, which guarantees that x
# is a single point, not an array of points
#
# A slight complication: k could be "None", which means "return
# all neighbors within the given distance_upper_bound".
# Cap distance_upper_bound
distance_upper_bound = np.amin([distance_upper_bound,
self.max_distance_upper_bound])
# Run queries over all relevant images of x
hits_list = []
for xi in _generate_images(x, self.boxsize, distance_upper_bound):
hits_list.append(
super()._KDTree__query(xi, k, eps, p, distance_upper_bound)
)
# Now merge results
if k is None:
return list(heapq.merge(*hits_list))
elif k > 1:
return heapq.nsmallest(k, itertools.chain(*hits_list))
elif k == 1:
return [min(itertools.chain(*hits_list))]
else:
raise ValueError("Invalid k in periodic_kdtree._KDTree__query")
# The following name is a kludge to override KDTree's private method
def _KDTree__query_ball_point(self, x, r, p=2., eps=0):
# This is the internal query method, which guarantees that x
# is a single point, not an array of points
# Cap r
r = np.amin([r, self.max_distance_upper_bound])
# Run queries over all relevant images of x
results = []
for xi in _generate_images(x, self.boxsize, r):
results.extend(
super()._KDTree__query_ball_point(xi, r, p, eps))
return results
# def query_ball_tree(self, other, r, p=2., eps=0):
# raise NotImplementedError()
# def query_pairs(self, r, p=2., eps=0):
# raise NotImplementedError()
# def count_neighbors(self, other, r, p=2.):
# raise NotImplementedError()
# def sparse_distance_matrix(self, other, max_distance, p=2.):
# raise NotImplementedError()
[docs]class PeriodicCKDTree(cKDTree):
"""
Cython kd-tree for quick nearest-neighbor lookup with periodic boundaries
See scipy.spatial.ckdtree for details on kd-trees.
Searches with periodic boundaries are implemented by mapping all
initial data points to one canonical periodic image, building an
ordinary kd-tree with these points, then querying this kd-tree multiple
times, if necessary, with all the relevant periodic images of the
query point.
Note that to ensure that no two distinct images of the same point
appear in the results, it is essential to restrict the maximum
distance between a query point and a data point to half the smallest
box dimension.
"""
def __init__(self, data, boxsize=None, leafsize=10):
"""Construct a kd-tree.
Parameters
----------
Construct a kd-tree.
Parameters
----------
data : array_like, shape (n,k)
The n data points of dimension k to be indexed. This array is
not copied unless this is necessary to produce a contiguous
array of doubles, and so modifying this data will result in
bogus results.
boxsize : array_like, shape (k,)
Size of the periodic box along each spatial dimension. A
negative or zero size for dimension k means that space is not
periodic along k.
leafsize : positive int
The number of points at which the algorithm switches over to
brute-force.
"""
# Map all points to canonical periodic image
boxsize = self.boxsize = np.asarray(boxsize)
data = np.asarray(data)
wrapped_data = data - \
np.where(boxsize > 0.0, np.floor(data / boxsize) * boxsize, 0.0)
# Calculate maximum distance_upper_bound
self.max_distance_upper_bound = \
np.amin(np.where(boxsize > 0, 0.5 * boxsize, np.inf))
# Set up underlying kd-tree
super().__init__(wrapped_data, leafsize)
# Ideally, KDTree and cKDTree would expose identical query and __query
# interfaces. But they don't, and cKDTree.__query is also inaccessible
# from Python. We do our best here to cope.
def __query(self, x, k=1, eps=0, p=2, distance_upper_bound=np.inf):
# This is the internal query method, which guarantees that x
# is a single point, not an array of points
#
# A slight complication: k could be "None", which means "return
# all neighbors within the given distance_upper_bound".
# Cap distance_upper_bound
distance_upper_bound = np.amin([distance_upper_bound,
self.max_distance_upper_bound])
# Run queries over all relevant images of x
hits_list = []
for xi in _generate_images(x, self.boxsize, distance_upper_bound):
d, i = \
super().query(xi, k, eps, p, distance_upper_bound)
if k > 1:
hits_list.append(list(zip(d, i)))
else:
hits_list.append([(d, i)])
# Now merge results
if k > 1:
return heapq.nsmallest(k, itertools.chain(*hits_list))
elif k == 1:
return [min(itertools.chain(*hits_list))]
else:
raise ValueError("Invalid k in periodic_kdtree._KDTree__query")
[docs] def query(self, x, k=1, eps=0, p=2, distance_upper_bound=np.inf):
"""
Query the kd-tree for nearest neighbors
Parameters
----------
x : array_like, last dimension self.m
An array of points to query.
k : integer
The number of nearest neighbors to return.
eps : non-negative float
Return approximate nearest neighbors; the kth returned value
is guaranteed to be no further than (1+eps) times the
distance to the real k-th nearest neighbor.
p : float, 1<=p<=infinity
Which Minkowski p-norm to use.
1 is the sum-of-absolute-values "Manhattan" distance
2 is the usual Euclidean distance
infinity is the maximum-coordinate-difference distance
distance_upper_bound : nonnegative float
Return only neighbors within this distance. This is used to prune
tree searches, so if you are doing a series of nearest-neighbor
queries, it may help to supply the distance to the nearest neighbor
of the most recent point.
Returns
-------
d : array of floats
The distances to the nearest neighbors.
If x has shape tuple+(self.m,), then d has shape tuple+(k,).
Missing neighbors are indicated with infinite distances.
i : ndarray of ints
The locations of the neighbors in self.data.
If `x` has shape tuple+(self.m,), then `i` has shape tuple+(k,).
Missing neighbors are indicated with self.n.
"""
x = np.asarray(x)
if np.shape(x)[-1] != self.m:
raise ValueError("x must consist of vectors of length "
"{:d} but has shape {!s}".format(self.m,
np.shape(x)))
if p < 1:
raise ValueError("Only p-norms with 1<=p<=infinity permitted")
retshape = np.shape(x)[:-1]
if retshape != ():
if k > 1:
dd = np.empty(retshape+(k,), dtype=np.float)
dd.fill(np.inf)
ii = np.empty(retshape+(k,), dtype=np.int)
ii.fill(self.n)
elif k == 1:
dd = np.empty(retshape, dtype=np.float)
dd.fill(np.inf)
ii = np.empty(retshape, dtype=np.int)
ii.fill(self.n)
else:
raise ValueError("k must be >= 1, or None")
for c in np.ndindex(retshape):
hits = self.__query(x[c], k=k, eps=eps, p=p,
distance_upper_bound=distance_upper_bound)
if k > 1:
for j in range(len(hits)):
dd[c+(j,)], ii[c+(j,)] = hits[j]
elif k == 1:
if len(hits) > 0:
dd[c], ii[c] = hits[0]
else:
dd[c] = np.inf
ii[c] = self.n
return dd, ii
else:
hits = self.__query(x, k=k, eps=eps, p=p,
distance_upper_bound=distance_upper_bound)
if k == 1:
if len(hits) > 0:
return hits[0]
else:
return np.inf, self.n
elif k > 1:
dd = np.empty(k, dtype=np.float)
dd.fill(np.inf)
ii = np.empty(k, dtype=np.int)
ii.fill(self.n)
for j in range(len(hits)):
dd[j], ii[j] = hits[j]
return dd, ii
else:
raise ValueError("k must be >= 1, or None")
# Ideally, KDTree and cKDTree would expose identical __query_ball_point
# interfaces. But they don't, and cKDTree.__query_ball_point is also
# inaccessible from Python. We do our best here to cope.
def __query_ball_point(self, x, r, p=2., eps=0):
# This is the internal query method, which guarantees that x
# is a single point, not an array of points
# Cap r
r = min(r, self.max_distance_upper_bound)
# Run queries over all relevant images of x
results = []
for xi in _generate_images(x, self.boxsize, r):
results.extend(super().query_ball_point(xi, r, p, eps))
return results
[docs] def query_ball_point(self, x, r, p=2., eps=0):
"""
Find all points within distance r of point(s) x.
Parameters
----------
x : array_like, shape tuple + (self.m,)
The point or points to search for neighbors of.
r : positive float
The radius of points to return.
p : float, optional
Which Minkowski p-norm to use. Should be in the range [1, inf].
eps : nonnegative float, optional
Approximate search. Branches of the tree are not explored if their
nearest points are further than ``r / (1 + eps)``, and branches are
added in bulk if their furthest points are nearer than
``r * (1 + eps)``.
Returns
-------
results : list or array of lists
If `x` is a single point, returns a list of the indices of the
neighbors of `x`. If `x` is an array of points, returns an object
array of shape tuple containing lists of neighbors.
Notes
-----
If you have many points whose neighbors you want to find, you may
save substantial amounts of time by putting them in a
PeriodicCKDTree and using query_ball_tree.
"""
x = np.asarray(x).astype(np.float)
if x.shape[-1] != self.m:
raise ValueError("Searching for a %d-dimensional point in a "
"%d-dimensional KDTree" % (x.shape[-1], self.m))
if len(x.shape) == 1:
return self.__query_ball_point(x, r, p, eps)
else:
retshape = x.shape[:-1]
result = np.empty(retshape, dtype=np.object)
for c in np.ndindex(retshape):
result[c] = self.__query_ball_point(x[c], r, p, eps)
return result
# def query_ball_tree(self, other, r, p=2., eps=0):
# raise NotImplementedError()
# def query_pairs(self, r, p=2., eps=0):
# raise NotImplementedError()
# def count_neighbors(self, other, r, p=2.):
# raise NotImplementedError()
# def sparse_distance_matrix(self, other, max_distance, p=2.):
# raise NotImplementedError()