Source code for tts.transition_tube_sampling

from typing import Iterable, Sequence
import warnings

import numpy as np
import scipy.constants as cs
from scipy.interpolate import CubicSpline
from scipy.linalg import orth


[docs]class NormalModes:
    """A representation of normal modes.

    Provided a Hessian matrix, calculate normal mode vectors and corresponding frequencies,
    optionally filtering out translational and rotational degrees of freedom or unstable modes.

    Normal modes and frequencies are made available in mass-weighted atomic units.
    """

[docs]    @classmethod
    def from_cp2k(cls, fn_in: str, n_atoms: int, masses: np.ndarray = None):
        """Read a CP2K Hessian file and return a NormalModes object."""
        hessian = load_cp2k_Hessian(fn_in, n_atoms)
        return cls(hessian, masses)

[docs]    @classmethod
    def from_ase(cls, hessian_ase: np.ndarray, masses: np.ndarray):
        """Take an ASE Hessian, convert units, mass-weigh and return a NormalModes object."""
        hessian = convert_ASE_hessian(hessian_ase, masses)
        return cls(hessian, masses)

    def __init__(self, hessian: np.ndarray, masses: np.ndarray = None):
        """Store the Hessian matrix to prepare for normal modes calculation.

        To obtain modes and frequencies, `update_normal_modes()` needs to be called by hand after construction.

        Arguments:
            hessian: 3N x 3N mass-weighted cartesian Hessian matrix in atomic units
            masses: N masses, one for each atom, in atomic mass constant units (where carbon is ~12)
        """

        # store the Hessian
        self.hessian = hessian

        # store masses
        self.masses = masses

        # normal modes haven't been calculated yet
        self._modes = None

    @property
    def modes(self):
        """Normal mode vectors in atomic units."""
        if self._modes is None:
            raise Exception('Normal modes have not been computed yet.')
        return self._modes.copy()

    @property
    def frequencies(self):
        """Normal mode frequencies in atomic units."""
        if self._modes is None:
            raise Exception('Normal modes have not been computed yet.')
        return self._frequencies.copy()

    @property
    def wavenumbers(self):
        """Normal mode wavenumbers in spectroscopic units (cm^-1)."""
        if self._modes is None:
            raise Exception('Normal modes have not been computed yet.')

        # convert a.u. to SI units (Hz)
        omega = self._frequencies * cs.physical_constants['atomic unit of energy'][0] / cs.hbar

        # convert angular frequency to linear
        nu = omega / (2 * np.pi)

        # convert `nu` to wavenumbers in cm^-1
        wavenumbers = nu / cs.c
        wavenumbers *= 1e-2
        return wavenumbers

[docs]    def update_normal_modes(
        self,
        symmetrize: bool = False,
        order_SP: int = 0,
        discard_imag_freq: bool = True,
        eps_freq: float = 5.0e-4,
        n_TR: int = 6
    ) -> None:
        """Diagonalize the Hessian to get normal modes and their frequencies.

        Optionally also filter out translational and rotational degrees of freedom.

        Arguments:
            symmetrize: activate symmetrization of the Hessian

            order_SP: expected saddle point order, number of finite imaginary frequencies at the configuration

            discard_imag_freq: discard unstable modes when `order_SP` > 0

            eps_freq: numerical threshold to check frequencies in a.u., 5e-4 ~ 100 cm^-1.

            n_TR: how many modes to filter to remove rotations and translations
        """

        # use a symmetrized Hessian if requested
        if symmetrize:
            hessian = 0.5 * (self.hessian + self.hessian.T)
        else:
            hessian = self.hessian

        # diagonalize
        ev, modes = np.linalg.eigh(hessian)

        # get frequencies from `ev` (omega^2), possibly imaginary
        frequencies = np.sqrt(ev.astype('complex'))

        # check that the number of finite imaginary frequencies checks out
        n_imag = (np.imag(frequencies) > eps_freq).sum()
        if n_imag != order_SP:
            warnings.warn(f'Number of imaginary frequencies ({n_imag}) differs from the expected SP order ({order_SP})')

        # transpose modes so that they're ordered along the first axis
        modes = modes.T

        # filter modes as requested
        mask = np.ones(frequencies.size, dtype='bool')
        if discard_imag_freq:
            mask[:order_SP] = False
        mask[order_SP:order_SP+n_TR] = False
        frequencies = frequencies[mask]
        modes = modes[mask]

        # lift mass weighing from modes if masses are assigned
        if self.masses is not None:
            amu = cs.physical_constants['atomic mass constant'][0]
            mass_atomic = cs.physical_constants['atomic unit of mass'][0]
            amu_to_me = amu / mass_atomic
            masses = self.masses * amu_to_me
            modes = modes / np.sqrt(np.repeat(masses, 3))

        # recast remaining real frequencies after filtering as real numbers
        frequencies = np.real(frequencies)

        # store the settings and results
        self._symmetrize = symmetrize
        self._n_TR = n_TR
        self._order_SP = order_SP
        self._discard_imag_freq = discard_imag_freq
        self._eps_freq = eps_freq
        self._modes = modes.copy()
        self._frequencies = frequencies.copy()


def load_cp2k_Hessian(fn_in: str, n_atoms: int, correct_CP2K_factor: bool = True) -> np.ndarray:
    """Load the raw Hessian matrix from CP2K output.

    Arguments:
       fn_in: Path to the CP2K log file
       n_atoms: Number of atoms in the studied molecule
       symmetrize: Whether to symmetrize the Hessian after reading
       correct_CP2K_factor: remove CP2K redundant multiplicative 1e6 factor from the Hessian, so that it is in a.u.

    Returns:
       Hessian matrix as a NumPy array in atomic units
    """

    # variables such as stride of data, regular expressions and collectors
    ndof = 3 * n_atoms
    stride = ndof + 2
    begin_regex = 'Hessian in cartesian coordinates'
    end_regex = 'Cartesian Low frequencies'

    # read all the lines of the log file, put them in a list
    with open(fn_in) as f_in:
        lines = f_in.readlines()

    # locate the beginning of the Hessian section
    i_start = None
    for i, line in enumerate(lines):
        if begin_regex in line:
            i_start = i + 3
            break
    if i_start is None:
        raise ValueError('Start of Hessian section not found.')

    # locate the end of the Hessian section
    i_end = None
    for i, line in enumerate(lines):
        if end_regex in line:
            i_end = i - 1
            break
    if i_end is None:
        raise ValueError('End of Hessian section not found.')

    # check that the Hessian section is not empty
    if i_end <= i_start:
        raise ValueError('Hessian section is empty.')

    # read in Hessian data
    hessian = np.empty((ndof, 0))
    for i in range(i_start, i_end, stride):
        data = []
        for j in range(i, i+stride-2):
            data.extend(lines[j].split()[2:])

        data = np.array(data, dtype=float).reshape(ndof, int(len(data) / ndof))

        hessian = np.append(hessian, data, axis=1)

    # check resulting shape is as expected
    assert hessian.shape == (ndof, ndof), 'Hessian matrix does not have the right shape of (3*n_atoms, 3*n_atoms).'

    if correct_CP2K_factor:
        hessian *= 1e-6

    return hessian


def convert_ASE_hessian(hessian_ase: np.ndarray, masses: np.ndarray) -> np.ndarray:
    """Convert the ASE-generated Hessian matrix into mass-weighted atomic units.

    Arguments:
        ase_hessian: 2D symmetric hessian matrix in eV * Angstrom**-2
        masses: masses for each atom ordered as in `pos` in atomic mass constant units

    Returns:
        hessian: 2D symmetric Hessian in E_h * a_0**-2 * m_e**-1
    """

    # constants
    e_h = cs.physical_constants['atomic unit of energy'][0]
    eV = cs.physical_constants['electron volt'][0]
    a_0 = cs.physical_constants['atomic unit of length'][0]
    angstrom = 1e-10
    amu = cs.physical_constants['atomic mass constant'][0]
    m_e = cs.physical_constants['atomic unit of mass'][0]

    # convert unit
    hessian = hessian_ase
    hessian *= (eV / e_h)
    hessian /= (angstrom / a_0)**2

    # define auxiliary mass matrix m_ij = sqrt(m_i)*sqrt(m_j)
    aux_mass_matrix = np.outer(np.repeat(masses, 3), np.repeat(masses, 3))

    # mass-weigh the hessian
    hessian /= (np.sqrt(aux_mass_matrix) * amu / m_e)

    return hessian


def rho_harmonic(q: np.ndarray, frequency: float, beta: float) -> np.ndarray:
    """Calculate the harmonic thermal density.

    For a given harmonic mode with frequency `frequency` and inverse temperature `beta`,
    calculate the thermal probability density along the values of the coordinate `q`.
    This can be used in the classical case or in the quantum case with an effective inverse temperature.
    Be careful about consistent units.

    Arguments:
        q: 1D array of coordinate values
        frequency: frequency of the mode
        beta: inverse temperature

    Returns:
        1D array of the density values
    """

    return np.sqrt(0.5 * beta * frequency**2 / np.pi) * np.exp(-0.5 * beta * frequency**2 * q**2)


[docs]class NormalModeSampling:
    """Sample geometries based on normal modes.

    Generate geometry samples around a single minimum or around a minimum energy path.
    The weight of the sampling is uniform along the MEP between the endpoints.
    Perpendicular to the MEP and beyong the endpoints, the weighting is thermal, based on either
    classical or quantum thermal probability densities.

    Written by Krystof Brezina in the group of Ondrej Marsalek at the Charles University in Prague, 2023.
    If you use this code, please cite Brezina K. et al. JCTC, 19(19), 6589-6604, 2023.
    """

    def __init__(self, positions: np.ndarray, normal_modes: dict):
        """
        Arguments:
            positions: Atomic positions of all images in atomic units (Bohr radii)

            normal_modes: Dictionary of `NormalModes` objects with keys being
            the relevant indices of images that the normal modes correspond to
        """

        # store the positions and normal modes
        self._positions = positions
        self._normal_modes = normal_modes

        # set MEP length
        self._number_of_replicas = len(self._positions)

        # check that we have at least one image and that we have enough modes
        assert self._number_of_replicas >= 1
        if self._number_of_replicas == 1:
            assert len(self._normal_modes) == 1
        else:
            assert len(self._normal_modes) >= 2

        # indices of images that have normal modes available, sorted by increasing index
        idx_nm = np.array(sorted(list(self._normal_modes.keys())), dtype=int)
        self._idx_nm = idx_nm

        # If we are sampling around an MEP, rather than just a single minimum,
        # prepapre MEP parametrization and interpolation.
        if self._number_of_replicas > 1:
            # get the physical length of mep in a0 and parametrize the MEP by dimensionless xi
            d_vec = np.diff(positions, axis=0)
            d_norm = np.linalg.norm(d_vec, axis=(2,1))
            d_norm = np.append(0, d_norm)
            d = np.cumsum(d_norm)
            self._mep_separations = d
            self._mep_length = d[-1]

            xi = d / (d[-1] - d[0])
            self._xi = xi

            # spline the MEP
            self._spline_mep = CubicSpline(x=self._xi, y=self._positions, axis=0)

[docs]    def get_samples(
        self,
        temperature: float,
        sampling_mode: str,
        mep_density: float,
        match_density_at_minimum: bool = False,
        n_points_min: int = None
    ) -> dict:
        """Perform thermal sampling around an MEP or a minimum.

        This is the main interface of this class.

        Arguments:
            temperature: temperature in Kelvin for the thermal sampling

            sampling_mode: 'classical' or 'quantum'; select whether effective quantum temperatures
            should be calculated for each mode

            mep_density: linear density of points per unit length (a0) along the MEP

            match_density_at_minimum: if True, the density of points at the minimum is matched 
            with the MEP density without the need to specify `n_points_min`

            n_points_min: number of points to sample at the minimum

        Returns:
            A dictionary of:

            xis: values of xi on the MEP from which the samples originate

            distortions: generated thermal distortions, array of shape (number of distortions, number of atoms, 3)

            labels: integer labels which denote the origin of each distortion, array of shape (number of distortions)
        """
        # check that (only) one of `n_points_min` and `match_density_at_minimum` is specified
        if match_density_at_minimum is False and n_points_min is None:
            raise ValueError('Either `n_points_min` or `match_density_at_minimum` must be specified.')

        # check that we are passing only one of `n_points_min` and `match_density_at_minimum`
        if match_density_at_minimum is True and n_points_min is not None:
            raise ValueError('Only one of `n_points_min` and `match_density_at_minimum` can be used.')

        # set up relevant objects
        idx_nm = self._idx_nm
        xis = []
        distortions = []
        labels = []
        i_label = 0

        # recalculate the mep density to get the density per dimensionless xi 
        scaled_mep_density = mep_density * self._mep_separations[idx_nm[-1]]

        # start sampling from the first minimum geometry
        beta = self._calculate_beta(idx_nm[0], temperature, sampling_mode)
        if match_density_at_minimum:
            n_points_min = self._calculate_minimum_number_of_points(idx=idx_nm[0], density=mep_density, temperature=temperature, beta=beta)
        d_init = self._generate_distortions(
            0,
            np.repeat(self._positions[idx_nm[0]][np.newaxis, ...], n_points_min, axis=0),
            beta
        )

        # if we only have one image, keep all the thermal samples around the image and finish
        if self._number_of_replicas == 1:
            distortions.extend(d_init)
            labels.extend([i_label] * len(d_init))
            xis = None

        # if we have more than one image, then much more stuff needs to be done
        else:
            xi_mep = self._xi
            # filter the thermally sampled minimum to get only points away from the MEP
            d_init = self._process_thermal_samples(d_init, [xi_mep[idx_nm[0]]], operation='filter')
            xis.extend([0.0] * len(d_init))
            distortions.extend(d_init)
            labels.extend([i_label] * len(d_init))
            i_label += 1

            # main loop over the MEP intervals
            for n in range(len(idx_nm[:-1])):

                # inner loop inside each interval
                for i, distribution in enumerate(('cos^2', 'sin^2')):

                    # set up relevant indices: left, right and from where modes are taken in an iteration
                    i_nm_first = idx_nm[n]
                    i_nm_last = idx_nm[n+1]
                    i_nm_modes = idx_nm[n+i]

                    # perform sampling with the chosen parameters
                    xi = random_square_harmonic(distribution, scaled_mep_density, xi_mep[i_nm_first], xi_mep[i_nm_last])
                    beta = self._calculate_beta(i_nm_modes, temperature, sampling_mode)
                    d_mep = self._generate_distortions(i_nm_modes, self._spline_mep(xi), beta)
                    d_proj = self._process_thermal_samples(d_mep, xi, operation='project')  # type: ignore
                    xis.extend(xi)
                    distortions.extend(d_proj)
                    labels.extend([i_label] * len(d_proj))
                    i_label += 1

            # if MEP has modes assigned to the last point, we have to add thermal sampling there, too
            if idx_nm[-1] == (self._number_of_replicas - 1):
                beta = self._calculate_beta(idx_nm[-1], temperature, sampling_mode)
                if match_density_at_minimum:
                    n_points_min = self._calculate_minimum_number_of_points(idx=idx_nm[-1], density=mep_density, temperature=temperature, beta=beta)
                d_fin = self._generate_distortions(
                    idx_nm[-1],
                    np.repeat(self._positions[-1][np.newaxis, ...], n_points_min, axis=0),
                    beta
                )
                d_fin = self._process_thermal_samples(d_fin, [xi_mep[idx_nm[-1]]], operation='filter')
                xis.extend([xi_mep[idx_nm[-1]]] * len(d_fin))
                distortions.extend(d_fin)
                labels.extend([i_label] * len(d_fin))

        samples = {
            'xis': np.array(xis),
            'distortions': np.array(distortions),
            'labels': np.array(labels)
        }

        return samples

    def _process_thermal_samples(self, distortions: Sequence, xi: Sequence, operation: str) -> list:
        """Perform post-generation modification of thermal distortions, either filtering or projecting

        Arguments:
            distortions: sequence of distorted geometries
            xi: sequence of xi values corresponding to the original configuration before distortion
            operation: 'filter' of 'project'
                'filter': only applied at the MEP edges to keep points from the minimum away from MEP
                'project': used along the MEP to project out the direction along the MEP from the distortions

        Returns:
            d_new: modified distortions
        """

        # Note that the `Sequence` type hints abouve might raise some warnings, but it's the sensible thing to do.
        # An issue has been open for... a few years now:
        # https://github.com/numpy/numpy/issues/2776

        # extract stuff from `self` - get coordinates alon MEP and correpsonding tangent vectors
        references = self._spline_mep(xi)
        tangents = self._spline_mep(xi, nu=1)

        # extend `reference` and `tangents` if we only have one for filtering
        # flip derivative if we're at the end of MEP at xi = 1.0
        if operation == 'filter':
            assert len(xi) == 1
            if xi[0] == 1.0:
                tangents *= -1.0
            references = np.repeat(references, len(distortions), axis=0)
            tangents = np.repeat(tangents, len(distortions), axis=0)

        # loop over all distortions, filter or project
        # TODO: possibly broadcast numpy arrays
        d_new = []
        for r, t, d in zip(references, tangents, distortions):
            disp = d - r
            t /= np.linalg.norm(t)
            proj = (disp * t).sum(axis=1).sum(axis=0)
            if operation == 'filter':
                if proj < 0.0:
                    d_new.append(d)
            elif operation == 'project':
                d_proj = d - (t * proj)
                d_new.append(d_proj)
            else:
                raise ValueError('Unknown mode of operation.')

        return d_new

    def _calculate_beta(self, idx: int, temp: float, sampling: str = 'quantum'):
        """Calculate thermal inverse temperature for each mode, classical or quantum.

        Arguments:
            idx: image index, key for the `self._normal_modes` dictionary to select a set of normal modes
            temp: reference temperature in Kelvin
            sampling: either 'classical' or 'quantum'

        Returns:
            beta: array of calculated effective temperatures
        """
        hartree = cs.physical_constants['atomic unit of energy'][0]
        # derive inverse temperatures for the appropriate sampling mode
        if sampling == 'quantum':
            # calculate effective quantum temperatures for each mode (beta* is frequency-dependent)
            if temp > 0.0:
                beta = 1.0 / (cs.k * temp)
                beta *= hartree
                beta = 2 / self._normal_modes[idx].frequencies * np.tanh(beta * self._normal_modes[idx].frequencies / 2)
            elif temp == 0.0:
                # lim beta -> infinity (or as T -> 0) tanh(beta) = 1
                beta = 2 / self._normal_modes[idx].frequencies
            else:
                raise ValueError('Reference classical temperature for quantum sampling must be non-negative.')
        elif sampling == 'classical':
            # stay with just one classical beta, repeat for each frequency to be consistent with the quantum case
            if temp > 0.0:
                beta = 1.0 / (cs.k * temp)
                beta *= hartree
                beta = np.repeat(beta, self._normal_modes[idx].frequencies.size)
            else:
                raise ValueError('Temperature for classical sampling must be positive.')
        else:
            raise ValueError('Unknown mode of sampling.')

        return beta

    def _generate_distortions(self, idx: int, geometries: Iterable, beta: np.ndarray) -> list:
        """Generate multiple distortions, return as a list."""
        distortions = [self._generate_distortion(idx, g, beta) for g in geometries]
        return distortions

    def _generate_distortion(self, idx: int, ref_pos: np.ndarray, beta: np.ndarray) -> np.ndarray:
        """Generate a random distortion from the normal mode thermal distribution.

        In the quantum case, the distribution is a Gaussian just like in the classical case,
        but an effective frequency-dependent temperature
        T* = hbar * omega / (2 * k_B) coth(hbar * omega / (2 * k_B * T))
        is used.

        Arguments:
            idx: image index, key for the `self._normal_modes` dictionary to select a set of normal modes
            ref_pos: reference atomic positions in stationary geometry, shape = (n_atoms, 3)

        Returns:
            Atomic positions of a random thermally distorted structure in nanometers.
        """

        # check `ref_pos` shape and extract number of atoms
        msg = 'Positions need to be a 2D array of shape (n_atoms, 3).'
        assert (ref_pos.ndim == 2) and (ref_pos.shape[1] == 3), msg
        n_atoms = ref_pos.shape[0]

        # reshape everything nicely
        modes = self._normal_modes[idx].modes.reshape(-1, n_atoms, 3)

        # calculate standard deviations of the thermal distributions
        sigmas = 1.0 / (np.sqrt(beta) * self._normal_modes[idx].frequencies)

        # draw random samples of normal coordinates
        q = np.random.normal(loc=0.0, scale=sigmas)

        # prepare distortion vector
        d = (modes * q[:, np.newaxis, np.newaxis]).sum(axis=0)

        # distort the reference structure
        newpositions = ref_pos + d

        return newpositions

    def _calculate_minimum_number_of_points(self, idx: int, density: float, temperature: float, beta: np.ndarray) -> int:
        """Calculate the number of points required to sample the minimum while keeping the density continuous with the MEP sampling density at the minimum.

        Arguments:
            idx: image index, key for the `self._normal_modes` dictionary to select a set of normal modes
            density: linear density of the sampling points in the MEP (per unit length) which we are matching the density of the minimum to
            temperature: temperature of the sampling in K
            beta: array of inverse temperatures, shape = (n_modes)

        Returns:
            n_points: number of points to sample the thermal minimum Gaussian to match the density of the MEP
        """

        # TODO: check that this is correct
        # warn user that this is sort of beta version - should work, but maybe does not
        warnings.warn("""Note that the density matching is a crude implementation that hasn not been tested thoroughly. 
        Use with caution.""")

        # get constants
        amu = cs.physical_constants['atomic mass constant'][0]
        m_e = cs.physical_constants['atomic unit of mass'][0]
        amu_to_me = amu / m_e

        # get key quantities from the class
        xi_min = self._xi[idx]
        masses = np.repeat(self._normal_modes[idx].masses, 3) * amu_to_me
        frequencies = self._normal_modes[idx].frequencies

        # calculate classical beta
        beta_cl = self._calculate_beta(idx=idx, temp=temperature, sampling='classical')[0]

        # get the normal mode vectors in a convenient form
        modes = self._normal_modes[idx].modes
        modes = modes * np.sqrt(masses)
        modes = modes.T

        # get the tangent vector (tau) at the minimum pointing away from the MEP
        tau = self._spline_mep(xi_min, nu=1).reshape(-1)
        if xi_min == 0.0:
            tau = tau * -1.0

        # mass-weight the tangent vector and normalize
        tau = tau * np.sqrt(masses)
        tau = tau / np.linalg.norm(tau)

        # transform tau into the normal mode basis
        tau_omega = modes.T @ tau
        tau_omega = tau_omega / np.linalg.norm(tau_omega)

        # prepare the rest of the tau-oriented basis
        perp = prepare_perp(tau_omega)
        tau_basis_omega = np.append(tau_omega[:, np.newaxis], perp, axis=1)

        # check that the tau-basis has the full rank (i.e. that `tau_omega` is not collinear with other vectors)
        # raise a non-implicit error if this is the case
        # TODO: build the `perp` preparation on random vectors rather than the fixed unit matrix:
        # like that, we can just create a new set if the rank test fails
        if np.linalg.matrix_rank(tau_basis_omega) < tau_basis_omega.shape[0]:
            raise NotImplementedError("""The tau-basis is not full rank. Please use the `n_points_min` argument
            to set the number of points to sample the minimum
            and talk to Krystof to implement random `perp` initialization.""")

        # get the normal-mode-basis diagonal Hessian
        hessian_omega = np.diag((beta / beta_cl) * frequencies**2)

        # transform the Hessian into the tau-oriented basis
        hessian_tau = tau_basis_omega.T @ hessian_omega @ tau_basis_omega

        # calculate the effective squared frequency in the tau direction
        # this uses the formula with determinants I derived - det of the full hessian divided by the det of the minor over the tau subspace
        # we must use the tau-oriented basis, since the minor is basis dependent and only works nicely in this basis
        omega_eff_sq = np.linalg.det(hessian_tau) / np.linalg.det(hessian_tau[1:, 1:])

        # calculate the effective sigma in the tau direction
        sigma_eff = 1.0 / np.sqrt(omega_eff_sq * beta_cl)

        # calculate effective mass of the tau direction
        mu = (tau**2 * masses).sum() / (tau**2).sum()

        # calculate the required number of points
        # alpha is the scaling: alpha * p(tau=0) = density, where p(tau=0) = 1 / (sqrt(2*pi) * sigma_eff)
        alpha = np.sqrt(2 * np.pi) * sigma_eff * mu**-0.5 * density
        n_points = int(alpha)

        return n_points


def random_square_harmonic(
    distribution: str = 'cos^2',
    density: float = 1000.0,
    a: float = 0.0,
    b: float = 1.0
) -> np.ndarray:
    """Generate random samples from a distribution defined by a
    square of harmonic function, either
        cos^2(pi * x / (2 * |b - a|))
    or
        sin^2(pi * x / (2 * |b - a|))
    over the interval [a, b].

    Arguments:
        distribution: 'cos^2' or 'sin^2' to decide whether to sample cosine or sine distribution
        density: required density of samples at the peak of the distribution
        a: lower bound of the independent variable
        b: upper bound of the independent variable

    Returns:
        samples: array of one-dimensional points sampled from the chosen distribution.
    """

    if b <= a:
        raise ValueError('Upper bound must be larger than lower bound.')

    N_samples = density * (b - a)
    N_samples = int(N_samples)

    # points are sampled not from `a` to `b` but from 0 to `b - a` and shifted later by `a`
    points = np.random.uniform([0, 0], [(b - a), 1.0], size=[N_samples, 2])

    if distribution == 'cos^2':
        expected_y = (np.cos(np.pi * points[:, 0] / (2 * (b - a))))**2
    elif distribution == 'sin^2':
        expected_y = (np.sin(np.pi * points[:, 0] / (2 * (b - a))))**2
    else:
        raise ValueError('Unknown distribution type.')

    is_within = points[:, 1] <= expected_y
    samples = points[:, 0][is_within]

    # shift points by a to get correct absolute positions
    samples += a

    return samples


def prepare_perp(vector):
    """Prepare the perpendicular complement (perp) to `vector`
    """
    # scipy orth works only on matrices, not 1D vectors
    assert vector.size > 2

    # initiate perp as identity matrix with the first column taken out (this will be replaced by `vector`)
    perp = np.identity(vector.shape[0])
    perp = perp[:, 1:]

    # project out the direction of `vector` of all remaining columns of `perp`
    proj = np.dot(vector, perp)
    perp -= vector[:, np.newaxis] * proj

    # orthonormalize perp using Scipy SVD
    perp = orth(perp)

    return perp