Skip to content

base

deepdrivewe.binners.base

Binning module for WESTPA.

Binner

Bases: ABC

Binner for the progress coordinate.

Source code in deepdrivewe/binners/base.py
class Binner(ABC):
    """Binner for the progress coordinate."""

    def __init__(
        self,
        bin_target_counts: int | list[int],
        target_state_inds: int | list[int] | None = None,
    ) -> None:
        """Initialize the binner.

        Parameters
        ----------
        bin_target_counts : int | list[int]
            The target counts for each bin. If an integer is provided,
            the target counts are assumed to be the same for each bin.
        target_state_inds : int | list[int] | None
            The index of the target state. If an integer is provided, then
            there is only one target state. If a list of integers is provided,
            then there are multiple target states. If None is provided, then
            there are no target states. Default is None.
        """
        self.bin_target_counts = bin_target_counts
        self.target_state_inds = target_state_inds

    @property
    @abstractmethod
    def nbins(self) -> int:
        """The number of bins."""
        ...

    @abstractmethod
    def assign_bins(self, pcoords: np.ndarray) -> np.ndarray:
        """Assign the simulation pcoords to bins."""
        ...

    def get_bin_target_counts(self) -> list[int]:
        """Get the target counts for each bin.

        Returns
        -------
        list[int]
            The target counts for each bin.
        """
        # Check if the bin target counts is an integer
        # If so, then set the target counts for each bin to the same value
        # and set the target state bins to 0. Cache the result.
        if isinstance(self.bin_target_counts, int):
            # Create a list of the bin target counts
            bin_target_counts = [self.bin_target_counts] * self.nbins

            # If there are target states, set the target state bins to 0
            if self.target_state_inds is not None:
                # Make sure the target state indices are a list
                if isinstance(self.target_state_inds, int):
                    self.target_state_inds = [self.target_state_inds]

                # Set the target state bins to 0 since they are recycled
                for i in self.target_state_inds:
                    bin_target_counts[i] = 0

            # Cache the result
            self.bin_target_counts = bin_target_counts

        # Otherwise, return the list of bin target counts
        return self.bin_target_counts

    @property
    def labels(self) -> list[str]:
        """The bin labels for WESTPA."""
        return [f'state{i}' for i in range(self.nbins)]

    def assign(
        self,
        coords: np.ndarray,
        mask: np.ndarray | None = None,
        output: np.ndarray | None = None,
    ) -> np.ndarray | None:
        """Assign the simulations to bins.

        This API is compatible with the WESTPA Binner class.

        Parameters
        ----------
        coords : np.ndarray
            The progress coordinates to bin. Shape: (n_simulations, n_dims).
        mask : np.ndarray
            The mask to apply to skip a certain simulation (0 skips and 1
            uses the simulation). By default all simulations are used.
            Shape: (n_simulations,)
        output : np.ndarray
            The output array to store the bin assignments.
            Shape: (n_simulations,)

        Returns
        -------
        np.ndarray
            The bin assignments for each simulation (n_simulations,)
        """
        # Initialize output if not provided
        if output is None:
            output = np.empty(coords.shape[0], dtype=np.uint16)

        # Initialize the mask if not provided
        if mask is not None:
            mask = np.ones(coords.shape[0], dtype=np.bool_)

        # Assign the simulations to bin indices (in-place)
        output[mask] = self.assign_bins(coords)

        return output

    def pickle_and_hash(self) -> tuple[bytes, str]:
        """Pickle this mapper and calculate a hash of the result.

        Pickle this mapper and calculate a hash of the result
        (thus identifying the contents of the pickled data), returning a
        tuple ``(pickled_data, hash)``. This will raise PickleError if this
        mapper cannot be pickled, in which case code that would otherwise
        rely on detecting a topology change must assume a topology change
        happened, even if one did not.
        """
        pkldat = pickle.dumps(self, pickle.HIGHEST_PROTOCOL)
        binhash = hashlib.sha256(pkldat)
        return (pkldat, binhash.hexdigest())

    def _get_bin_assignments(
        self,
        pcoords: np.ndarray,
    ) -> dict[int, list[int]]:
        # Find the bin assignment indices
        assignments = self.assign_bins(pcoords)

        # Check that the number of assignments is the same as the simulations
        if len(assignments) != len(pcoords):
            raise ValueError(
                'Number of assignments must match the number of simulations.',
            )

        # Collect a dictionary of the bin assignments
        bin_assignments = defaultdict(list)

        # Assign the simulations to the bins
        for sim_idx, bin_idx in enumerate(assignments):
            bin_assignments[bin_idx].append(sim_idx)

        return bin_assignments

    def _get_bin_probs(
        self,
        bin_assignments: dict[int, list[int]],
        cur_sims: list[SimMetadata],
    ) -> list[float]:
        """Compute the bin statistics.

        Parameters
        ----------
        bin_assignments : dict[int, list[int]]
            A dictionary of the bin assignments. The keys are the bin
            indices and the values are the indices of the simulations
            assigned to that bin.

        cur_sims : list[SimMetadata]
            The list of current simulations.

        Returns
        -------
        list[float]
            The sum of weights in each bin (i.e., bin probabilities).
        """
        # Compute the probability of each bin by summing the weights
        bin_probs = []

        # Iterate over the bin assignments
        for sim_indices in bin_assignments.values():
            # Extract the simulations in the bin
            binned_sims = [cur_sims[i] for i in sim_indices]

            # Compute the probability of the bin
            bin_prob = sum(x.weight for x in binned_sims)

            # Append the bin probability
            bin_probs.append(bin_prob)

        return bin_probs

    def compute_iteration_metadata(
        self,
        cur_sims: list[SimMetadata],
    ) -> IterationMetadata:
        """Compute the iteration metadata using the current simulations.

        Returns
        -------
        IterationMetadata
            The iteration metadata.
        """
        # Extract the pcoords from the last frame of each simulation
        pcoords = np.array([sim.pcoord[-1] for sim in cur_sims])

        # Assign the simulations to bins
        bin_assignments = self._get_bin_assignments(pcoords)

        # Compute the bin probabilities
        bin_probs = self._get_bin_probs(bin_assignments, cur_sims)

        # Add the binner pickle and hash metadata to the iteration
        binner_pickle, binner_hash = self.pickle_and_hash()

        # Create the iteration metadata
        return IterationMetadata(
            iteration_id=cur_sims[0].iteration_id,
            binner_pickle=binner_pickle,
            binner_hash=binner_hash,
            min_bin_prob=min(bin_probs),
            max_bin_prob=max(bin_probs),
            bin_target_counts=self.get_bin_target_counts(),
        )

    def bin_simulations(
        self,
        next_sims: list[SimMetadata],
    ) -> dict[int, list[int]]:
        """Assign the simulations to bins.

        Parameters
        ----------
        next_sims : list[SimMetadata]
            The list of next simulations.

        Returns
        -------
        dict[int, list[int]]
            A dictionary of the bin assignments. The keys are the bin
            indices and the values are the indices of the simulations
            assigned to that bin.
        """
        # Extract the pcoords using the parent pcoords since
        # they are they have already been recycled.
        pcoords = np.array([sim.parent_pcoord for sim in next_sims])

        # Assign the simulations to bins
        bin_assignments = self._get_bin_assignments(pcoords)

        return bin_assignments

nbins abstractmethod property

nbins: int

The number of bins.

labels property

labels: list[str]

The bin labels for WESTPA.

__init__

__init__(
    bin_target_counts: int | list[int],
    target_state_inds: int | list[int] | None = None,
) -> None

Initialize the binner.

Parameters:

Name Type Description Default
bin_target_counts int | list[int]

The target counts for each bin. If an integer is provided, the target counts are assumed to be the same for each bin.

required
target_state_inds int | list[int] | None

The index of the target state. If an integer is provided, then there is only one target state. If a list of integers is provided, then there are multiple target states. If None is provided, then there are no target states. Default is None.

None
Source code in deepdrivewe/binners/base.py
def __init__(
    self,
    bin_target_counts: int | list[int],
    target_state_inds: int | list[int] | None = None,
) -> None:
    """Initialize the binner.

    Parameters
    ----------
    bin_target_counts : int | list[int]
        The target counts for each bin. If an integer is provided,
        the target counts are assumed to be the same for each bin.
    target_state_inds : int | list[int] | None
        The index of the target state. If an integer is provided, then
        there is only one target state. If a list of integers is provided,
        then there are multiple target states. If None is provided, then
        there are no target states. Default is None.
    """
    self.bin_target_counts = bin_target_counts
    self.target_state_inds = target_state_inds

assign_bins abstractmethod

assign_bins(pcoords: ndarray) -> np.ndarray

Assign the simulation pcoords to bins.

Source code in deepdrivewe/binners/base.py
@abstractmethod
def assign_bins(self, pcoords: np.ndarray) -> np.ndarray:
    """Assign the simulation pcoords to bins."""
    ...

get_bin_target_counts

get_bin_target_counts() -> list[int]

Get the target counts for each bin.

Returns:

Type Description
list[int]

The target counts for each bin.

Source code in deepdrivewe/binners/base.py
def get_bin_target_counts(self) -> list[int]:
    """Get the target counts for each bin.

    Returns
    -------
    list[int]
        The target counts for each bin.
    """
    # Check if the bin target counts is an integer
    # If so, then set the target counts for each bin to the same value
    # and set the target state bins to 0. Cache the result.
    if isinstance(self.bin_target_counts, int):
        # Create a list of the bin target counts
        bin_target_counts = [self.bin_target_counts] * self.nbins

        # If there are target states, set the target state bins to 0
        if self.target_state_inds is not None:
            # Make sure the target state indices are a list
            if isinstance(self.target_state_inds, int):
                self.target_state_inds = [self.target_state_inds]

            # Set the target state bins to 0 since they are recycled
            for i in self.target_state_inds:
                bin_target_counts[i] = 0

        # Cache the result
        self.bin_target_counts = bin_target_counts

    # Otherwise, return the list of bin target counts
    return self.bin_target_counts

assign

assign(
    coords: ndarray,
    mask: ndarray | None = None,
    output: ndarray | None = None,
) -> np.ndarray | None

Assign the simulations to bins.

This API is compatible with the WESTPA Binner class.

Parameters:

Name Type Description Default
coords ndarray

The progress coordinates to bin. Shape: (n_simulations, n_dims).

required
mask ndarray

The mask to apply to skip a certain simulation (0 skips and 1 uses the simulation). By default all simulations are used. Shape: (n_simulations,)

None
output ndarray

The output array to store the bin assignments. Shape: (n_simulations,)

None

Returns:

Type Description
ndarray

The bin assignments for each simulation (n_simulations,)

Source code in deepdrivewe/binners/base.py
def assign(
    self,
    coords: np.ndarray,
    mask: np.ndarray | None = None,
    output: np.ndarray | None = None,
) -> np.ndarray | None:
    """Assign the simulations to bins.

    This API is compatible with the WESTPA Binner class.

    Parameters
    ----------
    coords : np.ndarray
        The progress coordinates to bin. Shape: (n_simulations, n_dims).
    mask : np.ndarray
        The mask to apply to skip a certain simulation (0 skips and 1
        uses the simulation). By default all simulations are used.
        Shape: (n_simulations,)
    output : np.ndarray
        The output array to store the bin assignments.
        Shape: (n_simulations,)

    Returns
    -------
    np.ndarray
        The bin assignments for each simulation (n_simulations,)
    """
    # Initialize output if not provided
    if output is None:
        output = np.empty(coords.shape[0], dtype=np.uint16)

    # Initialize the mask if not provided
    if mask is not None:
        mask = np.ones(coords.shape[0], dtype=np.bool_)

    # Assign the simulations to bin indices (in-place)
    output[mask] = self.assign_bins(coords)

    return output

pickle_and_hash

pickle_and_hash() -> tuple[bytes, str]

Pickle this mapper and calculate a hash of the result.

Pickle this mapper and calculate a hash of the result (thus identifying the contents of the pickled data), returning a tuple (pickled_data, hash). This will raise PickleError if this mapper cannot be pickled, in which case code that would otherwise rely on detecting a topology change must assume a topology change happened, even if one did not.

Source code in deepdrivewe/binners/base.py
def pickle_and_hash(self) -> tuple[bytes, str]:
    """Pickle this mapper and calculate a hash of the result.

    Pickle this mapper and calculate a hash of the result
    (thus identifying the contents of the pickled data), returning a
    tuple ``(pickled_data, hash)``. This will raise PickleError if this
    mapper cannot be pickled, in which case code that would otherwise
    rely on detecting a topology change must assume a topology change
    happened, even if one did not.
    """
    pkldat = pickle.dumps(self, pickle.HIGHEST_PROTOCOL)
    binhash = hashlib.sha256(pkldat)
    return (pkldat, binhash.hexdigest())

compute_iteration_metadata

compute_iteration_metadata(
    cur_sims: list[SimMetadata],
) -> IterationMetadata

Compute the iteration metadata using the current simulations.

Returns:

Type Description
IterationMetadata

The iteration metadata.

Source code in deepdrivewe/binners/base.py
def compute_iteration_metadata(
    self,
    cur_sims: list[SimMetadata],
) -> IterationMetadata:
    """Compute the iteration metadata using the current simulations.

    Returns
    -------
    IterationMetadata
        The iteration metadata.
    """
    # Extract the pcoords from the last frame of each simulation
    pcoords = np.array([sim.pcoord[-1] for sim in cur_sims])

    # Assign the simulations to bins
    bin_assignments = self._get_bin_assignments(pcoords)

    # Compute the bin probabilities
    bin_probs = self._get_bin_probs(bin_assignments, cur_sims)

    # Add the binner pickle and hash metadata to the iteration
    binner_pickle, binner_hash = self.pickle_and_hash()

    # Create the iteration metadata
    return IterationMetadata(
        iteration_id=cur_sims[0].iteration_id,
        binner_pickle=binner_pickle,
        binner_hash=binner_hash,
        min_bin_prob=min(bin_probs),
        max_bin_prob=max(bin_probs),
        bin_target_counts=self.get_bin_target_counts(),
    )

bin_simulations

bin_simulations(
    next_sims: list[SimMetadata],
) -> dict[int, list[int]]

Assign the simulations to bins.

Parameters:

Name Type Description Default
next_sims list[SimMetadata]

The list of next simulations.

required

Returns:

Type Description
dict[int, list[int]]

A dictionary of the bin assignments. The keys are the bin indices and the values are the indices of the simulations assigned to that bin.

Source code in deepdrivewe/binners/base.py
def bin_simulations(
    self,
    next_sims: list[SimMetadata],
) -> dict[int, list[int]]:
    """Assign the simulations to bins.

    Parameters
    ----------
    next_sims : list[SimMetadata]
        The list of next simulations.

    Returns
    -------
    dict[int, list[int]]
        A dictionary of the bin assignments. The keys are the bin
        indices and the values are the indices of the simulations
        assigned to that bin.
    """
    # Extract the pcoords using the parent pcoords since
    # they are they have already been recycled.
    pcoords = np.array([sim.parent_pcoord for sim in next_sims])

    # Assign the simulations to bins
    bin_assignments = self._get_bin_assignments(pcoords)

    return bin_assignments