model_based_util

`Belief`

A class representing a belief in the space of a given model. It is the belief to be in any combination of states: eg: - In a 2 state POMDP: a belief of (0.5, 0.5) represent the complete ignorance of which state we are in. Where a (1.0, 0.0) belief is the certainty to be in state 0.

The belief update function has been implemented based on the belief update define in the paper of J. Pineau, G. Gordon, and S. Thrun, 'Point-based approximations for fast POMDP solving'

Parameters:

Name	Type	Description	Default
`model`	`Model`	The model on which the belief applies on.	required
`values`	`ndarray`	A vector of the probabilities to be in each state of the model. The sum of the probabilities must sum to 1. If not specified, it will be set as the start probabilities of the model.	`None`

Attributes:

Name	Type	Description
`model`	`Model`
`values`	`ndarray`
`bytes_repr`	`bytes`	A representation in bytes of the value of the belief

Source code in olfactory_navigation/agents/model_based_util/belief.py

class Belief:
    '''
    A class representing a belief in the space of a given model. It is the belief to be in any combination of states:
    eg:
        - In a 2 state POMDP: a belief of (0.5, 0.5) represent the complete ignorance of which state we are in. Where a (1.0, 0.0) belief is the certainty to be in state 0.

    The belief update function has been implemented based on the belief update define in the paper of J. Pineau, G. Gordon, and S. Thrun, 'Point-based approximations for fast POMDP solving'


    Parameters
    ----------
    model : pomdp.Model
        The model on which the belief applies on.
    values : np.ndarray, optional
        A vector of the probabilities to be in each state of the model. The sum of the probabilities must sum to 1.
        If not specified, it will be set as the start probabilities of the model.

    Attributes
    ----------
    model : pomdp.Model
    values : np.ndarray
    bytes_repr : bytes
        A representation in bytes of the value of the belief
    '''
    def __init__(self,
                 model: Model,
                 values: np.ndarray | None = None
                 ) -> None:
        assert model is not None
        self.model = model

        if values is not None:
            assert values.shape[0] == model.state_count, "Belief must contain be of dimension |S|"

            xp = np if not gpu_support else cp.get_array_module(values)

            prob_sum = xp.sum(values)
            rounded_sum = xp.round(prob_sum, decimals=3)
            assert rounded_sum == 1.0, f"States probabilities in belief must sum to 1 (found: {prob_sum}; rounded {rounded_sum})"

            self._values = values
        else:
            self._values = model.start_probabilities


    def __new__(cls, *args, **kwargs):
        instance = super().__new__(cls)

        instance._bytes_repr = None
        instance._successors = {}

        return instance


    @property
    def bytes_repr(self) -> bytes:
        '''
        A representation as bytes of a belief.
        '''
        if self._bytes_repr is None:
            self._bytes_repr = self.values.tobytes()
        return self._bytes_repr


    def __eq__(self, other: object) -> bool:
        '''
        A way to check the equality between two belief points.
        The byte representation of each belief point is compared.
        '''
        return self.bytes_repr == other.bytes_repr


    @property
    def values(self) -> np.ndarray:
        '''
        An array of the probability distribution to be in each state.
        '''
        return self._values


    def update(self,
               a: int,
               o: int,
               throw_error: bool = True
               ) -> 'Belief':
        '''
        Returns a new belief based on this current belief, the most recent action (a) and the most recent observation (o).

        Parameters
        ----------
        a : int
            The most recent action.
        o : int
            The most recent observation.
        throw_error : bool, default=True
            Whether the creation of an impossible belief (sum of probabilities of 0.0) will throw an error or not.

        Returns
        -------
        new_belief : Belief
            An updated belief
        '''
        xp = np if not gpu_support else cp.get_array_module(self._values)

        # Check if successor exists
        succ_id = f'{a}_{o}'
        succ = self._successors.get(succ_id)
        if succ is not None:
            return succ

        # Computing new probabilities
        reachable_state_probabilities = self.model.reachable_transitional_observation_table[:,a,o,:] * self.values[:,None]
        new_state_probabilities = xp.bincount(self.model.reachable_states[:,a,:].flatten(), weights=reachable_state_probabilities.flatten(), minlength=self.model.state_count)

        # Normalization
        probability_sum = xp.sum(new_state_probabilities)
        if probability_sum == 0:
            if throw_error:
                raise ValueError("Impossible belief: the sum of probabilities is 0...")
        else:
            new_state_probabilities /= probability_sum

        # Generation of new belief from new state probabilities
        new_belief = self.__new__(self.__class__)
        new_belief.model = self.model
        new_belief._values = new_state_probabilities

        # Remember generated successor
        self._successors[succ_id] = new_belief

        return new_belief


    def generate_successors(self) -> list['Belief']:
        '''
        Function to generate a set of belief that can be reached for each actions and observations available in the model.

        Returns
        -------
        successor_beliefs : list[Belief]
            The successor beliefs.
        '''
        successor_beliefs = []
        for a in self.model.actions:
            for o in self.model.observations:
                b_ao = self.update(a,o)
                successor_beliefs.append(b_ao)

        return successor_beliefs


    def random_state(self) -> int:
        '''
        Returns a random state of the model weighted by the belief probabily.

        Returns
        -------
        rand_s : int
            A random state.
        '''
        xp = np if not gpu_support else cp.get_array_module(self._values)

        rand_s = int(self.model.rnd_state.choice(a=self.model.states, size=1, p=self._values)[0])
        return rand_s


    @property
    def entropy(self) -> float:
        '''
        The entropy of the belief point
        '''
        xp = np if not gpu_support else cp.get_array_module(self._values)

        return float(entropy(self._values) if xp == np else cupy_entropy(self._values))


    def plot(self, size: int = 5) -> None:
        '''
        Function to plot a heatmap of the belief distribution if the belief is of a grid model.

        Parameters
        ----------
        size : int, default=5
            The scale of the plot.
        '''
        # Plot setup
        plt.figure(figsize=(size*1.2,size))

        model = self.model.cpu_model

        # Ticks
        dimensions = model.state_grid.shape
        x_ticks = np.arange(0, dimensions[1], (1 if dimensions[1] < 10 else int(dimensions[1] / 10)))
        y_ticks = np.arange(0, dimensions[0], (1 if dimensions[0] < 5 else int(dimensions[0] / 5)))

        plt.xticks(x_ticks)
        plt.yticks(y_ticks)

        # Title
        plt.title(f'Belief (probability distribution over states)')

        # Actual plot
        belief_values = self._values if (not gpu_support) or (cp.get_array_module(self._values) == np) else cp.asnumpy(self._values)
        grid_values = belief_values[model.state_grid]
        plt.imshow(grid_values,cmap='Blues')
        plt.colorbar()
        plt.show()

`bytes_repr` `property`

A representation as bytes of a belief.

`entropy` `property`

The entropy of the belief point

`values` `property`

An array of the probability distribution to be in each state.

`eq(other)`

A way to check the equality between two belief points. The byte representation of each belief point is compared.

Source code in olfactory_navigation/agents/model_based_util/belief.py

def __eq__(self, other: object) -> bool:
    '''
    A way to check the equality between two belief points.
    The byte representation of each belief point is compared.
    '''
    return self.bytes_repr == other.bytes_repr

`generate_successors()`

Function to generate a set of belief that can be reached for each actions and observations available in the model.

Returns:

Name	Type	Description
`successor_beliefs`	`list[Belief]`	The successor beliefs.

Source code in olfactory_navigation/agents/model_based_util/belief.py

def generate_successors(self) -> list['Belief']:
    '''
    Function to generate a set of belief that can be reached for each actions and observations available in the model.

    Returns
    -------
    successor_beliefs : list[Belief]
        The successor beliefs.
    '''
    successor_beliefs = []
    for a in self.model.actions:
        for o in self.model.observations:
            b_ao = self.update(a,o)
            successor_beliefs.append(b_ao)

    return successor_beliefs

`plot(size=5)`

Function to plot a heatmap of the belief distribution if the belief is of a grid model.

Parameters:

Name	Type	Description	Default
`size`	`int`	The scale of the plot.	`5`

Source code in olfactory_navigation/agents/model_based_util/belief.py

def plot(self, size: int = 5) -> None:
    '''
    Function to plot a heatmap of the belief distribution if the belief is of a grid model.

    Parameters
    ----------
    size : int, default=5
        The scale of the plot.
    '''
    # Plot setup
    plt.figure(figsize=(size*1.2,size))

    model = self.model.cpu_model

    # Ticks
    dimensions = model.state_grid.shape
    x_ticks = np.arange(0, dimensions[1], (1 if dimensions[1] < 10 else int(dimensions[1] / 10)))
    y_ticks = np.arange(0, dimensions[0], (1 if dimensions[0] < 5 else int(dimensions[0] / 5)))

    plt.xticks(x_ticks)
    plt.yticks(y_ticks)

    # Title
    plt.title(f'Belief (probability distribution over states)')

    # Actual plot
    belief_values = self._values if (not gpu_support) or (cp.get_array_module(self._values) == np) else cp.asnumpy(self._values)
    grid_values = belief_values[model.state_grid]
    plt.imshow(grid_values,cmap='Blues')
    plt.colorbar()
    plt.show()

`random_state()`

Returns a random state of the model weighted by the belief probabily.

Returns:

Name	Type	Description
`rand_s`	`int`	A random state.

Source code in olfactory_navigation/agents/model_based_util/belief.py

def random_state(self) -> int:
    '''
    Returns a random state of the model weighted by the belief probabily.

    Returns
    -------
    rand_s : int
        A random state.
    '''
    xp = np if not gpu_support else cp.get_array_module(self._values)

    rand_s = int(self.model.rnd_state.choice(a=self.model.states, size=1, p=self._values)[0])
    return rand_s

`update(a, o, throw_error=True)`

Returns a new belief based on this current belief, the most recent action (a) and the most recent observation (o).

Parameters:

Name	Type	Description	Default
`a`	`int`	The most recent action.	required
`o`	`int`	The most recent observation.	required
`throw_error`	`bool`	Whether the creation of an impossible belief (sum of probabilities of 0.0) will throw an error or not.	`True`

Returns:

Name	Type	Description
`new_belief`	`Belief`	An updated belief

Source code in olfactory_navigation/agents/model_based_util/belief.py

def update(self,
           a: int,
           o: int,
           throw_error: bool = True
           ) -> 'Belief':
    '''
    Returns a new belief based on this current belief, the most recent action (a) and the most recent observation (o).

    Parameters
    ----------
    a : int
        The most recent action.
    o : int
        The most recent observation.
    throw_error : bool, default=True
        Whether the creation of an impossible belief (sum of probabilities of 0.0) will throw an error or not.

    Returns
    -------
    new_belief : Belief
        An updated belief
    '''
    xp = np if not gpu_support else cp.get_array_module(self._values)

    # Check if successor exists
    succ_id = f'{a}_{o}'
    succ = self._successors.get(succ_id)
    if succ is not None:
        return succ

    # Computing new probabilities
    reachable_state_probabilities = self.model.reachable_transitional_observation_table[:,a,o,:] * self.values[:,None]
    new_state_probabilities = xp.bincount(self.model.reachable_states[:,a,:].flatten(), weights=reachable_state_probabilities.flatten(), minlength=self.model.state_count)

    # Normalization
    probability_sum = xp.sum(new_state_probabilities)
    if probability_sum == 0:
        if throw_error:
            raise ValueError("Impossible belief: the sum of probabilities is 0...")
    else:
        new_state_probabilities /= probability_sum

    # Generation of new belief from new state probabilities
    new_belief = self.__new__(self.__class__)
    new_belief.model = self.model
    new_belief._values = new_state_probabilities

    # Remember generated successor
    self._successors[succ_id] = new_belief

    return new_belief

`BeliefSet`

Class to represent a set of beliefs with regard to a POMDP model. It has the purpose to store the beliefs in a numpy array format and be able to conver it to a list of Belief class objects.

Parameters:

Name	Type	Description	Default
`model`	`Model`	The model on which the beliefs apply.	required
`beliefs`	`list[Belief] or ndarray`	The actual set of beliefs.	required

Attributes:

Name	Type	Description
`model`	`Model`
`belief_array`	`ndarray`	A 2D array of shape N x S of N belief vectors.
`belief_list`	`list[Belief]`	A list of N Belief object.

Source code in olfactory_navigation/agents/model_based_util/belief.py

class BeliefSet:
    '''
    Class to represent a set of beliefs with regard to a POMDP model.
    It has the purpose to store the beliefs in a numpy array format and be able to conver it to a list of Belief class objects.


    Parameters
    ----------
    model : pomdp.Model
        The model on which the beliefs apply.
    beliefs : list[Belief] or np.ndarray
        The actual set of beliefs.

    Attributes
    ----------
    model : pomdp.Model
    belief_array : np.ndarray
        A 2D array of shape N x S of N belief vectors.
    belief_list : list[Belief]
        A list of N Belief object.
    '''
    def __init__(self,
                 model: Model,
                 beliefs: list[Belief] | np.ndarray
                 ) -> None:
        self.model = model

        self._belief_list = None
        self._belief_array = None
        self._uniqueness_dict = None

        self.is_on_gpu = False

        if isinstance(beliefs, list):
            assert all(len(b.values) == model.state_count for b in beliefs), f"Beliefs in belief list provided dont all have shape ({model.state_count},)"
            self._belief_list = beliefs

            # Check if on gpu and make sure all beliefs are also on the gpu
            if (len(beliefs) > 0) and gpu_support and cp.get_array_module(beliefs[0].values) == cp:
                assert all(cp.get_array_module(b.values) == cp for b in beliefs), "Either all or none of the alpha vectors should be on the GPU, not just some."
                self.is_on_gpu = True
        else:
            assert beliefs.shape[1] == model.state_count, f"Belief array provided doesnt have the right shape (expected (-,{model.state_count}), received {beliefs.shape})"

            self._belief_array = beliefs

            # Check if array is on gpu
            if gpu_support and cp.get_array_module(beliefs) == cp:
                self.is_on_gpu = True


    @property
    def belief_array(self) -> np.ndarray:
        '''
        A matrix of size N x S containing N belief vectors. If belief set is stored as a list of Belief objects, the matrix of beliefs will be generated from them.
        '''
        xp = cp if (gpu_support and self.is_on_gpu) else np

        if self._belief_array is None:
            self._belief_array = xp.array([b.values for b in self._belief_list])
        return self._belief_array


    @property
    def belief_list(self) -> list[Belief]:
        '''
        A list of Belief objects. If the belief set is represented as a matrix of Belief vectors, the list of Belief objects will be generated from it.
        '''
        if self._belief_list is None:
            self._belief_list = [Belief(self.model, belief_vector) for belief_vector in self._belief_array]
        return self._belief_list


    def generate_all_successors(self) -> 'BeliefSet':
        '''
        Function to generate the successors beliefs of all the beliefs in the belief set.

        Returns
        -------
        all_successors : BeliefSet
            All successors of all beliefs in the belief set.
        '''
        all_successors = []
        for belief in self.belief_list:
            all_successors.extend(belief.generate_successors())
        return BeliefSet(self.model, all_successors)


    def update(self,
               actions: list | np.ndarray,
               observations: list | np.ndarray,
               throw_error: bool = True
               ) -> 'BeliefSet':
        '''
        Returns a new belief based on this current belief, the most recent action (a) and the most recent observation (o).

        Parameters
        ----------
        actions : list or np.ndarray
            The most recent played actions.
        observations : list or np.ndarray
            The most recent received observations.
        throw_error : bool, default=True
            Whether the throw an error when attempting to generate impossible beliefs.

        Returns
        -------
        new_belief_set : BeliefSet
            An set of updated beliefs.
        '''
        # GPU support check
        xp = cp if (gpu_support and self.is_on_gpu) else np

        # Ensuring we are dealing we are dealing with ndarrays
        observations = xp.array(observations)
        actions = xp.array(actions)

        # Computing reachable probabilities and states
        reachable_probabilities = (self.model.reachable_transitional_observation_table[:, actions, observations, :] * self.belief_array.T[:,:,None])
        reachable_state_per_actions = self.model.reachable_states[:, actions, :]

        # Computing new probabilities
        flatten_offset = xp.arange(len(observations))[:,None] * self.model.state_count
        flat_shape = (len(observations), (self.model.state_count * self.model.reachable_state_count))

        a = reachable_state_per_actions.swapaxes(0,1).reshape(flat_shape)
        w = reachable_probabilities.swapaxes(0,1).reshape(flat_shape)

        a_offs = a + flatten_offset
        new_probabilities = xp.bincount(a_offs.ravel(), weights=w.ravel(), minlength=a.shape[0]*self.model.state_count).reshape((-1,self.model.state_count))

        # Normalization
        probability_sum = xp.sum(new_probabilities, axis=1)
        if xp.any(probability_sum == 0.0) and throw_error:
            raise ValueError('One or more belief is impossible, (ie the sum of the probability distribution is 0)')
        non_zero_mask = probability_sum != 0
        new_probabilities[non_zero_mask] /= probability_sum[non_zero_mask,None]

        return BeliefSet(self.model, new_probabilities)


    @property
    def unique_belief_dict(self) -> dict:
        '''
        A dictionary of unique belief points with the keys being the byte representation of these belief points.
        '''
        if self._uniqueness_dict is None:
            self._uniqueness_dict = {belief.bytes_repr: belief for belief in self.belief_list}
        return self._uniqueness_dict


    def union(self, other_belief_set: 'BeliefSet') -> 'BeliefSet':
        '''
        Function to make the union between two belief set objects.

        Parameters
        ----------
        other_belief_set : BeliefSet
            The other belief set to make the union with

        Returns
        -------
        new_belief_set : BeliefSet
            A new, combined, belief set
        '''
        # Deduplication
        combined_uniqueness_dict = self.unique_belief_dict | other_belief_set.unique_belief_dict

        # Generation of new set
        new_belief_set = BeliefSet(self.model, list(combined_uniqueness_dict.values()))
        new_belief_set._uniqueness_dict = combined_uniqueness_dict

        return new_belief_set


    def __len__(self) -> int:
        return len(self._belief_list) if self._belief_list is not None else self._belief_array.shape[0]


    @property
    def entropies(self) -> np.ndarray:
        '''
        An array of the entropies of the belief points.
        '''
        xp = np if not gpu_support else cp.get_array_module(self.belief_array)

        return entropy(self.belief_array, axis=1) if xp == np else cupy_entropy(self.belief_array, axis=1)


    def to_gpu(self) -> 'BeliefSet':
        '''
        Function returning an equivalent belief set object with the array of values stored on GPU instead of CPU.

        Returns
        -------
        gpu_belief_set : BeliefSet
            A new belief set with array on GPU.
        '''
        assert gpu_support, "GPU support is not enabled, unable to execute this function"

        gpu_model = self.model.gpu_model

        gpu_belief_set = None
        if self._belief_array is not None:
            gpu_belief_array = cp.array(self._belief_array)
            gpu_belief_set = BeliefSet(gpu_model, gpu_belief_array)
        else:
            gpu_belief_list = [Belief(gpu_model, cp.array(b.values)) for b in self._belief_list]
            gpu_belief_set = BeliefSet(gpu_model, gpu_belief_list)

        return gpu_belief_set


    def to_cpu(self) -> 'BeliefSet':
        '''
        Function returning an equivalent belief set object with the array of values stored on CPU instead of GPU.

        Returns
        -------
        cpu_belief_set : BeliefSet
            A new belief set with array on CPU.
        '''
        assert gpu_support, "GPU support is not enabled, unable to execute this function"

        cpu_model = self.model.cpu_model

        cpu_belief_set = None
        if self._belief_array is not None:
            cpu_belief_array = cp.asnumpy(self._belief_array)
            cpu_belief_set = BeliefSet(cpu_model, cpu_belief_array)

        else:
            cpu_belief_list = [Belief(cpu_model, cp.asnumpy(b.values)) for b in self._belief_list]
            cpu_belief_set = BeliefSet(cpu_model, cpu_belief_list)

        return cpu_belief_set

`belief_array` `property`

A matrix of size N x S containing N belief vectors. If belief set is stored as a list of Belief objects, the matrix of beliefs will be generated from them.

`belief_list` `property`

A list of Belief objects. If the belief set is represented as a matrix of Belief vectors, the list of Belief objects will be generated from it.

`entropies` `property`

An array of the entropies of the belief points.

`unique_belief_dict` `property`

A dictionary of unique belief points with the keys being the byte representation of these belief points.

`generate_all_successors()`

Function to generate the successors beliefs of all the beliefs in the belief set.

Returns:

Name	Type	Description
`all_successors`	`BeliefSet`	All successors of all beliefs in the belief set.

Source code in olfactory_navigation/agents/model_based_util/belief.py

def generate_all_successors(self) -> 'BeliefSet':
    '''
    Function to generate the successors beliefs of all the beliefs in the belief set.

    Returns
    -------
    all_successors : BeliefSet
        All successors of all beliefs in the belief set.
    '''
    all_successors = []
    for belief in self.belief_list:
        all_successors.extend(belief.generate_successors())
    return BeliefSet(self.model, all_successors)

`to_cpu()`

Function returning an equivalent belief set object with the array of values stored on CPU instead of GPU.

Returns:

Name	Type	Description
`cpu_belief_set`	`BeliefSet`	A new belief set with array on CPU.

Source code in olfactory_navigation/agents/model_based_util/belief.py

def to_cpu(self) -> 'BeliefSet':
    '''
    Function returning an equivalent belief set object with the array of values stored on CPU instead of GPU.

    Returns
    -------
    cpu_belief_set : BeliefSet
        A new belief set with array on CPU.
    '''
    assert gpu_support, "GPU support is not enabled, unable to execute this function"

    cpu_model = self.model.cpu_model

    cpu_belief_set = None
    if self._belief_array is not None:
        cpu_belief_array = cp.asnumpy(self._belief_array)
        cpu_belief_set = BeliefSet(cpu_model, cpu_belief_array)

    else:
        cpu_belief_list = [Belief(cpu_model, cp.asnumpy(b.values)) for b in self._belief_list]
        cpu_belief_set = BeliefSet(cpu_model, cpu_belief_list)

    return cpu_belief_set

`to_gpu()`

Function returning an equivalent belief set object with the array of values stored on GPU instead of CPU.

Returns:

Name	Type	Description
`gpu_belief_set`	`BeliefSet`	A new belief set with array on GPU.

Source code in olfactory_navigation/agents/model_based_util/belief.py

def to_gpu(self) -> 'BeliefSet':
    '''
    Function returning an equivalent belief set object with the array of values stored on GPU instead of CPU.

    Returns
    -------
    gpu_belief_set : BeliefSet
        A new belief set with array on GPU.
    '''
    assert gpu_support, "GPU support is not enabled, unable to execute this function"

    gpu_model = self.model.gpu_model

    gpu_belief_set = None
    if self._belief_array is not None:
        gpu_belief_array = cp.array(self._belief_array)
        gpu_belief_set = BeliefSet(gpu_model, gpu_belief_array)
    else:
        gpu_belief_list = [Belief(gpu_model, cp.array(b.values)) for b in self._belief_list]
        gpu_belief_set = BeliefSet(gpu_model, gpu_belief_list)

    return gpu_belief_set

`union(other_belief_set)`

Function to make the union between two belief set objects.

Parameters:

Name	Type	Description	Default
`other_belief_set`	`BeliefSet`	The other belief set to make the union with	required

Returns:

Name	Type	Description
`new_belief_set`	`BeliefSet`	A new, combined, belief set

Source code in olfactory_navigation/agents/model_based_util/belief.py

def union(self, other_belief_set: 'BeliefSet') -> 'BeliefSet':
    '''
    Function to make the union between two belief set objects.

    Parameters
    ----------
    other_belief_set : BeliefSet
        The other belief set to make the union with

    Returns
    -------
    new_belief_set : BeliefSet
        A new, combined, belief set
    '''
    # Deduplication
    combined_uniqueness_dict = self.unique_belief_dict | other_belief_set.unique_belief_dict

    # Generation of new set
    new_belief_set = BeliefSet(self.model, list(combined_uniqueness_dict.values()))
    new_belief_set._uniqueness_dict = combined_uniqueness_dict

    return new_belief_set

`update(actions, observations, throw_error=True)`

Returns a new belief based on this current belief, the most recent action (a) and the most recent observation (o).

Parameters:

Name	Type	Description	Default
`actions`	`list or ndarray`	The most recent played actions.	required
`observations`	`list or ndarray`	The most recent received observations.	required
`throw_error`	`bool`	Whether the throw an error when attempting to generate impossible beliefs.	`True`

Returns:

Name	Type	Description
`new_belief_set`	`BeliefSet`	An set of updated beliefs.

Source code in olfactory_navigation/agents/model_based_util/belief.py

def update(self,
           actions: list | np.ndarray,
           observations: list | np.ndarray,
           throw_error: bool = True
           ) -> 'BeliefSet':
    '''
    Returns a new belief based on this current belief, the most recent action (a) and the most recent observation (o).

    Parameters
    ----------
    actions : list or np.ndarray
        The most recent played actions.
    observations : list or np.ndarray
        The most recent received observations.
    throw_error : bool, default=True
        Whether the throw an error when attempting to generate impossible beliefs.

    Returns
    -------
    new_belief_set : BeliefSet
        An set of updated beliefs.
    '''
    # GPU support check
    xp = cp if (gpu_support and self.is_on_gpu) else np

    # Ensuring we are dealing we are dealing with ndarrays
    observations = xp.array(observations)
    actions = xp.array(actions)

    # Computing reachable probabilities and states
    reachable_probabilities = (self.model.reachable_transitional_observation_table[:, actions, observations, :] * self.belief_array.T[:,:,None])
    reachable_state_per_actions = self.model.reachable_states[:, actions, :]

    # Computing new probabilities
    flatten_offset = xp.arange(len(observations))[:,None] * self.model.state_count
    flat_shape = (len(observations), (self.model.state_count * self.model.reachable_state_count))

    a = reachable_state_per_actions.swapaxes(0,1).reshape(flat_shape)
    w = reachable_probabilities.swapaxes(0,1).reshape(flat_shape)

    a_offs = a + flatten_offset
    new_probabilities = xp.bincount(a_offs.ravel(), weights=w.ravel(), minlength=a.shape[0]*self.model.state_count).reshape((-1,self.model.state_count))

    # Normalization
    probability_sum = xp.sum(new_probabilities, axis=1)
    if xp.any(probability_sum == 0.0) and throw_error:
        raise ValueError('One or more belief is impossible, (ie the sum of the probability distribution is 0)')
    non_zero_mask = probability_sum != 0
    new_probabilities[non_zero_mask] /= probability_sum[non_zero_mask,None]

    return BeliefSet(self.model, new_probabilities)

`ValueFunction`

Class representing a set of AlphaVectors. One such set approximates the value function of the MDP model.

Parameters:

Name	Type	Description	Default
`model`	`Model`	The model the value function is associated with.	required
`alpha_vectors`	`list[AlphaVector] or ndarray`	The alpha vectors composing the value function, if none are provided, it will be empty to start with and AlphaVectors can be appended.	`[]`
`action_list`	`list[int] or ndarray`	The actions associated with alpha vectors in the case the alpha vectors are provided as an numpy array.	`[]`

Attributes:

Name	Type	Description
`model`	`Model`	The model the value function is associated with.
`alpha_vector_list`	`list[AlphaVector]`
`alpha_vector_array`	`ndarray`
`actions`	`ndarray`

Source code in olfactory_navigation/agents/model_based_util/value_function.py

class ValueFunction:
    '''
    Class representing a set of AlphaVectors. One such set approximates the value function of the MDP model.


    Parameters
    ----------
    model : mdp.Model
        The model the value function is associated with.
    alpha_vectors : list[AlphaVector] or np.ndarray, optional
        The alpha vectors composing the value function, if none are provided, it will be empty to start with and AlphaVectors can be appended.
    action_list : list[int] or np.ndarray, optional
        The actions associated with alpha vectors in the case the alpha vectors are provided as an numpy array.

    Attributes
    ----------
    model : mdp.Model
        The model the value function is associated with.
    alpha_vector_list : list[AlphaVector]
    alpha_vector_array : np.ndarray
    actions : np.ndarray
    '''
    def __init__(self,
                 model: Model,
                 alpha_vectors: list[AlphaVector] | np.ndarray = [],
                 action_list: list[int] | np.ndarray = []
                 ) -> None:
        self.model = model

        self._vector_list = None
        self._vector_array = None
        self._actions = None

        self.is_on_gpu = False

        # List of alpha vectors
        if isinstance(alpha_vectors, list):
            assert all(v.values.shape[0] == model.state_count for v in alpha_vectors), f"Some or all alpha vectors in the list provided dont have the right size, they should be of shape: {model.state_count}"
            self._vector_list = alpha_vectors

            # Check if on gpu and make sure all vectors are also on the gpu
            if (len(alpha_vectors) > 0) and gpu_support and cp.get_array_module(alpha_vectors[0].values) == cp:
                assert all(cp.get_array_module(v.values) == cp for v in alpha_vectors), "Either all or none of the alpha vectors should be on the GPU, not just some."
                self.is_on_gpu = True

        # As numpy array
        else:
            av_shape = alpha_vectors.shape
            exp_shape = (len(action_list), model.state_count)
            assert av_shape == exp_shape, f"Alpha vector array does not have the right shape (received: {av_shape}; expected: {exp_shape})"

            self._vector_list = []
            for alpha_vect, action in zip(alpha_vectors, action_list):
                self._vector_list.append(AlphaVector(alpha_vect, action))

            # Check if array is on gpu
            if gpu_support and cp.get_array_module(alpha_vectors) == cp:
                self.is_on_gpu = True

        # Deduplication
        self._uniqueness_dict = {alpha_vector.values.tobytes(): alpha_vector for alpha_vector in self._vector_list}
        self._vector_list = list(self._uniqueness_dict.values())

        self._pruning_level = 1


    @property
    def alpha_vector_list(self) -> list[AlphaVector]:
        '''
        A list of AlphaVector objects. If the value function is defined as an matrix of vectors and a list of actions, the list of AlphaVectors will be generated from them.
        '''
        if self._vector_list is None:
            self._vector_list = []
            for alpha_vect, action in zip(self._vector_array, self._actions):
                self._vector_list.append(AlphaVector(alpha_vect, action))
        return self._vector_list


    @property
    def alpha_vector_array(self) -> np.ndarray:
        '''
        A matrix of size N x S, containing all the alpha vectors making up the value function. (N is the number of alpha vectors and S the amount of states in the model)
        If the value function is defined as a list of AlphaVector objects, the matrix will the generated from them.
        '''
        xp = cp if (gpu_support and self.is_on_gpu) else np

        if self._vector_array is None:
            self._vector_array = xp.array([v.values for v in self._vector_list])
            self._actions = xp.array([v.action for v in self._vector_list])
        return self._vector_array


    @property
    def actions(self) -> np.ndarray:
        '''
        A list of N actions corresponding to the N alpha vectors making up the value function.
        If the value function is defined as a list of AlphaVector objects, the list will the generated from the actions of those alpha vector objects.
        '''
        xp = cp if (gpu_support and self.is_on_gpu) else np

        if self._actions is None:
            self._vector_array = xp.array(self._vector_list)
            self._actions = xp.array([v.action for v in self._vector_list])
        return self._actions


    def __len__(self) -> int:
        return len(self._vector_list) if self._vector_list is not None else self._vector_array.shape[0]


    def __add__(self, other_value_function: 'Model') -> 'Model':
        # combined_dict = {**self._uniqueness_dict, **other_value_function._uniqueness_dict}
        combined_dict = {}
        combined_dict.update(self._uniqueness_dict)
        combined_dict.update(other_value_function._uniqueness_dict)

        # Instantiation of the new value function
        new_value_function = super().__new__(self.__class__)
        new_value_function.model = self.model
        new_value_function.is_on_gpu = self.is_on_gpu

        new_value_function._vector_list = list(combined_dict.values())
        new_value_function._uniqueness_dict = combined_dict
        new_value_function._pruning_level = 1

        new_value_function._vector_array = None
        new_value_function._actions = None

        return new_value_function


    def append(self,
               alpha_vector: AlphaVector
               ) -> None:
        '''
        Function to add an alpha vector to the value function.

        Parameters
        ----------
        alpha_vector : AlphaVector
            The alpha vector to be added to the value function.
        '''
        # Make sure size is correct
        assert alpha_vector.values.shape[0] == self.model.state_count, f"Vector to add to value function doesn't have the right size (received: {alpha_vector.values.shape[0]}, expected: {self.model.state_count})"

        # GPU support check
        xp = cp if (gpu_support and self.is_on_gpu) else np
        assert gpu_support and cp.get_array_module(alpha_vector.values) == xp, f"Vector is{' not' if self.is_on_gpu else ''} on GPU while value function is{'' if self.is_on_gpu else ' not'}."

        if self._vector_array is not None:
            self._vector_array = xp.append(self._vector_array, alpha_vector[None,:], axis=0)
            self._actions = xp.append(self._actions, alpha_vector.action)

        if self._vector_list is not None:
            self._vector_list.append(alpha_vector)


    def extend(self,
               other_value_function: 'Model'
               ) -> None:
        '''
        Function to add another value function is place.
        Effectively, it performs the union of the two sets of alpha vectors.

        Parameters
        ----------
        other_value_function : ValueFunction
            The other side of the union.
        '''
        self._uniqueness_dict.update(other_value_function._uniqueness_dict)
        self._vector_list = list(self._uniqueness_dict.values())

        self._vector_array = None
        self._actions = None

        self._pruning_level = 1


    def to_gpu(self) -> 'ValueFunction':
        '''
        Function returning an equivalent value function object with the arrays stored on GPU instead of CPU.

        Returns
        -------
        gpu_value_function : ValueFunction
            A new value function with arrays on GPU.
        '''
        assert gpu_support, "GPU support is not enabled, unable to execute this function"

        gpu_model = self.model.gpu_model

        gpu_value_function = None
        if self._vector_list is not None:
            gpu_alpha_vectors = [AlphaVector(cp.array(av.values), av.action) for av in self._vector_list]
            gpu_value_function = ValueFunction(gpu_model, gpu_alpha_vectors)

        else:
            gpu_vector_array = cp.array(self._vector_array)
            gpu_actions = self._actions if isinstance(self._actions, list) else cp.array(self._actions)
            gpu_value_function = ValueFunction(gpu_model, gpu_vector_array, gpu_actions)

        return gpu_value_function


    def to_cpu(self) -> 'ValueFunction':
        '''
        Function returning an equivalent value function object with the arrays stored on CPU instead of GPU.

        Returns
        -------
        cpu_value_function : ValueFunction
            A new value function with arrays on CPU.
        '''
        assert gpu_support, "GPU support is not enabled, unable to execute this function"

        cpu_model = self.model.cpu_model

        cpu_value_function = None
        if self._vector_list is not None:
            cpu_alpha_vectors = [AlphaVector(cp.asnumpy(av.values), av.action) for av in self._vector_list]
            cpu_value_function = ValueFunction(cpu_model, cpu_alpha_vectors)

        else:
            cpu_vector_array = cp.asnumpy(self._vector_array)
            cpu_actions = self._actions if isinstance(self._actions, list) else cp.asnumpy(self._actions)
            cpu_value_function = ValueFunction(cpu_model, cpu_vector_array, cpu_actions)

        return cpu_value_function


    def prune(self,
              level: int = 1
              ) -> None:
        '''
        Function pruning the set of alpha vectors composing the value function.
        The pruning is as thorough as the level:
            - 2: 1+ Check of absolute domination (check if dominated at each state).
            - 3: 2+ Solves Linear Programming problem for each alpha vector to see if it is dominated by combinations of other vectors.

        Note that the higher the level, the heavier the time impact will be.

        Parameters
        ----------
        level : int, default=1
            Between 0 and 3, how thorough the alpha vector pruning should be.
        '''
        # GPU support check
        xp = cp if (gpu_support and self.is_on_gpu) else np

        # Level 1 or under
        if level < self._pruning_level or level > 3:
            log('Attempting to prune a value function to a level already reached. Returning \'self\'')
            return

        # Level 2 pruning: Check for absolute domination
        if level >= 2 and self._pruning_level < 2:
            non_dominated_vector_indices = []

            for i, v in enumerate(self.alpha_vector_array):
                is_dom_by = xp.all(self.alpha_vector_array >= v, axis=1)
                if len(xp.where(is_dom_by)[0]) == 1:
                    non_dominated_vector_indices.append(i)

            self._vector_array = self._vector_array[non_dominated_vector_indices]
            self._actions = self._actions[non_dominated_vector_indices]

        # Level 3 pruning: LP to check for more complex domination
        if level >= 3:
            assert ilp_support, "ILP support not enabled..."

            pruned_alpha_set = pruned_alpha_set.to_cpu()

            alpha_set = pruned_alpha_set.alpha_vector_array
            non_dominated_vector_indices = []

            for i, alpha_vect in enumerate(alpha_set):
                other_alphas = alpha_set[:i] + alpha_set[(i+1):]

                # Objective function
                c = np.concatenate([np.array([1]), -1*alpha_vect])

                # Alpha vector contraints
                other_count = len(other_alphas)
                A = np.c_[np.ones(other_count), np.multiply(np.array(other_alphas), -1)]
                alpha_constraints = LinearConstraint(A, 0, np.inf)

                # Constraints that sum of beliefs is 1
                belief_constraint = LinearConstraint(np.array([0] + ([1]*self.model.state_count)), 1, 1)

                # Solve problem
                res = milp(c=c, constraints=[alpha_constraints, belief_constraint])

                # Check if dominated
                is_dominated = (res.x[0] - np.dot(res.x[1:], alpha_vect)) >= 0
                if is_dominated:
                    print(alpha_vect)
                    print(' -> Dominated\n')
                else:
                    non_dominated_vector_indices.append(i)

            self._vector_array = self._vector_array[non_dominated_vector_indices]
            self._actions = self._actions[non_dominated_vector_indices]

        # Update the tracked pruned level so far
        self._pruning_level = level


    def evaluate_at(self,
                    belief: Belief | BeliefSet
                    ) -> tuple[float | np.ndarray, int | np.ndarray]:
        '''
        Function to evaluate the value function at a belief point or at a set of belief points.
        It returns a value and the associated action.

        Parameters
        ----------
        belief : Belief or BeliefSet

        Returns
        -------
        value : float or np.ndarray
            The largest value associated with the belief point(s)
        action : int or np.ndarray
            The action(s) associated with the vector having the highest values at the belief point(s).
        '''
        # GPU support check
        xp = cp if (gpu_support and self.is_on_gpu) else np

        best_value = None
        best_action = None

        if isinstance(belief, Belief):
            # Computing values
            belief_values = xp.dot(self.alpha_vector_array, belief.values)

            # Selecting best vectors
            best_vector = xp.argmax(belief_values)

            # From best vector, compute the best value and action
            best_value = float(belief_values[best_vector])
            best_action = int(self.actions[best_vector])
        else:
            # Computing values
            belief_values = xp.matmul(belief.values if isinstance(belief, Belief) else belief.belief_array, self.alpha_vector_array.T)

            # Retrieving the top vectors according to the value function
            best_vectors = xp.argmax(belief_values, axis=1)

            # Retrieving the values and actions associated with the vectors chosen
            best_value = belief_values[xp.arange(belief_values.shape[0]), best_vectors]
            best_action = self.actions[best_vectors]

        return (best_value, best_action)


    def save(self,
             folder: str = './ValueFunctions',
             file_name: str | None = None
             ) -> None:
        '''
        Function to save the value function in a file at a given path. If no path is provided, it will be saved in a subfolder (ValueFunctions) inside the current working directory.
        If no file_name is provided, it be saved as '<current_timestamp>_value_function.csv'.

        Parameters
        ----------
        folder : str, default='./ValueFunctions'
            The path at which the npy file will be saved.
        file_name : str, default='<current_timestamp>_value_function.npy'
            The file name used to save in.
        '''
        if self.is_on_gpu:
            self.to_cpu().save(path=folder, file_name=file_name)
            return

        # Handle file_name
        if file_name is None:
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            file_name = timestamp + '_value_function.npy'

        # Make sure that .csv is in the file name
        if '.npy' not in file_name:
            file_name += '.npy'

        # Getting array
        av_array = np.hstack([self.actions[:,None], self.alpha_vector_array])

        np.save(folder + '/' + file_name, av_array)


    @classmethod
    def load(cls,
             file: str,
             model: Model
             ) -> 'ValueFunction':
        '''
        Function to load the value function from a csv file.

        Parameters
        ----------
        file : str
            The path and file_name of the value function to be loaded.
        model : mdp.Model
            The model the value function is linked to.

        Returns
        -------
        loaded_value_function : ValueFunction
            The loaded value function.
        '''
        av_array = np.load(file)

        loaded_value_function = ValueFunction(model=model,
                                              alpha_vectors=av_array[:,1:],
                                              action_list=av_array[:,0].astype(int))

        return loaded_value_function


    def plot(self,
             as_grid: bool = False,
             size: int = 5,
             belief_set: np.ndarray = None
             ) -> None:
        '''
        Function to plot out the value function in 2 or 3 dimensions if possible and the as_grid parameter is kept to false. Else, the value function is plot as a grid.
        If a belief set array is provided and the model is a 2- or 3-model, it will be plot alongside the value function.

        Parameters
        ----------
        as_grid : bool, default=False
            Forces the plot to be plot as a grid.
        size : int, default=5
            The actual plot scale.
        belief_set : np.ndarray, optional
            A set of belief to plot the belief points that were explored.
        '''
        assert len(self) > 0, "Value function is empty, plotting is impossible..."

        # If on GPU, convert to CPU and plot that one
        if self.is_on_gpu:
            print('[Warning] Value function on GPU, converting to numpy before plotting...')
            cpu_value_function = self.to_cpu()
            cpu_value_function.plot(as_grid, size, belief_set)
            return

        func = None
        if as_grid:
            func = self._plot_grid
        elif self.model.state_count == 2:
            func = self._plot_2D
        elif self.model.state_count == 3:
            func = self._plot_3D
        else:
            print('[Warning] \'as_grid\' parameter set to False but state count is >3 so it will be plotted as a grid')
            func = self._plot_grid

        func(size, belief_set)


    def _plot_2D(self, size, belief_set=None):
        x = np.linspace(0, 1, 100)

        plt.figure(figsize=(int(size*1.5),size))
        grid_spec = {'height_ratios': ([1] if belief_set is None else [19,1])}
        _, ax = plt.subplots((2 if belief_set is not None else 1),1,sharex=True,gridspec_kw=grid_spec)

        # Vector plotting
        alpha_vects = self.alpha_vector_array

        m = alpha_vects[:,1] - alpha_vects[:,0] # type: ignore
        m = m.reshape(m.shape[0],1)

        x = x.reshape((1,x.shape[0])).repeat(m.shape[0],axis=0)
        y = (m*x) + alpha_vects[:,0].reshape(m.shape[0],1)

        ax0 = ax[0] if belief_set is not None else ax
        for i, alpha in enumerate(self.alpha_vector_list):
            ax0.plot(x[i,:], y[i,:], color=COLOR_LIST[alpha.action]['id']) # type: ignore

        # Set title
        title = 'Value function' + ('' if belief_set is None else ' and explored belief points')
        ax0.set_title(title)

        # X-axis setting
        ticks = [0,0.25,0.5,0.75,1]
        x_ticks = [str(t) for t in ticks]
        x_ticks[0] = self.model.state_labels[0]
        x_ticks[-1] = self.model.state_labels[1]

        ax0.set_xticks(ticks, x_ticks) # type: ignore

        # Action legend
        proxy = [patches.Rectangle((0,0),1,1,fc = COLOR_LIST[a]['id']) for a in self.model.actions]
        ax0.legend(proxy, self.model.action_labels, title='Actions') # type: ignore

        # Belief plotting
        if belief_set is not None:
            beliefs_x = belief_set.belief_array[:,1]
            ax[1].scatter(beliefs_x, np.zeros(beliefs_x.shape[0]), c='red')
            ax[1].get_yaxis().set_visible(False)
            ax[1].axhline(0, color='black')
            ax[1].set_xlabel('Belief space')
        else:
            ax0.set_xlabel('Belief space')

        # Axis labels
        ax0.set_ylabel('V(b)')


    def _plot_3D(self, size, belief_set=None):

        def get_alpha_vect_z(xx, yy, alpha_vect):
            x0, y0, z0 = [0, 0, alpha_vect[0]]
            x1, y1, z1 = [1, 0, alpha_vect[1]]
            x2, y2, z2 = [0, 1, alpha_vect[2]]

            ux, uy, uz = u = [x1-x0, y1-y0, z1-z0]
            vx, vy, vz = v = [x2-x0, y2-y0, z2-z0]

            u_cross_v = [uy*vz-uz*vy, uz*vx-ux*vz, ux*vy-uy*vx]

            point  = np.array([0, 0, alpha_vect[0]])
            normal = np.array(u_cross_v)

            d = -point.dot(normal)

            z = (-normal[0] * xx - normal[1] * yy - d) * 1. / normal[2]

            return z

        def get_plane_gradient(alpha_vect):

            x0, y0, z0 = [0, 0, alpha_vect[0]]
            x1, y1, z1 = [1, 0, alpha_vect[1]]
            x2, y2, z2 = [0, 1, alpha_vect[2]]

            ux, uy, uz = u = [x1-x0, y1-y0, z1-z0]
            vx, vy, vz = v = [x2-x0, y2-y0, z2-z0]

            u_cross_v = [uy*vz-uz*vy, uz*vx-ux*vz, ux*vy-uy*vx]

            normal_vector = np.array(u_cross_v)
            normal_vector_norm = float(np.linalg.norm(normal_vector))
            normal_vector = np.divide(normal_vector, normal_vector_norm)
            normal_vector[2] = 0

            return np.linalg.norm(normal_vector)

        # Actual plotting
        x = np.linspace(0, 1, 1000)
        y = np.linspace(0, 1, 1000)

        xx, yy = np.meshgrid(x, y)

        max_z = np.zeros((xx.shape[0], yy.shape[0]))
        best_a = (np.zeros((xx.shape[0], yy.shape[0])))
        plane = (np.zeros((xx.shape[0], yy.shape[0])))
        gradients = (np.zeros((xx.shape[0], yy.shape[0])))

        for alpha in self.alpha_vector_list:

            z = get_alpha_vect_z(xx, yy, alpha.values)

            # Action array update
            new_a_mask = np.argmax(np.array([max_z, z]), axis=0)

            best_a[new_a_mask == 1] = alpha.action

            plane[new_a_mask == 1] = random.randrange(100)

            alpha_gradient = get_plane_gradient(alpha.values)
            gradients[new_a_mask == 1] = alpha_gradient

            # Max z update
            max_z = np.max(np.array([max_z, z]), axis=0)

        for x_i, x_val in enumerate(x):
            for y_i, y_val in enumerate(y):
                if (x_val+y_val) > 1:
                    max_z[x_i, y_i] = np.nan
                    plane[x_i, y_i] = np.nan
                    gradients[x_i, y_i] = np.nan
                    best_a[x_i, y_i] = np.nan

        belief_points = None
        if belief_set is not None:
            belief_points = belief_set.belief_array[:,1:]

        fig, ((ax1, ax2),(ax3,ax4)) = plt.subplots(2, 2, figsize=(size*4,size*3.5), sharex=True, sharey=True)

        # Set ticks
        ticks = [0,0.25,0.5,0.75,1]
        x_ticks = [str(t) for t in ticks]
        x_ticks[0] = self.model.state_labels[0]
        x_ticks[-1] = self.model.state_labels[1]

        y_ticks = [str(t) for t in ticks]
        y_ticks[0] = self.model.state_labels[0]
        y_ticks[-1] = self.model.state_labels[2]

        plt.setp([ax1,ax2,ax3,ax4], xticks=ticks, xticklabels=x_ticks, yticks=ticks, yticklabels=y_ticks)

        # Value function ax
        ax1.set_title("Value function")
        ax1_plot = ax1.contourf(x, y, max_z, 100, cmap="viridis")
        plt.colorbar(ax1_plot, ax=ax1)

        # Alpha planes ax
        ax2.set_title("Alpha planes")
        ax2_plot = ax2.contourf(x, y, plane, 100, cmap="viridis")
        plt.colorbar(ax2_plot, ax=ax2)

        # Gradient of planes ax
        ax3.set_title("Gradients of planes")
        ax3_plot = ax3.contourf(x, y, gradients, 100, cmap="Blues")
        plt.colorbar(ax3_plot, ax=ax3)

        # Action policy ax
        ax4.set_title("Action policy")
        ax4.contourf(x, y, best_a, 1, colors=[c['id'] for c in COLOR_LIST])
        proxy = [patches.Rectangle((0,0),1,1,fc = COLOR_LIST[int(a)]['id']) for a in self.model.actions]
        ax4.legend(proxy, self.model.action_labels, title='Actions')

        if belief_points is not None:
            for ax in [ax1,ax2,ax3,ax4]:
                ax.scatter(belief_points[:,0], belief_points[:,1], s=1, c='black')


    def _plot_grid(self, size=5, belief_set=None):
        value_table = np.max(self.alpha_vector_array, axis=0)[self.model.state_grid]
        best_action_table = np.array(self.actions)[np.argmax(self.alpha_vector_array, axis=0)][self.model.state_grid]
        best_action_colors = COLOR_ARRAY[best_action_table]

        dimensions = self.model.state_grid.shape

        fig, (ax1,ax2) = plt.subplots(1,2, figsize=(size*2, size), width_ratios=(0.55,0.45))

        # Ticks
        x_ticks = np.arange(0, dimensions[1], (1 if dimensions[1] < 10 else int(dimensions[1] / 10)))
        y_ticks = np.arange(0, dimensions[0], (1 if dimensions[0] < 5 else int(dimensions[0] / 5)))

        ax1.set_title('Value function')
        ax1_plot = ax1.imshow(value_table)

        if dimensions[0] >= dimensions[1]: # If higher than wide 
            plt.colorbar(ax1_plot, ax=ax1)
        else:
            plt.colorbar(ax1_plot, ax=ax1, location='bottom', orientation='horizontal')

        ax1.set_xticks(x_ticks)
        ax1.set_yticks(y_ticks)

        ax2.set_title('Action policy')
        ax2.imshow(best_action_colors)
        p = [ patches.Patch(color=COLOR_LIST[int(i)]['id'], label=str(self.model.action_labels[int(i)])) for i in self.model.actions]
        ax2.legend(handles=p, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., title='Actions')
        ax2.set_xticks(x_ticks)
        ax2.set_yticks(y_ticks)

`actions` `property`

A list of N actions corresponding to the N alpha vectors making up the value function. If the value function is defined as a list of AlphaVector objects, the list will the generated from the actions of those alpha vector objects.

`alpha_vector_array` `property`

A matrix of size N x S, containing all the alpha vectors making up the value function. (N is the number of alpha vectors and S the amount of states in the model) If the value function is defined as a list of AlphaVector objects, the matrix will the generated from them.

`alpha_vector_list` `property`

A list of AlphaVector objects. If the value function is defined as an matrix of vectors and a list of actions, the list of AlphaVectors will be generated from them.

`append(alpha_vector)`

Function to add an alpha vector to the value function.

Parameters:

Name	Type	Description	Default
`alpha_vector`	`AlphaVector`	The alpha vector to be added to the value function.	required

Source code in olfactory_navigation/agents/model_based_util/value_function.py

def append(self,
           alpha_vector: AlphaVector
           ) -> None:
    '''
    Function to add an alpha vector to the value function.

    Parameters
    ----------
    alpha_vector : AlphaVector
        The alpha vector to be added to the value function.
    '''
    # Make sure size is correct
    assert alpha_vector.values.shape[0] == self.model.state_count, f"Vector to add to value function doesn't have the right size (received: {alpha_vector.values.shape[0]}, expected: {self.model.state_count})"

    # GPU support check
    xp = cp if (gpu_support and self.is_on_gpu) else np
    assert gpu_support and cp.get_array_module(alpha_vector.values) == xp, f"Vector is{' not' if self.is_on_gpu else ''} on GPU while value function is{'' if self.is_on_gpu else ' not'}."

    if self._vector_array is not None:
        self._vector_array = xp.append(self._vector_array, alpha_vector[None,:], axis=0)
        self._actions = xp.append(self._actions, alpha_vector.action)

    if self._vector_list is not None:
        self._vector_list.append(alpha_vector)

`evaluate_at(belief)`

Function to evaluate the value function at a belief point or at a set of belief points. It returns a value and the associated action.

Parameters:

Name	Type	Description	Default
`belief`	`Belief or BeliefSet`		required

Returns:

Name	Type	Description
`value`	`float or ndarray`	The largest value associated with the belief point(s)
`action`	`int or ndarray`	The action(s) associated with the vector having the highest values at the belief point(s).

Source code in olfactory_navigation/agents/model_based_util/value_function.py

def evaluate_at(self,
                belief: Belief | BeliefSet
                ) -> tuple[float | np.ndarray, int | np.ndarray]:
    '''
    Function to evaluate the value function at a belief point or at a set of belief points.
    It returns a value and the associated action.

    Parameters
    ----------
    belief : Belief or BeliefSet

    Returns
    -------
    value : float or np.ndarray
        The largest value associated with the belief point(s)
    action : int or np.ndarray
        The action(s) associated with the vector having the highest values at the belief point(s).
    '''
    # GPU support check
    xp = cp if (gpu_support and self.is_on_gpu) else np

    best_value = None
    best_action = None

    if isinstance(belief, Belief):
        # Computing values
        belief_values = xp.dot(self.alpha_vector_array, belief.values)

        # Selecting best vectors
        best_vector = xp.argmax(belief_values)

        # From best vector, compute the best value and action
        best_value = float(belief_values[best_vector])
        best_action = int(self.actions[best_vector])
    else:
        # Computing values
        belief_values = xp.matmul(belief.values if isinstance(belief, Belief) else belief.belief_array, self.alpha_vector_array.T)

        # Retrieving the top vectors according to the value function
        best_vectors = xp.argmax(belief_values, axis=1)

        # Retrieving the values and actions associated with the vectors chosen
        best_value = belief_values[xp.arange(belief_values.shape[0]), best_vectors]
        best_action = self.actions[best_vectors]

    return (best_value, best_action)

`extend(other_value_function)`

Function to add another value function is place. Effectively, it performs the union of the two sets of alpha vectors.

Parameters:

Name	Type	Description	Default
`other_value_function`	`ValueFunction`	The other side of the union.	required

Source code in olfactory_navigation/agents/model_based_util/value_function.py

def extend(self,
           other_value_function: 'Model'
           ) -> None:
    '''
    Function to add another value function is place.
    Effectively, it performs the union of the two sets of alpha vectors.

    Parameters
    ----------
    other_value_function : ValueFunction
        The other side of the union.
    '''
    self._uniqueness_dict.update(other_value_function._uniqueness_dict)
    self._vector_list = list(self._uniqueness_dict.values())

    self._vector_array = None
    self._actions = None

    self._pruning_level = 1

`load(file, model)` `classmethod`

Function to load the value function from a csv file.

Parameters:

Name	Type	Description	Default
`file`	`str`	The path and file_name of the value function to be loaded.	required
`model`	`Model`	The model the value function is linked to.	required

Returns:

Name	Type	Description
`loaded_value_function`	`ValueFunction`	The loaded value function.

Source code in olfactory_navigation/agents/model_based_util/value_function.py

@classmethod
def load(cls,
         file: str,
         model: Model
         ) -> 'ValueFunction':
    '''
    Function to load the value function from a csv file.

    Parameters
    ----------
    file : str
        The path and file_name of the value function to be loaded.
    model : mdp.Model
        The model the value function is linked to.

    Returns
    -------
    loaded_value_function : ValueFunction
        The loaded value function.
    '''
    av_array = np.load(file)

    loaded_value_function = ValueFunction(model=model,
                                          alpha_vectors=av_array[:,1:],
                                          action_list=av_array[:,0].astype(int))

    return loaded_value_function

`plot(as_grid=False, size=5, belief_set=None)`

Function to plot out the value function in 2 or 3 dimensions if possible and the as_grid parameter is kept to false. Else, the value function is plot as a grid. If a belief set array is provided and the model is a 2- or 3-model, it will be plot alongside the value function.

Parameters:

Name	Type	Description	Default
`as_grid`	`bool`	Forces the plot to be plot as a grid.	`False`
`size`	`int`	The actual plot scale.	`5`
`belief_set`	`ndarray`	A set of belief to plot the belief points that were explored.	`None`

Source code in olfactory_navigation/agents/model_based_util/value_function.py

def plot(self,
         as_grid: bool = False,
         size: int = 5,
         belief_set: np.ndarray = None
         ) -> None:
    '''
    Function to plot out the value function in 2 or 3 dimensions if possible and the as_grid parameter is kept to false. Else, the value function is plot as a grid.
    If a belief set array is provided and the model is a 2- or 3-model, it will be plot alongside the value function.

    Parameters
    ----------
    as_grid : bool, default=False
        Forces the plot to be plot as a grid.
    size : int, default=5
        The actual plot scale.
    belief_set : np.ndarray, optional
        A set of belief to plot the belief points that were explored.
    '''
    assert len(self) > 0, "Value function is empty, plotting is impossible..."

    # If on GPU, convert to CPU and plot that one
    if self.is_on_gpu:
        print('[Warning] Value function on GPU, converting to numpy before plotting...')
        cpu_value_function = self.to_cpu()
        cpu_value_function.plot(as_grid, size, belief_set)
        return

    func = None
    if as_grid:
        func = self._plot_grid
    elif self.model.state_count == 2:
        func = self._plot_2D
    elif self.model.state_count == 3:
        func = self._plot_3D
    else:
        print('[Warning] \'as_grid\' parameter set to False but state count is >3 so it will be plotted as a grid')
        func = self._plot_grid

    func(size, belief_set)

`prune(level=1)`

Function pruning the set of alpha vectors composing the value function. The pruning is as thorough as the level: - 2: 1+ Check of absolute domination (check if dominated at each state). - 3: 2+ Solves Linear Programming problem for each alpha vector to see if it is dominated by combinations of other vectors.

Note that the higher the level, the heavier the time impact will be.

Parameters:

Name	Type	Description	Default
`level`	`int`	Between 0 and 3, how thorough the alpha vector pruning should be.	`1`

Source code in olfactory_navigation/agents/model_based_util/value_function.py

def prune(self,
          level: int = 1
          ) -> None:
    '''
    Function pruning the set of alpha vectors composing the value function.
    The pruning is as thorough as the level:
        - 2: 1+ Check of absolute domination (check if dominated at each state).
        - 3: 2+ Solves Linear Programming problem for each alpha vector to see if it is dominated by combinations of other vectors.

    Note that the higher the level, the heavier the time impact will be.

    Parameters
    ----------
    level : int, default=1
        Between 0 and 3, how thorough the alpha vector pruning should be.
    '''
    # GPU support check
    xp = cp if (gpu_support and self.is_on_gpu) else np

    # Level 1 or under
    if level < self._pruning_level or level > 3:
        log('Attempting to prune a value function to a level already reached. Returning \'self\'')
        return

    # Level 2 pruning: Check for absolute domination
    if level >= 2 and self._pruning_level < 2:
        non_dominated_vector_indices = []

        for i, v in enumerate(self.alpha_vector_array):
            is_dom_by = xp.all(self.alpha_vector_array >= v, axis=1)
            if len(xp.where(is_dom_by)[0]) == 1:
                non_dominated_vector_indices.append(i)

        self._vector_array = self._vector_array[non_dominated_vector_indices]
        self._actions = self._actions[non_dominated_vector_indices]

    # Level 3 pruning: LP to check for more complex domination
    if level >= 3:
        assert ilp_support, "ILP support not enabled..."

        pruned_alpha_set = pruned_alpha_set.to_cpu()

        alpha_set = pruned_alpha_set.alpha_vector_array
        non_dominated_vector_indices = []

        for i, alpha_vect in enumerate(alpha_set):
            other_alphas = alpha_set[:i] + alpha_set[(i+1):]

            # Objective function
            c = np.concatenate([np.array([1]), -1*alpha_vect])

            # Alpha vector contraints
            other_count = len(other_alphas)
            A = np.c_[np.ones(other_count), np.multiply(np.array(other_alphas), -1)]
            alpha_constraints = LinearConstraint(A, 0, np.inf)

            # Constraints that sum of beliefs is 1
            belief_constraint = LinearConstraint(np.array([0] + ([1]*self.model.state_count)), 1, 1)

            # Solve problem
            res = milp(c=c, constraints=[alpha_constraints, belief_constraint])

            # Check if dominated
            is_dominated = (res.x[0] - np.dot(res.x[1:], alpha_vect)) >= 0
            if is_dominated:
                print(alpha_vect)
                print(' -> Dominated\n')
            else:
                non_dominated_vector_indices.append(i)

        self._vector_array = self._vector_array[non_dominated_vector_indices]
        self._actions = self._actions[non_dominated_vector_indices]

    # Update the tracked pruned level so far
    self._pruning_level = level

`save(folder='./ValueFunctions', file_name=None)`

Function to save the value function in a file at a given path. If no path is provided, it will be saved in a subfolder (ValueFunctions) inside the current working directory. If no file_name is provided, it be saved as '_value_function.csv'.

Parameters:

Name	Type	Description	Default
`folder`	`str`	The path at which the npy file will be saved.	`'./ValueFunctions'`
`file_name`	`str`	The file name used to save in.	`'<current_timestamp>_value_function.npy'`

Source code in olfactory_navigation/agents/model_based_util/value_function.py

def save(self,
         folder: str = './ValueFunctions',
         file_name: str | None = None
         ) -> None:
    '''
    Function to save the value function in a file at a given path. If no path is provided, it will be saved in a subfolder (ValueFunctions) inside the current working directory.
    If no file_name is provided, it be saved as '<current_timestamp>_value_function.csv'.

    Parameters
    ----------
    folder : str, default='./ValueFunctions'
        The path at which the npy file will be saved.
    file_name : str, default='<current_timestamp>_value_function.npy'
        The file name used to save in.
    '''
    if self.is_on_gpu:
        self.to_cpu().save(path=folder, file_name=file_name)
        return

    # Handle file_name
    if file_name is None:
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        file_name = timestamp + '_value_function.npy'

    # Make sure that .csv is in the file name
    if '.npy' not in file_name:
        file_name += '.npy'

    # Getting array
    av_array = np.hstack([self.actions[:,None], self.alpha_vector_array])

    np.save(folder + '/' + file_name, av_array)

`to_cpu()`

Function returning an equivalent value function object with the arrays stored on CPU instead of GPU.

Returns:

Name	Type	Description
`cpu_value_function`	`ValueFunction`	A new value function with arrays on CPU.

Source code in olfactory_navigation/agents/model_based_util/value_function.py

def to_cpu(self) -> 'ValueFunction':
    '''
    Function returning an equivalent value function object with the arrays stored on CPU instead of GPU.

    Returns
    -------
    cpu_value_function : ValueFunction
        A new value function with arrays on CPU.
    '''
    assert gpu_support, "GPU support is not enabled, unable to execute this function"

    cpu_model = self.model.cpu_model

    cpu_value_function = None
    if self._vector_list is not None:
        cpu_alpha_vectors = [AlphaVector(cp.asnumpy(av.values), av.action) for av in self._vector_list]
        cpu_value_function = ValueFunction(cpu_model, cpu_alpha_vectors)

    else:
        cpu_vector_array = cp.asnumpy(self._vector_array)
        cpu_actions = self._actions if isinstance(self._actions, list) else cp.asnumpy(self._actions)
        cpu_value_function = ValueFunction(cpu_model, cpu_vector_array, cpu_actions)

    return cpu_value_function

`to_gpu()`

Function returning an equivalent value function object with the arrays stored on GPU instead of CPU.

Returns:

Name	Type	Description
`gpu_value_function`	`ValueFunction`	A new value function with arrays on GPU.

Source code in olfactory_navigation/agents/model_based_util/value_function.py

def to_gpu(self) -> 'ValueFunction':
    '''
    Function returning an equivalent value function object with the arrays stored on GPU instead of CPU.

    Returns
    -------
    gpu_value_function : ValueFunction
        A new value function with arrays on GPU.
    '''
    assert gpu_support, "GPU support is not enabled, unable to execute this function"

    gpu_model = self.model.gpu_model

    gpu_value_function = None
    if self._vector_list is not None:
        gpu_alpha_vectors = [AlphaVector(cp.array(av.values), av.action) for av in self._vector_list]
        gpu_value_function = ValueFunction(gpu_model, gpu_alpha_vectors)

    else:
        gpu_vector_array = cp.array(self._vector_array)
        gpu_actions = self._actions if isinstance(self._actions, list) else cp.array(self._actions)
        gpu_value_function = ValueFunction(gpu_model, gpu_vector_array, gpu_actions)

    return gpu_value_function

model_based_util

Belief

bytes_repr property

entropy property

values property

__eq__(other)

generate_successors()

plot(size=5)

random_state()

update(a, o, throw_error=True)

BeliefSet

belief_array property

belief_list property

entropies property

unique_belief_dict property

generate_all_successors()

to_cpu()

to_gpu()

union(other_belief_set)

update(actions, observations, throw_error=True)

ValueFunction

actions property

alpha_vector_array property

alpha_vector_list property

append(alpha_vector)

evaluate_at(belief)

extend(other_value_function)

load(file, model) classmethod

plot(as_grid=False, size=5, belief_set=None)

prune(level=1)

save(folder='./ValueFunctions', file_name=None)

to_cpu()

to_gpu()

`Belief`

`bytes_repr` `property`

`entropy` `property`

`values` `property`

`eq(other)`

`generate_successors()`

`plot(size=5)`

`random_state()`

`update(a, o, throw_error=True)`

`BeliefSet`

`belief_array` `property`

`belief_list` `property`

`entropies` `property`

`unique_belief_dict` `property`

`generate_all_successors()`

`to_cpu()`

`to_gpu()`

`union(other_belief_set)`

`update(actions, observations, throw_error=True)`

`ValueFunction`

`actions` `property`

`alpha_vector_array` `property`

`alpha_vector_list` `property`

`append(alpha_vector)`

`evaluate_at(belief)`

`extend(other_value_function)`

`load(file, model)` `classmethod`

`plot(as_grid=False, size=5, belief_set=None)`

`prune(level=1)`

`save(folder='./ValueFunctions', file_name=None)`

`to_cpu()`

`to_gpu()`