from abc import ABC, abstractmethod
from copy import deepcopy as dcopy
from typing import Optional, List, Union, Dict, Tuple
from banditpylib.data_pb2 import Context, Arm, Actions, Feedback
[docs]class Goal(ABC):
"""Abstract class for the goal of a learner"""
@property
@abstractmethod
def name(self) -> str:
"""Name of the goal"""
[docs]class IdentifyBestArm(Goal):
"""Best arm identification
:param Arm best_arm: best arm identified by the learner
"""
def __init__(self, best_arm: Arm):
self.__best_arm = best_arm
@property
def name(self) -> str:
return 'best_arm_id'
@property
def best_arm(self) -> Arm:
return self.__best_arm
[docs]class MaximizeTotalRewards(Goal):
"""Reward maximization"""
@property
def name(self) -> str:
return 'reward_maximization'
[docs]class MaximizeCorrectAnswers(Goal):
"""Maximize correct answers
This is used by thresholding bandit learners.
:param List[int] answers: answers obtained by the learner
"""
def __init__(self, answers: List[int]):
self.__answers = answers
@property
def name(self) -> str:
return 'max_correct_answers'
@property
def answers(self) -> List[int]:
return self.__answers
[docs]class MakeAllAnswersCorrect(Goal):
"""Make all answers correct
This is used by thresholding bandit learners.
:param List[int] answers: answers obtained by the learner
"""
def __init__(self, answers: List[int]):
self.__answers = answers
@property
def name(self) -> str:
return 'make_all_correct'
@property
def answers(self) -> List[int]:
return self.__answers
[docs]class Learner(ABC):
"""Abstract class for learners
:param Optional[str] name: alias name
"""
def __init__(self, name: Optional[str]):
self.__name = self._name() if name is None else name
@property
def name(self) -> str:
"""Name of the learner"""
return self.__name
@abstractmethod
def _name(self) -> str:
"""
Returns:
default learner name
"""
[docs] @abstractmethod
def reset(self):
"""Reset the learner
.. warning::
This function should be called before the start of the game.
"""
@property
@abstractmethod
def running_environment(self) -> Union[type, List[type]]:
"""Type of bandit environment the learner plays with"""
@property
@abstractmethod
def goal(self) -> Goal:
"""Goal of the learner"""
[docs]class SinglePlayerLearner(Learner):
"""Abstract class for single player learners
:param Optional[str] name: alias name
"""
def __init__(self, name: Optional[str]):
super().__init__(name)
[docs] @abstractmethod
def actions(self, context: Context) -> Actions:
"""Actions of the learner
Args:
context: contextual information about the bandit environment
Returns:
actions to take
"""
[docs] @abstractmethod
def update(self, feedback: Feedback):
"""Update the learner
Args:
feedback: feedback returned by the bandit environment after
:func:`actions` is executed
"""
[docs]class CollaborativeAgent(ABC):
r"""Abstract class for collaborative agents
:param Optional[str] name: alias name
"""
def __init__(self, name: Optional[str]):
self.__name = self._name() if name is None else name
@property
def name(self) -> str:
"""Name of the agent"""
return self.__name
@abstractmethod
def _name(self) -> str:
"""
Returns:
default agent name
"""
[docs] @abstractmethod
def reset(self):
"""Reset the agent
.. warning::
This function should be called before the start of each game.
"""
[docs] @abstractmethod
def actions(self, context: Context) -> Actions:
"""Actions of the agent
Args:
context: contextual information about the bandit environment
Returns:
actions to take
"""
[docs] @abstractmethod
def update(self, feedback: Feedback):
"""Update the agent
Args:
feedback: feedback returned by the bandit environment after
:func:`actions` is executed
"""
[docs] @abstractmethod
def broadcast(self) -> Dict[int, Tuple[float, int]]:
"""Broadcast information learnt in the current round
Returns:
arm ids, corresponding average rewards seen, and numbers of pulls used to
deduce average rewards
"""
[docs]class CollaborativeMaster(ABC):
r"""Abstract class for collaborative masters that handle arm assignment and
elimination
:param Optional[str] name: alias name
"""
def __init__(self, name: Optional[str]):
self.__name = self._name() if name is None else name
@property
def name(self) -> str:
"""Name of the master"""
return self.__name
@abstractmethod
def _name(self) -> str:
"""
Returns:
default master name
"""
[docs] @abstractmethod
def reset(self):
"""Reset the master
.. warning::
This function should be called before the start of each game.
"""
[docs] @abstractmethod
def initial_arm_assignment(self) -> Dict[int, List[int]]:
"""The arm assignment before the first round
Returns:
arm assignment per agent for all agents
"""
[docs] @abstractmethod
def elimination(
self, messages: Dict[int, Dict[int,
Tuple[float,
int]]]) -> Dict[int, List[int]]:
"""Update the set of active arms based on some criteria and return arm
assignment
Args:
messages: dict of messages broadcasted from agents, where key is agent_id
Returns:
arm assignment per agent
"""
[docs]class CollaborativeLearner(Learner):
"""Abstract class for collaborative learners
:param CollaborativeAgent agent: one instance of a collaborative agent
:param CollaboratveMaster master: instance of a collaborative master
:param int num_agents: total number of agents involved
:param Optional[str] name: alias name
"""
def __init__(self,
agent: CollaborativeAgent,
master: CollaborativeMaster,
num_agents: int,
name: Optional[str] = None):
super().__init__(name=name)
self.__agents = []
for _ in range(num_agents):
self.__agents.append(dcopy(agent))
self.__master = master
[docs] def reset(self):
for agent in self.__agents:
agent.reset()
self.__master.reset()
@property
def agents(self) -> List[CollaborativeAgent]:
"""Involved agents"""
return self.__agents
@property
def master(self) -> CollaborativeMaster:
"""Controlling master"""
return self.__master