Start including standard tool calling agent

2024-12-22 22:19:24 +01:00 · 2024-12-22 22:19:24 +01:00 · 4d4bf13152
parent 3a28bda490
commit 4d4bf13152
2 changed files with 221 additions and 0 deletions
--- a/src/agents/agents.py
+++ b/src/agents/agents.py
@ -33,6 +33,7 @@ from .monitoring import Monitor
 from .prompts import (
    CODE_SYSTEM_PROMPT,
    JSON_SYSTEM_PROMPT,
    TOOL_CALLING_SYSTEM_PROMPT,
    PLAN_UPDATE_FINAL_PLAN_REDACTION,
    SYSTEM_PROMPT_FACTS,
    SYSTEM_PROMPT_FACTS_UPDATE,
@ -870,6 +871,119 @@ class JsonAgent(ReactAgent):
            log_entry.observations = updated_information
            return None
 class ToolCallingAgent(ReactAgent):
    """
    In this agent, the tool calls will be formulated and parsed using the underlying library, before execution.
    """
    def __init__(
        self,
        tools: List[Tool],
        llm_engine: Optional[Callable] = None,
        system_prompt: Optional[str] = None,
        tool_description_template: Optional[str] = None,
        planning_interval: Optional[int] = None,
        **kwargs,
    ):
        if llm_engine is None:
            llm_engine = HfApiEngine()
        if system_prompt is None:
            system_prompt = TOOL_CALLING_SYSTEM_PROMPT
        if tool_description_template is None:
            tool_description_template = DEFAULT_TOOL_DESCRIPTION_TEMPLATE
        super().__init__(
            tools=tools,
            llm_engine=llm_engine,
            system_prompt=system_prompt,
            tool_description_template=tool_description_template,
            planning_interval=planning_interval,
            **kwargs,
        )
    def step(self, log_entry: ActionStep) -> Union[None, Any]:
        """
        Perform one step in the ReAct framework: the agent thinks, acts, and observes the result.
        Returns None if the step is not final.
        """
        agent_memory = self.write_inner_memory_from_logs()
        self.prompt_messages = agent_memory
        # Add new step in logs
        log_entry.agent_memory = agent_memory.copy()
        if self.verbose:
            console.print(
                Group(
                    Rule(
                        "[italic]Calling LLM engine with this last message:",
                        align="left",
                        style="orange",
                    ),
                    Text(str(self.prompt_messages[-1])),
                )
            )
        try:
            llm_output = self.llm_engine(
                self.prompt_messages,
            )
            log_entry.llm_output = llm_output
        except Exception as e:
            raise AgentGenerationError(f"Error in generating llm_engine output: {e}.")
        if self.verbose:
            console.print(
                Group(
                    Rule(
                        "[italic]Output message of the LLM:",
                        align="left",
                        style="orange",
                    ),
                    Text(llm_output),
                )
            )
        log_entry.tool_call = ToolCall(tool_name=tool_name, tool_arguments=arguments)
        # Execute
        console.print(Rule("Agent thoughts:", align="left"), Text(rationale))
        console.print(
            Panel(Text(f"Calling tool: '{tool_name}' with arguments: {arguments}"))
        )
        if tool_name == "final_answer":
            if isinstance(arguments, dict):
                if "answer" in arguments:
                    answer = arguments["answer"]
                else:
                    answer = arguments
            else:
                answer = arguments
            if (
                isinstance(answer, str) and answer in self.state.keys()
            ):  # if the answer is a state variable, return the value
                answer = self.state[answer]
            log_entry.action_output = answer
            return answer
        else:
            if arguments is None:
                arguments = {}
            observation = self.execute_tool_call(tool_name, arguments)
            observation_type = type(observation)
            if observation_type in [AgentImage, AgentAudio]:
                if observation_type == AgentImage:
                    observation_name = "image.png"
                elif observation_type == AgentAudio:
                    observation_name = "audio.mp3"
                # TODO: observation naming could allow for different names of same type
                self.state[observation_name] = observation
                updated_information = f"Stored '{observation_name}' in memory."
            else:
                updated_information = str(observation).strip()
            log_entry.observations = updated_information
            return None
 class CodeAgent(ReactAgent):
    """
--- a/src/agents/prompts.py
+++ b/src/agents/prompts.py
@ -277,6 +277,113 @@ Now Begin! If you solve the task correctly, you will receive a reward of $1,000,
 """
 TOOL_CALLING_SYSTEM_PROMPT = """You are an expert assistant who can solve any task using  tool calls. You will be given a task to solve as best you can.
 To do so, you have been given access to the following tools: {{tool_names}}
 The tool call you write is an action: after the tool is executed, you will get the result of the tool call as an "observation".
 This Action/Observation can repeat N times, you should take several steps when needed.
 You can use the result of the previous action as input for the next action.
 The observation will always be a string: it can represent a file, like "image_1.jpg".
 Then you can use it as input for the next action. You can do it for instance as follows:
 Observation: "image_1.jpg"
 Action:
 {
  "action": "image_transformer",
  "action_input": {"image": "image_1.jpg"}
 }
 To provide the final answer to the task, use an action blob with "action": "final_answer" tool. It is the only way to complete the task, else you will be stuck on a loop. So your final output should look like this:
 Action:
 {
  "action": "final_answer",
  "action_input": {"answer": "insert your final answer here"}
 }
 Here are a few examples using notional tools:
 ---
 Task: "Generate an image of the oldest person in this document."
 Action:
 {
  "action": "document_qa",
  "action_input": {"document": "document.pdf", "question": "Who is the oldest person mentioned?"}
 }
 Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland."
 Action:
 {
  "action": "image_generator",
  "action_input": {"prompt": "A portrait of John Doe, a 55-year-old man living in Canada."}
 }
 Observation: "image.png"
 Action:
 {
  "action": "final_answer",
  "action_input": "image.png"
 }
 ---
 Task: "What is the result of the following operation: 5 + 3 + 1294.678?"
 Action:
 {
    "action": "python_interpreter",
    "action_input": {"code": "5 + 3 + 1294.678"}
 }
 Observation: 1302.678
 Action:
 {
  "action": "final_answer",
  "action_input": "1302.678"
 }
 ---
 Task: "Which city has the highest population , Guangzhou or Shanghai?"
 Action:
 {
    "action": "search",
    "action_input": "Population Guangzhou"
 }
 Observation: ['Guangzhou has a population of 15 million inhabitants as of 2021.']
 Action:
 {
    "action": "search",
    "action_input": "Population Shanghai"
 }
 Observation: '26 million (2019)'
 Action:
 {
  "action": "final_answer",
  "action_input": "Shanghai"
 }
 Above example were using notional tools that might not exist for you. You only have access to these tools:
 {{tool_descriptions}}
 {{managed_agents_descriptions}}
 Here are the rules you should always follow to solve your task:
 1. ALWAYS provide a tool call, else you will fail.
 2. Always use the right arguments for the tools. Never use variable names as the action arguments, use the value instead.
 3. Call a tool only when needed: do not call the search agent if you do not need information, try to solve the task yourself.
 If no tool call is needed, use final_answer tool to return your answer.
 4. Never re-do a tool call that you previously did with the exact same parameters.
 Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000.
 """
 CODE_SYSTEM_PROMPT = """You are an expert assistant who can solve any task using code blobs. You will be given a task to solve as best you can.
 To do so, you have been given access to a list of tools: these tools are basically Python functions which you can call with code.
 To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences.