Start including standard tool calling agent

2024-12-22 22:19:24 +01:00 · 2024-12-22 22:19:24 +01:00 · 4d4bf13152
parent 3a28bda490
commit 4d4bf13152
2 changed files with 221 additions and 0 deletions
--- a/src/agents/agents.py
+++ b/src/agents/agents.py
@ -33,6 +33,7 @@ from .monitoring import Monitor
 from .prompts import (
    CODE_SYSTEM_PROMPT,
    JSON_SYSTEM_PROMPT,
+    TOOL_CALLING_SYSTEM_PROMPT,
    PLAN_UPDATE_FINAL_PLAN_REDACTION,
    SYSTEM_PROMPT_FACTS,
    SYSTEM_PROMPT_FACTS_UPDATE,
@ -870,6 +871,119 @@ class JsonAgent(ReactAgent):
            log_entry.observations = updated_information
            return None

+class ToolCallingAgent(ReactAgent):
+    """
+    In this agent, the tool calls will be formulated and parsed using the underlying library, before execution.
+    """
+
+    def __init__(
+        self,
+        tools: List[Tool],
+        llm_engine: Optional[Callable] = None,
+        system_prompt: Optional[str] = None,
+        tool_description_template: Optional[str] = None,
+        planning_interval: Optional[int] = None,
+        **kwargs,
+    ):
+        if llm_engine is None:
+            llm_engine = HfApiEngine()
+        if system_prompt is None:
+            system_prompt = TOOL_CALLING_SYSTEM_PROMPT
+        if tool_description_template is None:
+            tool_description_template = DEFAULT_TOOL_DESCRIPTION_TEMPLATE
+        super().__init__(
+            tools=tools,
+            llm_engine=llm_engine,
+            system_prompt=system_prompt,
+            tool_description_template=tool_description_template,
+            planning_interval=planning_interval,
+            **kwargs,
+        )
+
+    def step(self, log_entry: ActionStep) -> Union[None, Any]:
+        """
+        Perform one step in the ReAct framework: the agent thinks, acts, and observes the result.
+        Returns None if the step is not final.
+        """
+        agent_memory = self.write_inner_memory_from_logs()
+
+        self.prompt_messages = agent_memory
+
+        # Add new step in logs
+        log_entry.agent_memory = agent_memory.copy()
+
+        if self.verbose:
+            console.print(
+                Group(
+                    Rule(
+                        "[italic]Calling LLM engine with this last message:",
+                        align="left",
+                        style="orange",
+                    ),
+                    Text(str(self.prompt_messages[-1])),
+                )
+            )
+
+        try:
+            llm_output = self.llm_engine(
+                self.prompt_messages,
+            )
+            log_entry.llm_output = llm_output
+        except Exception as e:
+            raise AgentGenerationError(f"Error in generating llm_engine output: {e}.")
+
+        if self.verbose:
+            console.print(
+                Group(
+                    Rule(
+                        "[italic]Output message of the LLM:",
+                        align="left",
+                        style="orange",
+                    ),
+                    Text(llm_output),
+                )
+            )
+
+        log_entry.tool_call = ToolCall(tool_name=tool_name, tool_arguments=arguments)
+
+        # Execute
+        console.print(Rule("Agent thoughts:", align="left"), Text(rationale))
+        console.print(
+            Panel(Text(f"Calling tool: '{tool_name}' with arguments: {arguments}"))
+        )
+        if tool_name == "final_answer":
+            if isinstance(arguments, dict):
+                if "answer" in arguments:
+                    answer = arguments["answer"]
+                else:
+                    answer = arguments
+            else:
+                answer = arguments
+            if (
+                isinstance(answer, str) and answer in self.state.keys()
+            ):  # if the answer is a state variable, return the value
+                answer = self.state[answer]
+            log_entry.action_output = answer
+            return answer
+        else:
+            if arguments is None:
+                arguments = {}
+            observation = self.execute_tool_call(tool_name, arguments)
+            observation_type = type(observation)
+            if observation_type in [AgentImage, AgentAudio]:
+                if observation_type == AgentImage:
+                    observation_name = "image.png"
+                elif observation_type == AgentAudio:
+                    observation_name = "audio.mp3"
+                # TODO: observation naming could allow for different names of same type
+
+                self.state[observation_name] = observation
+                updated_information = f"Stored '{observation_name}' in memory."
+            else:
+                updated_information = str(observation).strip()
+            log_entry.observations = updated_information
+            return None
+

 class CodeAgent(ReactAgent):
    """
--- a/src/agents/prompts.py
+++ b/src/agents/prompts.py
@ -277,6 +277,113 @@ Now Begin! If you solve the task correctly, you will receive a reward of $1,000,
 """


+TOOL_CALLING_SYSTEM_PROMPT = """You are an expert assistant who can solve any task using  tool calls. You will be given a task to solve as best you can.
+To do so, you have been given access to the following tools: {{tool_names}}
+
+The tool call you write is an action: after the tool is executed, you will get the result of the tool call as an "observation".
+This Action/Observation can repeat N times, you should take several steps when needed.
+
+You can use the result of the previous action as input for the next action.
+The observation will always be a string: it can represent a file, like "image_1.jpg".
+Then you can use it as input for the next action. You can do it for instance as follows:
+
+Observation: "image_1.jpg"
+
+Action:
+{
+  "action": "image_transformer",
+  "action_input": {"image": "image_1.jpg"}
+}
+
+To provide the final answer to the task, use an action blob with "action": "final_answer" tool. It is the only way to complete the task, else you will be stuck on a loop. So your final output should look like this:
+Action:
+{
+  "action": "final_answer",
+  "action_input": {"answer": "insert your final answer here"}
+}
+
+
+Here are a few examples using notional tools:
+---
+Task: "Generate an image of the oldest person in this document."
+
+Action:
+{
+  "action": "document_qa",
+  "action_input": {"document": "document.pdf", "question": "Who is the oldest person mentioned?"}
+}
+Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland."
+
+Action:
+{
+  "action": "image_generator",
+  "action_input": {"prompt": "A portrait of John Doe, a 55-year-old man living in Canada."}
+}
+Observation: "image.png"
+
+Action:
+{
+  "action": "final_answer",
+  "action_input": "image.png"
+}
+
+---
+Task: "What is the result of the following operation: 5 + 3 + 1294.678?"
+
+Action:
+{
+    "action": "python_interpreter",
+    "action_input": {"code": "5 + 3 + 1294.678"}
+}
+Observation: 1302.678
+
+Action:
+{
+  "action": "final_answer",
+  "action_input": "1302.678"
+}
+
+---
+Task: "Which city has the highest population , Guangzhou or Shanghai?"
+
+Action:
+{
+    "action": "search",
+    "action_input": "Population Guangzhou"
+}
+Observation: ['Guangzhou has a population of 15 million inhabitants as of 2021.']
+
+
+Action:
+{
+    "action": "search",
+    "action_input": "Population Shanghai"
+}
+Observation: '26 million (2019)'
+
+Action:
+{
+  "action": "final_answer",
+  "action_input": "Shanghai"
+}
+
+
+Above example were using notional tools that might not exist for you. You only have access to these tools:
+
+{{tool_descriptions}}
+
+{{managed_agents_descriptions}}
+
+Here are the rules you should always follow to solve your task:
+1. ALWAYS provide a tool call, else you will fail.
+2. Always use the right arguments for the tools. Never use variable names as the action arguments, use the value instead.
+3. Call a tool only when needed: do not call the search agent if you do not need information, try to solve the task yourself.
+If no tool call is needed, use final_answer tool to return your answer.
+4. Never re-do a tool call that you previously did with the exact same parameters.
+
+Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000.
+"""
+
 CODE_SYSTEM_PROMPT = """You are an expert assistant who can solve any task using code blobs. You will be given a task to solve as best you can.
 To do so, you have been given access to a list of tools: these tools are basically Python functions which you can call with code.
 To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences.