From 30cb6111b3ee86550bcac3ad28a18dca17123ab9 Mon Sep 17 00:00:00 2001
From: Aymeric <aymeric.roucher@gmail.com>
Date: Mon, 23 Dec 2024 17:10:07 +0100
Subject: [PATCH] Enable support for tool calling agents

---
 README.md                              |   3 +-
 docs/source/_toctree.yml               |   2 +-
 docs/source/conceptual_guides/react.md |   6 +-
 docs/source/examples/text_to_sql.md    |   8 +-
 docs/source/reference/agents.md        |   4 +-
 src/agents/agents.py                   | 165 +++++++++++--------------
 src/agents/default_tools/base.py       |   4 +-
 src/agents/gradio_ui.py                |   6 +-
 src/agents/llm_engines.py              | 148 +++++++++++++++++++---
 src/agents/prompts.py                  |   4 +-
 src/agents/tools.py                    |   1 +
 src/agents/utils.py                    |   4 -
 tests/test_agents.py                   |   8 +-
 13 files changed, 224 insertions(+), 139 deletions(-)

diff --git a/README.md b/README.md
index 7d1d4c1..b253f82 100644
--- a/README.md
+++ b/README.md
@@ -40,7 +40,8 @@ This library offers:
 
 🤗 **Hub integrations**: you can share and load tools to/from the Hub, and more is to come!
 
-Quick demo:
+## Quick demo
+
 First install the package.
 ```bash
 pip install agents
diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index d6b0544..c87bb6e 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -17,7 +17,7 @@
   - local: conceptual_guides/intro_agents
     title: 🤖 An introduction to agentic systems
   - local: conceptual_guides/react
-    title: 🤔 ReAct agents
+    title: 🤔 How do Multi-step agents work?
 - title: Examples
   sections:
   - local: examples/text_to_sql
diff --git a/docs/source/conceptual_guides/react.md b/docs/source/conceptual_guides/react.md
index 910123f..85ac66f 100644
--- a/docs/source/conceptual_guides/react.md
+++ b/docs/source/conceptual_guides/react.md
@@ -13,7 +13,7 @@ specific language governing permissions and limitations under the License.
 rendered properly in your Markdown viewer.
 
 -->
-# ReAct agents
+# How do multi-step agents work?
 
 The ReAct framework ([Yao et al., 2022](https://huggingface.co/papers/2210.03629)) is currently the main approach to building agents.
 
@@ -22,7 +22,7 @@ The name is based on the concatenation of two words, "Reason" and "Act." Indeed,
 React process involves keeping a memory of past steps.
 
 > [!TIP]
-> Read [Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents) blog post to learn more about ReAct agents.
+> Read [Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents) blog post to learn more about multi-step agents.
 
 Here is a video overview of how that works:
 
@@ -44,4 +44,4 @@ We implement two versions of JsonAgent:
 - [`CodeAgent`] is a new type of JsonAgent that generates its tool calls as blobs of code, which works really well for LLMs that have strong coding performance.
 
 > [!TIP]
-> We also provide an option to run agents in one-shot: just pass `oneshot=True` when launching the agent, like `agent.run(your_task, oneshot=True)`
\ No newline at end of file
+> We also provide an option to run agents in one-shot: just pass `single_step=True` when launching the agent, like `agent.run(your_task, single_step=True)`
\ No newline at end of file
diff --git a/docs/source/examples/text_to_sql.md b/docs/source/examples/text_to_sql.md
index 4d31475..44b56ea 100644
--- a/docs/source/examples/text_to_sql.md
+++ b/docs/source/examples/text_to_sql.md
@@ -122,14 +122,14 @@ def sql_engine(query: str) -> str:
 
 Now let us create an agent that leverages this tool.
 
-We use the ReactCodeAgent, which is transformers.agents’ main agent class: an agent that writes actions in code and can iterate on previous output according to the ReAct framework.
+We use the CodeAgent, which is transformers.agents’ main agent class: an agent that writes actions in code and can iterate on previous output according to the ReAct framework.
 
 The llm_engine is the LLM that powers the agent system. HfEngine allows you to call LLMs using HF’s Inference API, either via Serverless or Dedicated endpoint, but you could also use any proprietary API.
 
 ```py
-from transformers.agents import ReactCodeAgent, HfApiEngine
+from transformers.agents import CodeAgent, HfApiEngine
 
-agent = ReactCodeAgent(
+agent = CodeAgent(
     tools=[sql_engine],
     llm_engine=HfApiEngine("meta-llama/Meta-Llama-3-8B-Instruct"),
 )
@@ -185,7 +185,7 @@ Since this request is a bit harder than the previous one, we’ll switch the LLM
 ```py
 sql_engine.description = updated_description
 
-agent = ReactCodeAgent(
+agent = CodeAgent(
     tools=[sql_engine],
     llm_engine=HfApiEngine("Qwen/Qwen2.5-72B-Instruct"),
 )
diff --git a/docs/source/reference/agents.md b/docs/source/reference/agents.md
index 10daf17..4e82ad8 100644
--- a/docs/source/reference/agents.md
+++ b/docs/source/reference/agents.md
@@ -27,7 +27,7 @@ contains the API docs for the underlying classes.
 
 ## Agents
 
-Our agents inherit from [`ReactAgent`], which means they can act in multiple steps, each step consisting of one thought, then one tool call and execution. Read more in [this conceptual guide](../conceptual_guides/react).
+Our agents inherit from [`MultiStepAgent`], which means they can act in multiple steps, each step consisting of one thought, then one tool call and execution. Read more in [this conceptual guide](../conceptual_guides/react).
 
 We provide two types of agents, based on the main [`Agent`] class.
   - [`JsonAgent`] writes its tool calls in JSON.
@@ -40,7 +40,7 @@ We provide two types of agents, based on the main [`Agent`] class.
 
 ### React agents
 
-[[autodoc]] ReactAgent
+[[autodoc]] MultiStepAgent
 
 [[autodoc]] JsonAgent
 
diff --git a/src/agents/agents.py b/src/agents/agents.py
index 2742a5e..63bf7d8 100644
--- a/src/agents/agents.py
+++ b/src/agents/agents.py
@@ -89,8 +89,9 @@ class AgentGenerationError(AgentError):
 
 @dataclass
 class ToolCall:
-    tool_name: str
-    tool_arguments: Any
+    name: str
+    arguments: Any
+    id: str
 
 
 class AgentStep:
@@ -306,26 +307,49 @@ class BaseAgent:
                     }
                     memory.append(thought_message)
 
-                if step_log.tool_call is not None and summary_mode:
+                if step_log.tool_call is not None:
                     tool_call_message = {
                         "role": MessageRole.ASSISTANT,
-                        "content": f"[STEP {i} TOOL CALL]: "
-                        + str(step_log.tool_call).strip(),
+                        "content": str(
+                            [
+                                {
+                                    "id": step_log.tool_call.id,
+                                    "type": "function",
+                                    "function": {
+                                        "name": step_log.tool_call.name,
+                                        "arguments": step_log.tool_call.arguments,
+                                    },
+                                }
+                            ]
+                        ),
                     }
                     memory.append(tool_call_message)
 
-                if step_log.error is not None or step_log.observations is not None:
+                if step_log.tool_call is None and step_log.error is not None:
+                    message_content = (
+                        "Error:\n"
+                        + str(step_log.error)
+                        + "\nNow let's retry: take care not to repeat previous errors! If you have retried several times, try a completely different approach.\n"
+                    )
+                    tool_response_message = {
+                        "role": MessageRole.ASSISTANT,
+                        "content": message_content,
+                    }
+                if step_log.tool_call is not None and (
+                    step_log.error is not None or step_log.observations is not None
+                ):
                     if step_log.error is not None:
                         message_content = (
-                            f"[OUTPUT OF STEP {i}] -> Error:\n"
+                            "Error:\n"
                             + str(step_log.error)
                             + "\nNow let's retry: take care not to repeat previous errors! If you have retried several times, try a completely different approach.\n"
                         )
                     elif step_log.observations is not None:
-                        message_content = f"[OUTPUT OF STEP {i}] -> Observation:\n{step_log.observations}"
+                        message_content = f"Observation:\n{step_log.observations}"
                     tool_response_message = {
                         "role": MessageRole.TOOL_RESPONSE,
-                        "content": message_content,
+                        "content": f"Call id: {(step_log.tool_call.id if getattr(step_log.tool_call, 'id') else 'call_0')}\n"
+                        + message_content,
                     }
                     memory.append(tool_response_message)
 
@@ -362,7 +386,7 @@ class BaseAgent:
         raise NotImplementedError
 
 
-class ReactAgent(BaseAgent):
+class MultiStepAgent(BaseAgent):
     """
     This agent that solves the given task step by step, using the ReAct framework:
     While the objective is not reached, the agent will perform a cycle of thinking and acting.
@@ -474,7 +498,7 @@ class ReactAgent(BaseAgent):
         task: str,
         stream: bool = False,
         reset: bool = True,
-        oneshot: bool = False,
+        single_step: bool = False,
         **kwargs,
     ):
         """
@@ -484,7 +508,7 @@ class ReactAgent(BaseAgent):
             task (`str`): The task to perform.
             stream (`bool`): Wether to run in a streaming way.
             reset (`bool`): Wether to reset the conversation or keep it going from previous run.
-            oneshot (`bool`): Should the agent run in one shot or multi-step fashion?
+            single_step (`bool`): Should the agent run in one shot or multi-step fashion?
 
         Example:
         ```py
@@ -516,7 +540,7 @@ class ReactAgent(BaseAgent):
         console.print(Group(Rule("[bold]New task", characters="="), Text(self.task)))
         self.logs.append(TaskStep(task=self.task))
 
-        if oneshot:
+        if single_step:
             step_start_time = time.time()
             step_log = ActionStep(start_time=step_start_time)
             step_log.end_time = time.time()
@@ -548,7 +572,7 @@ class ReactAgent(BaseAgent):
                     self.planning_step(
                         task, is_first_step=(iteration == 0), iteration=iteration
                     )
-                console.rule("[bold]New step")
+                console.rule(f"[bold]Step {iteration}")
 
                 # Run one step!
                 final_answer = self.step(step_log)
@@ -594,7 +618,7 @@ class ReactAgent(BaseAgent):
                     self.planning_step(
                         task, is_first_step=(iteration == 0), iteration=iteration
                     )
-                console.rule("[bold]New step")
+                console.rule(f"[bold]Step {iteration}")
 
                 # Run one step!
                 final_answer = self.step(step_log)
@@ -741,7 +765,7 @@ Now begin!""",
             )
 
 
-class JsonAgent(ReactAgent):
+class JsonAgent(MultiStepAgent):
     """
     In this agent, the tool calls will be formulated by the LLM in JSON format, then parsed and executed.
     """
@@ -784,18 +808,6 @@ class JsonAgent(ReactAgent):
         # Add new step in logs
         log_entry.agent_memory = agent_memory.copy()
 
-        if self.verbose:
-            console.print(
-                Group(
-                    Rule(
-                        "[italic]Calling LLM engine with this last message:",
-                        align="left",
-                        style="orange",
-                    ),
-                    Text(str(self.prompt_messages[-1])),
-                )
-            )
-
         try:
             additional_args = (
                 {"grammar": self.grammar} if self.grammar is not None else {}
@@ -827,25 +839,27 @@ class JsonAgent(ReactAgent):
         )
 
         try:
-            tool_name, arguments = self.tool_parser(action)
+            tool_name, tool_arguments = self.tool_parser(action)
         except Exception as e:
             raise AgentParsingError(f"Could not parse the given action: {e}.")
 
-        log_entry.tool_call = ToolCall(tool_name=tool_name, tool_arguments=arguments)
+        log_entry.tool_call = ToolCall(
+            tool_name=tool_name, tool_arguments=tool_arguments
+        )
 
         # Execute
         console.print(Rule("Agent thoughts:", align="left"), Text(rationale))
         console.print(
-            Panel(Text(f"Calling tool: '{tool_name}' with arguments: {arguments}"))
+            Panel(Text(f"Calling tool: '{tool_name}' with arguments: {tool_arguments}"))
         )
         if tool_name == "final_answer":
-            if isinstance(arguments, dict):
-                if "answer" in arguments:
-                    answer = arguments["answer"]
+            if isinstance(tool_arguments, dict):
+                if "answer" in tool_arguments:
+                    answer = tool_arguments["answer"]
                 else:
-                    answer = arguments
+                    answer = tool_arguments
             else:
-                answer = arguments
+                answer = tool_arguments
             if (
                 isinstance(answer, str) and answer in self.state.keys()
             ):  # if the answer is a state variable, return the value
@@ -853,9 +867,9 @@ class JsonAgent(ReactAgent):
             log_entry.action_output = answer
             return answer
         else:
-            if arguments is None:
-                arguments = {}
-            observation = self.execute_tool_call(tool_name, arguments)
+            if tool_arguments is None:
+                tool_arguments = {}
+            observation = self.execute_tool_call(tool_name, tool_arguments)
             observation_type = type(observation)
             if observation_type in [AgentImage, AgentAudio]:
                 if observation_type == AgentImage:
@@ -871,9 +885,10 @@ class JsonAgent(ReactAgent):
             log_entry.observations = updated_information
             return None
 
-class ToolCallingAgent(ReactAgent):
+
+class ToolCallingAgent(MultiStepAgent):
     """
-    In this agent, the tool calls will be formulated and parsed using the underlying library, before execution.
+    This agent uses JSON-like tool calls, but to the difference of JsonAgents, it makes use of the underlying librarie's tool calling facilities.
     """
 
     def __init__(
@@ -912,53 +927,29 @@ class ToolCallingAgent(ReactAgent):
         # Add new step in logs
         log_entry.agent_memory = agent_memory.copy()
 
-        if self.verbose:
-            console.print(
-                Group(
-                    Rule(
-                        "[italic]Calling LLM engine with this last message:",
-                        align="left",
-                        style="orange",
-                    ),
-                    Text(str(self.prompt_messages[-1])),
-                )
-            )
-
         try:
-            llm_output = self.llm_engine(
-                self.prompt_messages,
+            tool_name, tool_arguments, tool_call_id = self.llm_engine.get_tool_call(
+                self.prompt_messages, available_tools=list(self.toolbox._tools.values())
             )
-            log_entry.llm_output = llm_output
         except Exception as e:
             raise AgentGenerationError(f"Error in generating llm_engine output: {e}.")
 
-        if self.verbose:
-            console.print(
-                Group(
-                    Rule(
-                        "[italic]Output message of the LLM:",
-                        align="left",
-                        style="orange",
-                    ),
-                    Text(llm_output),
-                )
-            )
-
-        log_entry.tool_call = ToolCall(tool_name=tool_name, tool_arguments=arguments)
+        log_entry.tool_call = ToolCall(
+            name=tool_name, arguments=tool_arguments, id=tool_call_id
+        )
 
         # Execute
-        console.print(Rule("Agent thoughts:", align="left"), Text(rationale))
         console.print(
-            Panel(Text(f"Calling tool: '{tool_name}' with arguments: {arguments}"))
+            Panel(Text(f"Calling tool: '{tool_name}' with arguments: {tool_arguments}"))
         )
         if tool_name == "final_answer":
-            if isinstance(arguments, dict):
-                if "answer" in arguments:
-                    answer = arguments["answer"]
+            if isinstance(tool_arguments, dict):
+                if "answer" in tool_arguments:
+                    answer = tool_arguments["answer"]
                 else:
-                    answer = arguments
+                    answer = tool_arguments
             else:
-                answer = arguments
+                answer = tool_arguments
             if (
                 isinstance(answer, str) and answer in self.state.keys()
             ):  # if the answer is a state variable, return the value
@@ -966,9 +957,9 @@ class ToolCallingAgent(ReactAgent):
             log_entry.action_output = answer
             return answer
         else:
-            if arguments is None:
-                arguments = {}
-            observation = self.execute_tool_call(tool_name, arguments)
+            if tool_arguments is None:
+                tool_arguments = {}
+            observation = self.execute_tool_call(tool_name, tool_arguments)
             observation_type = type(observation)
             if observation_type in [AgentImage, AgentAudio]:
                 if observation_type == AgentImage:
@@ -985,7 +976,7 @@ class ToolCallingAgent(ReactAgent):
             return None
 
 
-class CodeAgent(ReactAgent):
+class CodeAgent(MultiStepAgent):
     """
     In this agent, the tool calls will be formulated by the LLM in code format, then parsed and executed.
     """
@@ -1058,18 +1049,6 @@ class CodeAgent(ReactAgent):
         # Add new step in logs
         log_entry.agent_memory = agent_memory.copy()
 
-        if self.verbose:
-            console.print(
-                Group(
-                    Rule(
-                        "[italic]Calling LLM engine with these last messages:",
-                        align="left",
-                        style="orange",
-                    ),
-                    Text(str(self.prompt_messages[-2:])),
-                )
-            )
-
         try:
             additional_args = (
                 {"grammar": self.grammar} if self.grammar is not None else {}
@@ -1220,7 +1199,7 @@ __all__ = [
     "AgentError",
     "BaseAgent",
     "ManagedAgent",
-    "ReactAgent",
+    "MultiStepAgent",
     "CodeAgent",
     "JsonAgent",
     "Toolbox",
diff --git a/src/agents/default_tools/base.py b/src/agents/default_tools/base.py
index ceedd3b..18e4243 100644
--- a/src/agents/default_tools/base.py
+++ b/src/agents/default_tools/base.py
@@ -112,9 +112,9 @@ class FinalAnswerTool(Tool):
     name = "final_answer"
     description = "Provides a final answer to the given problem."
     inputs = {
-        "answer": {"type": "any", "description": "The final answer to the problem"}
+        "answer": {"type": "object", "description": "The final answer to the problem"}
     }
-    output_type = "any"
+    output_type = "object"
 
     def forward(self, answer):
         return answer
diff --git a/src/agents/gradio_ui.py b/src/agents/gradio_ui.py
index 8b2f6e2..8d99194 100644
--- a/src/agents/gradio_ui.py
+++ b/src/agents/gradio_ui.py
@@ -24,13 +24,13 @@ def pull_messages_from_step(step_log: AgentStep, test_mode: bool = True):
     if isinstance(step_log, ActionStep):
         yield gr.ChatMessage(role="assistant", content=step_log.llm_output)
         if step_log.tool_call is not None:
-            used_code = step_log.tool_call.tool_name == "code interpreter"
-            content = step_log.tool_call.tool_arguments
+            used_code = step_log.tool_call.name == "code interpreter"
+            content = step_log.tool_call.arguments
             if used_code:
                 content = f"```py\n{content}\n```"
             yield gr.ChatMessage(
                 role="assistant",
-                metadata={"title": f"🛠️ Used tool {step_log.tool_call.tool_name}"},
+                metadata={"title": f"🛠️ Used tool {step_log.tool_call.name}"},
                 content=str(content),
             )
         if step_log.observations is not None:
diff --git a/src/agents/llm_engines.py b/src/agents/llm_engines.py
index 7e24f75..793ee4a 100644
--- a/src/agents/llm_engines.py
+++ b/src/agents/llm_engines.py
@@ -16,14 +16,14 @@
 # limitations under the License.
 from copy import deepcopy
 from enum import Enum
-from typing import Dict, List, Optional
-
-from huggingface_hub import InferenceClient
-
+from typing import Dict, List, Optional, Tuple
 from transformers import AutoTokenizer, Pipeline
 import logging
 import os
 from openai import OpenAI
+from huggingface_hub import InferenceClient
+
+from agents import Tool
 
 logger = logging.getLogger(__name__)
 
@@ -50,13 +50,37 @@ class MessageRole(str, Enum):
         return [r.value for r in cls]
 
 
-openai_role_conversions = {
+llama_role_conversions = {
+    MessageRole.TOOL_CALL: MessageRole.ASSISTANT,
     MessageRole.TOOL_RESPONSE: MessageRole.USER,
 }
 
-llama_role_conversions = {
-    MessageRole.TOOL_RESPONSE: MessageRole.USER,
-}
+
+def get_json_schema(tool: Tool) -> Dict:
+    return {
+        "type": "function",
+        "function": {
+            "name": tool.name,
+            "description": tool.description,
+            "parameters": {
+                "type": "object",
+                "properties": tool.inputs,
+                "required": list(tool.inputs.keys()),
+            },
+        },
+    }
+
+
+def get_json_schema_anthropic(tool: Tool) -> Dict:
+    return {
+        "name": tool.name,
+        "description": tool.description,
+        "input_schema": {
+            "type": "object",
+            "properties": tool.inputs,
+            "required": list(tool.inputs.keys()),
+        },
+    }
 
 
 def remove_stop_sequences(content: str, stop_sequences: List[str]) -> str:
@@ -78,8 +102,8 @@ def get_clean_message_list(
     final_message_list = []
     message_list = deepcopy(message_list)  # Avoid modifying the original list
     for message in message_list:
-        if not set(message.keys()) == {"role", "content"}:
-            raise ValueError("Message should contain only 'role' and 'content' keys!")
+        # if not set(message.keys()) == {"role", "content"}:
+        #     raise ValueError("Message should contain only 'role' and 'content' keys!")
 
         role = message["role"]
         if role not in MessageRole.roles():
@@ -206,6 +230,7 @@ class HfApiEngine(HfEngine):
         grammar: Optional[str] = None,
         max_tokens: int = 1500,
     ) -> str:
+        """Generates a text completion for the given message list"""
         messages = get_clean_message_list(
             messages, role_conversions=llama_role_conversions
         )
@@ -219,7 +244,7 @@ class HfApiEngine(HfEngine):
                 max_tokens=max_tokens,
             )
         else:
-            response = self.client.chat_completion(
+            response = self.client.chat.completions.create(
                 messages, stop=stop_sequences, max_tokens=max_tokens
             )
 
@@ -228,6 +253,25 @@ class HfApiEngine(HfEngine):
         self.last_output_token_count = response.usage.completion_tokens
         return response
 
+    def get_tool_call(
+        self,
+        messages: List[Dict[str, str]],
+        available_tools: List[Tool],
+    ):
+        """Generates a tool call for the given message list"""
+        messages = get_clean_message_list(
+            messages, role_conversions=llama_role_conversions
+        )
+        response = self.client.chat.completions.create(
+            messages=messages,
+            tools=[get_json_schema(tool) for tool in available_tools],
+            tool_choice="auto",
+        )
+        tool_call = response.choices[0].message.tool_calls[0]
+        self.last_input_token_count = response.usage.prompt_tokens
+        self.last_output_token_count = response.usage.completion_tokens
+        return tool_call.function.name, tool_call.function.arguments, tool_call.id
+
 
 class TransformersEngine(HfEngine):
     """This engine uses a pre-initialized local text-generation pipeline."""
@@ -305,6 +349,8 @@ class OpenAIEngine:
             base_url=base_url,
             api_key=api_key,
         )
+        self.last_input_token_count = 0
+        self.last_output_token_count = 0
 
     def __call__(
         self,
@@ -328,6 +374,26 @@ class OpenAIEngine:
         self.last_output_token_count = response.usage.completion_tokens
         return response.choices[0].message.content
 
+    def get_tool_call(
+        self,
+        messages: List[Dict[str, str]],
+        available_tools: List[Tool],
+    ):
+        """Generates a tool call for the given message list"""
+        messages = get_clean_message_list(
+            messages, role_conversions=llama_role_conversions
+        )
+        response = self.client.chat.completions.create(
+            model=self.model_name,
+            messages=messages,
+            tools=[get_json_schema(tool) for tool in available_tools],
+            tool_choice="required",
+        )
+        tool_call = response.choices[0].message.tool_calls[0]
+        self.last_input_token_count = response.usage.prompt_tokens
+        self.last_output_token_count = response.usage.completion_tokens
+        return tool_call.function.name, tool_call.function.arguments, tool_call.id
+
 
 class AnthropicEngine:
     def __init__(self, model_name="claude-3-5-sonnet-20240620", use_bedrock=False):
@@ -345,17 +411,19 @@ class AnthropicEngine:
             self.client = Anthropic(
                 api_key=os.getenv("ANTHROPIC_API_KEY"),
             )
+        self.last_input_token_count = 0
+        self.last_output_token_count = 0
 
-    def __call__(
+    def separate_messages_system_prompt(
         self,
-        messages: List[Dict[str, str]],
-        stop_sequences: Optional[List[str]] = None,
-        grammar: Optional[str] = None,
-        max_tokens: int = 1500,
-    ) -> str:
-        messages = get_clean_message_list(
-            messages, role_conversions=openai_role_conversions
-        )
+        messages: List[
+            Dict[
+                str,
+                str,
+            ]
+        ],
+    ) -> Tuple[List[Dict[str, str]], str]:
+        """Gets the system prompt and the rest of messages as separate elements."""
         index_system_message, system_prompt = None, None
         for index, message in enumerate(messages):
             if message["role"] == MessageRole.SYSTEM:
@@ -370,7 +438,21 @@ class AnthropicEngine:
         if len(filtered_messages) == 0:
             print("Error, no user message:", messages)
             assert False
+        return filtered_messages, system_prompt
 
+    def __call__(
+        self,
+        messages: List[Dict[str, str]],
+        stop_sequences: Optional[List[str]] = None,
+        grammar: Optional[str] = None,
+        max_tokens: int = 1500,
+    ) -> str:
+        messages = get_clean_message_list(
+            messages, role_conversions=llama_role_conversions
+        )
+        filtered_messages, system_prompt = self.separate_messages_system_prompt(
+            messages
+        )
         response = self.client.messages.create(
             model=self.model_name,
             system=system_prompt,
@@ -385,6 +467,32 @@ class AnthropicEngine:
                 full_response_text += content_block.text
         return full_response_text
 
+    def get_tool_call(
+        self,
+        messages: List[Dict[str, str]],
+        available_tools: List[Tool],
+        max_tokens: int = 1500,
+    ):
+        """Generates a tool call for the given message list"""
+        messages = get_clean_message_list(
+            messages, role_conversions=llama_role_conversions
+        )
+        filtered_messages, system_prompt = self.separate_messages_system_prompt(
+            messages
+        )
+        response = self.client.messages.create(
+            model=self.model_name,
+            system=system_prompt,
+            messages=filtered_messages,
+            tools=[get_json_schema_anthropic(tool) for tool in available_tools],
+            tool_choice={"type": "any"},
+            max_tokens=max_tokens,
+        )
+        tool_call = response.content[0]
+        self.last_input_token_count = response.usage.input_tokens
+        self.last_output_token_count = response.usage.output_tokens
+        return tool_call.name, tool_call.input, tool_call.id
+
 
 __all__ = [
     "MessageRole",
diff --git a/src/agents/prompts.py b/src/agents/prompts.py
index 4c21b50..5f30bae 100644
--- a/src/agents/prompts.py
+++ b/src/agents/prompts.py
@@ -51,7 +51,7 @@ def download_prompt(prompt_or_repo_id, agent_name, mode="run"):
         return f.read()
 
 
-ONESHOT_CODE_SYSTEM_PROMPT = """You will be given a task to solve, your job is to come up with a series of simple commands in Python that will perform the task.
+SINGLE_STEP_CODE_SYSTEM_PROMPT = """You will be given a task to solve, your job is to come up with a series of simple commands in Python that will perform the task.
 To help you, I will give you access to a set of tools that you can use. Each tool is a Python function and has a description explaining the task it performs, the inputs it expects and the outputs it returns.
 You should first explain which tool you will use to perform the task and for what reason, then write the code in Python.
 Each instruction in Python should be a simple assignment. You can print intermediate results if it makes sense to do so.
@@ -618,7 +618,7 @@ And even if your task resolution is not successful, please return as much contex
 __all__ = [
     "USER_PROMPT_PLAN_UPDATE",
     "PLAN_UPDATE_FINAL_PLAN_REDACTION",
-    "ONESHOT_CODE_SYSTEM_PROMPT",
+    "SINGLE_STEP_CODE_SYSTEM_PROMPT",
     "CODE_SYSTEM_PROMPT",
     "JSON_SYSTEM_PROMPT",
     "MANAGED_AGENT_PROMPT",
diff --git a/src/agents/tools.py b/src/agents/tools.py
index 3acf9f2..e93d0de 100644
--- a/src/agents/tools.py
+++ b/src/agents/tools.py
@@ -114,6 +114,7 @@ AUTHORIZED_TYPES = [
     "image",
     "audio",
     "any",
+    "object",
 ]
 
 CONVERSION_DICT = {"str": "string", "int": "integer", "float": "number"}
diff --git a/src/agents/utils.py b/src/agents/utils.py
index 1462c88..d7e5946 100644
--- a/src/agents/utils.py
+++ b/src/agents/utils.py
@@ -143,10 +143,6 @@ class ImportFinder(ast.NodeVisitor):
             self.packages.add(base_package)
 
 
-import ast
-from typing import Dict
-
-
 def get_method_source(method):
     """Get source code for a method, including bound methods."""
     if isinstance(method, types.MethodType):
diff --git a/tests/test_agents.py b/tests/test_agents.py
index 41689dd..f1e8332 100644
--- a/tests/test_agents.py
+++ b/tests/test_agents.py
@@ -150,7 +150,7 @@ final_answer(res)
 """
 
 
-def fake_code_llm_oneshot(messages, stop_sequences=None, grammar=None) -> str:
+def fake_code_llm_single_step(messages, stop_sequences=None, grammar=None) -> str:
     return """
 Thought: I should multiply 2 by 3.6452. special_marker
 Code:
@@ -173,11 +173,11 @@ print(result)
 
 
 class AgentTests(unittest.TestCase):
-    def test_fake_oneshot_code_agent(self):
+    def test_fake_single_step_code_agent(self):
         agent = CodeAgent(
-            tools=[PythonInterpreterTool()], llm_engine=fake_code_llm_oneshot
+            tools=[PythonInterpreterTool()], llm_engine=fake_code_llm_single_step
         )
-        output = agent.run("What is 2 multiplied by 3.6452?", oneshot=True)
+        output = agent.run("What is 2 multiplied by 3.6452?", single_step=True)
         assert isinstance(output, str)
         assert output == "7.2904"