Remove direct support for openai and anthropic engines, one can use LiteLLM

2024-12-24 23:36:46 +01:00 · 2024-12-24 23:36:46 +01:00 · 3a70e17cae
parent 162d4dc362
commit 3a70e17cae
9 changed files with 59 additions and 405 deletions
--- a/docs/source/guided_tour.md
+++ b/docs/source/guided_tour.md
@ -52,7 +52,7 @@ Additionally, `llm_engine` can also take a `grammar` argument. In the case where
 For convenience, we provide pre-built classes for your llm engine:
 - [`TransformersEngine`] takes a pre-initialized `transformers` pipeline to run inference on your local machine using `transformers`.
 - [`HfApiEngine`] leverages a `huggingface_hub.InferenceClient` under the hood.
- We also provide [`OpenAIEngine`] and [`AnthropicEngine`] but you could use anything!
+- We also provide [`LiteLLMEngine`], which lets you call 100+ different models through [LiteLLM](https://docs.litellm.ai/)!

 You will also need a `tools` argument which accepts a list of `Tools` - it can be an empty list. You can also add the default toolbox on top of your `tools` list by defining the optional argument `add_base_tools=True`.

--- a/examples/tool_calling_agent_from_any_llm.py
+++ b/examples/tool_calling_agent_from_any_llm.py
@ -1,11 +1,9 @@
 from smolagents.agents import ToolCallingAgent
-from smolagents import tool, HfApiEngine, OpenAIEngine, AnthropicEngine, TransformersEngine, LiteLLMEngine
+from smolagents import tool, HfApiEngine, TransformersEngine, LiteLLMEngine

 # Choose which LLM engine to use!
-# llm_engine = OpenAIEngine("gpt-4o")
-# llm_engine = AnthropicEngine("claude-3-5-sonnet-20240620")
-# llm_engine = HfApiEngine("meta-llama/Llama-3.3-70B-Instruct")
-# llm_engine = TransformersEngine("meta-llama/Llama-3.2-2B-Instruct")
+llm_engine = HfApiEngine("meta-llama/Llama-3.3-70B-Instruct")
+llm_engine = TransformersEngine("meta-llama/Llama-3.2-2B-Instruct")
 llm_engine = LiteLLMEngine("gpt-4o")

@tool
--- a/pyproject.toml
+++ b/pyproject.toml
@ -23,6 +23,7 @@ dependencies = [
    "python-dotenv>=1.0.1",
    "e2b-code-interpreter>=1.0.3",
    "torch>=2.5.1",
+    "litellm>=1.55.10",
 ]

 [project.optional-dependencies]
--- a/src/smolagents/agents.py
+++ b/src/smolagents/agents.py
@ -40,7 +40,6 @@ from .llm_engines import MessageRole
 from .monitoring import Monitor
 from .prompts import (
    CODE_SYSTEM_PROMPT,
-    JSON_SYSTEM_PROMPT,
    TOOL_CALLING_SYSTEM_PROMPT,
    PLAN_UPDATE_FINAL_PLAN_REDACTION,
    SYSTEM_PROMPT_FACTS,
@ -171,7 +170,6 @@ class MultiStepAgent:
        grammar: Optional[Dict[str, str]] = None,
        managed_agents: Optional[Dict] = None,
        step_callbacks: Optional[List[Callable]] = None,
-        monitor_metrics: bool = True,
        planning_interval: Optional[int] = None,
    ):
        if system_prompt is None:
@ -210,14 +208,8 @@ class MultiStepAgent:
        self.logs = []
        self.task = None
        self.verbose = verbose
-
-        # Initialize step callbacks
-        self.step_callbacks = step_callbacks if step_callbacks is not None else []
-
-        # Initialize Monitor if monitor_metrics is True
-        self.monitor = None
-        if monitor_metrics:
        self.monitor = Monitor(self.llm_engine)
+        self.step_callbacks = step_callbacks if step_callbacks is not None else []
        self.step_callbacks.append(self.monitor.update_metrics)

    @property
@ -712,7 +704,6 @@ Now begin!""",
            )


-
 class ToolCallingAgent(MultiStepAgent):
    """
    This agent uses JSON-like tool calls, using method `llm_engine.get_tool_call` to leverage the LLM engine's tool calling capabilities.
@ -755,7 +746,9 @@ class ToolCallingAgent(MultiStepAgent):
                stop_sequences=["Observation:"],
            )
        except Exception as e:
-            raise AgentGenerationError(f"Error in generating tool call with llm_engine:\n{e}")
+            raise AgentGenerationError(
+                f"Error in generating tool call with llm_engine:\n{e}"
+            )

        log_entry.tool_call = ToolCall(
            name=tool_name, arguments=tool_arguments, id=tool_call_id
--- a/src/smolagents/llm_engines.py
+++ b/src/smolagents/llm_engines.py
@ -16,15 +16,19 @@
 # limitations under the License.
 from copy import deepcopy
 from enum import Enum
-from typing import Dict, List, Optional, Tuple
-from transformers import AutoTokenizer, AutoModelForCausalLM, StoppingCriteria, StoppingCriteriaList
+from typing import Dict, List, Optional
+from transformers import (
+    AutoTokenizer,
+    AutoModelForCausalLM,
+    StoppingCriteria,
+    StoppingCriteriaList,
+)

 import litellm
 import logging
 import os
 import random

-from openai import OpenAI
 from huggingface_hub import InferenceClient

 from .tools import Tool
@ -69,25 +73,16 @@ def get_json_schema(tool: Tool) -> Dict:
            "description": tool.description,
            "parameters": {
                "type": "object",
-                "properties": {k: {k2: v2.replace("any", "object") for k2, v2 in v.items()} for k, v in tool.inputs.items()},
+                "properties": {
+                    k: {k2: v2.replace("any", "object") for k2, v2 in v.items()}
+                    for k, v in tool.inputs.items()
+                },
                "required": list(tool.inputs.keys()),
            },
        },
    }


-def get_json_schema_anthropic(tool: Tool) -> Dict:
-    return {
-        "name": tool.name,
-        "description": tool.description,
-        "input_schema": {
-            "type": "object",
-            "properties": tool.inputs,
-            "required": list(tool.inputs.keys()),
-        },
-    }
-
-
 def remove_stop_sequences(content: str, stop_sequences: List[str]) -> str:
    for stop_seq in stop_sequences:
        if content[-len(stop_seq) :] == stop_seq:
@ -395,176 +390,7 @@ class TransformersEngine(HfEngine):
        return tool_name, tool_input, call_id


-class OpenAIEngine:
-    def __init__(
-        self,
-        model_id: Optional[str] = None,
-        api_key: Optional[str] = None,
-        base_url: Optional[str] = None,
-    ):
-        """Creates a LLM Engine that follows OpenAI format.
-
-        Args:
-           model_id (`str`, *optional*): the model name to use.
-           api_key (`str`, *optional*): your API key.
-           base_url (`str`, *optional*): the URL to use if using a different inference service than OpenAI, for instance "https://api-inference.huggingface.co/v1/".
-        """
-        if model_id is None:
-            model_id = "gpt-4o"
-        if api_key is None:
-            api_key = os.getenv("OPENAI_API_KEY")
-        self.model_id = model_id
-        self.client = OpenAI(
-            base_url=base_url,
-            api_key=api_key,
-        )
-        self.last_input_token_count = 0
-        self.last_output_token_count = 0
-
-    def __call__(
-        self,
-        messages: List[Dict[str, str]],
-        stop_sequences: Optional[List[str]] = None,
-        grammar: Optional[str] = None,
-        max_tokens: int = 1500,
-    ) -> str:
-        messages = get_clean_message_list(
-            messages, role_conversions=tool_role_conversions
-        )
-
-        response = self.client.chat.completions.create(
-            model=self.model_id,
-            messages=messages,
-            stop=stop_sequences,
-            temperature=0.5,
-            max_tokens=max_tokens,
-        )
-        self.last_input_token_count = response.usage.prompt_tokens
-        self.last_output_token_count = response.usage.completion_tokens
-        return response.choices[0].message.content
-
-    def get_tool_call(
-        self,
-        messages: List[Dict[str, str]],
-        available_tools: List[Tool],
-        stop_sequences: Optional[List[str]] = None,
-    ):
-        """Generates a tool call for the given message list. This method is used only by `ToolCallingAgent`."""
-        messages = get_clean_message_list(
-            messages, role_conversions=tool_role_conversions
-        )
-        response = self.client.chat.completions.create(
-            model=self.model_id,
-            messages=messages,
-            tools=[get_json_schema(tool) for tool in available_tools],
-            tool_choice="required",
-        )
-        tool_call = response.choices[0].message.tool_calls[0]
-        self.last_input_token_count = response.usage.prompt_tokens
-        self.last_output_token_count = response.usage.completion_tokens
-        return tool_call.function.name, tool_call.function.arguments, tool_call.id
-
-
-class AnthropicEngine:
-    def __init__(self, model_id="claude-3-5-sonnet-20240620", use_bedrock=False):
-        from anthropic import Anthropic, AnthropicBedrock
-
-        self.model_id = model_id
-        if use_bedrock:
-            self.model_id = "anthropic.claude-3-5-sonnet-20240620-v1:0"
-            self.client = AnthropicBedrock(
-                aws_access_key=os.getenv("AWS_BEDROCK_ID"),
-                aws_secret_key=os.getenv("AWS_BEDROCK_KEY"),
-                aws_region="us-east-1",
-            )
-        else:
-            self.client = Anthropic(
-                api_key=os.getenv("ANTHROPIC_API_KEY"),
-            )
-        self.last_input_token_count = 0
-        self.last_output_token_count = 0
-
-    def separate_messages_system_prompt(
-        self,
-        messages: List[
-            Dict[
-                str,
-                str,
-            ]
-        ],
-    ) -> Tuple[List[Dict[str, str]], str]:
-        """Gets the system prompt and the rest of messages as separate elements."""
-        index_system_message, system_prompt = None, None
-        for index, message in enumerate(messages):
-            if message["role"] == MessageRole.SYSTEM:
-                index_system_message = index
-                system_prompt = message["content"]
-        if system_prompt is None:
-            raise Exception("No system prompt found!")
-
-        filtered_messages = [
-            message for i, message in enumerate(messages) if i != index_system_message
-        ]
-        if len(filtered_messages) == 0:
-            print("Error, no user message:", messages)
-            assert False
-        return filtered_messages, system_prompt
-
-    def __call__(
-        self,
-        messages: List[Dict[str, str]],
-        stop_sequences: Optional[List[str]] = None,
-        grammar: Optional[str] = None,
-        max_tokens: int = 1500,
-    ) -> str:
-        messages = get_clean_message_list(
-            messages, role_conversions=tool_role_conversions
-        )
-        filtered_messages, system_prompt = self.separate_messages_system_prompt(
-            messages
-        )
-        response = self.client.messages.create(
-            model=self.model_id,
-            system=system_prompt,
-            messages=filtered_messages,
-            stop_sequences=stop_sequences,
-            temperature=0.5,
-            max_tokens=max_tokens,
-        )
-        full_response_text = ""
-        for content_block in response.content:
-            if content_block.type == "text":
-                full_response_text += content_block.text
-        return full_response_text
-
-    def get_tool_call(
-        self,
-        messages: List[Dict[str, str]],
-        available_tools: List[Tool],
-        stop_sequences: Optional[List[str]] = None,
-        max_tokens: int = 1500,
-    ):
-        """Generates a tool call for the given message list. This method is used only by `ToolCallingAgent`."""
-        messages = get_clean_message_list(
-            messages, role_conversions=tool_role_conversions
-        )
-        filtered_messages, system_prompt = self.separate_messages_system_prompt(
-            messages
-        )
-        response = self.client.messages.create(
-            model=self.model_id,
-            system=system_prompt,
-            messages=filtered_messages,
-            tools=[get_json_schema_anthropic(tool) for tool in available_tools],
-            tool_choice={"type": "any"},
-            max_tokens=max_tokens,
-        )
-        tool_call = response.content[0]
-        self.last_input_token_count = response.usage.input_tokens
-        self.last_output_token_count = response.usage.output_tokens
-        return tool_call.name, tool_call.input, tool_call.id
-
-class LiteLLMEngine():
+class LiteLLMEngine:
    def __init__(self, model_id="anthropic/claude-3-5-sonnet-20240620"):
        self.model_id = model_id
        # IMPORTANT - Set this to TRUE to add the function to the prompt for Non OpenAI LLMs
@ -624,7 +450,5 @@ __all__ = [
    "HfEngine",
    "TransformersEngine",
    "HfApiEngine",
-    "OpenAIEngine",
-    "AnthropicEngine",
    "LiteLLMEngine",
 ]
--- a/src/smolagents/prompts.py
+++ b/src/smolagents/prompts.py
@ -14,43 +14,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import re
-
-from transformers.utils import cached_file
-
-
-# docstyle-ignore
-CHAT_MESSAGE_PROMPT = """
-Human: <<task>>
-
-Assistant: """
-
-
-DEFAULT_PROMPTS_REPO = "huggingface-tools/default-prompts"
-PROMPT_FILES = {"chat": "chat_prompt_template.txt", "run": "run_prompt_template.txt"}
-
-
-def download_prompt(prompt_or_repo_id, agent_name, mode="run"):
-    """
-    Downloads and caches the prompt from a repo and returns it contents (if necessary).
-    """
-    if prompt_or_repo_id is None:
-        prompt_or_repo_id = DEFAULT_PROMPTS_REPO
-
-    # prompt is considered a repo ID when it does not contain any kind of space
-    if re.search("\\s", prompt_or_repo_id) is not None:
-        return prompt_or_repo_id
-
-    prompt_file = cached_file(
-        prompt_or_repo_id,
-        PROMPT_FILES[mode],
-        repo_type="dataset",
-        user_agent={"agent": agent_name},
-    )
-    with open(prompt_file, "r", encoding="utf-8") as f:
-        return f.read()
-
-
 SINGLE_STEP_CODE_SYSTEM_PROMPT = """You will be given a task to solve, your job is to come up with a series of simple commands in Python that will perform the task.
 To help you, I will give you access to a set of tools that you can use. Each tool is a Python function and has a description explaining the task it performs, the inputs it expects and the outputs it returns.
 You should first explain which tool you will use to perform the task and for what reason, then write the code in Python.
@ -73,7 +36,7 @@ translated_question = translator(question=question, src_lang="French", tgt_lang=
 print(f"The translated question is {translated_question}.")
 answer = image_qa(image=image, question=translated_question)
 final_answer(f"The answer is {answer}")
-```<end_action>
+```<end_code>

 ---
 Task: "Identify the oldest person in the `document` and create an image showcasing the result."
@ -85,7 +48,7 @@ answer = document_qa(document, question="What is the oldest person?")
 print(f"The answer is {answer}.")
 image = image_generator(answer)
 final_answer(image)
-```<end_action>
+```<end_code>

 ---
 Task: "Generate an image using the text given in the variable `caption`."
@ -95,7 +58,7 @@ Code:
 ```py
 image = image_generator(prompt=caption)
 final_answer(image)
-```<end_action>
+```<end_code>

 ---
 Task: "Summarize the text given in the variable `text` and read it out loud."
@ -107,7 +70,7 @@ summarized_text = summarizer(text)
 print(f"Summary: {summarized_text}")
 audio_summary = text_reader(summarized_text)
 final_answer(audio_summary)
-```<end_action>
+```<end_code>

 ---
 Task: "Answer the question in the variable `question` about the text in the variable `text`. Use the answer to generate an image."
@ -119,7 +82,7 @@ answer = text_qa(text=text, question=question)
 print(f"The answer is {answer}.")
 image = image_generator(answer)
 final_answer(image)
-```<end_action>
+```<end_code>

 ---
 Task: "Caption the following `image`."
@ -129,7 +92,7 @@ Code:
 ```py
 caption = image_captioner(image)
 final_answer(caption)
-```<end_action>
+```<end_code>

 ---
 Above example were using tools that might not exist for you. You only have access to these tools:
@ -138,145 +101,13 @@ Above example were using tools that might not exist for you. You only have acces
 {{managed_agents_descriptions}}

 Remember to make sure that variables you use are all defined. In particular don't import packages!
-Be sure to provide a 'Code:\n```' sequence before the code and '```<end_action>' after, else you will get an error.
+Be sure to provide a 'Code:\n```' sequence before the code and '```<end_code>' after, else you will get an error.
 DO NOT pass the arguments as a dict as in 'answer = ask_search_agent({'query': "What is the place where James Bond lives?"})', but use the arguments directly as in 'answer = ask_search_agent(query="What is the place where James Bond lives?")'.

 Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000.
 """


-JSON_SYSTEM_PROMPT = """You are an expert assistant who can solve any task using JSON tool calls. You will be given a task to solve as best you can.
-To do so, you have been given access to the following tools: {{tool_names}}
-The way you use the tools is by specifying a json blob, ending with '<end_action>'.
-Specifically, this json should have an `action` key (name of the tool to use) and an `action_input` key (input to the tool).
-
-The $ACTION_JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions. It should be formatted in json. Do not try to escape special characters. Here is the template of a valid $ACTION_JSON_BLOB:
-{
-  "tool_name": $TOOL_NAME,
-  "tool_arguments": $INPUT
-}<end_action>
-
-Make sure to have the $INPUT as a dictionary in the right format for the tool you are using, and do not put variable names as input if you can find the right values.
-
-You should ALWAYS use the following format:
-
-Thought: you should always think about one action to take. Then use the action as follows:
-Action:
-$ACTION_JSON_BLOB
-Observation: the result of the action
-... (this Thought/Action/Observation can repeat N times, you should take several steps when needed. The $ACTION_JSON_BLOB must only use a SINGLE action at a time.)
-
-You can use the result of the previous action as input for the next action.
-The observation will always be a string: it can represent a file, like "image_1.jpg".
-Then you can use it as input for the next action. You can do it for instance as follows:
-
-Observation: "image_1.jpg"
-
-Thought: I need to transform the image that I received in the previous observation to make it green.
-Action:
-{
-  "tool_name": "image_transformer",
-  "tool_arguments": {"image": "image_1.jpg"}
-}<end_action>
-
-To provide the final answer to the task, use an action blob with "tool_name": "final_answer" tool. It is the only way to complete the task, else you will be stuck on a loop. So your final output should look like this:
-Action:
-{
-  "tool_name": "final_answer",
-  "tool_arguments": {"answer": "insert your final answer here"}
-}<end_action>
-
-
-Here are a few examples using notional tools:
---
-Task: "Generate an image of the oldest person in this document."
-
-Thought: I will proceed step by step and use the following tools: `document_qa` to find the oldest person in the document, then `image_generator` to generate an image according to the answer.
-Action:
-{
-  "tool_name": "document_qa",
-  "tool_arguments": {"document": "document.pdf", "question": "Who is the oldest person mentioned?"}
-}<end_action>
-Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland."
-
-
-Thought: I will now generate an image showcasing the oldest person.
-Action:
-{
-  "tool_name": "image_generator",
-  "tool_arguments": {"prompt": "A portrait of John Doe, a 55-year-old man living in Canada."}
-}<end_action>
-Observation: "image.png"
-
-Thought: I will now return the generated image.
-Action:
-{
-  "tool_name": "final_answer",
-  "tool_arguments": "image.png"
-}<end_action>
-
---
-Task: "What is the result of the following operation: 5 + 3 + 1294.678?"
-
-Thought: I will use python code evaluator to compute the result of the operation and then return the final answer using the `final_answer` tool
-Action:
-{
-    "tool_name": "python_interpreter",
-    "tool_arguments": {"code": "5 + 3 + 1294.678"}
-}<end_action>
-Observation: 1302.678
-
-Thought: Now that I know the result, I will now return it.
-Action:
-{
-  "tool_name": "final_answer",
-  "tool_arguments": "1302.678"
-}<end_action>
-
---
-Task: "Which city has the highest population , Guangzhou or Shanghai?"
-
-Thought: I need to get the populations for both cities and compare them: I will use the tool `search` to get the population of both cities.
-Action:
-{
-    "tool_name": "search",
-    "tool_arguments": "Population Guangzhou"
-}<end_action>
-Observation: ['Guangzhou has a population of 15 million inhabitants as of 2021.']
-
-
-Thought: Now let's get the population of Shanghai using the tool 'search'.
-Action:
-{
-    "tool_name": "search",
-    "tool_arguments": "Population Shanghai"
-}
-Observation: '26 million (2019)'
-
-Thought: Now I know that Shanghai has a larger population. Let's return the result.
-Action:
-{
-  "tool_name": "final_answer",
-  "tool_arguments": "Shanghai"
-}<end_action>
-
-
-Above example were using notional tools that might not exist for you. You only have access to these tools:
-
-{{tool_descriptions}}
-
-{{managed_agents_descriptions}}
-
-Here are the rules you should always follow to solve your task:
-1. ALWAYS provide a 'Thought:' sequence, and an 'Action:' sequence that ends with <end_action>, else you will fail.
-2. Always use the right arguments for the tools. Never use variable names in the 'action_input' field, use the value instead.
-3. Call a tool only when needed: do not call the search agent if you do not need information, try to solve the task yourself.
-4. Never re-do a tool call that you previously did with the exact same parameters.
-
-Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000.
-"""
-
-
 TOOL_CALLING_SYSTEM_PROMPT = """You are an expert assistant who can solve any task using  tool calls. You will be given a task to solve as best you can.
 To do so, you have been given access to the following tools: {{tool_names}}

@ -389,7 +220,7 @@ To do so, you have been given access to a list of tools: these tools are basical
 To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences.

 At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task and the tools that you want to use.
-Then in the 'Code:' sequence, you should write the code in simple Python. The code sequence must end with '<end_action>' sequence.
+Then in the 'Code:' sequence, you should write the code in simple Python. The code sequence must end with '<end_code>' sequence.
 During each intermediate step, you can use 'print()' to save whatever important information you will then need.
 These print outputs will then appear in the 'Observation:' field, which will be available as input for the next step.
 In the end you have to return a final answer using the `final_answer` tool.
@ -403,7 +234,7 @@ Code:
 ```py
 answer = document_qa(document=document, question="Who is the oldest person mentioned?")
 print(answer)
-```<end_action>
+```<end_code>
 Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland."

 Thought: I will now generate an image showcasing the oldest person.
@ -411,7 +242,7 @@ Code:
 ```py
 image = image_generator("A portrait of John Doe, a 55-year-old man living in Canada.")
 final_answer(image)
-```<end_action>
+```<end_code>

 ---
 Task: "What is the result of the following operation: 5 + 3 + 1294.678?"
@ -421,7 +252,7 @@ Code:
 ```py
 result = 5 + 3 + 1294.678
 final_answer(result)
-```<end_action>
+```<end_code>

 ---
 Task: "Which city has the highest population: Guangzhou or Shanghai?"
@ -433,7 +264,7 @@ population_guangzhou = search("Guangzhou population")
 print("Population Guangzhou:", population_guangzhou)
 population_shanghai = search("Shanghai population")
 print("Population Shanghai:", population_shanghai)
-```<end_action>
+```<end_code>
 Observation:
 Population Guangzhou: ['Guangzhou has a population of 15 million inhabitants as of 2021.']
 Population Shanghai: '26 million (2019)'
@ -442,7 +273,7 @@ Thought: Now I know that Shanghai has the highest population.
 Code:
 ```py
 final_answer("Shanghai")
-```<end_action>
+```<end_code>

 ---
 Task: "What is the current age of the pope, raised to the power 0.36?"
@ -452,7 +283,7 @@ Code:
 ```py
 pope_age = wiki(query="current pope age")
 print("Pope age:", pope_age)
-```<end_action>
+```<end_code>
 Observation:
 Pope age: "The pope Francis is currently 85 years old."

@ -461,7 +292,7 @@ Code:
 ```py
 pope_current_age = 85 ** 0.36
 final_answer(pope_current_age)
-```<end_action>
+```<end_code>

 Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools:

@ -470,7 +301,7 @@ Above example were using notional tools that might not exist for you. On top of
 {{managed_agents_descriptions}}

 Here are the rules you should always follow to solve your task:
-1. Always provide a 'Thought:' sequence, and a 'Code:\n```py' sequence ending with '```<end_action>' sequence, else you will fail.
+1. Always provide a 'Thought:' sequence, and a 'Code:\n```py' sequence ending with '```<end_code>' sequence, else you will fail.
 2. Use only variables that you have defined!
 3. Always use the right arguments for the tools. DO NOT pass the arguments as a dict as in 'answer = wiki({'query': "What is the place where James Bond lives?"})', but use the arguments directly as in 'answer = wiki(query="What is the place where James Bond lives?")'.
 4. Take care to not chain too many sequential tool calls in the same code block, especially when the output format is unpredictable. For instance, a call to search has an unpredictable return format, so do not have another tool call that depends on its output in the same block: rather output results with print() to use them in the next block.
@ -620,6 +451,5 @@ __all__ = [
    "PLAN_UPDATE_FINAL_PLAN_REDACTION",
    "SINGLE_STEP_CODE_SYSTEM_PROMPT",
    "CODE_SYSTEM_PROMPT",
-    "JSON_SYSTEM_PROMPT",
    "MANAGED_AGENT_PROMPT",
 ]
--- a/tests/test_agents.py
+++ b/tests/test_agents.py
@ -182,7 +182,9 @@ class AgentTests(unittest.TestCase):
        assert output == "7.2904"

    def test_fake_json_agent(self):
-        agent = ToolCallingAgent(tools=[PythonInterpreterTool()], llm_engine=fake_json_llm)
+        agent = ToolCallingAgent(
+            tools=[PythonInterpreterTool()], llm_engine=fake_json_llm
+        )
        output = agent.run("What is 2 multiplied by 3.6452?")
        assert isinstance(output, str)
        assert output == "7.2904"
--- a/tests/test_monitoring.py
+++ b/tests/test_monitoring.py
@ -15,7 +15,13 @@

 import unittest

-from smolagents import AgentImage, AgentError, CodeAgent, ToolCallingAgent, stream_to_gradio
+from smolagents import (
+    AgentImage,
+    AgentError,
+    CodeAgent,
+    ToolCallingAgent,
+    stream_to_gradio,
+)


 class MonitoringTester(unittest.TestCase):