Remove direct support for openai and anthropic engines, one can use LiteLLM

This commit is contained in:
Aymeric 2024-12-24 23:36:46 +01:00
parent 162d4dc362
commit 3a70e17cae
9 changed files with 59 additions and 405 deletions

View File

@ -52,7 +52,7 @@ Additionally, `llm_engine` can also take a `grammar` argument. In the case where
For convenience, we provide pre-built classes for your llm engine: For convenience, we provide pre-built classes for your llm engine:
- [`TransformersEngine`] takes a pre-initialized `transformers` pipeline to run inference on your local machine using `transformers`. - [`TransformersEngine`] takes a pre-initialized `transformers` pipeline to run inference on your local machine using `transformers`.
- [`HfApiEngine`] leverages a `huggingface_hub.InferenceClient` under the hood. - [`HfApiEngine`] leverages a `huggingface_hub.InferenceClient` under the hood.
- We also provide [`OpenAIEngine`] and [`AnthropicEngine`] but you could use anything! - We also provide [`LiteLLMEngine`], which lets you call 100+ different models through [LiteLLM](https://docs.litellm.ai/)!
You will also need a `tools` argument which accepts a list of `Tools` - it can be an empty list. You can also add the default toolbox on top of your `tools` list by defining the optional argument `add_base_tools=True`. You will also need a `tools` argument which accepts a list of `Tools` - it can be an empty list. You can also add the default toolbox on top of your `tools` list by defining the optional argument `add_base_tools=True`.

View File

@ -1,11 +1,9 @@
from smolagents.agents import ToolCallingAgent from smolagents.agents import ToolCallingAgent
from smolagents import tool, HfApiEngine, OpenAIEngine, AnthropicEngine, TransformersEngine, LiteLLMEngine from smolagents import tool, HfApiEngine, TransformersEngine, LiteLLMEngine
# Choose which LLM engine to use! # Choose which LLM engine to use!
# llm_engine = OpenAIEngine("gpt-4o") llm_engine = HfApiEngine("meta-llama/Llama-3.3-70B-Instruct")
# llm_engine = AnthropicEngine("claude-3-5-sonnet-20240620") llm_engine = TransformersEngine("meta-llama/Llama-3.2-2B-Instruct")
# llm_engine = HfApiEngine("meta-llama/Llama-3.3-70B-Instruct")
# llm_engine = TransformersEngine("meta-llama/Llama-3.2-2B-Instruct")
llm_engine = LiteLLMEngine("gpt-4o") llm_engine = LiteLLMEngine("gpt-4o")
@tool @tool

View File

@ -23,6 +23,7 @@ dependencies = [
"python-dotenv>=1.0.1", "python-dotenv>=1.0.1",
"e2b-code-interpreter>=1.0.3", "e2b-code-interpreter>=1.0.3",
"torch>=2.5.1", "torch>=2.5.1",
"litellm>=1.55.10",
] ]
[project.optional-dependencies] [project.optional-dependencies]

View File

@ -40,7 +40,6 @@ from .llm_engines import MessageRole
from .monitoring import Monitor from .monitoring import Monitor
from .prompts import ( from .prompts import (
CODE_SYSTEM_PROMPT, CODE_SYSTEM_PROMPT,
JSON_SYSTEM_PROMPT,
TOOL_CALLING_SYSTEM_PROMPT, TOOL_CALLING_SYSTEM_PROMPT,
PLAN_UPDATE_FINAL_PLAN_REDACTION, PLAN_UPDATE_FINAL_PLAN_REDACTION,
SYSTEM_PROMPT_FACTS, SYSTEM_PROMPT_FACTS,
@ -171,7 +170,6 @@ class MultiStepAgent:
grammar: Optional[Dict[str, str]] = None, grammar: Optional[Dict[str, str]] = None,
managed_agents: Optional[Dict] = None, managed_agents: Optional[Dict] = None,
step_callbacks: Optional[List[Callable]] = None, step_callbacks: Optional[List[Callable]] = None,
monitor_metrics: bool = True,
planning_interval: Optional[int] = None, planning_interval: Optional[int] = None,
): ):
if system_prompt is None: if system_prompt is None:
@ -210,14 +208,8 @@ class MultiStepAgent:
self.logs = [] self.logs = []
self.task = None self.task = None
self.verbose = verbose self.verbose = verbose
# Initialize step callbacks
self.step_callbacks = step_callbacks if step_callbacks is not None else []
# Initialize Monitor if monitor_metrics is True
self.monitor = None
if monitor_metrics:
self.monitor = Monitor(self.llm_engine) self.monitor = Monitor(self.llm_engine)
self.step_callbacks = step_callbacks if step_callbacks is not None else []
self.step_callbacks.append(self.monitor.update_metrics) self.step_callbacks.append(self.monitor.update_metrics)
@property @property
@ -712,7 +704,6 @@ Now begin!""",
) )
class ToolCallingAgent(MultiStepAgent): class ToolCallingAgent(MultiStepAgent):
""" """
This agent uses JSON-like tool calls, using method `llm_engine.get_tool_call` to leverage the LLM engine's tool calling capabilities. This agent uses JSON-like tool calls, using method `llm_engine.get_tool_call` to leverage the LLM engine's tool calling capabilities.
@ -755,7 +746,9 @@ class ToolCallingAgent(MultiStepAgent):
stop_sequences=["Observation:"], stop_sequences=["Observation:"],
) )
except Exception as e: except Exception as e:
raise AgentGenerationError(f"Error in generating tool call with llm_engine:\n{e}") raise AgentGenerationError(
f"Error in generating tool call with llm_engine:\n{e}"
)
log_entry.tool_call = ToolCall( log_entry.tool_call = ToolCall(
name=tool_name, arguments=tool_arguments, id=tool_call_id name=tool_name, arguments=tool_arguments, id=tool_call_id

View File

@ -61,7 +61,7 @@ class E2BExecutor:
[f"import {module}" for module in BASE_BUILTIN_MODULES] [f"import {module}" for module in BASE_BUILTIN_MODULES]
) )
tool_definition_code += textwrap.dedent(""" tool_definition_code += textwrap.dedent("""
class Tool: class Tool:
def __call__(self, *args, **kwargs): def __call__(self, *args, **kwargs):
return self.forward(*args, **kwargs) return self.forward(*args, **kwargs)

View File

@ -16,15 +16,19 @@
# limitations under the License. # limitations under the License.
from copy import deepcopy from copy import deepcopy
from enum import Enum from enum import Enum
from typing import Dict, List, Optional, Tuple from typing import Dict, List, Optional
from transformers import AutoTokenizer, AutoModelForCausalLM, StoppingCriteria, StoppingCriteriaList from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
StoppingCriteria,
StoppingCriteriaList,
)
import litellm import litellm
import logging import logging
import os import os
import random import random
from openai import OpenAI
from huggingface_hub import InferenceClient from huggingface_hub import InferenceClient
from .tools import Tool from .tools import Tool
@ -69,25 +73,16 @@ def get_json_schema(tool: Tool) -> Dict:
"description": tool.description, "description": tool.description,
"parameters": { "parameters": {
"type": "object", "type": "object",
"properties": {k: {k2: v2.replace("any", "object") for k2, v2 in v.items()} for k, v in tool.inputs.items()}, "properties": {
k: {k2: v2.replace("any", "object") for k2, v2 in v.items()}
for k, v in tool.inputs.items()
},
"required": list(tool.inputs.keys()), "required": list(tool.inputs.keys()),
}, },
}, },
} }
def get_json_schema_anthropic(tool: Tool) -> Dict:
return {
"name": tool.name,
"description": tool.description,
"input_schema": {
"type": "object",
"properties": tool.inputs,
"required": list(tool.inputs.keys()),
},
}
def remove_stop_sequences(content: str, stop_sequences: List[str]) -> str: def remove_stop_sequences(content: str, stop_sequences: List[str]) -> str:
for stop_seq in stop_sequences: for stop_seq in stop_sequences:
if content[-len(stop_seq) :] == stop_seq: if content[-len(stop_seq) :] == stop_seq:
@ -395,176 +390,7 @@ class TransformersEngine(HfEngine):
return tool_name, tool_input, call_id return tool_name, tool_input, call_id
class OpenAIEngine: class LiteLLMEngine:
def __init__(
self,
model_id: Optional[str] = None,
api_key: Optional[str] = None,
base_url: Optional[str] = None,
):
"""Creates a LLM Engine that follows OpenAI format.
Args:
model_id (`str`, *optional*): the model name to use.
api_key (`str`, *optional*): your API key.
base_url (`str`, *optional*): the URL to use if using a different inference service than OpenAI, for instance "https://api-inference.huggingface.co/v1/".
"""
if model_id is None:
model_id = "gpt-4o"
if api_key is None:
api_key = os.getenv("OPENAI_API_KEY")
self.model_id = model_id
self.client = OpenAI(
base_url=base_url,
api_key=api_key,
)
self.last_input_token_count = 0
self.last_output_token_count = 0
def __call__(
self,
messages: List[Dict[str, str]],
stop_sequences: Optional[List[str]] = None,
grammar: Optional[str] = None,
max_tokens: int = 1500,
) -> str:
messages = get_clean_message_list(
messages, role_conversions=tool_role_conversions
)
response = self.client.chat.completions.create(
model=self.model_id,
messages=messages,
stop=stop_sequences,
temperature=0.5,
max_tokens=max_tokens,
)
self.last_input_token_count = response.usage.prompt_tokens
self.last_output_token_count = response.usage.completion_tokens
return response.choices[0].message.content
def get_tool_call(
self,
messages: List[Dict[str, str]],
available_tools: List[Tool],
stop_sequences: Optional[List[str]] = None,
):
"""Generates a tool call for the given message list. This method is used only by `ToolCallingAgent`."""
messages = get_clean_message_list(
messages, role_conversions=tool_role_conversions
)
response = self.client.chat.completions.create(
model=self.model_id,
messages=messages,
tools=[get_json_schema(tool) for tool in available_tools],
tool_choice="required",
)
tool_call = response.choices[0].message.tool_calls[0]
self.last_input_token_count = response.usage.prompt_tokens
self.last_output_token_count = response.usage.completion_tokens
return tool_call.function.name, tool_call.function.arguments, tool_call.id
class AnthropicEngine:
def __init__(self, model_id="claude-3-5-sonnet-20240620", use_bedrock=False):
from anthropic import Anthropic, AnthropicBedrock
self.model_id = model_id
if use_bedrock:
self.model_id = "anthropic.claude-3-5-sonnet-20240620-v1:0"
self.client = AnthropicBedrock(
aws_access_key=os.getenv("AWS_BEDROCK_ID"),
aws_secret_key=os.getenv("AWS_BEDROCK_KEY"),
aws_region="us-east-1",
)
else:
self.client = Anthropic(
api_key=os.getenv("ANTHROPIC_API_KEY"),
)
self.last_input_token_count = 0
self.last_output_token_count = 0
def separate_messages_system_prompt(
self,
messages: List[
Dict[
str,
str,
]
],
) -> Tuple[List[Dict[str, str]], str]:
"""Gets the system prompt and the rest of messages as separate elements."""
index_system_message, system_prompt = None, None
for index, message in enumerate(messages):
if message["role"] == MessageRole.SYSTEM:
index_system_message = index
system_prompt = message["content"]
if system_prompt is None:
raise Exception("No system prompt found!")
filtered_messages = [
message for i, message in enumerate(messages) if i != index_system_message
]
if len(filtered_messages) == 0:
print("Error, no user message:", messages)
assert False
return filtered_messages, system_prompt
def __call__(
self,
messages: List[Dict[str, str]],
stop_sequences: Optional[List[str]] = None,
grammar: Optional[str] = None,
max_tokens: int = 1500,
) -> str:
messages = get_clean_message_list(
messages, role_conversions=tool_role_conversions
)
filtered_messages, system_prompt = self.separate_messages_system_prompt(
messages
)
response = self.client.messages.create(
model=self.model_id,
system=system_prompt,
messages=filtered_messages,
stop_sequences=stop_sequences,
temperature=0.5,
max_tokens=max_tokens,
)
full_response_text = ""
for content_block in response.content:
if content_block.type == "text":
full_response_text += content_block.text
return full_response_text
def get_tool_call(
self,
messages: List[Dict[str, str]],
available_tools: List[Tool],
stop_sequences: Optional[List[str]] = None,
max_tokens: int = 1500,
):
"""Generates a tool call for the given message list. This method is used only by `ToolCallingAgent`."""
messages = get_clean_message_list(
messages, role_conversions=tool_role_conversions
)
filtered_messages, system_prompt = self.separate_messages_system_prompt(
messages
)
response = self.client.messages.create(
model=self.model_id,
system=system_prompt,
messages=filtered_messages,
tools=[get_json_schema_anthropic(tool) for tool in available_tools],
tool_choice={"type": "any"},
max_tokens=max_tokens,
)
tool_call = response.content[0]
self.last_input_token_count = response.usage.input_tokens
self.last_output_token_count = response.usage.output_tokens
return tool_call.name, tool_call.input, tool_call.id
class LiteLLMEngine():
def __init__(self, model_id="anthropic/claude-3-5-sonnet-20240620"): def __init__(self, model_id="anthropic/claude-3-5-sonnet-20240620"):
self.model_id = model_id self.model_id = model_id
# IMPORTANT - Set this to TRUE to add the function to the prompt for Non OpenAI LLMs # IMPORTANT - Set this to TRUE to add the function to the prompt for Non OpenAI LLMs
@ -624,7 +450,5 @@ __all__ = [
"HfEngine", "HfEngine",
"TransformersEngine", "TransformersEngine",
"HfApiEngine", "HfApiEngine",
"OpenAIEngine",
"AnthropicEngine",
"LiteLLMEngine", "LiteLLMEngine",
] ]

View File

@ -14,43 +14,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import re
from transformers.utils import cached_file
# docstyle-ignore
CHAT_MESSAGE_PROMPT = """
Human: <<task>>
Assistant: """
DEFAULT_PROMPTS_REPO = "huggingface-tools/default-prompts"
PROMPT_FILES = {"chat": "chat_prompt_template.txt", "run": "run_prompt_template.txt"}
def download_prompt(prompt_or_repo_id, agent_name, mode="run"):
"""
Downloads and caches the prompt from a repo and returns it contents (if necessary).
"""
if prompt_or_repo_id is None:
prompt_or_repo_id = DEFAULT_PROMPTS_REPO
# prompt is considered a repo ID when it does not contain any kind of space
if re.search("\\s", prompt_or_repo_id) is not None:
return prompt_or_repo_id
prompt_file = cached_file(
prompt_or_repo_id,
PROMPT_FILES[mode],
repo_type="dataset",
user_agent={"agent": agent_name},
)
with open(prompt_file, "r", encoding="utf-8") as f:
return f.read()
SINGLE_STEP_CODE_SYSTEM_PROMPT = """You will be given a task to solve, your job is to come up with a series of simple commands in Python that will perform the task. SINGLE_STEP_CODE_SYSTEM_PROMPT = """You will be given a task to solve, your job is to come up with a series of simple commands in Python that will perform the task.
To help you, I will give you access to a set of tools that you can use. Each tool is a Python function and has a description explaining the task it performs, the inputs it expects and the outputs it returns. To help you, I will give you access to a set of tools that you can use. Each tool is a Python function and has a description explaining the task it performs, the inputs it expects and the outputs it returns.
You should first explain which tool you will use to perform the task and for what reason, then write the code in Python. You should first explain which tool you will use to perform the task and for what reason, then write the code in Python.
@ -73,7 +36,7 @@ translated_question = translator(question=question, src_lang="French", tgt_lang=
print(f"The translated question is {translated_question}.") print(f"The translated question is {translated_question}.")
answer = image_qa(image=image, question=translated_question) answer = image_qa(image=image, question=translated_question)
final_answer(f"The answer is {answer}") final_answer(f"The answer is {answer}")
```<end_action> ```<end_code>
--- ---
Task: "Identify the oldest person in the `document` and create an image showcasing the result." Task: "Identify the oldest person in the `document` and create an image showcasing the result."
@ -85,7 +48,7 @@ answer = document_qa(document, question="What is the oldest person?")
print(f"The answer is {answer}.") print(f"The answer is {answer}.")
image = image_generator(answer) image = image_generator(answer)
final_answer(image) final_answer(image)
```<end_action> ```<end_code>
--- ---
Task: "Generate an image using the text given in the variable `caption`." Task: "Generate an image using the text given in the variable `caption`."
@ -95,7 +58,7 @@ Code:
```py ```py
image = image_generator(prompt=caption) image = image_generator(prompt=caption)
final_answer(image) final_answer(image)
```<end_action> ```<end_code>
--- ---
Task: "Summarize the text given in the variable `text` and read it out loud." Task: "Summarize the text given in the variable `text` and read it out loud."
@ -107,7 +70,7 @@ summarized_text = summarizer(text)
print(f"Summary: {summarized_text}") print(f"Summary: {summarized_text}")
audio_summary = text_reader(summarized_text) audio_summary = text_reader(summarized_text)
final_answer(audio_summary) final_answer(audio_summary)
```<end_action> ```<end_code>
--- ---
Task: "Answer the question in the variable `question` about the text in the variable `text`. Use the answer to generate an image." Task: "Answer the question in the variable `question` about the text in the variable `text`. Use the answer to generate an image."
@ -119,7 +82,7 @@ answer = text_qa(text=text, question=question)
print(f"The answer is {answer}.") print(f"The answer is {answer}.")
image = image_generator(answer) image = image_generator(answer)
final_answer(image) final_answer(image)
```<end_action> ```<end_code>
--- ---
Task: "Caption the following `image`." Task: "Caption the following `image`."
@ -129,7 +92,7 @@ Code:
```py ```py
caption = image_captioner(image) caption = image_captioner(image)
final_answer(caption) final_answer(caption)
```<end_action> ```<end_code>
--- ---
Above example were using tools that might not exist for you. You only have access to these tools: Above example were using tools that might not exist for you. You only have access to these tools:
@ -138,145 +101,13 @@ Above example were using tools that might not exist for you. You only have acces
{{managed_agents_descriptions}} {{managed_agents_descriptions}}
Remember to make sure that variables you use are all defined. In particular don't import packages! Remember to make sure that variables you use are all defined. In particular don't import packages!
Be sure to provide a 'Code:\n```' sequence before the code and '```<end_action>' after, else you will get an error. Be sure to provide a 'Code:\n```' sequence before the code and '```<end_code>' after, else you will get an error.
DO NOT pass the arguments as a dict as in 'answer = ask_search_agent({'query': "What is the place where James Bond lives?"})', but use the arguments directly as in 'answer = ask_search_agent(query="What is the place where James Bond lives?")'. DO NOT pass the arguments as a dict as in 'answer = ask_search_agent({'query': "What is the place where James Bond lives?"})', but use the arguments directly as in 'answer = ask_search_agent(query="What is the place where James Bond lives?")'.
Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000. Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000.
""" """
JSON_SYSTEM_PROMPT = """You are an expert assistant who can solve any task using JSON tool calls. You will be given a task to solve as best you can.
To do so, you have been given access to the following tools: {{tool_names}}
The way you use the tools is by specifying a json blob, ending with '<end_action>'.
Specifically, this json should have an `action` key (name of the tool to use) and an `action_input` key (input to the tool).
The $ACTION_JSON_BLOB should only contain a SINGLE action, do NOT return a list of multiple actions. It should be formatted in json. Do not try to escape special characters. Here is the template of a valid $ACTION_JSON_BLOB:
{
"tool_name": $TOOL_NAME,
"tool_arguments": $INPUT
}<end_action>
Make sure to have the $INPUT as a dictionary in the right format for the tool you are using, and do not put variable names as input if you can find the right values.
You should ALWAYS use the following format:
Thought: you should always think about one action to take. Then use the action as follows:
Action:
$ACTION_JSON_BLOB
Observation: the result of the action
... (this Thought/Action/Observation can repeat N times, you should take several steps when needed. The $ACTION_JSON_BLOB must only use a SINGLE action at a time.)
You can use the result of the previous action as input for the next action.
The observation will always be a string: it can represent a file, like "image_1.jpg".
Then you can use it as input for the next action. You can do it for instance as follows:
Observation: "image_1.jpg"
Thought: I need to transform the image that I received in the previous observation to make it green.
Action:
{
"tool_name": "image_transformer",
"tool_arguments": {"image": "image_1.jpg"}
}<end_action>
To provide the final answer to the task, use an action blob with "tool_name": "final_answer" tool. It is the only way to complete the task, else you will be stuck on a loop. So your final output should look like this:
Action:
{
"tool_name": "final_answer",
"tool_arguments": {"answer": "insert your final answer here"}
}<end_action>
Here are a few examples using notional tools:
---
Task: "Generate an image of the oldest person in this document."
Thought: I will proceed step by step and use the following tools: `document_qa` to find the oldest person in the document, then `image_generator` to generate an image according to the answer.
Action:
{
"tool_name": "document_qa",
"tool_arguments": {"document": "document.pdf", "question": "Who is the oldest person mentioned?"}
}<end_action>
Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland."
Thought: I will now generate an image showcasing the oldest person.
Action:
{
"tool_name": "image_generator",
"tool_arguments": {"prompt": "A portrait of John Doe, a 55-year-old man living in Canada."}
}<end_action>
Observation: "image.png"
Thought: I will now return the generated image.
Action:
{
"tool_name": "final_answer",
"tool_arguments": "image.png"
}<end_action>
---
Task: "What is the result of the following operation: 5 + 3 + 1294.678?"
Thought: I will use python code evaluator to compute the result of the operation and then return the final answer using the `final_answer` tool
Action:
{
"tool_name": "python_interpreter",
"tool_arguments": {"code": "5 + 3 + 1294.678"}
}<end_action>
Observation: 1302.678
Thought: Now that I know the result, I will now return it.
Action:
{
"tool_name": "final_answer",
"tool_arguments": "1302.678"
}<end_action>
---
Task: "Which city has the highest population , Guangzhou or Shanghai?"
Thought: I need to get the populations for both cities and compare them: I will use the tool `search` to get the population of both cities.
Action:
{
"tool_name": "search",
"tool_arguments": "Population Guangzhou"
}<end_action>
Observation: ['Guangzhou has a population of 15 million inhabitants as of 2021.']
Thought: Now let's get the population of Shanghai using the tool 'search'.
Action:
{
"tool_name": "search",
"tool_arguments": "Population Shanghai"
}
Observation: '26 million (2019)'
Thought: Now I know that Shanghai has a larger population. Let's return the result.
Action:
{
"tool_name": "final_answer",
"tool_arguments": "Shanghai"
}<end_action>
Above example were using notional tools that might not exist for you. You only have access to these tools:
{{tool_descriptions}}
{{managed_agents_descriptions}}
Here are the rules you should always follow to solve your task:
1. ALWAYS provide a 'Thought:' sequence, and an 'Action:' sequence that ends with <end_action>, else you will fail.
2. Always use the right arguments for the tools. Never use variable names in the 'action_input' field, use the value instead.
3. Call a tool only when needed: do not call the search agent if you do not need information, try to solve the task yourself.
4. Never re-do a tool call that you previously did with the exact same parameters.
Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000.
"""
TOOL_CALLING_SYSTEM_PROMPT = """You are an expert assistant who can solve any task using tool calls. You will be given a task to solve as best you can. TOOL_CALLING_SYSTEM_PROMPT = """You are an expert assistant who can solve any task using tool calls. You will be given a task to solve as best you can.
To do so, you have been given access to the following tools: {{tool_names}} To do so, you have been given access to the following tools: {{tool_names}}
@ -389,7 +220,7 @@ To do so, you have been given access to a list of tools: these tools are basical
To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences. To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences.
At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task and the tools that you want to use. At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task and the tools that you want to use.
Then in the 'Code:' sequence, you should write the code in simple Python. The code sequence must end with '<end_action>' sequence. Then in the 'Code:' sequence, you should write the code in simple Python. The code sequence must end with '<end_code>' sequence.
During each intermediate step, you can use 'print()' to save whatever important information you will then need. During each intermediate step, you can use 'print()' to save whatever important information you will then need.
These print outputs will then appear in the 'Observation:' field, which will be available as input for the next step. These print outputs will then appear in the 'Observation:' field, which will be available as input for the next step.
In the end you have to return a final answer using the `final_answer` tool. In the end you have to return a final answer using the `final_answer` tool.
@ -403,7 +234,7 @@ Code:
```py ```py
answer = document_qa(document=document, question="Who is the oldest person mentioned?") answer = document_qa(document=document, question="Who is the oldest person mentioned?")
print(answer) print(answer)
```<end_action> ```<end_code>
Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland." Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland."
Thought: I will now generate an image showcasing the oldest person. Thought: I will now generate an image showcasing the oldest person.
@ -411,7 +242,7 @@ Code:
```py ```py
image = image_generator("A portrait of John Doe, a 55-year-old man living in Canada.") image = image_generator("A portrait of John Doe, a 55-year-old man living in Canada.")
final_answer(image) final_answer(image)
```<end_action> ```<end_code>
--- ---
Task: "What is the result of the following operation: 5 + 3 + 1294.678?" Task: "What is the result of the following operation: 5 + 3 + 1294.678?"
@ -421,7 +252,7 @@ Code:
```py ```py
result = 5 + 3 + 1294.678 result = 5 + 3 + 1294.678
final_answer(result) final_answer(result)
```<end_action> ```<end_code>
--- ---
Task: "Which city has the highest population: Guangzhou or Shanghai?" Task: "Which city has the highest population: Guangzhou or Shanghai?"
@ -433,7 +264,7 @@ population_guangzhou = search("Guangzhou population")
print("Population Guangzhou:", population_guangzhou) print("Population Guangzhou:", population_guangzhou)
population_shanghai = search("Shanghai population") population_shanghai = search("Shanghai population")
print("Population Shanghai:", population_shanghai) print("Population Shanghai:", population_shanghai)
```<end_action> ```<end_code>
Observation: Observation:
Population Guangzhou: ['Guangzhou has a population of 15 million inhabitants as of 2021.'] Population Guangzhou: ['Guangzhou has a population of 15 million inhabitants as of 2021.']
Population Shanghai: '26 million (2019)' Population Shanghai: '26 million (2019)'
@ -442,7 +273,7 @@ Thought: Now I know that Shanghai has the highest population.
Code: Code:
```py ```py
final_answer("Shanghai") final_answer("Shanghai")
```<end_action> ```<end_code>
--- ---
Task: "What is the current age of the pope, raised to the power 0.36?" Task: "What is the current age of the pope, raised to the power 0.36?"
@ -452,7 +283,7 @@ Code:
```py ```py
pope_age = wiki(query="current pope age") pope_age = wiki(query="current pope age")
print("Pope age:", pope_age) print("Pope age:", pope_age)
```<end_action> ```<end_code>
Observation: Observation:
Pope age: "The pope Francis is currently 85 years old." Pope age: "The pope Francis is currently 85 years old."
@ -461,7 +292,7 @@ Code:
```py ```py
pope_current_age = 85 ** 0.36 pope_current_age = 85 ** 0.36
final_answer(pope_current_age) final_answer(pope_current_age)
```<end_action> ```<end_code>
Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools: Above example were using notional tools that might not exist for you. On top of performing computations in the Python code snippets that you create, you only have access to these tools:
@ -470,7 +301,7 @@ Above example were using notional tools that might not exist for you. On top of
{{managed_agents_descriptions}} {{managed_agents_descriptions}}
Here are the rules you should always follow to solve your task: Here are the rules you should always follow to solve your task:
1. Always provide a 'Thought:' sequence, and a 'Code:\n```py' sequence ending with '```<end_action>' sequence, else you will fail. 1. Always provide a 'Thought:' sequence, and a 'Code:\n```py' sequence ending with '```<end_code>' sequence, else you will fail.
2. Use only variables that you have defined! 2. Use only variables that you have defined!
3. Always use the right arguments for the tools. DO NOT pass the arguments as a dict as in 'answer = wiki({'query': "What is the place where James Bond lives?"})', but use the arguments directly as in 'answer = wiki(query="What is the place where James Bond lives?")'. 3. Always use the right arguments for the tools. DO NOT pass the arguments as a dict as in 'answer = wiki({'query': "What is the place where James Bond lives?"})', but use the arguments directly as in 'answer = wiki(query="What is the place where James Bond lives?")'.
4. Take care to not chain too many sequential tool calls in the same code block, especially when the output format is unpredictable. For instance, a call to search has an unpredictable return format, so do not have another tool call that depends on its output in the same block: rather output results with print() to use them in the next block. 4. Take care to not chain too many sequential tool calls in the same code block, especially when the output format is unpredictable. For instance, a call to search has an unpredictable return format, so do not have another tool call that depends on its output in the same block: rather output results with print() to use them in the next block.
@ -620,6 +451,5 @@ __all__ = [
"PLAN_UPDATE_FINAL_PLAN_REDACTION", "PLAN_UPDATE_FINAL_PLAN_REDACTION",
"SINGLE_STEP_CODE_SYSTEM_PROMPT", "SINGLE_STEP_CODE_SYSTEM_PROMPT",
"CODE_SYSTEM_PROMPT", "CODE_SYSTEM_PROMPT",
"JSON_SYSTEM_PROMPT",
"MANAGED_AGENT_PROMPT", "MANAGED_AGENT_PROMPT",
] ]

View File

@ -182,7 +182,9 @@ class AgentTests(unittest.TestCase):
assert output == "7.2904" assert output == "7.2904"
def test_fake_json_agent(self): def test_fake_json_agent(self):
agent = ToolCallingAgent(tools=[PythonInterpreterTool()], llm_engine=fake_json_llm) agent = ToolCallingAgent(
tools=[PythonInterpreterTool()], llm_engine=fake_json_llm
)
output = agent.run("What is 2 multiplied by 3.6452?") output = agent.run("What is 2 multiplied by 3.6452?")
assert isinstance(output, str) assert isinstance(output, str)
assert output == "7.2904" assert output == "7.2904"

View File

@ -15,7 +15,13 @@
import unittest import unittest
from smolagents import AgentImage, AgentError, CodeAgent, ToolCallingAgent, stream_to_gradio from smolagents import (
AgentImage,
AgentError,
CodeAgent,
ToolCallingAgent,
stream_to_gradio,
)
class MonitoringTester(unittest.TestCase): class MonitoringTester(unittest.TestCase):