Improve documentation on agent building tips
This commit is contained in:
parent
0a0402d090
commit
465614295d
|
@ -492,8 +492,8 @@ class ReactAgent(BaseAgent):
|
|||
|
||||
Example:
|
||||
```py
|
||||
from transformers.agents import ReactCodeAgent
|
||||
agent = ReactCodeAgent(tools=[])
|
||||
from transformers.agents import CodeAgent
|
||||
agent = CodeAgent(tools=[])
|
||||
agent.run("What is the result of 2 power 3.7384?")
|
||||
```
|
||||
"""
|
||||
|
|
|
@ -1029,10 +1029,10 @@ class ToolCollection:
|
|||
Example:
|
||||
|
||||
```py
|
||||
>>> from transformers import ToolCollection, ReactCodeAgent
|
||||
>>> from transformers import ToolCollection, CodeAgent
|
||||
|
||||
>>> image_tool_collection = ToolCollection(collection_slug="huggingface-tools/diffusion-tools-6630bb19a942c2306a2cdb6f")
|
||||
>>> agent = ReactCodeAgent(tools=[*image_tool_collection.tools], add_base_tools=True)
|
||||
>>> agent = CodeAgent(tools=[*image_tool_collection.tools], add_base_tools=True)
|
||||
|
||||
>>> agent.run("Please draw me a picture of rivers and lakes.")
|
||||
```
|
||||
|
|
|
@ -29,7 +29,7 @@ These *tools* are functions for performing a task, and they contain all necessar
|
|||
|
||||
The agent can be programmed to:
|
||||
- devise a series of actions/tools and run them all at once, like the [`CodeAgent`]
|
||||
- plan and execute actions/tools one by one and wait for the outcome of each action before launching the next one, like the [`ReactJsonAgent`]
|
||||
- plan and execute actions/tools one by one and wait for the outcome of each action before launching the next one, like the [`JsonAgent`]
|
||||
|
||||
### Types of agents
|
||||
|
||||
|
@ -41,9 +41,9 @@ This agent has a planning step, then generates python code to execute all its ac
|
|||
|
||||
This is the go-to agent to solve reasoning tasks, since the ReAct framework ([Yao et al., 2022](https://huggingface.co/papers/2210.03629)) makes it really efficient to think on the basis of its previous observations.
|
||||
|
||||
We implement two versions of ReactJsonAgent:
|
||||
- [`ReactJsonAgent`] generates tool calls as a JSON in its output.
|
||||
- [`ReactCodeAgent`] is a new type of ReactJsonAgent that generates its tool calls as blobs of code, which works really well for LLMs that have strong coding performance.
|
||||
We implement two versions of JsonAgent:
|
||||
- [`JsonAgent`] generates tool calls as a JSON in its output.
|
||||
- [`CodeAgent`] is a new type of JsonAgent that generates its tool calls as blobs of code, which works really well for LLMs that have strong coding performance.
|
||||
|
||||
> [!TIP]
|
||||
> Read [Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents) blog post to learn more about ReAct agents.
|
||||
|
@ -171,9 +171,9 @@ Note that we used an additional `sentence` argument: you can pass text as additi
|
|||
You can also use this to indicate the path to local or remote files for the model to use:
|
||||
|
||||
```py
|
||||
from transformers import ReactCodeAgent
|
||||
from transformers import CodeAgent
|
||||
|
||||
agent = ReactCodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
|
||||
agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
|
||||
|
||||
agent.run("Why does Mike not know many people in New York?", audio="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/recording.mp3")
|
||||
```
|
||||
|
@ -196,12 +196,12 @@ A Python interpreter executes the code on a set of inputs passed along with your
|
|||
This should be safe because the only functions that can be called are the tools you provided (especially if it's only tools by Hugging Face) and the print function, so you're already limited in what can be executed.
|
||||
|
||||
The Python interpreter also doesn't allow imports by default outside of a safe list, so all the most obvious attacks shouldn't be an issue.
|
||||
You can still authorize additional imports by passing the authorized modules as a list of strings in argument `additional_authorized_imports` upon initialization of your [`ReactCodeAgent`] or [`CodeAgent`]:
|
||||
You can still authorize additional imports by passing the authorized modules as a list of strings in argument `additional_authorized_imports` upon initialization of your [`CodeAgent`] or [`CodeAgent`]:
|
||||
|
||||
```py
|
||||
>>> from transformers import ReactCodeAgent
|
||||
>>> from transformers import CodeAgent
|
||||
|
||||
>>> agent = ReactCodeAgent(tools=[], additional_authorized_imports=['requests', 'bs4'])
|
||||
>>> agent = CodeAgent(tools=[], additional_authorized_imports=['requests', 'bs4'])
|
||||
>>> agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?")
|
||||
|
||||
(...)
|
||||
|
@ -215,7 +215,7 @@ The execution will stop at any code trying to perform an illegal operation or if
|
|||
|
||||
### The system prompt
|
||||
|
||||
An agent, or rather the LLM that drives the agent, generates an output based on the system prompt. The system prompt can be customized and tailored to the intended task. For example, check the system prompt for the [`ReactCodeAgent`] (below version is slightly simplified).
|
||||
An agent, or rather the LLM that drives the agent, generates an output based on the system prompt. The system prompt can be customized and tailored to the intended task. For example, check the system prompt for the [`CodeAgent`] (below version is slightly simplified).
|
||||
|
||||
```text
|
||||
You will be given a task to solve as best you can.
|
||||
|
@ -260,10 +260,10 @@ You could improve the system prompt, for example, by adding an explanation of th
|
|||
For maximum flexibility, you can overwrite the whole system prompt template by passing your custom prompt as an argument to the `system_prompt` parameter.
|
||||
|
||||
```python
|
||||
from transformers import ReactJsonAgent
|
||||
from transformers import JsonAgent
|
||||
from transformers.agents import PythonInterpreterTool
|
||||
|
||||
agent = ReactJsonAgent(tools=[PythonInterpreterTool()], system_prompt="{your_custom_prompt}")
|
||||
agent = JsonAgent(tools=[PythonInterpreterTool()], system_prompt="{your_custom_prompt}")
|
||||
```
|
||||
|
||||
> [!WARNING]
|
||||
|
@ -295,7 +295,7 @@ Transformers comes with a default toolbox for empowering agents, that you can ad
|
|||
- **Text to speech**: convert text to speech ([SpeechT5](./model_doc/speecht5))
|
||||
- **Translation**: translates a given sentence from source language to target language.
|
||||
- **DuckDuckGo search***: performs a web search using DuckDuckGo browser.
|
||||
- **Python code interpreter**: runs your the LLM generated Python code in a secure environment. This tool will only be added to [`ReactJsonAgent`] if you initialize it with `add_base_tools=True`, since code-based agent can already natively execute Python code
|
||||
- **Python code interpreter**: runs your the LLM generated Python code in a secure environment. This tool will only be added to [`JsonAgent`] if you initialize it with `add_base_tools=True`, since code-based agent can already natively execute Python code
|
||||
|
||||
|
||||
You can manually use a tool by calling the [`load_tool`] function and a task to perform.
|
||||
|
@ -375,57 +375,89 @@ print(f"The most downloaded model for the 'text-to-video' task is {most_download
|
|||
And the output:
|
||||
`"The most downloaded model for the 'text-to-video' task is ByteDance/AnimateDiff-Lightning."`
|
||||
|
||||
### Manage your agent's toolbox
|
||||
## Multi-agents
|
||||
|
||||
If you have already initialized an agent, it is inconvenient to reinitialize it from scratch with a tool you want to use. With Transformers, you can manage an agent's toolbox by adding or replacing a tool.
|
||||
Multi-agent has been introduced in Microsoft's framework [Autogen](https://huggingface.co/papers/2308.08155).
|
||||
It simply means having several agents working together to solve your task instead of only one.
|
||||
It empirically yields better performance on most benchmarks. The reason for this better performance is conceptually simple: for many tasks, rather than using a do-it-all system, you would prefer to specialize units on sub-tasks. Here, having agents with separate tool sets and memories allows to achieve efficient specialization.
|
||||
|
||||
Let's add the `model_download_tool` to an existing agent initialized with only the default toolbox.
|
||||
You can easily build hierarchical multi-agent systems with `transformers.agents`.
|
||||
|
||||
```python
|
||||
from transformers import CodeAgent
|
||||
To do so, encapsulate the agent in a [`ManagedAgent`] object. This object needs arguments `agent`, `name`, and a `description`, which will then be embedded in the manager agent's system prompt to let it know how to call this managed agent, as we also do for tools.
|
||||
|
||||
agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True)
|
||||
agent.toolbox.add_tool(model_download_tool)
|
||||
```
|
||||
Now we can leverage both the new tool and the previous text-to-speech tool:
|
||||
|
||||
```python
|
||||
agent.run(
|
||||
"Can you read out loud the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub and return the audio?"
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
| **Audio** |
|
||||
|------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| <audio controls><source src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/damo.wav" type="audio/wav"/> |
|
||||
|
||||
|
||||
> [!WARNING]
|
||||
> Beware when adding tools to an agent that already works well because it can bias selection towards your tool or select another tool other than the one already defined.
|
||||
|
||||
|
||||
Use the `agent.toolbox.update_tool()` method to replace an existing tool in the agent's toolbox.
|
||||
This is useful if your new tool is a one-to-one replacement of the existing tool because the agent already knows how to perform that specific task.
|
||||
Just make sure the new tool follows the same API as the replaced tool or adapt the system prompt template to ensure all examples using the replaced tool are updated.
|
||||
|
||||
|
||||
### Use a collection of tools
|
||||
|
||||
You can leverage tool collections by using the ToolCollection object, with the slug of the collection you want to use.
|
||||
Then pass them as a list to initialize you agent, and start using them!
|
||||
Here's an example of making an agent that managed a specific web search agent using our [`DuckDuckGoSearchTool`]:
|
||||
|
||||
```py
|
||||
from transformers import ToolCollection, ReactCodeAgent
|
||||
from transformers.agents import CodeAgent, HfApiEngine, DuckDuckGoSearchTool, ManagedAgent
|
||||
|
||||
image_tool_collection = ToolCollection(collection_slug="huggingface-tools/diffusion-tools-6630bb19a942c2306a2cdb6f")
|
||||
agent = ReactCodeAgent(tools=[*image_tool_collection.tools], add_base_tools=True)
|
||||
llm_engine = HfApiEngine()
|
||||
|
||||
agent.run("Please draw me a picture of rivers and lakes.")
|
||||
web_agent = CodeAgent(tools=[DuckDuckGoSearchTool()], llm_engine=llm_engine)
|
||||
|
||||
managed_web_agent = ManagedAgent(
|
||||
agent=web_agent,
|
||||
name="web_search",
|
||||
description="Runs web searches for you. Give it your query as an argument."
|
||||
)
|
||||
|
||||
manager_agent = CodeAgent(
|
||||
tools=[], llm_engine=llm_engine, managed_agents=[managed_web_agent]
|
||||
)
|
||||
|
||||
manager_agent.run("Who is the CEO of Hugging Face?")
|
||||
```
|
||||
|
||||
To speed up the start, tools are loaded only if called by the agent.
|
||||
> [!TIP]
|
||||
> For an in-depth example of an efficient multi-agent implementation, see [how we pushed our multi-agent system to the top of the GAIA leaderboard](https://huggingface.co/blog/beating-gaia).
|
||||
|
||||
This gets you this image:
|
||||
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rivers_and_lakes.png">
|
||||
## Display your agent run in a cool Gradio interface
|
||||
|
||||
You can leverage `gradio.Chatbot` to display your agent's thoughts using `stream_to_gradio`, here is an example:
|
||||
|
||||
```py
|
||||
import gradio as gr
|
||||
from transformers import (
|
||||
load_tool,
|
||||
CodeAgent,
|
||||
HfApiEngine,
|
||||
stream_to_gradio,
|
||||
)
|
||||
|
||||
# Import tool from Hub
|
||||
image_generation_tool = load_tool("m-ric/text-to-image")
|
||||
|
||||
llm_engine = HfApiEngine("meta-llama/Meta-Llama-3-70B-Instruct")
|
||||
|
||||
# Initialize the agent with the image generation tool
|
||||
agent = CodeAgent(tools=[image_generation_tool], llm_engine=llm_engine)
|
||||
|
||||
|
||||
def interact_with_agent(task):
|
||||
messages = []
|
||||
messages.append(gr.ChatMessage(role="user", content=task))
|
||||
yield messages
|
||||
for msg in stream_to_gradio(agent, task):
|
||||
messages.append(msg)
|
||||
yield messages + [
|
||||
gr.ChatMessage(role="assistant", content="⏳ Task not finished yet!")
|
||||
]
|
||||
yield messages
|
||||
|
||||
|
||||
with gr.Blocks() as demo:
|
||||
text_input = gr.Textbox(lines=1, label="Chat Message", value="Make me a picture of the Statue of Liberty.")
|
||||
submit = gr.Button("Run illustrator agent!")
|
||||
chatbot = gr.Chatbot(
|
||||
label="Agent",
|
||||
type="messages",
|
||||
avatar_images=(
|
||||
None,
|
||||
"https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png",
|
||||
),
|
||||
)
|
||||
submit.click(interact_with_agent, [text_input], [chatbot])
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.launch()
|
||||
```
|
||||
|
|
|
@ -1,261 +0,0 @@
|
|||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations under the License.
|
||||
|
||||
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
|
||||
rendered properly in your Markdown viewer.
|
||||
|
||||
-->
|
||||
# Agents, supercharged - Multi-agents, External tools, and more
|
||||
|
||||
[[open-in-colab]]
|
||||
|
||||
### What is an agent?
|
||||
|
||||
> [!TIP]
|
||||
> If you're new to `transformers.agents`, make sure to first read the main [agents documentation](./agents).
|
||||
|
||||
In this page we're going to highlight several advanced uses of `transformers.agents`.
|
||||
|
||||
## Multi-agents
|
||||
|
||||
Multi-agent has been introduced in Microsoft's framework [Autogen](https://huggingface.co/papers/2308.08155).
|
||||
It simply means having several agents working together to solve your task instead of only one.
|
||||
It empirically yields better performance on most benchmarks. The reason for this better performance is conceptually simple: for many tasks, rather than using a do-it-all system, you would prefer to specialize units on sub-tasks. Here, having agents with separate tool sets and memories allows to achieve efficient specialization.
|
||||
|
||||
You can easily build hierarchical multi-agent systems with `transformers.agents`.
|
||||
|
||||
To do so, encapsulate the agent in a [`ManagedAgent`] object. This object needs arguments `agent`, `name`, and a `description`, which will then be embedded in the manager agent's system prompt to let it know how to call this managed agent, as we also do for tools.
|
||||
|
||||
Here's an example of making an agent that managed a specific web search agent using our [`DuckDuckGoSearchTool`]:
|
||||
|
||||
```py
|
||||
from transformers.agents import ReactCodeAgent, HfApiEngine, DuckDuckGoSearchTool, ManagedAgent
|
||||
|
||||
llm_engine = HfApiEngine()
|
||||
|
||||
web_agent = ReactCodeAgent(tools=[DuckDuckGoSearchTool()], llm_engine=llm_engine)
|
||||
|
||||
managed_web_agent = ManagedAgent(
|
||||
agent=web_agent,
|
||||
name="web_search",
|
||||
description="Runs web searches for you. Give it your query as an argument."
|
||||
)
|
||||
|
||||
manager_agent = ReactCodeAgent(
|
||||
tools=[], llm_engine=llm_engine, managed_agents=[managed_web_agent]
|
||||
)
|
||||
|
||||
manager_agent.run("Who is the CEO of Hugging Face?")
|
||||
```
|
||||
|
||||
> [!TIP]
|
||||
> For an in-depth example of an efficient multi-agent implementation, see [how we pushed our multi-agent system to the top of the GAIA leaderboard](https://huggingface.co/blog/beating-gaia).
|
||||
|
||||
|
||||
## Advanced tool usage
|
||||
|
||||
### Directly define a tool by subclassing Tool, and share it to the Hub
|
||||
|
||||
Let's take again the tool example from main documentation, for which we had implemented a `tool` decorator.
|
||||
|
||||
If you need to add variation, like custom attributes for your tool, you can build your tool following the fine-grained method: building a class that inherits from the [`Tool`] superclass.
|
||||
|
||||
The custom tool needs:
|
||||
- An attribute `name`, which corresponds to the name of the tool itself. The name usually describes what the tool does. Since the code returns the model with the most downloads for a task, let's name it `model_download_counter`.
|
||||
- An attribute `description` is used to populate the agent's system prompt.
|
||||
- An `inputs` attribute, which is a dictionary with keys `"type"` and `"description"`. It contains information that helps the Python interpreter make educated choices about the input.
|
||||
- An `output_type` attribute, which specifies the output type.
|
||||
- A `forward` method which contains the inference code to be executed.
|
||||
|
||||
The types for both `inputs` and `output_type` should be amongst [Pydantic formats](https://docs.pydantic.dev/latest/concepts/json_schema/#generating-json-schema).
|
||||
|
||||
```python
|
||||
from transformers import Tool
|
||||
from huggingface_hub import list_models
|
||||
|
||||
class HFModelDownloadsTool(Tool):
|
||||
name = "model_download_counter"
|
||||
description = """
|
||||
This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub.
|
||||
It returns the name of the checkpoint."""
|
||||
|
||||
inputs = {
|
||||
"task": {
|
||||
"type": "string",
|
||||
"description": "the task category (such as text-classification, depth-estimation, etc)",
|
||||
}
|
||||
}
|
||||
output_type = "string"
|
||||
|
||||
def forward(self, task: str):
|
||||
model = next(iter(list_models(filter=task, sort="downloads", direction=-1)))
|
||||
return model.id
|
||||
```
|
||||
|
||||
Now that the custom `HfModelDownloadsTool` class is ready, you can save it to a file named `model_downloads.py` and import it for use.
|
||||
|
||||
|
||||
```python
|
||||
from model_downloads import HFModelDownloadsTool
|
||||
|
||||
tool = HFModelDownloadsTool()
|
||||
```
|
||||
|
||||
You can also share your custom tool to the Hub by calling [`~Tool.push_to_hub`] on the tool. Make sure you've created a repository for it on the Hub and are using a token with read access.
|
||||
|
||||
```python
|
||||
tool.push_to_hub("{your_username}/hf-model-downloads")
|
||||
```
|
||||
|
||||
Load the tool with the [`~Tool.load_tool`] function and pass it to the `tools` parameter in your agent.
|
||||
|
||||
```python
|
||||
from transformers import load_tool, CodeAgent
|
||||
|
||||
model_download_tool = load_tool("m-ric/hf-model-downloads")
|
||||
```
|
||||
|
||||
### Import a Space as a tool 🚀
|
||||
|
||||
You can directly import a Space from the Hub as a tool using the [`Tool.from_space`] method!
|
||||
|
||||
You only need to provide the id of the Space on the Hub, its name, and a description that will help you agent understand what the tool does. Under the hood, this will use [`gradio-client`](https://pypi.org/project/gradio-client/) library to call the Space.
|
||||
|
||||
For instance, let's import the [FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev) Space from the Hub and use it to generate an image.
|
||||
|
||||
```
|
||||
from transformers import Tool
|
||||
|
||||
image_generation_tool = Tool.from_space(
|
||||
"black-forest-labs/FLUX.1-dev",
|
||||
name="image_generator",
|
||||
description="Generate an image from a prompt")
|
||||
|
||||
image_generation_tool("A sunny beach")
|
||||
```
|
||||
And voilà, here's your image! 🏖️
|
||||
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/sunny_beach.webp">
|
||||
|
||||
Then you can use this tool just like any other tool. For example, let's improve the prompt `a rabbit wearing a space suit` and generate an image of it.
|
||||
|
||||
```python
|
||||
from transformers import ReactCodeAgent
|
||||
|
||||
agent = ReactCodeAgent(tools=[image_generation_tool])
|
||||
|
||||
agent.run(
|
||||
"Improve this prompt, then generate an image of it.", prompt='A rabbit wearing a space suit'
|
||||
)
|
||||
```
|
||||
|
||||
```text
|
||||
=== Agent thoughts:
|
||||
improved_prompt could be "A bright blue space suit wearing rabbit, on the surface of the moon, under a bright orange sunset, with the Earth visible in the background"
|
||||
|
||||
Now that I have improved the prompt, I can use the image generator tool to generate an image based on this prompt.
|
||||
>>> Agent is executing the code below:
|
||||
image = image_generator(prompt="A bright blue space suit wearing rabbit, on the surface of the moon, under a bright orange sunset, with the Earth visible in the background")
|
||||
final_answer(image)
|
||||
```
|
||||
|
||||
<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit_spacesuit_flux.webp">
|
||||
|
||||
How cool is this? 🤩
|
||||
|
||||
### Use gradio-tools
|
||||
|
||||
[gradio-tools](https://github.com/freddyaboulton/gradio-tools) is a powerful library that allows using Hugging
|
||||
Face Spaces as tools. It supports many existing Spaces as well as custom Spaces.
|
||||
|
||||
Transformers supports `gradio_tools` with the [`Tool.from_gradio`] method. For example, let's use the [`StableDiffusionPromptGeneratorTool`](https://github.com/freddyaboulton/gradio-tools/blob/main/gradio_tools/tools/prompt_generator.py) from `gradio-tools` toolkit for improving prompts to generate better images.
|
||||
|
||||
Import and instantiate the tool, then pass it to the `Tool.from_gradio` method:
|
||||
|
||||
```python
|
||||
from gradio_tools import StableDiffusionPromptGeneratorTool
|
||||
from transformers import Tool, load_tool, CodeAgent
|
||||
|
||||
gradio_prompt_generator_tool = StableDiffusionPromptGeneratorTool()
|
||||
prompt_generator_tool = Tool.from_gradio(gradio_prompt_generator_tool)
|
||||
```
|
||||
|
||||
> [!WARNING]
|
||||
> gradio-tools require *textual* inputs and outputs even when working with different modalities like image and audio objects. Image and audio inputs and outputs are currently incompatible.
|
||||
|
||||
### Use LangChain tools
|
||||
|
||||
We love Langchain and think it has a very compelling suite of tools.
|
||||
To import a tool from LangChain, use the `from_langchain()` method.
|
||||
|
||||
Here is how you can use it to recreate the intro's search result using a LangChain web search tool.
|
||||
This tool will need `pip install google-search-results` to work properly.
|
||||
```python
|
||||
from langchain.agents import load_tools
|
||||
from transformers import Tool, ReactCodeAgent
|
||||
|
||||
search_tool = Tool.from_langchain(load_tools(["serpapi"])[0])
|
||||
|
||||
agent = ReactCodeAgent(tools=[search_tool])
|
||||
|
||||
agent.run("How many more blocks (also denoted as layers) are in BERT base encoder compared to the encoder from the architecture proposed in Attention is All You Need?")
|
||||
```
|
||||
|
||||
## Display your agent run in a cool Gradio interface
|
||||
|
||||
You can leverage `gradio.Chatbot` to display your agent's thoughts using `stream_to_gradio`, here is an example:
|
||||
|
||||
```py
|
||||
import gradio as gr
|
||||
from transformers import (
|
||||
load_tool,
|
||||
ReactCodeAgent,
|
||||
HfApiEngine,
|
||||
stream_to_gradio,
|
||||
)
|
||||
|
||||
# Import tool from Hub
|
||||
image_generation_tool = load_tool("m-ric/text-to-image")
|
||||
|
||||
llm_engine = HfApiEngine("meta-llama/Meta-Llama-3-70B-Instruct")
|
||||
|
||||
# Initialize the agent with the image generation tool
|
||||
agent = ReactCodeAgent(tools=[image_generation_tool], llm_engine=llm_engine)
|
||||
|
||||
|
||||
def interact_with_agent(task):
|
||||
messages = []
|
||||
messages.append(gr.ChatMessage(role="user", content=task))
|
||||
yield messages
|
||||
for msg in stream_to_gradio(agent, task):
|
||||
messages.append(msg)
|
||||
yield messages + [
|
||||
gr.ChatMessage(role="assistant", content="⏳ Task not finished yet!")
|
||||
]
|
||||
yield messages
|
||||
|
||||
|
||||
with gr.Blocks() as demo:
|
||||
text_input = gr.Textbox(lines=1, label="Chat Message", value="Make me a picture of the Statue of Liberty.")
|
||||
submit = gr.Button("Run illustrator agent!")
|
||||
chatbot = gr.Chatbot(
|
||||
label="Agent",
|
||||
type="messages",
|
||||
avatar_images=(
|
||||
None,
|
||||
"https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png",
|
||||
),
|
||||
)
|
||||
submit.click(interact_with_agent, [text_input], [chatbot])
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.launch()
|
||||
```
|
|
@ -1,123 +0,0 @@
|
|||
<!--Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
||||
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations under the License.
|
||||
|
||||
⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
|
||||
rendered properly in your Markdown viewer.
|
||||
|
||||
-->
|
||||
### The best agentic systems are the simplest: simplify the workflow as much as you can
|
||||
|
||||
Giving an LLM some agency in your workflow introducessome risk of errors.
|
||||
|
||||
Well-programmed agentic systems have good error logging and retry mechanisms anyway, so the LLM engine has a chance to self-correct their mistake. But to reduce the risk of LLM error to the maximum, you should simplify your worklow!
|
||||
|
||||
Let's take again the example from [intro_agents]: a bot that answers user queries on a surf trip company.
|
||||
Instead of letting the agent do 2 different calls for "travel distance API" and "weather API" each time they are asked about a new surf spot, you could just make one unified tool "return_spot_information", a functions that calls both APIs at once and returns their concatenated outputs to the user.
|
||||
|
||||
This will reduce costs, latency, and error risk!
|
||||
|
||||
So our first actionable takeaway is *you should group tools if possible*
|
||||
|
||||
|
||||
### Improve the information flow to the LLM engine
|
||||
|
||||
Remember that your LLM engine is like a ~intelligent~ robot, tapped into a room with the only communication with the outside world being notes passed under a door.
|
||||
|
||||
It won't know of anything that happened if you don't explicitly put that into its prompt.
|
||||
|
||||
For a `CodeAgent` using variables, it cannot access any varible not saved into its state.
|
||||
For instance check out this agent trace for an LLM that I asked to make me a car picture:
|
||||
```
|
||||
==================================================================================================== New task ====================================================================================================
|
||||
Make me a cool car picture
|
||||
──────────────────────────────────────────────────────────────────────────────────────────────────── New step ────────────────────────────────────────────────────────────────────────────────────────────────────
|
||||
Agent is executing the code below: ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
|
||||
image_generator(prompt="A cool, futuristic sports car with LED headlights, aerodynamic design, and vibrant color, high-res, photorealistic")
|
||||
──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
Last output from code snippet: ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
|
||||
/var/folders/6m/9b1tts6d5w960j80wbw9tx3m0000gn/T/tmpx09qfsdd/652f0007-3ee9-44e2-94ac-90dae6bb89a4.png
|
||||
Step 1:
|
||||
|
||||
- Time taken: 16.35 seconds
|
||||
- Input tokens: 1,383
|
||||
- Output tokens: 77
|
||||
──────────────────────────────────────────────────────────────────────────────────────────────────── New step ────────────────────────────────────────────────────────────────────────────────────────────────────
|
||||
Agent is executing the code below: ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
|
||||
final_answer("/var/folders/6m/9b1tts6d5w960j80wbw9tx3m0000gn/T/tmpx09qfsdd/652f0007-3ee9-44e2-94ac-90dae6bb89a4.png")
|
||||
──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
|
||||
Print outputs:
|
||||
|
||||
Last output from code snippet: ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
|
||||
/var/folders/6m/9b1tts6d5w960j80wbw9tx3m0000gn/T/tmpx09qfsdd/652f0007-3ee9-44e2-94ac-90dae6bb89a4.png
|
||||
Final answer:
|
||||
/var/folders/6m/9b1tts6d5w960j80wbw9tx3m0000gn/T/tmpx09qfsdd/652f0007-3ee9-44e2-94ac-90dae6bb89a4.png
|
||||
```
|
||||
|
||||
The LLM never explicitly saved the image output into a variable, so it cannot access it again except by leveraging the path that was logged while saving the image. So properly logging the code execution made a big difference!
|
||||
|
||||
Particular guidelines to follow:
|
||||
- Each tool should log (by simply using `print` statements inside the tool's `forward` method) everything that could be useful for the LLM engine.
|
||||
- In particular, logging detail on tool execution errors would help a lot!
|
||||
|
||||
For instance, here's a tool that :
|
||||
|
||||
First, here's a poor version:
|
||||
```py
|
||||
from my_weather_api return convert_location_to_coordinates, get_weather_report_at_coordinates
|
||||
# Let's say "get_weather_report_at_coordinates" returns a list of [temperature in °C, risk of rain on a scale 0-1, wave height in m]
|
||||
import datetime
|
||||
|
||||
@tool
|
||||
def get_weather_api(location (str), date_time: str) -> str:
|
||||
"""
|
||||
Returns the weather report.
|
||||
|
||||
Args:
|
||||
- location (`str`): the name of the place that you want the weather for.
|
||||
- date_time (`str`): the date and time for which you want the report.
|
||||
"""
|
||||
lon, lat = convert_location_to_coordinates(location)
|
||||
date_time = datetime.strptime(date_time)
|
||||
return str(get_weather_report_at_coordinates((lon, lat), date_time))
|
||||
```
|
||||
|
||||
Why is it bad?
|
||||
- there's no precision of the format that should be used for `date_time`
|
||||
- there's no detail on how location should
|
||||
- there's no logging mechanism tying to explicit failure cases like location not being in a proper format, or date_time not being properly formatted.
|
||||
- the output format is hard to understand
|
||||
|
||||
If the tool call fails, the error trace logged in memory can help the LLM reverse engineer the tool to fix the errors. But why leave it so much heavy lifting to do?
|
||||
|
||||
Here's a better way to build this tool:
|
||||
```py
|
||||
from my_weather_api return convert_location_to_coordinates, get_weather_report_at_coordinates
|
||||
# Let's say "get_weather_report_at_coordinates" returns a list of [temperature in °C, risk of rain on a scale 0-1, wave height in m]
|
||||
import datetime
|
||||
|
||||
@tool
|
||||
def get_weather_api(location (str), date_time: str) -> str:
|
||||
"""
|
||||
Returns the weather report.
|
||||
|
||||
Args:
|
||||
- location (`str`): the name of the place that you want the weather for. Should be a place name, followed by possibly a city name, then a country, like "Anchor Point, Taghazout, Morocco".
|
||||
- date_time (`str`): the date and time for which you want the report, formatted as '%m/%d/%y %H:%M:%S'.
|
||||
"""
|
||||
lon, lat = convert_location_to_coordinates(location)
|
||||
try:
|
||||
date_time = datetime.strptime(date_time)
|
||||
except Exception as e:
|
||||
raise ValueError("Conversion of `date_time` to datetime format failed, make sure to provide a string in format '%m/%d/%y %H:%M:%S'. Full trace:" + str(e))
|
||||
temperature_celsius, risk_of_rain, wave_height = get_weather_report_at_coordinates((lon, lat), date_time)
|
||||
return f"Weather report for {location}, {date_time}: Temperature will be {temperature_celsius}°C, risk of rain is {risk_of_rain*100:.0f}%, wave height is {wave_height}m."
|
||||
```
|
|
@ -31,8 +31,8 @@ contains the API docs for the underlying classes.
|
|||
We provide two types of agents, based on the main [`Agent`] class:
|
||||
- [`CodeAgent`] acts in one shot, generating code to solve the task, then executes it at once.
|
||||
- [`ReactAgent`] acts step by step, each step consisting of one thought, then one tool call and execution. It has two classes:
|
||||
- [`ReactJsonAgent`] writes its tool calls in JSON.
|
||||
- [`ReactCodeAgent`] writes its tool calls in Python code.
|
||||
- [`JsonAgent`] writes its tool calls in JSON.
|
||||
- [`CodeAgent`] writes its tool calls in Python code.
|
||||
|
||||
### Agent
|
||||
|
||||
|
@ -46,9 +46,9 @@ We provide two types of agents, based on the main [`Agent`] class:
|
|||
|
||||
[[autodoc]] ReactAgent
|
||||
|
||||
[[autodoc]] ReactJsonAgent
|
||||
[[autodoc]] JsonAgent
|
||||
|
||||
[[autodoc]] ReactCodeAgent
|
||||
[[autodoc]] CodeAgent
|
||||
|
||||
### ManagedAgent
|
||||
|
||||
|
|
|
@ -233,6 +233,22 @@ Action:
|
|||
tool_arguments="final_answer(7.2904)",
|
||||
)
|
||||
|
||||
def test_reset_conversations(self):
|
||||
agent = CodeAgent(
|
||||
tools=[PythonInterpreterTool()], llm_engine=fake_code_llm
|
||||
)
|
||||
output = agent.run("What is 2 multiplied by 3.6452?", reset=True)
|
||||
assert output == 7.2904
|
||||
assert len(agent.logs) == 4
|
||||
|
||||
output = agent.run("What is 2 multiplied by 3.6452?", reset=False)
|
||||
assert output == 7.2904
|
||||
assert len(agent.logs) == 6
|
||||
|
||||
output = agent.run("What is 2 multiplied by 3.6452?", reset=True)
|
||||
assert output == 7.2904
|
||||
assert len(agent.logs) == 4
|
||||
|
||||
def test_code_agent_code_errors_show_offending_lines(self):
|
||||
agent = CodeAgent(
|
||||
tools=[PythonInterpreterTool()], llm_engine=fake_code_llm_error
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
import unittest
|
||||
|
||||
from transformers.agents.agent_types import AgentImage
|
||||
from transformers.agents.agents import AgentError, ReactCodeAgent, ReactJsonAgent
|
||||
from transformers.agents.agents import AgentError, CodeAgent, JsonAgent
|
||||
from transformers.agents.monitoring import stream_to_gradio
|
||||
|
||||
|
||||
|
@ -34,7 +34,7 @@ Code:
|
|||
final_answer('This is the final answer.')
|
||||
```"""
|
||||
|
||||
agent = ReactCodeAgent(
|
||||
agent = CodeAgent(
|
||||
tools=[],
|
||||
llm_engine=FakeLLMEngine(),
|
||||
max_iterations=1,
|
||||
|
@ -54,7 +54,7 @@ final_answer('This is the final answer.')
|
|||
def __call__(self, prompt, **kwargs):
|
||||
return 'Action:{"action": "final_answer", "action_input": {"answer": "image"}}'
|
||||
|
||||
agent = ReactJsonAgent(
|
||||
agent = JsonAgent(
|
||||
tools=[],
|
||||
llm_engine=FakeLLMEngine(),
|
||||
max_iterations=1,
|
||||
|
@ -74,7 +74,7 @@ final_answer('This is the final answer.')
|
|||
def __call__(self, prompt, **kwargs):
|
||||
return "Malformed answer"
|
||||
|
||||
agent = ReactCodeAgent(
|
||||
agent = CodeAgent(
|
||||
tools=[],
|
||||
llm_engine=FakeLLMEngine(),
|
||||
max_iterations=1,
|
||||
|
@ -94,7 +94,7 @@ final_answer('This is the final answer.')
|
|||
def __call__(self, prompt, **kwargs):
|
||||
raise AgentError
|
||||
|
||||
agent = ReactCodeAgent(
|
||||
agent = CodeAgent(
|
||||
tools=[],
|
||||
llm_engine=FakeLLMEngine(),
|
||||
max_iterations=1,
|
||||
|
@ -113,7 +113,7 @@ Code:
|
|||
final_answer('This is the final answer.')
|
||||
```"""
|
||||
|
||||
agent = ReactCodeAgent(
|
||||
agent = CodeAgent(
|
||||
tools=[],
|
||||
llm_engine=dummy_llm_engine,
|
||||
max_iterations=1,
|
||||
|
@ -133,7 +133,7 @@ final_answer('This is the final answer.')
|
|||
'Action:{"action": "final_answer", "action_input": {"answer": "image"}}'
|
||||
)
|
||||
|
||||
agent = ReactJsonAgent(
|
||||
agent = JsonAgent(
|
||||
tools=[],
|
||||
llm_engine=dummy_llm_engine,
|
||||
max_iterations=1,
|
||||
|
@ -160,7 +160,7 @@ final_answer('This is the final answer.')
|
|||
def dummy_llm_engine(prompt, **kwargs):
|
||||
raise AgentError("Simulated agent error")
|
||||
|
||||
agent = ReactCodeAgent(
|
||||
agent = CodeAgent(
|
||||
tools=[],
|
||||
llm_engine=dummy_llm_engine,
|
||||
max_iterations=1,
|
||||
|
|
Loading…
Reference in New Issue