Update agents intro
This commit is contained in:
parent
77a08bacd0
commit
2080f94f07
|
@ -19,7 +19,10 @@ from typing import Any, Callable, Dict, List, Optional, Union
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from rich.syntax import Syntax
|
from rich.syntax import Syntax
|
||||||
|
|
||||||
|
from langfuse.decorators import langfuse_context, observe
|
||||||
|
|
||||||
from transformers.utils import is_torch_available
|
from transformers.utils import is_torch_available
|
||||||
|
|
||||||
from .utils import console, parse_code_blob, parse_json_tool_call, truncate_content
|
from .utils import console, parse_code_blob, parse_json_tool_call, truncate_content
|
||||||
from .agent_types import AgentAudio, AgentImage
|
from .agent_types import AgentAudio, AgentImage
|
||||||
from .default_tools import BASE_PYTHON_TOOLS, FinalAnswerTool
|
from .default_tools import BASE_PYTHON_TOOLS, FinalAnswerTool
|
||||||
|
@ -420,6 +423,7 @@ class ReactAgent(BaseAgent):
|
||||||
console.print(f"[bold red]{error_msg}[/bold red]")
|
console.print(f"[bold red]{error_msg}[/bold red]")
|
||||||
return error_msg
|
return error_msg
|
||||||
|
|
||||||
|
@observe
|
||||||
def run(self, task: str, stream: bool = False, reset: bool = True, oneshot: bool = False, **kwargs):
|
def run(self, task: str, stream: bool = False, reset: bool = True, oneshot: bool = False, **kwargs):
|
||||||
"""
|
"""
|
||||||
Runs the agent for the given task.
|
Runs the agent for the given task.
|
||||||
|
@ -437,6 +441,7 @@ class ReactAgent(BaseAgent):
|
||||||
agent.run("What is the result of 2 power 3.7384?")
|
agent.run("What is the result of 2 power 3.7384?")
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
|
print("LANGFUSE REF:", langfuse_context.get_current_trace_url())
|
||||||
self.task = task
|
self.task = task
|
||||||
if len(kwargs) > 0:
|
if len(kwargs) > 0:
|
||||||
self.task += f"\nYou have been provided with these initial arguments: {str(kwargs)}."
|
self.task += f"\nYou have been provided with these initial arguments: {str(kwargs)}."
|
||||||
|
|
|
@ -1,414 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
# coding=utf-8
|
|
||||||
|
|
||||||
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
from .agents import BASE_PYTHON_TOOLS
|
|
||||||
from .python_interpreter import InterpreterError, evaluate
|
|
||||||
|
|
||||||
|
|
||||||
### Fake tools for test
|
|
||||||
def classifier(text, labels):
|
|
||||||
return f"This is the classification of {text} along {labels}."
|
|
||||||
|
|
||||||
|
|
||||||
def translator(text, src_lang, tgt_lang):
|
|
||||||
return f"This is the translation of {text} from {src_lang} to {tgt_lang}."
|
|
||||||
|
|
||||||
|
|
||||||
def speaker(text):
|
|
||||||
return f"This is actually a sound reading {text}."
|
|
||||||
|
|
||||||
|
|
||||||
def transcriber(audio):
|
|
||||||
if "sound" not in audio:
|
|
||||||
raise ValueError(f"`audio` ({audio}) is not a sound.")
|
|
||||||
return f"This is the transcribed text from {audio}."
|
|
||||||
|
|
||||||
|
|
||||||
def image_generator(prompt):
|
|
||||||
return f"This is actually an image representing {prompt}."
|
|
||||||
|
|
||||||
|
|
||||||
def image_captioner(image):
|
|
||||||
if "image" not in image:
|
|
||||||
raise ValueError(f"`image` ({image}) is not an image.")
|
|
||||||
return f"This is a description of {image}."
|
|
||||||
|
|
||||||
|
|
||||||
def image_transformer(image, prompt):
|
|
||||||
if "image" not in image:
|
|
||||||
raise ValueError(f"`image` ({image}) is not an image.")
|
|
||||||
return f"This is a transformation of {image} according to {prompt}."
|
|
||||||
|
|
||||||
|
|
||||||
def question_answerer(text, question):
|
|
||||||
return f"This is the answer to {question} from {text}."
|
|
||||||
|
|
||||||
|
|
||||||
def image_qa(image, question):
|
|
||||||
if "image" not in image:
|
|
||||||
raise ValueError(f"`image` ({image}) is not an image.")
|
|
||||||
return f"This is the answer to {question} from {image}."
|
|
||||||
|
|
||||||
|
|
||||||
def text_downloader(url):
|
|
||||||
return f"This is the content of {url}."
|
|
||||||
|
|
||||||
|
|
||||||
def summarizer(text):
|
|
||||||
return f"This is a summary of {text}."
|
|
||||||
|
|
||||||
|
|
||||||
def video_generator(prompt, seconds=2):
|
|
||||||
return f"A video of {prompt}"
|
|
||||||
|
|
||||||
|
|
||||||
def document_qa(image, question):
|
|
||||||
return f"This is the answer to {question} from the document {image}."
|
|
||||||
|
|
||||||
|
|
||||||
def image_segmenter(image, prompt):
|
|
||||||
return f"This is the mask of {prompt} in {image}"
|
|
||||||
|
|
||||||
|
|
||||||
TEST_TOOLS = {
|
|
||||||
"text_classifier": classifier,
|
|
||||||
"translator": translator,
|
|
||||||
"text_reader": speaker,
|
|
||||||
"summarizer": summarizer,
|
|
||||||
"transcriber": transcriber,
|
|
||||||
"image_generator": image_generator,
|
|
||||||
"image_captioner": image_captioner,
|
|
||||||
"image_transformer": image_transformer,
|
|
||||||
"text_qa": question_answerer,
|
|
||||||
"text_downloader": text_downloader,
|
|
||||||
"image_qa": image_qa,
|
|
||||||
"video_generator": video_generator,
|
|
||||||
"document_qa": document_qa,
|
|
||||||
"image_segmenter": image_segmenter,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class Problem:
|
|
||||||
"""
|
|
||||||
A class regrouping all the information to solve a problem on which we will evaluate agents.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
task (`str` ou `list[str]`):
|
|
||||||
One or several descriptions of the task to perform. If a list, it should contain variations on the
|
|
||||||
phrasing, but for the same task.
|
|
||||||
inputs (`list[str]` or `dict[str, str]`):
|
|
||||||
The inputs that will be fed to the tools. For this testing environment, only strings are accepted as
|
|
||||||
values. Pass along a dictionary when you want to specify the values of each inputs, or just the list of
|
|
||||||
inputs expected (the value used will be `<<input_name>>` in this case).
|
|
||||||
answer (`str` or `list[str]`):
|
|
||||||
The theoretical answer (or list of possible valid answers) to the problem, as code.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, task, inputs, answer):
|
|
||||||
self.task = task
|
|
||||||
self.inputs = inputs
|
|
||||||
self.answer = answer
|
|
||||||
|
|
||||||
|
|
||||||
### The list of problems the agent will be evaluated on.
|
|
||||||
EVALUATION_TASKS = [
|
|
||||||
Problem(
|
|
||||||
task=[
|
|
||||||
"Is the following `text` (in Spanish) positive or negative?",
|
|
||||||
"Is the text in the variable `text` (in Spanish) positive or negative?",
|
|
||||||
"Translate the following `text` from Spanish to English then tell me if its positive or negative.",
|
|
||||||
],
|
|
||||||
inputs=["text"],
|
|
||||||
answer="""text_classifier(translator(text, src_lang="Spanish", tgt_lang="English"), labels=["positive", "negative"])""",
|
|
||||||
),
|
|
||||||
Problem(
|
|
||||||
task=[
|
|
||||||
"Tell me out loud what the `image` contains.",
|
|
||||||
"Describe the following `image` out loud.",
|
|
||||||
"Find what is in the picture stored in `image` then read it out loud.",
|
|
||||||
],
|
|
||||||
inputs=["image"],
|
|
||||||
answer=[
|
|
||||||
"text_reader(image_captioner(image))",
|
|
||||||
"text_reader(image_qa(image, question='What is in the image?'))",
|
|
||||||
],
|
|
||||||
),
|
|
||||||
Problem(
|
|
||||||
task=[
|
|
||||||
"Generate an image from the text given in `text_input`. Then transform it according to the text in `prompt`.",
|
|
||||||
"Use the following `text_input` to generate an image, then transform it by using the text in `prompt`.",
|
|
||||||
],
|
|
||||||
inputs=["text_input", "prompt"],
|
|
||||||
answer="image_transformer(image_generator(text_input), prompt)",
|
|
||||||
),
|
|
||||||
Problem(
|
|
||||||
task=[
|
|
||||||
"Download the content of `url`, summarize it then generate an image from its content.",
|
|
||||||
"Use a summary of the web page at `url` to generate an image.",
|
|
||||||
"Summarize the content of the web page at `url`, and use the result to generate an image.",
|
|
||||||
],
|
|
||||||
inputs=["url"],
|
|
||||||
answer="image_generator(summarizer(text_downloader(url)))",
|
|
||||||
),
|
|
||||||
Problem(
|
|
||||||
task=[
|
|
||||||
"Transform the following `image` using the prompt in `text`. The prompt is in Spanish.",
|
|
||||||
"Use the text prompt in `text` (in Spanish) to transform the following `image`.",
|
|
||||||
"Translate the `text` from Spanish to English then use it to transform the picture in `image`.",
|
|
||||||
],
|
|
||||||
inputs=["text", "image"],
|
|
||||||
answer="image_transformer(image, translator(text, src_lang='Spanish', tgt_lang='English'))",
|
|
||||||
),
|
|
||||||
Problem(
|
|
||||||
task=[
|
|
||||||
"Download the content of `url`, summarize it then read it out loud to me.",
|
|
||||||
"Read me a summary of the web page at `url`.",
|
|
||||||
],
|
|
||||||
inputs=["url"],
|
|
||||||
answer="text_reader(summarizer(text_downloader(url)))",
|
|
||||||
),
|
|
||||||
Problem(
|
|
||||||
task=[
|
|
||||||
"Generate an image from the text given in `text_input`.",
|
|
||||||
],
|
|
||||||
inputs=["text_input"],
|
|
||||||
answer="image_generator(text_input)",
|
|
||||||
),
|
|
||||||
Problem(
|
|
||||||
task=[
|
|
||||||
"Replace the beaver in the `image` by the `prompt`.",
|
|
||||||
"Transform the `image` so that it contains the `prompt`.",
|
|
||||||
"Use `prompt` to transform this `image`.",
|
|
||||||
],
|
|
||||||
inputs=["image", "prompt"],
|
|
||||||
answer="image_transformer(image, prompt)",
|
|
||||||
),
|
|
||||||
Problem(
|
|
||||||
task=[
|
|
||||||
"Provide me the summary of the `text`, then read it to me before transcribing it and translating it in French.",
|
|
||||||
"Summarize `text`, read it out loud then transcribe the audio and translate it in French.",
|
|
||||||
"Read me a summary of the `text` out loud. Transcribe this and translate it in French.",
|
|
||||||
],
|
|
||||||
inputs=["text"],
|
|
||||||
answer="translator(transcriber(text_reader(summarizer(text))), src_lang='English', tgt_lang='French')",
|
|
||||||
),
|
|
||||||
Problem(
|
|
||||||
task=["Generate a video of the `prompt`", "Animate a `prompt`", "Make me a short video using `prompt`."],
|
|
||||||
inputs={"prompt": "A lobster swimming"},
|
|
||||||
answer="video_generator('A lobster swimming')",
|
|
||||||
),
|
|
||||||
Problem(
|
|
||||||
task=[
|
|
||||||
"Download the following file `url`, summarize it in a few words and generate a video from it."
|
|
||||||
"Fetch the file at this `url`, summarize it, and create an animation out of it."
|
|
||||||
],
|
|
||||||
inputs=["url"],
|
|
||||||
answer="video_generator(summarizer(text_downloader(url)))",
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def get_theoretical_tools(agent_answer, theoretical_answer, code_answer):
|
|
||||||
if not isinstance(theoretical_answer, list):
|
|
||||||
return {name for name in TEST_TOOLS if name in code_answer}
|
|
||||||
|
|
||||||
if isinstance(agent_answer, dict):
|
|
||||||
for one_answer, one_code in zip(theoretical_answer, code_answer):
|
|
||||||
if one_answer in agent_answer.values():
|
|
||||||
return {name for name in TEST_TOOLS if name in one_code}
|
|
||||||
|
|
||||||
for one_answer, one_code in zip(theoretical_answer, code_answer):
|
|
||||||
if agent_answer == one_answer:
|
|
||||||
return {name for name in TEST_TOOLS if name in one_code}
|
|
||||||
|
|
||||||
return {name for name in TEST_TOOLS if name in code_answer[0]}
|
|
||||||
|
|
||||||
|
|
||||||
def evaluate_code(code, inputs=None, state=None, verbose=False, return_interpretor_error=False):
|
|
||||||
tools = BASE_PYTHON_TOOLS.copy()
|
|
||||||
for name, tool in TEST_TOOLS.items():
|
|
||||||
if name not in code:
|
|
||||||
continue
|
|
||||||
tools[name] = tool
|
|
||||||
|
|
||||||
if isinstance(inputs, dict):
|
|
||||||
inputs = inputs.copy()
|
|
||||||
elif inputs is not None:
|
|
||||||
inputs = {inp: f"<<{inp}>>" for inp in inputs}
|
|
||||||
|
|
||||||
if state is not None:
|
|
||||||
state.update(inputs)
|
|
||||||
else:
|
|
||||||
state = inputs
|
|
||||||
|
|
||||||
try:
|
|
||||||
return evaluate(code, tools, state)
|
|
||||||
except InterpreterError as e:
|
|
||||||
return str(e)
|
|
||||||
except Exception as e:
|
|
||||||
if verbose:
|
|
||||||
print(e)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def score_code(agent_answer, theoretical_answer, verbose: bool = False):
|
|
||||||
if verbose:
|
|
||||||
print(agent_answer, theoretical_answer)
|
|
||||||
theoretical_answer = theoretical_answer if isinstance(theoretical_answer, list) else [theoretical_answer]
|
|
||||||
|
|
||||||
if agent_answer in theoretical_answer:
|
|
||||||
if verbose:
|
|
||||||
print("Perfect!")
|
|
||||||
return 1
|
|
||||||
elif isinstance(agent_answer, dict) and any(v in theoretical_answer for v in agent_answer.values()):
|
|
||||||
if verbose:
|
|
||||||
print("Almsot perfect, result in state!")
|
|
||||||
return 0.75
|
|
||||||
else:
|
|
||||||
if verbose:
|
|
||||||
print("Result is not the right one but code executed.")
|
|
||||||
return 0.3
|
|
||||||
|
|
||||||
|
|
||||||
def evaluate_one_result(code, agent_answer, theoretical_answer, answer, verbose=False):
|
|
||||||
tools_in_code = {name for name in TEST_TOOLS if f"`{name}`" in code}
|
|
||||||
theoretical_tools = get_theoretical_tools(agent_answer, theoretical_answer, answer)
|
|
||||||
if tools_in_code == theoretical_tools:
|
|
||||||
tool_selection_score = 1.0
|
|
||||||
tool_selection_errors = None
|
|
||||||
else:
|
|
||||||
missing_tools = len(theoretical_tools - tools_in_code)
|
|
||||||
unexpected_tools = len(tools_in_code - theoretical_tools)
|
|
||||||
tool_selection_score = max(0, 1.0 - 0.25 * missing_tools - 0.25 * unexpected_tools)
|
|
||||||
|
|
||||||
tool_selection_errors = {
|
|
||||||
"selected_tools": tools_in_code,
|
|
||||||
"theoretical_tools": theoretical_tools,
|
|
||||||
}
|
|
||||||
|
|
||||||
tools_in_code = {name for name in TEST_TOOLS if name in code}
|
|
||||||
if tools_in_code == theoretical_tools:
|
|
||||||
tool_used_score = 1.0
|
|
||||||
tool_used_errors = None
|
|
||||||
else:
|
|
||||||
missing_tools = len(theoretical_tools - tools_in_code)
|
|
||||||
unexpected_tools = len(tools_in_code - theoretical_tools)
|
|
||||||
tool_used_score = max(0, 1.0 - 0.25 * missing_tools - 0.25 * unexpected_tools)
|
|
||||||
|
|
||||||
tool_used_errors = {
|
|
||||||
"selected_tools": tools_in_code,
|
|
||||||
"theoretical_tools": theoretical_tools,
|
|
||||||
}
|
|
||||||
|
|
||||||
score = score_code(agent_answer, theoretical_answer, verbose=verbose)
|
|
||||||
if score < 1.0:
|
|
||||||
code_errors = {
|
|
||||||
"code_produced": code,
|
|
||||||
"evaluation": agent_answer,
|
|
||||||
"theoretical_answer": theoretical_answer,
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
code_errors = None
|
|
||||||
|
|
||||||
return (tool_selection_score, tool_used_score, score), (tool_selection_errors, tool_used_errors, code_errors)
|
|
||||||
|
|
||||||
|
|
||||||
def evaluate_agent(agent, batch_size=8, verbose=False, return_errors=False):
|
|
||||||
"""
|
|
||||||
Evaluates a new agent on all `EVALUATION_TASKS`.
|
|
||||||
|
|
||||||
Example:
|
|
||||||
|
|
||||||
```py
|
|
||||||
agent = NewOpenAiAgent(model="text-davinci-003", api_key=your_api_key)
|
|
||||||
bads = new_evaluate_agent(agent)
|
|
||||||
for bad in bads:
|
|
||||||
print(bad)
|
|
||||||
```
|
|
||||||
"""
|
|
||||||
# Sanity check
|
|
||||||
agent_tools = set(agent.toolbox.keys())
|
|
||||||
if agent_tools != set(TEST_TOOLS):
|
|
||||||
missing_tools = set(TEST_TOOLS) - agent_tools
|
|
||||||
unexpected_tools = set(agent_tools) - TEST_TOOLS
|
|
||||||
raise ValueError(
|
|
||||||
f"Fix the test tools in the evaluate_agent module. Tools mising: {missing_tools}. Extra tools: {unexpected_tools}."
|
|
||||||
)
|
|
||||||
|
|
||||||
eval_tasks = []
|
|
||||||
eval_idx = []
|
|
||||||
for idx, pb in enumerate(EVALUATION_TASKS):
|
|
||||||
if isinstance(pb.task, list):
|
|
||||||
eval_tasks.extend(pb.task)
|
|
||||||
eval_idx.extend([idx] * len(pb.task))
|
|
||||||
else:
|
|
||||||
eval_tasks.append(pb.task)
|
|
||||||
eval_idx.append(idx)
|
|
||||||
|
|
||||||
tool_selection_score = 0
|
|
||||||
tool_used_score = 0
|
|
||||||
code_score = 0
|
|
||||||
|
|
||||||
if return_errors:
|
|
||||||
tool_selection_errors = {}
|
|
||||||
tool_used_errors = {}
|
|
||||||
code_errors = {}
|
|
||||||
|
|
||||||
for start_idx in range(0, len(eval_tasks), batch_size):
|
|
||||||
end_idx = min(start_idx + batch_size, len(eval_tasks))
|
|
||||||
batch_tasks = eval_tasks[start_idx:end_idx]
|
|
||||||
|
|
||||||
results = [agent.run(task, return_generated_code=True) for task in batch_tasks]
|
|
||||||
|
|
||||||
for idx, result in enumerate(results):
|
|
||||||
problem = EVALUATION_TASKS[eval_idx[start_idx + idx]]
|
|
||||||
if verbose:
|
|
||||||
print(f"====Task {start_idx + idx}====\n{batch_tasks[idx]}\n")
|
|
||||||
code = agent.extract_action(result, split_token="Answer:")
|
|
||||||
|
|
||||||
# Evaluate agent answer and code answer
|
|
||||||
agent_answer = evaluate_code(code, problem.inputs, verbose=verbose)
|
|
||||||
if isinstance(problem.answer, list):
|
|
||||||
theoretical_answer = [evaluate_code(answer, problem.inputs) for answer in problem.answer]
|
|
||||||
else:
|
|
||||||
theoretical_answer = evaluate_code(problem.answer, problem.inputs)
|
|
||||||
|
|
||||||
scores, errors = evaluate_one_result(
|
|
||||||
code, agent_answer, theoretical_answer, problem.answer, verbose=verbose
|
|
||||||
)
|
|
||||||
|
|
||||||
tool_selection_score += scores[0]
|
|
||||||
tool_used_score += scores[1]
|
|
||||||
code_score += scores[2]
|
|
||||||
|
|
||||||
if return_errors:
|
|
||||||
if errors[0] is not None:
|
|
||||||
tool_selection_errors[batch_tasks[idx]] = errors[0]
|
|
||||||
if errors[1] is not None:
|
|
||||||
tool_used_errors[batch_tasks[idx]] = errors[1]
|
|
||||||
if errors[2] is not None:
|
|
||||||
code_errors[batch_tasks[idx]] = errors[2]
|
|
||||||
|
|
||||||
scores = {
|
|
||||||
"tool selection score": 100 * (tool_selection_score / len(eval_tasks)),
|
|
||||||
"tool used score": 100 * (tool_used_score / len(eval_tasks)),
|
|
||||||
"code score": 100 * (code_score / len(eval_tasks)),
|
|
||||||
}
|
|
||||||
|
|
||||||
if return_errors:
|
|
||||||
return scores, tool_selection_errors, tool_used_errors, code_errors
|
|
||||||
else:
|
|
||||||
return scores
|
|
|
@ -38,3 +38,98 @@ class Monitor:
|
||||||
console.print(f"- Input tokens: {self.total_input_token_count:,}")
|
console.print(f"- Input tokens: {self.total_input_token_count:,}")
|
||||||
console.print(f"- Output tokens: {self.total_output_token_count:,}")
|
console.print(f"- Output tokens: {self.total_output_token_count:,}")
|
||||||
|
|
||||||
|
|
||||||
|
from typing import Optional, Union, List, Any
|
||||||
|
import httpx
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from langfuse.client import Langfuse, StatefulTraceClient, StatefulSpanClient, StateType
|
||||||
|
|
||||||
|
|
||||||
|
class BaseTracker:
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def call(cls, *args, **kwargs):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class LangfuseTracker(BaseTracker):
|
||||||
|
log = logging.getLogger("langfuse")
|
||||||
|
|
||||||
|
def __init__(self, *, public_key: Optional[str] = None, secret_key: Optional[str] = None,
|
||||||
|
host: Optional[str] = None, debug: bool = False, stateful_client: Optional[
|
||||||
|
Union[StatefulTraceClient, StatefulSpanClient]
|
||||||
|
] = None, update_stateful_client: bool = False, version: Optional[str] = None,
|
||||||
|
session_id: Optional[str] = None, user_id: Optional[str] = None, trace_name: Optional[str] = None,
|
||||||
|
release: Optional[str] = None, metadata: Optional[Any] = None, tags: Optional[List[str]] = None,
|
||||||
|
threads: Optional[int] = None, flush_at: Optional[int] = None, flush_interval: Optional[int] = None,
|
||||||
|
max_retries: Optional[int] = None, timeout: Optional[int] = None, enabled: Optional[bool] = None,
|
||||||
|
httpx_client: Optional[httpx.Client] = None, sdk_integration: str = "default") -> None:
|
||||||
|
super().__init__()
|
||||||
|
self.version = version
|
||||||
|
self.session_id = session_id
|
||||||
|
self.user_id = user_id
|
||||||
|
self.trace_name = trace_name
|
||||||
|
self.release = release
|
||||||
|
self.metadata = metadata
|
||||||
|
self.tags = tags
|
||||||
|
|
||||||
|
self.root_span = None
|
||||||
|
self.update_stateful_client = update_stateful_client
|
||||||
|
self.langfuse = None
|
||||||
|
|
||||||
|
prio_public_key = public_key or os.environ.get("LANGFUSE_PUBLIC_KEY")
|
||||||
|
prio_secret_key = secret_key or os.environ.get("LANGFUSE_SECRET_KEY")
|
||||||
|
prio_host = host or os.environ.get(
|
||||||
|
"LANGFUSE_HOST", "https://cloud.langfuse.com"
|
||||||
|
)
|
||||||
|
|
||||||
|
if stateful_client and isinstance(stateful_client, StatefulTraceClient):
|
||||||
|
self.trace = stateful_client
|
||||||
|
self._task_manager = stateful_client.task_manager
|
||||||
|
return
|
||||||
|
|
||||||
|
elif stateful_client and isinstance(stateful_client, StatefulSpanClient):
|
||||||
|
self.root_span = stateful_client
|
||||||
|
self.trace = StatefulTraceClient(
|
||||||
|
stateful_client.client,
|
||||||
|
stateful_client.trace_id,
|
||||||
|
StateType.TRACE,
|
||||||
|
stateful_client.trace_id,
|
||||||
|
stateful_client.task_manager,
|
||||||
|
)
|
||||||
|
self._task_manager = stateful_client.task_manager
|
||||||
|
return
|
||||||
|
|
||||||
|
args = {
|
||||||
|
"public_key": prio_public_key,
|
||||||
|
"secret_key": prio_secret_key,
|
||||||
|
"host": prio_host,
|
||||||
|
"debug": debug,
|
||||||
|
}
|
||||||
|
|
||||||
|
if release is not None:
|
||||||
|
args["release"] = release
|
||||||
|
if threads is not None:
|
||||||
|
args["threads"] = threads
|
||||||
|
if flush_at is not None:
|
||||||
|
args["flush_at"] = flush_at
|
||||||
|
if flush_interval is not None:
|
||||||
|
args["flush_interval"] = flush_interval
|
||||||
|
if max_retries is not None:
|
||||||
|
args["max_retries"] = max_retries
|
||||||
|
if timeout is not None:
|
||||||
|
args["timeout"] = timeout
|
||||||
|
if enabled is not None:
|
||||||
|
args["enabled"] = enabled
|
||||||
|
if httpx_client is not None:
|
||||||
|
args["httpx_client"] = httpx_client
|
||||||
|
args["sdk_integration"] = sdk_integration
|
||||||
|
|
||||||
|
self.langfuse = Langfuse(**args)
|
||||||
|
self.trace: Optional[StatefulTraceClient] = None
|
||||||
|
self._task_manager = self.langfuse.task_manager
|
||||||
|
|
||||||
|
def call(self, i, o, name=None, **kwargs):
|
||||||
|
self.langfuse.trace(input=i, output=o, name=name, metadata=kwargs)
|
|
@ -13,7 +13,7 @@ specific language governing permissions and limitations under the License.
|
||||||
rendered properly in your Markdown viewer.
|
rendered properly in your Markdown viewer.
|
||||||
-->
|
-->
|
||||||
|
|
||||||
# Accelerate
|
# Agents
|
||||||
|
|
||||||
Accelerate is a library that enables the same PyTorch code to be run across any distributed configuration by adding just four lines of code! In short, training and inference at scale made simple, efficient and adaptable.
|
Accelerate is a library that enables the same PyTorch code to be run across any distributed configuration by adding just four lines of code! In short, training and inference at scale made simple, efficient and adaptable.
|
||||||
|
|
||||||
|
|
|
@ -62,16 +62,26 @@ See? With these two examples, we already found the need for a few items to help
|
||||||
- a parser that extracts tool calls from the LLM output
|
- a parser that extracts tool calls from the LLM output
|
||||||
- system prompt synced with the parser
|
- system prompt synced with the parser
|
||||||
- memory
|
- memory
|
||||||
|
But wait, since we give room to LLMs in decisions, surely they will make mistakes, so for better performance we need error logging and retry mechanism?
|
||||||
|
|
||||||
|
These will not be that straightforward to implement correctly, especially not together. That's why we decided that we needed to build a few abstractions to help people use these.
|
||||||
|
|
||||||
### Most important feature: Code agent
|
### Most important feature: Code agent
|
||||||
|
|
||||||
[Multiple](https://huggingface.co/papers/2402.01030) [research](https://huggingface.co/papers/2411.01747) papers have shown that having the LLM write its actions (the tool calls) in code is much better than the current standard format JSON.
|
[Multiple](https://huggingface.co/papers/2402.01030) [research](https://huggingface.co/papers/2411.01747) [papers](https://huggingface.co/papers/2401.00812) have shown that having the LLM write its actions (the tool calls) in code is much better than the current standard format JSON.
|
||||||
|
|
||||||
Why is that? Well, because we crafted our code languages specifically to be great at expressing actions performed by a computer. If JSON snippets was a better way, this package would have been written in JSON snippets and the devil would be having a great time laughing at us.
|
Why is that? Well, because we crafted our code languages specifically to be great at expressing actions performed by a computer. If JSON snippets was a better way, this package would have been written in JSON snippets and the devil would be having a great time laughing at us.
|
||||||
|
|
||||||
Code is just a better way to express actions on a computer. It has better:
|
Code is just a better way to express actions on a computer. It has better:
|
||||||
- Composability: could you nest JSON actions within each other, or define a set of JSON actions to re-use later, the same way you could just define a python function?
|
- **Composability:** could you nest JSON actions within each other, or define a set of JSON actions to re-use later, the same way you could just define a python function?
|
||||||
- Object management: how do you store the output of an action like `generate_image` in JSON?
|
- **Object management:** how do you store the output of an action like `generate_image` in JSON?
|
||||||
- Generality: code is made to express simply anything you can do have a computer do.
|
- **Generality:** code is built to express simply anything you can do have a computer do.
|
||||||
|
- **Representation in LLM training corpuses:** why not leverage this benediction of the sky that plenty of quality actions have already been included in LLM training corpuses?
|
||||||
|
|
||||||
So we decided to give you the best Code agents out there!
|
So we shoul use code as the main expression type for agent actions.
|
||||||
|
|
||||||
|
Few existing framework build on this idea to make code agents first-class citizens. We focused on it!
|
||||||
|
|
||||||
|
Especially, since code execution can be a security concern (arbitrary code execution!), we provide options at runtime:
|
||||||
|
- a secure python interpreter to run code more safely in your environment
|
||||||
|
- a sandbox `uv` environment.
|
|
@ -1,6 +1,10 @@
|
||||||
from agents import load_tool, ReactCodeAgent, ReactJsonAgent, HfApiEngine
|
from agents import load_tool, CodeAgent, JsonAgent, HfApiEngine
|
||||||
from agents.default_tools import PythonInterpreterTool
|
from agents.default_tools import PythonInterpreterTool
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
# Import tool from Hub
|
# Import tool from Hub
|
||||||
image_generation_tool = load_tool("m-ric/text-to-image", cache=False)
|
image_generation_tool = load_tool("m-ric/text-to-image", cache=False)
|
||||||
|
|
||||||
|
@ -10,7 +14,7 @@ search_tool = DuckDuckGoSearchTool()
|
||||||
|
|
||||||
llm_engine = HfApiEngine("Qwen/Qwen2.5-72B-Instruct")
|
llm_engine = HfApiEngine("Qwen/Qwen2.5-72B-Instruct")
|
||||||
|
|
||||||
agent = ReactCodeAgent(tools=[search_tool], llm_engine=llm_engine, planning_interval=3)
|
agent = CodeAgent(tools=[search_tool], llm_engine=llm_engine, planning_interval=3)
|
||||||
|
|
||||||
# Run it!
|
# Run it!
|
||||||
print("Let's run the Code agent:")
|
print("Let's run the Code agent:")
|
||||||
|
@ -24,7 +28,7 @@ print("RESULT:", result)
|
||||||
|
|
||||||
code_tool = PythonInterpreterTool()
|
code_tool = PythonInterpreterTool()
|
||||||
|
|
||||||
agent = ReactJsonAgent(tools=[search_tool, code_tool], llm_engine=llm_engine, planning_interval=3)
|
agent = JsonAgent(tools=[search_tool, code_tool], llm_engine=llm_engine, planning_interval=3)
|
||||||
|
|
||||||
print("====================")
|
print("====================")
|
||||||
print("====================")
|
print("====================")
|
||||||
|
|
|
@ -394,6 +394,25 @@ files = [
|
||||||
{file = "diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc"},
|
{file = "diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "duckduckgo-search"
|
||||||
|
version = "6.3.7"
|
||||||
|
description = "Search for words, documents, images, news, maps and text translation using the DuckDuckGo.com search engine."
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "duckduckgo_search-6.3.7-py3-none-any.whl", hash = "sha256:6a831a27977751e8928222f04c99a5d069ff80e2a7c78b699c9b9ac6cb48c41b"},
|
||||||
|
{file = "duckduckgo_search-6.3.7.tar.gz", hash = "sha256:53d84966429a6377647e2a1ea7224b657575c7a4d506729bdb837e4ee12915ed"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
click = ">=8.1.7"
|
||||||
|
primp = ">=0.8.1"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
dev = ["mypy (>=1.11.1)", "pytest (>=8.3.1)", "pytest-asyncio (>=0.23.8)", "ruff (>=0.6.1)"]
|
||||||
|
lxml = ["lxml (>=5.2.2)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "exceptiongroup"
|
name = "exceptiongroup"
|
||||||
version = "1.2.2"
|
version = "1.2.2"
|
||||||
|
@ -1396,6 +1415,27 @@ files = [
|
||||||
dev = ["pre-commit", "tox"]
|
dev = ["pre-commit", "tox"]
|
||||||
testing = ["pytest", "pytest-benchmark"]
|
testing = ["pytest", "pytest-benchmark"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "primp"
|
||||||
|
version = "0.8.2"
|
||||||
|
description = "HTTP client that can impersonate web browsers, mimicking their headers and `TLS/JA3/JA4/HTTP2` fingerprints"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "primp-0.8.2-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:20c4988c6538dfcac804e804f286493696e53498d5705e745a36d9fe436c787c"},
|
||||||
|
{file = "primp-0.8.2-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:dde74d6bf5534a60fd075e81b5828a6591753a647c5bfe69e664883e5c7a28bb"},
|
||||||
|
{file = "primp-0.8.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f988d7e47d7f63b63f851885d51abd86ba3a2a1981d047466c1e63827753a168"},
|
||||||
|
{file = "primp-0.8.2-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:965cf0c19986d074d4e20ce18f1b81e5c31818324718814af6317a291a3aba65"},
|
||||||
|
{file = "primp-0.8.2-cp38-abi3-manylinux_2_34_armv7l.whl", hash = "sha256:afc56989ae09bed76105bf045e666ea2da5f32e2e93dfb967795a4da4fc777e5"},
|
||||||
|
{file = "primp-0.8.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:64e8b9b216ee0f52d2885ac23303000339f798a59eb9b4b3b747dcbbf9187beb"},
|
||||||
|
{file = "primp-0.8.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b65de6d8fe4c7ef9d5d508e2a9cee3da77455e3a44c9282bdebb2134c55087c9"},
|
||||||
|
{file = "primp-0.8.2-cp38-abi3-win_amd64.whl", hash = "sha256:d686cf4ce21c318bafe2f0574aec9f7f9526d18a4b0c017f507bd007f323e519"},
|
||||||
|
{file = "primp-0.8.2.tar.gz", hash = "sha256:572ecd34b77021a89a0574b66b07e1da100afd6ec490d3b519a6763fac6ae6c5"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
dev = ["certifi", "pytest (>=8.1.1)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "prompt-toolkit"
|
name = "prompt-toolkit"
|
||||||
version = "3.0.48"
|
version = "3.0.48"
|
||||||
|
@ -1669,6 +1709,20 @@ files = [
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
six = ">=1.5"
|
six = ">=1.5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "python-dotenv"
|
||||||
|
version = "1.0.1"
|
||||||
|
description = "Read key-value pairs from a .env file and set them as environment variables"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"},
|
||||||
|
{file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
cli = ["click (>=5.0)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "python-multipart"
|
name = "python-multipart"
|
||||||
version = "0.0.19"
|
version = "0.0.19"
|
||||||
|
@ -2691,4 +2745,4 @@ files = [
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = ">=3.10,<3.13"
|
python-versions = ">=3.10,<3.13"
|
||||||
content-hash = "26cdfdf4d6328fb4a667075d445fa103caa803b6827887737651cd2e7d670c60"
|
content-hash = "66fc6a4cc54d94e6bcd0f81433bf34ff3f758caa7c5ef89aa7cf7f69319e2670"
|
||||||
|
|
|
@ -69,6 +69,8 @@ pillow = "^11.0.0"
|
||||||
llama-cpp-python = "^0.3.4"
|
llama-cpp-python = "^0.3.4"
|
||||||
markdownify = "^0.14.1"
|
markdownify = "^0.14.1"
|
||||||
gradio = "^5.8.0"
|
gradio = "^5.8.0"
|
||||||
|
duckduckgo-search = "^6.3.7"
|
||||||
|
python-dotenv = "^1.0.1"
|
||||||
|
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
|
|
Loading…
Reference in New Issue