Start including standard tool calling agent
This commit is contained in:
parent
3a28bda490
commit
4d4bf13152
|
@ -33,6 +33,7 @@ from .monitoring import Monitor
|
|||
from .prompts import (
|
||||
CODE_SYSTEM_PROMPT,
|
||||
JSON_SYSTEM_PROMPT,
|
||||
TOOL_CALLING_SYSTEM_PROMPT,
|
||||
PLAN_UPDATE_FINAL_PLAN_REDACTION,
|
||||
SYSTEM_PROMPT_FACTS,
|
||||
SYSTEM_PROMPT_FACTS_UPDATE,
|
||||
|
@ -870,6 +871,119 @@ class JsonAgent(ReactAgent):
|
|||
log_entry.observations = updated_information
|
||||
return None
|
||||
|
||||
class ToolCallingAgent(ReactAgent):
|
||||
"""
|
||||
In this agent, the tool calls will be formulated and parsed using the underlying library, before execution.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
tools: List[Tool],
|
||||
llm_engine: Optional[Callable] = None,
|
||||
system_prompt: Optional[str] = None,
|
||||
tool_description_template: Optional[str] = None,
|
||||
planning_interval: Optional[int] = None,
|
||||
**kwargs,
|
||||
):
|
||||
if llm_engine is None:
|
||||
llm_engine = HfApiEngine()
|
||||
if system_prompt is None:
|
||||
system_prompt = TOOL_CALLING_SYSTEM_PROMPT
|
||||
if tool_description_template is None:
|
||||
tool_description_template = DEFAULT_TOOL_DESCRIPTION_TEMPLATE
|
||||
super().__init__(
|
||||
tools=tools,
|
||||
llm_engine=llm_engine,
|
||||
system_prompt=system_prompt,
|
||||
tool_description_template=tool_description_template,
|
||||
planning_interval=planning_interval,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def step(self, log_entry: ActionStep) -> Union[None, Any]:
|
||||
"""
|
||||
Perform one step in the ReAct framework: the agent thinks, acts, and observes the result.
|
||||
Returns None if the step is not final.
|
||||
"""
|
||||
agent_memory = self.write_inner_memory_from_logs()
|
||||
|
||||
self.prompt_messages = agent_memory
|
||||
|
||||
# Add new step in logs
|
||||
log_entry.agent_memory = agent_memory.copy()
|
||||
|
||||
if self.verbose:
|
||||
console.print(
|
||||
Group(
|
||||
Rule(
|
||||
"[italic]Calling LLM engine with this last message:",
|
||||
align="left",
|
||||
style="orange",
|
||||
),
|
||||
Text(str(self.prompt_messages[-1])),
|
||||
)
|
||||
)
|
||||
|
||||
try:
|
||||
llm_output = self.llm_engine(
|
||||
self.prompt_messages,
|
||||
)
|
||||
log_entry.llm_output = llm_output
|
||||
except Exception as e:
|
||||
raise AgentGenerationError(f"Error in generating llm_engine output: {e}.")
|
||||
|
||||
if self.verbose:
|
||||
console.print(
|
||||
Group(
|
||||
Rule(
|
||||
"[italic]Output message of the LLM:",
|
||||
align="left",
|
||||
style="orange",
|
||||
),
|
||||
Text(llm_output),
|
||||
)
|
||||
)
|
||||
|
||||
log_entry.tool_call = ToolCall(tool_name=tool_name, tool_arguments=arguments)
|
||||
|
||||
# Execute
|
||||
console.print(Rule("Agent thoughts:", align="left"), Text(rationale))
|
||||
console.print(
|
||||
Panel(Text(f"Calling tool: '{tool_name}' with arguments: {arguments}"))
|
||||
)
|
||||
if tool_name == "final_answer":
|
||||
if isinstance(arguments, dict):
|
||||
if "answer" in arguments:
|
||||
answer = arguments["answer"]
|
||||
else:
|
||||
answer = arguments
|
||||
else:
|
||||
answer = arguments
|
||||
if (
|
||||
isinstance(answer, str) and answer in self.state.keys()
|
||||
): # if the answer is a state variable, return the value
|
||||
answer = self.state[answer]
|
||||
log_entry.action_output = answer
|
||||
return answer
|
||||
else:
|
||||
if arguments is None:
|
||||
arguments = {}
|
||||
observation = self.execute_tool_call(tool_name, arguments)
|
||||
observation_type = type(observation)
|
||||
if observation_type in [AgentImage, AgentAudio]:
|
||||
if observation_type == AgentImage:
|
||||
observation_name = "image.png"
|
||||
elif observation_type == AgentAudio:
|
||||
observation_name = "audio.mp3"
|
||||
# TODO: observation naming could allow for different names of same type
|
||||
|
||||
self.state[observation_name] = observation
|
||||
updated_information = f"Stored '{observation_name}' in memory."
|
||||
else:
|
||||
updated_information = str(observation).strip()
|
||||
log_entry.observations = updated_information
|
||||
return None
|
||||
|
||||
|
||||
class CodeAgent(ReactAgent):
|
||||
"""
|
||||
|
|
|
@ -277,6 +277,113 @@ Now Begin! If you solve the task correctly, you will receive a reward of $1,000,
|
|||
"""
|
||||
|
||||
|
||||
TOOL_CALLING_SYSTEM_PROMPT = """You are an expert assistant who can solve any task using tool calls. You will be given a task to solve as best you can.
|
||||
To do so, you have been given access to the following tools: {{tool_names}}
|
||||
|
||||
The tool call you write is an action: after the tool is executed, you will get the result of the tool call as an "observation".
|
||||
This Action/Observation can repeat N times, you should take several steps when needed.
|
||||
|
||||
You can use the result of the previous action as input for the next action.
|
||||
The observation will always be a string: it can represent a file, like "image_1.jpg".
|
||||
Then you can use it as input for the next action. You can do it for instance as follows:
|
||||
|
||||
Observation: "image_1.jpg"
|
||||
|
||||
Action:
|
||||
{
|
||||
"action": "image_transformer",
|
||||
"action_input": {"image": "image_1.jpg"}
|
||||
}
|
||||
|
||||
To provide the final answer to the task, use an action blob with "action": "final_answer" tool. It is the only way to complete the task, else you will be stuck on a loop. So your final output should look like this:
|
||||
Action:
|
||||
{
|
||||
"action": "final_answer",
|
||||
"action_input": {"answer": "insert your final answer here"}
|
||||
}
|
||||
|
||||
|
||||
Here are a few examples using notional tools:
|
||||
---
|
||||
Task: "Generate an image of the oldest person in this document."
|
||||
|
||||
Action:
|
||||
{
|
||||
"action": "document_qa",
|
||||
"action_input": {"document": "document.pdf", "question": "Who is the oldest person mentioned?"}
|
||||
}
|
||||
Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland."
|
||||
|
||||
Action:
|
||||
{
|
||||
"action": "image_generator",
|
||||
"action_input": {"prompt": "A portrait of John Doe, a 55-year-old man living in Canada."}
|
||||
}
|
||||
Observation: "image.png"
|
||||
|
||||
Action:
|
||||
{
|
||||
"action": "final_answer",
|
||||
"action_input": "image.png"
|
||||
}
|
||||
|
||||
---
|
||||
Task: "What is the result of the following operation: 5 + 3 + 1294.678?"
|
||||
|
||||
Action:
|
||||
{
|
||||
"action": "python_interpreter",
|
||||
"action_input": {"code": "5 + 3 + 1294.678"}
|
||||
}
|
||||
Observation: 1302.678
|
||||
|
||||
Action:
|
||||
{
|
||||
"action": "final_answer",
|
||||
"action_input": "1302.678"
|
||||
}
|
||||
|
||||
---
|
||||
Task: "Which city has the highest population , Guangzhou or Shanghai?"
|
||||
|
||||
Action:
|
||||
{
|
||||
"action": "search",
|
||||
"action_input": "Population Guangzhou"
|
||||
}
|
||||
Observation: ['Guangzhou has a population of 15 million inhabitants as of 2021.']
|
||||
|
||||
|
||||
Action:
|
||||
{
|
||||
"action": "search",
|
||||
"action_input": "Population Shanghai"
|
||||
}
|
||||
Observation: '26 million (2019)'
|
||||
|
||||
Action:
|
||||
{
|
||||
"action": "final_answer",
|
||||
"action_input": "Shanghai"
|
||||
}
|
||||
|
||||
|
||||
Above example were using notional tools that might not exist for you. You only have access to these tools:
|
||||
|
||||
{{tool_descriptions}}
|
||||
|
||||
{{managed_agents_descriptions}}
|
||||
|
||||
Here are the rules you should always follow to solve your task:
|
||||
1. ALWAYS provide a tool call, else you will fail.
|
||||
2. Always use the right arguments for the tools. Never use variable names as the action arguments, use the value instead.
|
||||
3. Call a tool only when needed: do not call the search agent if you do not need information, try to solve the task yourself.
|
||||
If no tool call is needed, use final_answer tool to return your answer.
|
||||
4. Never re-do a tool call that you previously did with the exact same parameters.
|
||||
|
||||
Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000.
|
||||
"""
|
||||
|
||||
CODE_SYSTEM_PROMPT = """You are an expert assistant who can solve any task using code blobs. You will be given a task to solve as best you can.
|
||||
To do so, you have been given access to a list of tools: these tools are basically Python functions which you can call with code.
|
||||
To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences.
|
||||
|
|
Loading…
Reference in New Issue