From 12822e280dc1382bd5570a07abe4d45ee1f665bd Mon Sep 17 00:00:00 2001
From: Aymeric <aymeric.roucher@gmail.com>
Date: Tue, 10 Dec 2024 11:27:24 +0100
Subject: [PATCH] Support one-liner UI

---
 agents/agents.py                | 12 +++--
 agents/gradio_ui.py             | 92 +++++++++++++++++++++++++++++++++
 agents/monitoring.py            | 51 ------------------
 examples/quick_launch_gradio.py | 10 ++++
 4 files changed, 110 insertions(+), 55 deletions(-)
 create mode 100644 agents/gradio_ui.py
 create mode 100644 examples/quick_launch_gradio.py

diff --git a/agents/agents.py b/agents/agents.py
index 709ea76..01a3349 100644
--- a/agents/agents.py
+++ b/agents/agents.py
@@ -81,8 +81,11 @@ class AgentGenerationError(AgentError):
 
     pass
 
+class AgentStep:
+    pass
+
 @dataclass
-class ActionStep:
+class ActionStep(AgentStep):
     tool_call: str | None = None
     start_time: float | None = None
     step_end_time: float | None = None
@@ -93,18 +96,19 @@ class ActionStep:
     llm_output: str | None = None
 
 @dataclass
-class PlanningStep:
+class PlanningStep(AgentStep):
     plan: str
     facts: str
 
 @dataclass
-class TaskStep:
+class TaskStep(AgentStep):
     task: str
 
 @dataclass
-class SystemPromptStep:
+class SystemPromptStep(AgentStep):
     system_prompt: str
 
+
 def format_prompt_with_tools(toolbox: Toolbox, prompt_template: str, tool_description_template: str) -> str:
     tool_descriptions = toolbox.show_tool_descriptions(tool_description_template)
     prompt = prompt_template.replace("{{tool_descriptions}}", tool_descriptions)
diff --git a/agents/gradio_ui.py b/agents/gradio_ui.py
new file mode 100644
index 0000000..34db002
--- /dev/null
+++ b/agents/gradio_ui.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+# coding=utf-8
+
+# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .agent_types import AgentAudio, AgentImage, AgentText
+from .agents import BaseAgent, AgentStep, ActionStep
+import gradio as gr
+
+def pull_messages_from_step(step_log: AgentStep, test_mode: bool = True):
+    """Extract ChatMessage objects from agent steps"""
+    if isinstance(step_log, ActionStep):
+        yield gr.ChatMessage(role="assistant", content=step_log.rationale)
+        if step_log.tool_call is not None:
+            used_code = step_log.tool_call["tool_name"] == "code interpreter"
+            content = step_log.tool_call["tool_arguments"]
+            if used_code:
+                content = f"```py\n{content}\n```"
+            yield gr.ChatMessage(
+                role="assistant",
+                metadata={"title": f"🛠️ Used tool {step_log.tool_call['tool_name']}"},
+                content=str(content),
+            )
+        if step_log.observation is not None:
+            yield gr.ChatMessage(role="assistant", content=f"```\n{step_log.observation}\n```")
+        if step_log.error is not None:
+            yield gr.ChatMessage(
+                role="assistant",
+                content=str(step_log.error),
+                metadata={"title": "💥 Error"},
+            )
+
+
+def stream_to_gradio(agent, task: str, test_mode: bool = False, reset_agent_memory: bool=False, **kwargs):
+    """Runs an agent with the given task and streams the messages from the agent as gradio ChatMessages."""
+
+    for step_log in agent.run(task, stream=True, reset=reset_agent_memory, **kwargs):
+        for message in pull_messages_from_step(step_log, test_mode=test_mode):
+            yield message
+
+    final_answer = step_log  # Last log is the run's final_answer
+
+    if isinstance(final_answer, AgentText):
+        yield gr.ChatMessage(role="assistant", content=f"**Final answer:**\n```\n{final_answer.to_string()}\n```")
+    elif isinstance(final_answer, AgentImage):
+        yield gr.ChatMessage(
+            role="assistant",
+            content={"path": final_answer.to_string(), "mime_type": "image/png"},
+        )
+    elif isinstance(final_answer, AgentAudio):
+        yield gr.ChatMessage(
+            role="assistant",
+            content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
+        )
+    else:
+        yield gr.ChatMessage(role="assistant", content=str(final_answer))
+
+
+class GradioUI():
+    """A one-line interface to launch your agent in Gradio"""
+    def __init__(self, agent: BaseAgent):
+        self.agent = agent 
+
+    def interact_with_agent(self, prompt, messages):
+        messages.append(gr.ChatMessage(role="user", content=prompt))
+        yield messages
+        for msg in stream_to_gradio(self.agent, task=prompt, reset_agent_memory=False):
+            messages.append(msg)
+            yield messages
+        yield messages
+
+    def run(self):
+        with gr.Blocks() as demo:
+            stored_message = gr.State([])
+            chatbot = gr.Chatbot(label="Agent",
+                                type="messages",
+                                avatar_images=(None, "https://em-content.zobj.net/source/twitter/53/robot-face_1f916.png"))
+            text_input = gr.Textbox(lines=1, label="Chat Message")
+            text_input.submit(lambda s: (s, ""), [text_input], [stored_message, text_input]).then(self.interact_with_agent, [stored_message, chatbot], [chatbot])
+
+        demo.launch()
\ No newline at end of file
diff --git a/agents/monitoring.py b/agents/monitoring.py
index 72fc1b6..82ee529 100644
--- a/agents/monitoring.py
+++ b/agents/monitoring.py
@@ -14,59 +14,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .agent_types import AgentAudio, AgentImage, AgentText
 from .utils import console
 
-def pull_message(step_log: dict, test_mode: bool = True):
-    from gradio import ChatMessage
-
-    if step_log.get("rationale"):
-        yield ChatMessage(role="assistant", content=step_log["rationale"])
-    if step_log.get("tool_call"):
-        used_code = step_log["tool_call"]["tool_name"] == "code interpreter"
-        content = step_log["tool_call"]["tool_arguments"]
-        if used_code:
-            content = f"```py\n{content}\n```"
-        yield ChatMessage(
-            role="assistant",
-            metadata={"title": f"🛠️ Used tool {step_log['tool_call']['tool_name']}"},
-            content=str(content),
-        )
-    if step_log.get("observation"):
-        yield ChatMessage(role="assistant", content=f"```\n{step_log['observation']}\n```")
-    if step_log.get("error"):
-        yield ChatMessage(
-            role="assistant",
-            content=str(step_log["error"]),
-            metadata={"title": "💥 Error"},
-        )
-
-
-def stream_to_gradio(agent, task: str, test_mode: bool = False, reset_agent_memory: bool=False, **kwargs):
-    """Runs an agent with the given task and streams the messages from the agent as gradio ChatMessages."""
-    from gradio import ChatMessage
-
-    for step_log in agent.run(task, stream=True, reset=reset_agent_memory, **kwargs):
-        if isinstance(step_log, dict):
-            for message in pull_message(step_log, test_mode=test_mode):
-                yield message
-
-    final_answer = step_log  # Last log is the run's final_answer
-
-    if isinstance(final_answer, AgentText):
-        yield ChatMessage(role="assistant", content=f"**Final answer:**\n```\n{final_answer.to_string()}\n```")
-    elif isinstance(final_answer, AgentImage):
-        yield ChatMessage(
-            role="assistant",
-            content={"path": final_answer.to_string(), "mime_type": "image/png"},
-        )
-    elif isinstance(final_answer, AgentAudio):
-        yield ChatMessage(
-            role="assistant",
-            content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
-        )
-    else:
-        yield ChatMessage(role="assistant", content=str(final_answer))
 
 
 class Monitor:
diff --git a/examples/quick_launch_gradio.py b/examples/quick_launch_gradio.py
new file mode 100644
index 0000000..19c2380
--- /dev/null
+++ b/examples/quick_launch_gradio.py
@@ -0,0 +1,10 @@
+from agents.gradio_ui import GradioUI
+from agents import HfApiEngine, load_tool, CodeAgent
+
+image_generation_tool = load_tool("m-ric/text-to-image")
+
+llm_engine = HfApiEngine("Qwen/Qwen2.5-72B-Instruct")
+
+agent = CodeAgent(tools=[image_generation_tool], llm_engine=llm_engine)
+
+GradioUI(agent).run()
\ No newline at end of file