Improve inference choice examples (#311)

* Improve inference choice examples * Fix style --------- Co-authored-by: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com>
2025-01-24 16:32:35 +01:00 · 2025-01-24 16:32:35 +01:00 · de7b0ee799
parent 0196dc7b21
commit de7b0ee799
5 changed files with 56 additions and 93 deletions
--- a/examples/agent_from_any_llm.py
+++ b/examples/agent_from_any_llm.py
@ -0,0 +1,51 @@
 from typing import Optional
 from smolagents import HfApiModel, LiteLLMModel, TransformersModel, tool
 from smolagents.agents import CodeAgent, ToolCallingAgent
 # Choose which inference type to use!
 available_inferences = ["hf_api", "transformers", "ollama", "litellm"]
 chosen_inference = "transformers"
 print(f"Chose model {chosen_inference}")
 if chosen_inference == "hf_api":
    model = HfApiModel(model_id="meta-llama/Llama-3.3-70B-Instruct")
 elif chosen_inference == "transformers":
    model = TransformersModel(model_id="HuggingFaceTB/SmolLM2-1.7B-Instruct", device_map="auto", max_new_tokens=1000)
 elif chosen_inference == "ollama":
    model = LiteLLMModel(
        model_id="ollama_chat/llama3.2",
        api_base="http://localhost:11434",  # replace with remote open-ai compatible server if necessary
        api_key="your-api-key",  # replace with API key if necessary
    )
 elif chosen_inference == "litellm":
    # For anthropic: change model_id below to 'anthropic/claude-3-5-sonnet-latest'
    model = LiteLLMModel(model_id="gpt-4o")
@tool
 def get_weather(location: str, celsius: Optional[bool] = False) -> str:
    """
    Get weather in the next days at given location.
    Secretly this tool does not care about the location, it hates the weather everywhere.
    Args:
        location: the location
        celsius: the temperature
    """
    return "The weather is UNGODLY with torrential rains and temperatures below -10°C"
 agent = ToolCallingAgent(tools=[get_weather], model=model)
 print("ToolCallingAgent:", agent.run("What's the weather like in Paris?"))
 agent = CodeAgent(tools=[get_weather], model=model)
 print("ToolCallingAgent:", agent.run("What's the weather like in Paris?"))
--- a/examples/tool_calling_agent_from_any_llm.py
+++ b/examples/tool_calling_agent_from_any_llm.py
@ -1,30 +0,0 @@
 from typing import Optional
 from smolagents import LiteLLMModel, tool
 from smolagents.agents import ToolCallingAgent
 # Choose which LLM engine to use!
 # model = HfApiModel(model_id="meta-llama/Llama-3.3-70B-Instruct")
 # model = TransformersModel(model_id="meta-llama/Llama-3.2-2B-Instruct")
 # For anthropic: change model_id below to 'anthropic/claude-3-5-sonnet-20240620'
 model = LiteLLMModel(model_id="gpt-4o")
@tool
 def get_weather(location: str, celsius: Optional[bool] = False) -> str:
    """
    Get weather in the next days at given location.
    Secretly this tool does not care about the location, it hates the weather everywhere.
    Args:
        location: the location
        celsius: the temperature
    """
    return "The weather is UNGODLY with torrential rains and temperatures below -10°C"
 agent = ToolCallingAgent(tools=[get_weather], model=model)
 print(agent.run("What's the weather like in Paris?"))
--- a/examples/tool_calling_agent_mcp.py
+++ b/examples/tool_calling_agent_mcp.py
@ -1,29 +0,0 @@
 """An example of loading a ToolCollection directly from an MCP server.
 Requirements: to run this example, you need to have uv installed and in your path in
 order to run the MCP server with uvx see `mcp_server_params` below.
 Note this is just a demo MCP server that was implemented for the purpose of this example.
 It only provide a single tool to search amongst pubmed papers abstracts.
 Usage:
 >>> uv run examples/tool_calling_agent_mcp.py
 """
 import os
 from mcp import StdioServerParameters
 from smolagents import CodeAgent, HfApiModel, ToolCollection
 mcp_server_params = StdioServerParameters(
    command="uvx",
    args=["--quiet", "pubmedmcp@0.1.3"],
    env={"UV_PYTHON": "3.12", **os.environ},
 )
 with ToolCollection.from_mcp(mcp_server_params) as tool_collection:
    # print(tool_collection.tools[0](request={"term": "efficient treatment hangover"}))
    agent = CodeAgent(tools=tool_collection.tools, model=HfApiModel(), max_steps=4)
    agent.run("Find me one risk associated with drinking alcohol regularly on low doses for humans.")
--- a/examples/tool_calling_agent_ollama.py
+++ b/examples/tool_calling_agent_ollama.py
@ -1,29 +0,0 @@
 from typing import Optional
 from smolagents import LiteLLMModel, tool
 from smolagents.agents import ToolCallingAgent
 model = LiteLLMModel(
    model_id="ollama_chat/llama3.2",
    api_base="http://localhost:11434",  # replace with remote open-ai compatible server if necessary
    api_key="your-api-key",  # replace with API key if necessary
 )
@tool
 def get_weather(location: str, celsius: Optional[bool] = False) -> str:
    """
    Get weather in the next days at given location.
    Secretly this tool does not care about the location, it hates the weather everywhere.
    Args:
        location: the location
        celsius: the temperature
    """
    return "The weather is UNGODLY with torrential rains and temperatures below -10°C"
 agent = ToolCallingAgent(tools=[get_weather], model=model)
 print(agent.run("What's the weather like in Paris?"))
--- a/src/smolagents/models.py
+++ b/src/smolagents/models.py
@ -480,7 +480,6 @@ class TransformersModel(Model):
            messages=messages,
            stop_sequences=stop_sequences,
            grammar=grammar,
            tools_to_call_from=tools_to_call_from,
            **kwargs,
        )
@ -497,9 +496,6 @@ class TransformersModel(Model):
        if max_new_tokens:
            completion_kwargs["max_new_tokens"] = max_new_tokens
        if stop_sequences:
            completion_kwargs["stopping_criteria"] = self.make_stopping_criteria(stop_sequences)
        if tools_to_call_from is not None:
            prompt_tensor = self.tokenizer.apply_chat_template(
                messages,
@ -518,7 +514,11 @@ class TransformersModel(Model):
        prompt_tensor = prompt_tensor.to(self.model.device)
        count_prompt_tokens = prompt_tensor["input_ids"].shape[1]
-        out = self.model.generate(**prompt_tensor, **completion_kwargs)
+        out = self.model.generate(
            **prompt_tensor,
            stopping_criteria=(self.make_stopping_criteria(stop_sequences) if stop_sequences else None),
            **completion_kwargs,
        )
        generated_tokens = out[0, count_prompt_tokens:]
        output = self.tokenizer.decode(generated_tokens, skip_special_tokens=True)
        self.last_input_token_count = count_prompt_tokens