From d3912c70cf7c0c44c37cac453052e7f3039155c4 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Thu, 30 Jan 2025 01:03:09 +0100 Subject: [PATCH] Support third-party Inference providers in `HfApiModel` (#422) * Add `provider` param to `HfApiModel `, update guided_tour.md --------- Co-authored-by: Aymeric --- docs/source/en/guided_tour.md | 11 ++++++----- docs/source/en/index.md | 2 +- docs/source/en/reference/models.md | 2 +- pyproject.toml | 2 +- src/smolagents/__init__.py | 2 ++ src/smolagents/models.py | 7 ++++++- tests/test_agents.py | 2 +- tests/test_all_docs.py | 1 + tests/test_models.py | 8 ++++++++ 9 files changed, 27 insertions(+), 10 deletions(-) diff --git a/docs/source/en/guided_tour.md b/docs/source/en/guided_tour.md index cbeaba8..7e0545b 100644 --- a/docs/source/en/guided_tour.md +++ b/docs/source/en/guided_tour.md @@ -25,18 +25,18 @@ To initialize a minimal agent, you need at least these two arguments: - `model`, a text-generation model to power your agent - because the agent is different from a simple LLM, it is a system that uses a LLM as its engine. You can use any of these options: - [`TransformersModel`] takes a pre-initialized `transformers` pipeline to run inference on your local machine using `transformers`. - - [`HfApiModel`] leverages a `huggingface_hub.InferenceClient` under the hood. - - [`LiteLLMModel`] lets you call 100+ different models through [LiteLLM](https://docs.litellm.ai/)! + - [`HfApiModel`] leverages a `huggingface_hub.InferenceClient` under the hood and supports all Inference Providers on the Hub. + - [`LiteLLMModel`] similarly lets you call 100+ different models and providers through [LiteLLM](https://docs.litellm.ai/)! - [`AzureOpenAIServerModel`] allows you to use OpenAI models deployed in [Azure](https://azure.microsoft.com/en-us/products/ai-services/openai-service). - `tools`, a list of `Tools` that the agent can use to solve the task. It can be an empty list. You can also add the default toolbox on top of your `tools` list by defining the optional argument `add_base_tools=True`. -Once you have these two arguments, `tools` and `model`, you can create an agent and run it. You can use any LLM you'd like, either through [Hugging Face API](https://huggingface.co/docs/api-inference/en/index), [transformers](https://github.com/huggingface/transformers/), [ollama](https://ollama.com/), [LiteLLM](https://www.litellm.ai/), or [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service). +Once you have these two arguments, `tools` and `model`, you can create an agent and run it. You can use any LLM you'd like, either through [Inference Providers](https://huggingface.co/blog/inference-providers), [transformers](https://github.com/huggingface/transformers/), [ollama](https://ollama.com/), [LiteLLM](https://www.litellm.ai/), or [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service). - + -Hugging Face API is free to use without a token, but then it will have a rate limitation. +HF Inference API is free to use without a token, but then it will have a rate limit. To access gated models or rise your rate limits with a PRO account, you need to set the environment variable `HF_TOKEN` or pass `token` variable upon initialization of `HfApiModel`. You can get your token from your [settings page](https://huggingface.co/settings/tokens) @@ -46,6 +46,7 @@ from smolagents import CodeAgent, HfApiModel model_id = "meta-llama/Llama-3.3-70B-Instruct" model = HfApiModel(model_id=model_id, token="") # You can choose to not pass any model_id to HfApiModel to use a default free model +# you can also specify a particular provider e.g. provider="together" or provider="sambanova" agent = CodeAgent(tools=[], model=model, add_base_tools=True) agent.run( diff --git a/docs/source/en/index.md b/docs/source/en/index.md index 90f5c78..14f80ff 100644 --- a/docs/source/en/index.md +++ b/docs/source/en/index.md @@ -25,7 +25,7 @@ This library offers: ✨ **Simplicity**: the logic for agents fits in ~thousand lines of code. We kept abstractions to their minimal shape above raw code! -🌐 **Support for any LLM**: it supports models hosted on the Hub loaded in their `transformers` version or through our inference API, but also models from OpenAI, Anthropic... it's really easy to power an agent with any LLM. +🌐 **Support for any LLM**: it supports models hosted on the Hub loaded in their `transformers` version or through our inference API and Inference providers, but also models from OpenAI, Anthropic... it's really easy to power an agent with any LLM. 🧑‍💻 **First-class support for Code Agents**, i.e. agents that write their actions in code (as opposed to "agents being used to write code"), [read more here](tutorials/secure_code_execution). diff --git a/docs/source/en/reference/models.md b/docs/source/en/reference/models.md index 3c4297a..d2d3db9 100644 --- a/docs/source/en/reference/models.md +++ b/docs/source/en/reference/models.md @@ -74,7 +74,7 @@ print(model([{"role": "user", "content": "Ok!"}], stop_sequences=["great"])) ### HfApiModel -The `HfApiModel` wraps an [HF Inference API](https://huggingface.co/docs/api-inference/index) client for the execution of the LLM. +The `HfApiModel` wraps huggingface_hub's [InferenceClient](https://huggingface.co/docs/huggingface_hub/main/en/guides/inference) for the execution of the LLM. It supports both HF's own [Inference API](https://huggingface.co/docs/api-inference/index) as well as all [Inference Providers](https://huggingface.co/blog/inference-providers) available on the Hub. ```python from smolagents import HfApiModel diff --git a/pyproject.toml b/pyproject.toml index d964ee4..db766ef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ authors = [ readme = "README.md" requires-python = ">=3.10" dependencies = [ - "huggingface-hub>=0.24.0", + "huggingface-hub>=0.28.0", "requests>=2.32.3", "rich>=13.9.4", "pandas>=2.2.3", diff --git a/src/smolagents/__init__.py b/src/smolagents/__init__.py index be4f6ce..1507c70 100644 --- a/src/smolagents/__init__.py +++ b/src/smolagents/__init__.py @@ -21,6 +21,8 @@ from .default_tools import * from .e2b_executor import * from .gradio_ui import * from .local_python_executor import * +from .logger import * +from .memory import * from .models import * from .monitoring import * from .prompts import * diff --git a/src/smolagents/models.py b/src/smolagents/models.py index 2b91d9b..ffe967b 100644 --- a/src/smolagents/models.py +++ b/src/smolagents/models.py @@ -338,6 +338,9 @@ class HfApiModel(Model): Parameters: model_id (`str`, *optional*, defaults to `"Qwen/Qwen2.5-Coder-32B-Instruct"`): The Hugging Face model ID to be used for inference. This can be a path or model identifier from the Hugging Face model hub. + provider (`str`, *optional*): + Name of the provider to use for inference. Can be `"replicate"`, `"together"`, `"fal-ai"`, `"sambanova"` or `"hf-inference"`. + defaults to hf-inference (HF Inference API). token (`str`, *optional*): Token used by the Hugging Face API for authentication. This token need to be authorized 'Make calls to the serverless Inference API'. If the model is gated (like Llama-3 models), the token also needs 'Read access to contents of all public gated repos you can access'. @@ -368,15 +371,17 @@ class HfApiModel(Model): def __init__( self, model_id: str = "Qwen/Qwen2.5-Coder-32B-Instruct", + provider: Optional[str] = None, token: Optional[str] = None, timeout: Optional[int] = 120, **kwargs, ): super().__init__(**kwargs) self.model_id = model_id + self.provider = provider if token is None: token = os.getenv("HF_TOKEN") - self.client = InferenceClient(self.model_id, token=token, timeout=timeout) + self.client = InferenceClient(self.model_id, provider=provider, token=token, timeout=timeout) def __call__( self, diff --git a/tests/test_agents.py b/tests/test_agents.py index 8083d2a..d123d83 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -474,7 +474,7 @@ class AgentTests(unittest.TestCase): with agent.logger.console.capture() as capture: agent.run("Count to 3") str_output = capture.get() - assert "Consider passing said import under" in str_output.replace("\n", "") + assert "`additional_authorized_imports`" in str_output.replace("\n", "") def test_multiagents(self): class FakeModelMultiagentsManagerAgent: diff --git a/tests/test_all_docs.py b/tests/test_all_docs.py index 68a88d3..7dcbf58 100644 --- a/tests/test_all_docs.py +++ b/tests/test_all_docs.py @@ -78,6 +78,7 @@ class DocCodeExtractor: return tmp_file +@pytest.mark.skipif(not os.getenv("RUN_ALL"), reason="RUN_ALL environment variable not set") class TestDocs: """Test case for documentation code testing.""" diff --git a/tests/test_models.py b/tests/test_models.py index 1857ccd..aa9024c 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -13,10 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. import json +import os import unittest from pathlib import Path from typing import Optional +import pytest from transformers.testing_utils import get_tests_dir from smolagents import ChatMessage, HfApiModel, TransformersModel, models, tool @@ -51,6 +53,12 @@ class ModelTests(unittest.TestCase): messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}] model(messages, stop_sequences=["great"]) + @pytest.mark.skipif(not os.getenv("RUN_ALL"), reason="RUN_ALL environment variable not set") + def test_get_hfapi_message_no_tool_external_provider(self): + model = HfApiModel(model="Qwen/Qwen2.5-Coder-32B-Instruct", provider="together", max_tokens=10) + messages = [{"role": "user", "content": [{"type": "text", "text": "Hello!"}]}] + model(messages, stop_sequences=["great"]) + def test_transformers_message_no_tool(self): model = TransformersModel( model_id="HuggingFaceTB/SmolLM2-135M-Instruct",