diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml
index 2e4f5c6..774fa52 100644
--- a/.github/workflows/quality.yml
+++ b/.github/workflows/quality.yml
@@ -16,20 +16,15 @@ jobs:
           python-version: "3.12"
 
       # Setup venv
-      # TODO: revisit when https://github.com/astral-sh/uv/issues/1526 is addressed.
       - name: Setup venv + uv
         run: |
           pip install --upgrade uv
           uv venv
 
       - name: Install dependencies
-        run: uv pip install "smolagents[test] @ ."
-      - run: uv run ruff check tests src # linter
-      - run: uv run ruff format --check tests src # formatter
+        run: uv pip install "smolagents[quality] @ ."
 
-      # Run type checking at least on smolagents root file to check all modules
-      # that can be lazy-loaded actually exist.
-      # - run: uv run mypy src/smolagents/__init__.py --follow-imports=silent --show-traceback
-
-      # Run mypy on full package
-      # - run: uv run mypy src
\ No newline at end of file
+      # Equivalent of "make quality" but step by step
+      - run: uv run ruff check examples src tests utils # linter
+      - run: uv run ruff format --check examples src tests utils # formatter
+      - run: uv run python utils/check_tests_in_ci.py
\ No newline at end of file
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index c720ec0..412c1e8 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -20,9 +20,7 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
 
-
       # Setup venv
-      # TODO: revisit when https://github.com/astral-sh/uv/issues/1526 is addressed.
       - name: Setup venv + uv
         run: |
           pip install --upgrade uv
@@ -33,33 +31,59 @@ jobs:
         run: |
           uv pip install "smolagents[test] @ ."
 
+      # Run all tests separately for individual feedback
+      # Use 'if success() || failure()' so that all tests are run even if one failed
+      # See https://stackoverflow.com/a/62112985
       - name: Agent tests
         run: |
-          uv run pytest -sv ./tests/test_agents.py
+          uv run pytest ./tests/test_agents.py
+        if: ${{ success() || failure() }}
+
       - name: Default tools tests
         run: |
-          uv run pytest -sv ./tests/test_default_tools.py
+          uv run pytest ./tests/test_default_tools.py
+        if: ${{ success() || failure() }}
+
+      # - name: Docs tests # Disabled for now (slow test + requires API keys)
+      #   run: |
+      #     uv run pytest ./tests/test_all_docs.py
+
       - name: Final answer tests
         run: |
-          uv run pytest -sv ./tests/test_final_answer.py
+          uv run pytest ./tests/test_final_answer.py
+        if: ${{ success() || failure() }}
+
       - name: Models tests
         run: |
-          uv run pytest -sv ./tests/test_models.py
+          uv run pytest ./tests/test_models.py
+        if: ${{ success() || failure() }}
+
       - name: Monitoring tests
         run: |
-          uv run pytest -sv ./tests/test_monitoring.py
+          uv run pytest ./tests/test_monitoring.py
+        if: ${{ success() || failure() }}
+
       - name: Python interpreter tests
         run: |
-          uv run pytest -sv ./tests/test_python_interpreter.py
+          uv run pytest ./tests/test_python_interpreter.py
+        if: ${{ success() || failure() }}
+
       - name: Search tests
         run: |
-          uv run pytest -sv ./tests/test_search.py
+          uv run pytest ./tests/test_search.py
+        if: ${{ success() || failure() }}
+
       - name: Tools tests
         run: |
-          uv run pytest -sv ./tests/test_tools.py
+          uv run pytest ./tests/test_tools.py
+        if: ${{ success() || failure() }}
+
       - name: Types tests
         run: |
-          uv run pytest -sv ./tests/test_types.py
+          uv run pytest ./tests/test_types.py
+        if: ${{ success() || failure() }}
+
       - name: Utils tests
         run: |
-          uv run pytest -sv ./tests/test_utils.py
\ No newline at end of file
+          uv run pytest ./tests/test_utils.py
+        if: ${{ success() || failure() }}
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index e6cfb91..0e346b7 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -91,7 +91,7 @@ happy to make the changes or help you make a contribution if you're interested!
 
 ## I want to become a maintainer of the project. How do I get there?
 
-smolagents is a project led and managed by Hugging Face. We are  more than 
+smolagents is a project led and managed by Hugging Face. We are more than
 happy to have motivated individuals from other organizations join us as maintainers with the goal of helping smolagents
 make a dent in the world of Agents.
 
diff --git a/Makefile b/Makefile
index a24c1ae..c8e7c04 100644
--- a/Makefile
+++ b/Makefile
@@ -1,53 +1,18 @@
 .PHONY: quality style test docs utils
 
-check_dirs := .
+check_dirs := examples src tests utils
 
-# Check that source code meets quality standards
-
-extra_quality_checks:
-	python utils/check_copies.py
-	python utils/check_dummies.py
-	python utils/check_repo.py
-	doc-builder style smolagents docs/source --max_len 119
-
-# this target runs checks on all files
+# Check code quality of the source code
 quality:
 	ruff check $(check_dirs)
 	ruff format --check $(check_dirs)
-	doc-builder style smolagents docs/source --max_len 119 --check_only
+	python utils/check_tests_in_ci.py
 
-# Format source code automatically and check is there are any problems left that need manual fixing
+# Format source code automatically
 style:
 	ruff check $(check_dirs) --fix
 	ruff format $(check_dirs)
-	doc-builder style smolagents docs/source --max_len 119
 	
-# Run tests for the library
-test_big_modeling:
-	python -m pytest -s -v ./tests/test_big_modeling.py ./tests/test_modeling_utils.py $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_big_modeling.log",)
-
-test_core:
-	python -m pytest -s -v ./tests/ --ignore=./tests/test_examples.py $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_core.log",)
-
-test_cli:
-	python -m pytest -s -v ./tests/test_cli.py $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_cli.log",)
-
-
-# Since the new version of pytest will *change* how things are collected, we need `deepspeed` to 
-# run after test_core and test_cli
+# Run smolagents tests
 test:
-	$(MAKE) test_core
-	$(MAKE) test_cli
-	$(MAKE) test_big_modeling
-	$(MAKE) test_deepspeed
-	$(MAKE) test_fsdp
-
-test_examples:
-	python -m pytest -s -v ./tests/test_examples.py $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_examples.log",)
-
-# Same as test but used to install only the base dependencies
-test_prod:
-	$(MAKE) test_core
-
-test_rest:
-	python -m pytest -s -v ./tests/test_examples.py::FeatureExamplesTests $(if $(IS_GITHUB_CI),--report-log "$(PYTORCH_VERSION)_rest.log",)
+	pytest ./tests/
\ No newline at end of file
diff --git a/README.md b/README.md
index 80e3f84..c0b14c7 100644
--- a/README.md
+++ b/README.md
@@ -98,9 +98,27 @@ To contribute, follow our [contribution guide](https://github.com/huggingface/sm
 At any moment, feel welcome to open an issue, citing your exact error traces and package versions if it's a bug.
 It's often even better to open a PR with your proposed fixes/changes!
 
+To install dev dependencies, run:
+```
+pip install -e ".[dev]"
+```
+
+When making changes to the codebase, please check that it follows the repo's code quality requirements by running:
+To check code quality of the source code:
+```
+make quality
+```
+
+If the checks fail, you can run the formatter with:
+```
+make style
+```
+
+And commit the changes.
+
 To run tests locally, run this command:
 ```bash
-pytest -sv .
+pytest .
 ```
 
 ## Citing smolagents
diff --git a/examples/benchmark.ipynb b/examples/benchmark.ipynb
index 758b19a..e0b59d5 100644
--- a/examples/benchmark.ipynb
+++ b/examples/benchmark.ipynb
@@ -254,7 +254,10 @@
     "            if is_vanilla_llm:\n",
     "                llm = agent\n",
     "                answer = str(llm([{\"role\": \"user\", \"content\": question}]).content)\n",
-    "                token_count = {\"input\": llm.last_input_token_count, \"output\": llm.last_output_token_count}\n",
+    "                token_count = {\n",
+    "                    \"input\": llm.last_input_token_count,\n",
+    "                    \"output\": llm.last_output_token_count,\n",
+    "                }\n",
     "                intermediate_steps = str([])\n",
     "            else:\n",
     "                answer = str(agent.run(question))\n",
@@ -983,7 +986,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -1043,8 +1046,8 @@
     "\n",
     "\n",
     "# Usage (after running your previous data processing code):\n",
-    "mathjax_table = create_mathjax_table(pivot_df, formatted_df)\n",
-    "print(mathjax_table)"
+    "# mathjax_table = create_mathjax_table(pivot_df, formatted_df)\n",
+    "# print(mathjax_table)"
    ]
   }
  ],
diff --git a/examples/e2b_example.py b/examples/e2b_example.py
index 049dc15..843e144 100644
--- a/examples/e2b_example.py
+++ b/examples/e2b_example.py
@@ -4,8 +4,9 @@ from dotenv import load_dotenv
 
 load_dotenv()
 
+
 class GetCatImageTool(Tool):
-    name="get_cat_image"
+    name = "get_cat_image"
     description = "Get a cat image"
     inputs = {}
     output_type = "image"
@@ -27,17 +28,22 @@ class GetCatImageTool(Tool):
 get_cat_image = GetCatImageTool()
 
 agent = CodeAgent(
-    tools = [get_cat_image, VisitWebpageTool()],
+    tools=[get_cat_image, VisitWebpageTool()],
     model=HfApiModel(),
-    additional_authorized_imports=["Pillow", "requests", "markdownify"], # "duckduckgo-search", 
-    use_e2b_executor=True
+    additional_authorized_imports=[
+        "Pillow",
+        "requests",
+        "markdownify",
+    ],  # "duckduckgo-search",
+    use_e2b_executor=True,
 )
 
 agent.run(
-    "Return me an image of a cat. Directly use the image provided in your state.", additional_args={"cat_image":get_cat_image()}
-) # Asking to directly return the image from state tests that additional_args are properly sent to server.
+    "Return me an image of a cat. Directly use the image provided in your state.",
+    additional_args={"cat_image": get_cat_image()},
+)  # Asking to directly return the image from state tests that additional_args are properly sent to server.
 
 # Try the agent in a Gradio UI
 from smolagents import GradioUI
 
-GradioUI(agent).launch()
\ No newline at end of file
+GradioUI(agent).launch()
diff --git a/examples/gradio_upload.py b/examples/gradio_upload.py
index 1db1464..061d226 100644
--- a/examples/gradio_upload.py
+++ b/examples/gradio_upload.py
@@ -1,11 +1,5 @@
-from smolagents import (
-    CodeAgent,
-    HfApiModel,
-    GradioUI
-)
+from smolagents import CodeAgent, HfApiModel, GradioUI
 
-agent = CodeAgent(
-    tools=[], model=HfApiModel(), max_steps=4, verbosity_level=1
-)
+agent = CodeAgent(tools=[], model=HfApiModel(), max_steps=4, verbosity_level=1)
 
-GradioUI(agent, file_upload_folder='./data').launch()
+GradioUI(agent, file_upload_folder="./data").launch()
diff --git a/examples/inspect_runs.py b/examples/inspect_runs.py
index 69ce720..3e24efa 100644
--- a/examples/inspect_runs.py
+++ b/examples/inspect_runs.py
@@ -16,7 +16,9 @@ from smolagents import (
 # Let's setup the instrumentation first
 
 trace_provider = TracerProvider()
-trace_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter("http://0.0.0.0:6006/v1/traces")))
+trace_provider.add_span_processor(
+    SimpleSpanProcessor(OTLPSpanExporter("http://0.0.0.0:6006/v1/traces"))
+)
 
 SmolagentsInstrumentor().instrument(tracer_provider=trace_provider, skip_dep_check=True)
 
diff --git a/examples/rag.py b/examples/rag.py
index 4096d57..83a201d 100644
--- a/examples/rag.py
+++ b/examples/rag.py
@@ -8,7 +8,9 @@ from langchain_community.retrievers import BM25Retriever
 
 
 knowledge_base = datasets.load_dataset("m-ric/huggingface_doc", split="train")
-knowledge_base = knowledge_base.filter(lambda row: row["source"].startswith("huggingface/transformers"))
+knowledge_base = knowledge_base.filter(
+    lambda row: row["source"].startswith("huggingface/transformers")
+)
 
 source_docs = [
     Document(page_content=doc["text"], metadata={"source": doc["source"].split("/")[1]})
@@ -26,6 +28,7 @@ docs_processed = text_splitter.split_documents(source_docs)
 
 from smolagents import Tool
 
+
 class RetrieverTool(Tool):
     name = "retriever"
     description = "Uses semantic search to retrieve the parts of transformers documentation that could be most relevant to answer your query."
@@ -39,9 +42,7 @@ class RetrieverTool(Tool):
 
     def __init__(self, docs, **kwargs):
         super().__init__(**kwargs)
-        self.retriever = BM25Retriever.from_documents(
-            docs, k=10
-        )
+        self.retriever = BM25Retriever.from_documents(docs, k=10)
 
     def forward(self, query: str) -> str:
         assert isinstance(query, str), "Your search query must be a string"
@@ -56,14 +57,20 @@ class RetrieverTool(Tool):
             ]
         )
 
+
 from smolagents import HfApiModel, CodeAgent
 
 retriever_tool = RetrieverTool(docs_processed)
 agent = CodeAgent(
-    tools=[retriever_tool], model=HfApiModel("meta-llama/Llama-3.3-70B-Instruct"), max_steps=4, verbosity_level=2
+    tools=[retriever_tool],
+    model=HfApiModel("meta-llama/Llama-3.3-70B-Instruct"),
+    max_steps=4,
+    verbosity_level=2,
 )
 
-agent_output = agent.run("For a transformers model training, which is slower, the forward or the backward pass?")
+agent_output = agent.run(
+    "For a transformers model training, which is slower, the forward or the backward pass?"
+)
 
 print("Final output:")
 print(agent_output)
diff --git a/examples/text_to_sql.py b/examples/text_to_sql.py
index 9a8b201..60b84f6 100644
--- a/examples/text_to_sql.py
+++ b/examples/text_to_sql.py
@@ -40,11 +40,14 @@ for row in rows:
 inspector = inspect(engine)
 columns_info = [(col["name"], col["type"]) for col in inspector.get_columns("receipts")]
 
-table_description = "Columns:\n" + "\n".join([f"  - {name}: {col_type}" for name, col_type in columns_info])
+table_description = "Columns:\n" + "\n".join(
+    [f"  - {name}: {col_type}" for name, col_type in columns_info]
+)
 print(table_description)
 
 from smolagents import tool
 
+
 @tool
 def sql_engine(query: str) -> str:
     """
@@ -66,10 +69,11 @@ def sql_engine(query: str) -> str:
             output += "\n" + str(row)
     return output
 
+
 from smolagents import CodeAgent, HfApiModel
 
 agent = CodeAgent(
     tools=[sql_engine],
     model=HfApiModel("meta-llama/Meta-Llama-3.1-8B-Instruct"),
 )
-agent.run("Can you give me the name of the client who got the most expensive receipt?")
\ No newline at end of file
+agent.run("Can you give me the name of the client who got the most expensive receipt?")
diff --git a/examples/tool_calling_agent_from_any_llm.py b/examples/tool_calling_agent_from_any_llm.py
index 68155fe..05daaa5 100644
--- a/examples/tool_calling_agent_from_any_llm.py
+++ b/examples/tool_calling_agent_from_any_llm.py
@@ -9,6 +9,7 @@ from typing import Optional
 # For anthropic: change model_id below to 'anthropic/claude-3-5-sonnet-20240620'
 model = LiteLLMModel(model_id="gpt-4o")
 
+
 @tool
 def get_weather(location: str, celsius: Optional[bool] = False) -> str:
     """
@@ -21,6 +22,7 @@ def get_weather(location: str, celsius: Optional[bool] = False) -> str:
     """
     return "The weather is UNGODLY with torrential rains and temperatures below -10°C"
 
+
 agent = ToolCallingAgent(tools=[get_weather], model=model)
 
-print(agent.run("What's the weather like in Paris?"))
\ No newline at end of file
+print(agent.run("What's the weather like in Paris?"))
diff --git a/examples/tool_calling_agent_ollama.py b/examples/tool_calling_agent_ollama.py
index ad914f8..c7198d6 100644
--- a/examples/tool_calling_agent_ollama.py
+++ b/examples/tool_calling_agent_ollama.py
@@ -4,10 +4,11 @@ from typing import Optional
 
 model = LiteLLMModel(
     model_id="ollama_chat/llama3.2",
-    api_base="http://localhost:11434", # replace with remote open-ai compatible server if necessary
-    api_key="your-api-key" # replace with API key if necessary
+    api_base="http://localhost:11434",  # replace with remote open-ai compatible server if necessary
+    api_key="your-api-key",  # replace with API key if necessary
 )
 
+
 @tool
 def get_weather(location: str, celsius: Optional[bool] = False) -> str:
     """
@@ -20,6 +21,7 @@ def get_weather(location: str, celsius: Optional[bool] = False) -> str:
     """
     return "The weather is UNGODLY with torrential rains and temperatures below -10°C"
 
+
 agent = ToolCallingAgent(tools=[get_weather], model=model)
 
 print(agent.run("What's the weather like in Paris?"))
diff --git a/pyproject.toml b/pyproject.toml
index c530189..871d499 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,27 +26,37 @@ dependencies = [
   "openai>=1.58.1",
 ]
 
+[project.optional-dependencies]
+audio = [
+  "soundfile",
+]
+torch = [
+  "torch",
+  "accelerate",
+]
+litellm = [
+  "litellm>=1.55.10",
+]
+quality = [
+  "ruff>=0.9.0",
+]
+test = [
+  "pytest>=8.1.0",
+  "smolagents[audio,litellm,torch]",
+]
+dev = [
+  "smolagents[quality,test]",
+  "sqlalchemy", # for ./examples
+]
+
+[tool.pytest.ini_options]
+# Add the specified `OPTS` to the set of command line arguments as if they had been specified by the user.
+addopts = "-sv --durations=0"
+
 [tool.ruff]
 lint.ignore = ["F403"]
 
-[project.optional-dependencies]
-dev = [
-  "torch",
-  "torchaudio",
-  "torchvision",
-  "sqlalchemy",
-  "accelerate",
-  "soundfile",
-  "litellm>=1.55.10",
-]
-test = [
-  "torch",
-  "torchaudio",
-  "torchvision",
-  "pytest>=8.1.0",
-  "sqlalchemy",
-  "ruff>=0.5.0",
-  "accelerate",
-  "soundfile",
-  "litellm>=1.55.10",
-]
+[tool.ruff.lint.per-file-ignores]
+"examples/*" = [
+  "E402", # module-import-not-at-top-of-file
+]
\ No newline at end of file
diff --git a/utils/check_tests_in_ci.py b/utils/check_tests_in_ci.py
new file mode 100644
index 0000000..4c55ef0
--- /dev/null
+++ b/utils/check_tests_in_ci.py
@@ -0,0 +1,58 @@
+# coding=utf-8
+# Copyright 2025-present, the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Check that all tests are called in CI."""
+
+from pathlib import Path
+
+ROOT = Path(__file__).parent.parent
+
+TESTS_FOLDER = ROOT / "tests"
+CI_WORKFLOW_FILE = ROOT / ".github" / "workflows" / "tests.yml"
+
+
+def check_tests_in_ci():
+    """List all test files in `./tests/` and check if they are listed in the CI workflow.
+
+    Since each test file is triggered separately in the CI workflow, it is easy to forget a new one when adding new
+    tests, hence this check.
+
+    NOTE: current implementation is quite naive but should work for now. Must be updated if one want to ignore some
+          tests or if file naming is updated (currently only files starting by `test_*` are cheked)
+    """
+    test_files = [
+        path.relative_to(TESTS_FOLDER).as_posix()
+        for path in TESTS_FOLDER.glob("**/*.py")
+        if path.name.startswith("test_")
+    ]
+    ci_workflow_file_content = CI_WORKFLOW_FILE.read_text()
+    missing_test_files = [
+        test_file
+        for test_file in test_files
+        if test_file not in ci_workflow_file_content
+    ]
+    if missing_test_files:
+        print(
+            "❌ Some test files seem to be ignored in the CI:\n"
+            + "\n".join(f"   - {test_file}" for test_file in missing_test_files)
+            + f"\n   Please add them manually in {CI_WORKFLOW_FILE}."
+        )
+        exit(1)
+    else:
+        print("✅ All good!")
+        exit(0)
+
+
+if __name__ == "__main__":
+    check_tests_in_ci()