From 81388b14f7f1857426cc31edbe83e5667bfabffe Mon Sep 17 00:00:00 2001
From: Izaak Curry <98251797+ScientistIzaak@users.noreply.github.com>
Date: Wed, 1 Jan 2025 22:33:07 -0800
Subject: [PATCH 1/3] add device parameter to TransformerModel

---
 src/smolagents/models.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/smolagents/models.py b/src/smolagents/models.py
index 6fc8dbb..6ad0ce9 100644
--- a/src/smolagents/models.py
+++ b/src/smolagents/models.py
@@ -284,7 +284,7 @@ class HfApiModel(Model):
 class TransformersModel(Model):
     """This engine initializes a model and tokenizer from the given `model_id`."""
 
-    def __init__(self, model_id: Optional[str] = None):
+    def __init__(self, model_id: Optional[str] = None, device: Optional[str] = None):
         super().__init__()
         default_model_id = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
         if model_id is None:
@@ -293,15 +293,18 @@ class TransformersModel(Model):
                 f"`model_id`not provided, using this default tokenizer for token counts: '{model_id}'"
             )
         self.model_id = model_id
+        if device is None:
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.device = device
         try:
             self.tokenizer = AutoTokenizer.from_pretrained(model_id)
-            self.model = AutoModelForCausalLM.from_pretrained(model_id)
+            self.model = AutoModelForCausalLM.from_pretrained(model_id).to(self.device)
         except Exception as e:
             logger.warning(
                 f"Failed to load tokenizer and model for {model_id=}: {e}. Loading default tokenizer and model instead from {model_id=}."
             )
             self.tokenizer = AutoTokenizer.from_pretrained(default_model_id)
-            self.model = AutoModelForCausalLM.from_pretrained(default_model_id)
+            self.model = AutoModelForCausalLM.from_pretrained(default_model_id).to(self.device)
 
     def make_stopping_criteria(self, stop_sequences: List[str]) -> StoppingCriteriaList:
         class StopOnStrings(StoppingCriteria):

From 12ee33a8788305131471778a3cc8b1981b9bf887 Mon Sep 17 00:00:00 2001
From: Izaak Curry <DataScientistIzaak@gmail.com>
Date: Thu, 2 Jan 2025 20:54:32 -0800
Subject: [PATCH 2/3] add device parameter to TransformersModel

---
 src/smolagents/models.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/smolagents/models.py b/src/smolagents/models.py
index 6ad0ce9..9c9a749 100644
--- a/src/smolagents/models.py
+++ b/src/smolagents/models.py
@@ -29,6 +29,7 @@ import litellm
 import logging
 import os
 import random
+import torch
 
 from huggingface_hub import InferenceClient
 
@@ -304,7 +305,7 @@ class TransformersModel(Model):
                 f"Failed to load tokenizer and model for {model_id=}: {e}. Loading default tokenizer and model instead from {model_id=}."
             )
             self.tokenizer = AutoTokenizer.from_pretrained(default_model_id)
-            self.model = AutoModelForCausalLM.from_pretrained(default_model_id).to(self.device)
+            self.model = AutoModelForCausalLM.from_pretrained(default_model_id, device_map=device)
 
     def make_stopping_criteria(self, stop_sequences: List[str]) -> StoppingCriteriaList:
         class StopOnStrings(StoppingCriteria):

From e2ac275d6ec56edf2e4ae668f2d58d44be901622 Mon Sep 17 00:00:00 2001
From: Izaak Curry <DataScientistIzaak@gmail.com>
Date: Thu, 2 Jan 2025 21:38:52 -0800
Subject: [PATCH 3/3] updated logging

---
 src/smolagents/models.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/smolagents/models.py b/src/smolagents/models.py
index 9c9a749..2e5f601 100644
--- a/src/smolagents/models.py
+++ b/src/smolagents/models.py
@@ -283,7 +283,14 @@ class HfApiModel(Model):
 
 
 class TransformersModel(Model):
-    """This engine initializes a model and tokenizer from the given `model_id`."""
+    """This engine initializes a model and tokenizer from the given `model_id`.
+    
+        Parameters:
+            model_id (`str`, *optional*, defaults to `"HuggingFaceTB/SmolLM2-1.7B-Instruct"`):
+                The Hugging Face model ID to be used for inference. This can be a path or model identifier from the Hugging Face model hub.
+            device (`str`, optional, defaults to `"cuda"` if available, else `"cpu"`.): 
+                The device to load the model on (`"cpu"` or `"cuda"`). 
+    """
 
     def __init__(self, model_id: Optional[str] = None, device: Optional[str] = None):
         super().__init__()
@@ -297,6 +304,7 @@ class TransformersModel(Model):
         if device is None:
             device = "cuda" if torch.cuda.is_available() else "cpu"
         self.device = device
+        logger.info(f"Using device: {self.device}")
         try:
             self.tokenizer = AutoTokenizer.from_pretrained(model_id)
             self.model = AutoModelForCausalLM.from_pretrained(model_id).to(self.device)
@@ -305,7 +313,7 @@ class TransformersModel(Model):
                 f"Failed to load tokenizer and model for {model_id=}: {e}. Loading default tokenizer and model instead from {model_id=}."
             )
             self.tokenizer = AutoTokenizer.from_pretrained(default_model_id)
-            self.model = AutoModelForCausalLM.from_pretrained(default_model_id, device_map=device)
+            self.model = AutoModelForCausalLM.from_pretrained(default_model_id).to(self.device)
 
     def make_stopping_criteria(self, stop_sequences: List[str]) -> StoppingCriteriaList:
         class StopOnStrings(StoppingCriteria):