From c5b92c8b59faba864d5cb1d2bd3ea9e2d1a45898 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 10 Apr 2026 13:39:39 +0200
Subject: [PATCH 1/5] test(litellm): Remove mocks with httpx types in embedding
 tests

---
 tests/conftest.py                          |  19 +++
 tests/integrations/litellm/test_litellm.py | 141 +++++++++++----------
 2 files changed, 95 insertions(+), 65 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index effe758091..796cfaf310 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1218,6 +1218,25 @@ def nonstreaming_chat_completions_model_response():
     )
 
 
+@pytest.fixture
+def openai_embedding_model_response():
+    return openai.types.CreateEmbeddingResponse(
+        data=[
+            openai.types.Embedding(
+                embedding=[0.1, 0.2, 0.3],
+                index=0,
+                object="embedding",
+            )
+        ],
+        model="text-embedding-ada-002",
+        object="list",
+        usage=openai.types.create_embedding_response.Usage(
+            prompt_tokens=5,
+            total_tokens=5,
+        ),
+    )
+
+
 @pytest.fixture
 def nonstreaming_responses_model_response():
     return openai.types.responses.Response(
diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index 6c7184de71..1a94dd4d81 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -126,38 +126,6 @@ def __init__(
         self.created = 1234567890
 
 
-class MockEmbeddingData:
-    def __init__(self, embedding=None):
-        self.embedding = embedding or [0.1, 0.2, 0.3]
-        self.index = 0
-        self.object = "embedding"
-
-
-class MockEmbeddingResponse:
-    def __init__(self, model="text-embedding-ada-002", data=None, usage=None):
-        self.model = model
-        self.data = data or [MockEmbeddingData()]
-        self.usage = usage or MockUsage(
-            prompt_tokens=5, completion_tokens=0, total_tokens=5
-        )
-        self.object = "list"
-
-    def model_dump(self):
-        return {
-            "model": self.model,
-            "data": [
-                {"embedding": d.embedding, "index": d.index, "object": d.object}
-                for d in self.data
-            ],
-            "usage": {
-                "prompt_tokens": self.usage.prompt_tokens,
-                "completion_tokens": self.usage.completion_tokens,
-                "total_tokens": self.usage.total_tokens,
-            },
-            "object": self.object,
-        }
-
-
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -311,7 +279,13 @@ def test_streaming_chat_completion(
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
-def test_embeddings_create(sentry_init, capture_events, clear_litellm_cache):
+def test_embeddings_create(
+    sentry_init,
+    capture_events,
+    get_model_response,
+    openai_embedding_model_response,
+    clear_litellm_cache,
+):
     """
     Test that litellm.embedding() calls are properly instrumented.
 
@@ -325,20 +299,24 @@ def test_embeddings_create(sentry_init, capture_events, clear_litellm_cache):
     )
     events = capture_events()
 
-    mock_response = MockEmbeddingResponse()
+    client = OpenAI(api_key="z")
 
-    # Mock within the test to ensure proper ordering with cache clearing
-    with mock.patch(
-        "litellm.openai_chat_completions.make_sync_openai_embedding_request"
-    ) as mock_http:
-        # The function returns (headers, response)
-        mock_http.return_value = ({}, mock_response)
+    model_response = get_model_response(
+        openai_embedding_model_response,
+        serialize_pydantic=True,
+        request_headers={"X-Stainless-Raw-Response": "True"},
+    )
 
+    with mock.patch.object(
+        client.embeddings._client._client,
+        "send",
+        return_value=model_response,
+    ):
         with start_transaction(name="litellm test"):
             response = litellm.embedding(
                 model="text-embedding-ada-002",
                 input="Hello, world!",
-                api_key="test-key",  # Provide a fake API key to avoid authentication errors
+                client=client,
             )
             # Allow time for callbacks to complete (they may run in separate threads)
             time.sleep(0.1)
@@ -349,8 +327,13 @@ def test_embeddings_create(sentry_init, capture_events, clear_litellm_cache):
         (event,) = events
 
         assert event["type"] == "transaction"
-        assert len(event["spans"]) == 1
-        (span,) = event["spans"]
+        spans = list(
+            x
+            for x in event["spans"]
+            if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+        )
+        assert len(spans) == 1
+        span = spans[0]
 
         assert span["op"] == OP.GEN_AI_EMBEDDINGS
         assert span["description"] == "embeddings text-embedding-ada-002"
@@ -363,7 +346,11 @@ def test_embeddings_create(sentry_init, capture_events, clear_litellm_cache):
 
 
 def test_embeddings_create_with_list_input(
-    sentry_init, capture_events, clear_litellm_cache
+    sentry_init,
+    capture_events,
+    get_model_response,
+    openai_embedding_model_response,
+    clear_litellm_cache,
 ):
     """Test embedding with list input."""
     sentry_init(
@@ -373,20 +360,24 @@ def test_embeddings_create_with_list_input(
     )
     events = capture_events()
 
-    mock_response = MockEmbeddingResponse()
+    client = OpenAI(api_key="z")
 
-    # Mock within the test to ensure proper ordering with cache clearing
-    with mock.patch(
-        "litellm.openai_chat_completions.make_sync_openai_embedding_request"
-    ) as mock_http:
-        # The function returns (headers, response)
-        mock_http.return_value = ({}, mock_response)
+    model_response = get_model_response(
+        openai_embedding_model_response,
+        serialize_pydantic=True,
+        request_headers={"X-Stainless-Raw-Response": "True"},
+    )
 
+    with mock.patch.object(
+        client.embeddings._client._client,
+        "send",
+        return_value=model_response,
+    ):
         with start_transaction(name="litellm test"):
             response = litellm.embedding(
                 model="text-embedding-ada-002",
                 input=["First text", "Second text", "Third text"],
-                api_key="test-key",  # Provide a fake API key to avoid authentication errors
+                client=client,
             )
             # Allow time for callbacks to complete (they may run in separate threads)
             time.sleep(0.1)
@@ -397,8 +388,13 @@ def test_embeddings_create_with_list_input(
         (event,) = events
 
         assert event["type"] == "transaction"
-        assert len(event["spans"]) == 1
-        (span,) = event["spans"]
+        spans = list(
+            x
+            for x in event["spans"]
+            if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+        )
+        assert len(spans) == 1
+        span = spans[0]
 
         assert span["op"] == OP.GEN_AI_EMBEDDINGS
         assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
@@ -411,7 +407,13 @@ def test_embeddings_create_with_list_input(
         ]
 
 
-def test_embeddings_no_pii(sentry_init, capture_events, clear_litellm_cache):
+def test_embeddings_no_pii(
+    sentry_init,
+    capture_events,
+    get_model_response,
+    openai_embedding_model_response,
+    clear_litellm_cache,
+):
     """Test that PII is not captured when disabled."""
     sentry_init(
         integrations=[LiteLLMIntegration(include_prompts=True)],
@@ -420,20 +422,24 @@ def test_embeddings_no_pii(sentry_init, capture_events, clear_litellm_cache):
     )
     events = capture_events()
 
-    mock_response = MockEmbeddingResponse()
+    client = OpenAI(api_key="z")
 
-    # Mock within the test to ensure proper ordering with cache clearing
-    with mock.patch(
-        "litellm.openai_chat_completions.make_sync_openai_embedding_request"
-    ) as mock_http:
-        # The function returns (headers, response)
-        mock_http.return_value = ({}, mock_response)
+    model_response = get_model_response(
+        openai_embedding_model_response,
+        serialize_pydantic=True,
+        request_headers={"X-Stainless-Raw-Response": "True"},
+    )
 
+    with mock.patch.object(
+        client.embeddings._client._client,
+        "send",
+        return_value=model_response,
+    ):
         with start_transaction(name="litellm test"):
             response = litellm.embedding(
                 model="text-embedding-ada-002",
                 input="Hello, world!",
-                api_key="test-key",  # Provide a fake API key to avoid authentication errors
+                client=client,
             )
             # Allow time for callbacks to complete (they may run in separate threads)
             time.sleep(0.1)
@@ -444,8 +450,13 @@ def test_embeddings_no_pii(sentry_init, capture_events, clear_litellm_cache):
         (event,) = events
 
         assert event["type"] == "transaction"
-        assert len(event["spans"]) == 1
-        (span,) = event["spans"]
+        spans = list(
+            x
+            for x in event["spans"]
+            if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+        )
+        assert len(spans) == 1
+        span = spans[0]
 
         assert span["op"] == OP.GEN_AI_EMBEDDINGS
         # Check that embeddings input is NOT captured when PII is disabled

From ad16c7f380a3c2fa99c0f4762c270c2aee340e35 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 10 Apr 2026 13:58:11 +0200
Subject: [PATCH 2/5] test(litellm): Replace mocks with httpx types in
 rate-limit test

---
 tests/conftest.py                          | 22 +++++++++++++++
 tests/integrations/litellm/test_litellm.py | 33 +++++++++++++---------
 2 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 796cfaf310..b8327622f4 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1080,6 +1080,28 @@ def inner(response_content, serialize_pydantic=False, request_headers=None):
     return inner
 
 
+@pytest.fixture
+def get_rate_limit_model_response():
+    def inner(request_headers=None):
+        if request_headers is None:
+            request_headers = {}
+
+        model_request = HttpxRequest(
+            "POST",
+            "/responses",
+            headers=request_headers,
+        )
+
+        response = HttpxResponse(
+            429,
+            request=model_request,
+        )
+
+        return response
+
+    return inner
+
+
 @pytest.fixture
 def streaming_chat_completions_model_response():
     return [
diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index 1a94dd4d81..f98eb7c6d3 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -463,7 +463,9 @@ def test_embeddings_no_pii(
         assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"]
 
 
-def test_exception_handling(sentry_init, capture_events):
+def test_exception_handling(
+    reset_litellm_executor, sentry_init, capture_events, get_rate_limit_model_response
+):
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
@@ -472,19 +474,24 @@ def test_exception_handling(sentry_init, capture_events):
 
     messages = [{"role": "user", "content": "Hello!"}]
 
-    with start_transaction(name="litellm test"):
-        kwargs = {
-            "model": "gpt-3.5-turbo",
-            "messages": messages,
-        }
+    client = OpenAI(api_key="z")
 
-        _input_callback(kwargs)
-        _failure_callback(
-            kwargs,
-            Exception("API rate limit reached"),
-            datetime.now(),
-            datetime.now(),
-        )
+    model_response = get_rate_limit_model_response()
+
+    with mock.patch.object(
+        client.embeddings._client._client,
+        "send",
+        return_value=model_response,
+    ):
+        with start_transaction(name="litellm test"):
+            with pytest.raises(litellm.RateLimitError):
+                litellm.completion(
+                    model="gpt-3.5-turbo",
+                    messages=messages,
+                    client=client,
+                )
+
+            litellm_utils.executor.shutdown(wait=True)
 
     # Should have error event and transaction
     assert len(events) >= 1

From 598d6b5baba4f8890a6e11b2623616553823b39d Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 10 Apr 2026 15:09:21 +0200
Subject: [PATCH 3/5] undo merge

---
 tests/integrations/litellm/test_litellm.py | 33 +++++++++-------------
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index d3bffa0eef..0196ff413f 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -465,9 +465,7 @@ def test_embeddings_no_pii(
         assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"]
 
 
-def test_exception_handling(
-    reset_litellm_executor, sentry_init, capture_events, get_rate_limit_model_response
-):
+def test_exception_handling(sentry_init, capture_events):
     sentry_init(
         integrations=[LiteLLMIntegration()],
         traces_sample_rate=1.0,
@@ -476,24 +474,19 @@ def test_exception_handling(
 
     messages = [{"role": "user", "content": "Hello!"}]
 
-    client = OpenAI(api_key="z")
-
-    model_response = get_rate_limit_model_response()
-
-    with mock.patch.object(
-        client.embeddings._client._client,
-        "send",
-        return_value=model_response,
-    ):
-        with start_transaction(name="litellm test"):
-            with pytest.raises(litellm.RateLimitError):
-                litellm.completion(
-                    model="gpt-3.5-turbo",
-                    messages=messages,
-                    client=client,
-                )
+    with start_transaction(name="litellm test"):
+        kwargs = {
+            "model": "gpt-3.5-turbo",
+            "messages": messages,
+        }
 
-            litellm_utils.executor.shutdown(wait=True)
+        _input_callback(kwargs)
+        _failure_callback(
+            kwargs,
+            Exception("API rate limit reached"),
+            datetime.now(),
+            datetime.now(),
+        )
 
     # Should have error event and transaction
     assert len(events) >= 1

From a8689cdca7f0d5444345e5042955b3933d353eb3 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 10 Apr 2026 15:13:43 +0200
Subject: [PATCH 4/5] remove fixture

---
 tests/conftest.py | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index b8327622f4..796cfaf310 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1080,28 +1080,6 @@ def inner(response_content, serialize_pydantic=False, request_headers=None):
     return inner
 
 
-@pytest.fixture
-def get_rate_limit_model_response():
-    def inner(request_headers=None):
-        if request_headers is None:
-            request_headers = {}
-
-        model_request = HttpxRequest(
-            "POST",
-            "/responses",
-            headers=request_headers,
-        )
-
-        response = HttpxResponse(
-            429,
-            request=model_request,
-        )
-
-        return response
-
-    return inner
-
-
 @pytest.fixture
 def streaming_chat_completions_model_response():
     return [

From a2b35856bf8f0fa3cd71042d9c9bdfb936a3d653 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 10 Apr 2026 16:48:59 +0200
Subject: [PATCH 5/5] make request headers consistent

---
 tests/integrations/litellm/test_litellm.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index 0196ff413f..40a7dd00c4 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -306,7 +306,7 @@ def test_embeddings_create(
     model_response = get_model_response(
         openai_embedding_model_response,
         serialize_pydantic=True,
-        request_headers={"X-Stainless-Raw-Response": "True"},
+        request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
     with mock.patch.object(
@@ -367,7 +367,7 @@ def test_embeddings_create_with_list_input(
     model_response = get_model_response(
         openai_embedding_model_response,
         serialize_pydantic=True,
-        request_headers={"X-Stainless-Raw-Response": "True"},
+        request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
     with mock.patch.object(
@@ -429,7 +429,7 @@ def test_embeddings_no_pii(
     model_response = get_model_response(
         openai_embedding_model_response,
         serialize_pydantic=True,
-        request_headers={"X-Stainless-Raw-Response": "True"},
+        request_headers={"X-Stainless-Raw-Response": "true"},
     )
 
     with mock.patch.object(