From c5b92c8b59faba864d5cb1d2bd3ea9e2d1a45898 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 10 Apr 2026 13:39:39 +0200 Subject: [PATCH 1/5] test(litellm): Remove mocks with httpx types in embedding tests --- tests/conftest.py | 19 +++ tests/integrations/litellm/test_litellm.py | 141 +++++++++++---------- 2 files changed, 95 insertions(+), 65 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index effe758091..796cfaf310 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1218,6 +1218,25 @@ def nonstreaming_chat_completions_model_response(): ) +@pytest.fixture +def openai_embedding_model_response(): + return openai.types.CreateEmbeddingResponse( + data=[ + openai.types.Embedding( + embedding=[0.1, 0.2, 0.3], + index=0, + object="embedding", + ) + ], + model="text-embedding-ada-002", + object="list", + usage=openai.types.create_embedding_response.Usage( + prompt_tokens=5, + total_tokens=5, + ), + ) + + @pytest.fixture def nonstreaming_responses_model_response(): return openai.types.responses.Response( diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 6c7184de71..1a94dd4d81 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -126,38 +126,6 @@ def __init__( self.created = 1234567890 -class MockEmbeddingData: - def __init__(self, embedding=None): - self.embedding = embedding or [0.1, 0.2, 0.3] - self.index = 0 - self.object = "embedding" - - -class MockEmbeddingResponse: - def __init__(self, model="text-embedding-ada-002", data=None, usage=None): - self.model = model - self.data = data or [MockEmbeddingData()] - self.usage = usage or MockUsage( - prompt_tokens=5, completion_tokens=0, total_tokens=5 - ) - self.object = "list" - - def model_dump(self): - return { - "model": self.model, - "data": [ - {"embedding": d.embedding, "index": d.index, "object": d.object} - for d in self.data - ], - "usage": { - "prompt_tokens": self.usage.prompt_tokens, - "completion_tokens": self.usage.completion_tokens, - "total_tokens": self.usage.total_tokens, - }, - "object": self.object, - } - - @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -311,7 +279,13 @@ def test_streaming_chat_completion( assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True -def test_embeddings_create(sentry_init, capture_events, clear_litellm_cache): +def test_embeddings_create( + sentry_init, + capture_events, + get_model_response, + openai_embedding_model_response, + clear_litellm_cache, +): """ Test that litellm.embedding() calls are properly instrumented. @@ -325,20 +299,24 @@ def test_embeddings_create(sentry_init, capture_events, clear_litellm_cache): ) events = capture_events() - mock_response = MockEmbeddingResponse() + client = OpenAI(api_key="z") - # Mock within the test to ensure proper ordering with cache clearing - with mock.patch( - "litellm.openai_chat_completions.make_sync_openai_embedding_request" - ) as mock_http: - # The function returns (headers, response) - mock_http.return_value = ({}, mock_response) + model_response = get_model_response( + openai_embedding_model_response, + serialize_pydantic=True, + request_headers={"X-Stainless-Raw-Response": "True"}, + ) + with mock.patch.object( + client.embeddings._client._client, + "send", + return_value=model_response, + ): with start_transaction(name="litellm test"): response = litellm.embedding( model="text-embedding-ada-002", input="Hello, world!", - api_key="test-key", # Provide a fake API key to avoid authentication errors + client=client, ) # Allow time for callbacks to complete (they may run in separate threads) time.sleep(0.1) @@ -349,8 +327,13 @@ def test_embeddings_create(sentry_init, capture_events, clear_litellm_cache): (event,) = events assert event["type"] == "transaction" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm" + ) + assert len(spans) == 1 + span = spans[0] assert span["op"] == OP.GEN_AI_EMBEDDINGS assert span["description"] == "embeddings text-embedding-ada-002" @@ -363,7 +346,11 @@ def test_embeddings_create(sentry_init, capture_events, clear_litellm_cache): def test_embeddings_create_with_list_input( - sentry_init, capture_events, clear_litellm_cache + sentry_init, + capture_events, + get_model_response, + openai_embedding_model_response, + clear_litellm_cache, ): """Test embedding with list input.""" sentry_init( @@ -373,20 +360,24 @@ def test_embeddings_create_with_list_input( ) events = capture_events() - mock_response = MockEmbeddingResponse() + client = OpenAI(api_key="z") - # Mock within the test to ensure proper ordering with cache clearing - with mock.patch( - "litellm.openai_chat_completions.make_sync_openai_embedding_request" - ) as mock_http: - # The function returns (headers, response) - mock_http.return_value = ({}, mock_response) + model_response = get_model_response( + openai_embedding_model_response, + serialize_pydantic=True, + request_headers={"X-Stainless-Raw-Response": "True"}, + ) + with mock.patch.object( + client.embeddings._client._client, + "send", + return_value=model_response, + ): with start_transaction(name="litellm test"): response = litellm.embedding( model="text-embedding-ada-002", input=["First text", "Second text", "Third text"], - api_key="test-key", # Provide a fake API key to avoid authentication errors + client=client, ) # Allow time for callbacks to complete (they may run in separate threads) time.sleep(0.1) @@ -397,8 +388,13 @@ def test_embeddings_create_with_list_input( (event,) = events assert event["type"] == "transaction" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm" + ) + assert len(spans) == 1 + span = spans[0] assert span["op"] == OP.GEN_AI_EMBEDDINGS assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" @@ -411,7 +407,13 @@ def test_embeddings_create_with_list_input( ] -def test_embeddings_no_pii(sentry_init, capture_events, clear_litellm_cache): +def test_embeddings_no_pii( + sentry_init, + capture_events, + get_model_response, + openai_embedding_model_response, + clear_litellm_cache, +): """Test that PII is not captured when disabled.""" sentry_init( integrations=[LiteLLMIntegration(include_prompts=True)], @@ -420,20 +422,24 @@ def test_embeddings_no_pii(sentry_init, capture_events, clear_litellm_cache): ) events = capture_events() - mock_response = MockEmbeddingResponse() + client = OpenAI(api_key="z") - # Mock within the test to ensure proper ordering with cache clearing - with mock.patch( - "litellm.openai_chat_completions.make_sync_openai_embedding_request" - ) as mock_http: - # The function returns (headers, response) - mock_http.return_value = ({}, mock_response) + model_response = get_model_response( + openai_embedding_model_response, + serialize_pydantic=True, + request_headers={"X-Stainless-Raw-Response": "True"}, + ) + with mock.patch.object( + client.embeddings._client._client, + "send", + return_value=model_response, + ): with start_transaction(name="litellm test"): response = litellm.embedding( model="text-embedding-ada-002", input="Hello, world!", - api_key="test-key", # Provide a fake API key to avoid authentication errors + client=client, ) # Allow time for callbacks to complete (they may run in separate threads) time.sleep(0.1) @@ -444,8 +450,13 @@ def test_embeddings_no_pii(sentry_init, capture_events, clear_litellm_cache): (event,) = events assert event["type"] == "transaction" - assert len(event["spans"]) == 1 - (span,) = event["spans"] + spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm" + ) + assert len(spans) == 1 + span = spans[0] assert span["op"] == OP.GEN_AI_EMBEDDINGS # Check that embeddings input is NOT captured when PII is disabled From ad16c7f380a3c2fa99c0f4762c270c2aee340e35 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 10 Apr 2026 13:58:11 +0200 Subject: [PATCH 2/5] test(litellm): Replace mocks with httpx types in rate-limit test --- tests/conftest.py | 22 +++++++++++++++ tests/integrations/litellm/test_litellm.py | 33 +++++++++++++--------- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 796cfaf310..b8327622f4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1080,6 +1080,28 @@ def inner(response_content, serialize_pydantic=False, request_headers=None): return inner +@pytest.fixture +def get_rate_limit_model_response(): + def inner(request_headers=None): + if request_headers is None: + request_headers = {} + + model_request = HttpxRequest( + "POST", + "/responses", + headers=request_headers, + ) + + response = HttpxResponse( + 429, + request=model_request, + ) + + return response + + return inner + + @pytest.fixture def streaming_chat_completions_model_response(): return [ diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 1a94dd4d81..f98eb7c6d3 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -463,7 +463,9 @@ def test_embeddings_no_pii( assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"] -def test_exception_handling(sentry_init, capture_events): +def test_exception_handling( + reset_litellm_executor, sentry_init, capture_events, get_rate_limit_model_response +): sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, @@ -472,19 +474,24 @@ def test_exception_handling(sentry_init, capture_events): messages = [{"role": "user", "content": "Hello!"}] - with start_transaction(name="litellm test"): - kwargs = { - "model": "gpt-3.5-turbo", - "messages": messages, - } + client = OpenAI(api_key="z") - _input_callback(kwargs) - _failure_callback( - kwargs, - Exception("API rate limit reached"), - datetime.now(), - datetime.now(), - ) + model_response = get_rate_limit_model_response() + + with mock.patch.object( + client.embeddings._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + with pytest.raises(litellm.RateLimitError): + litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + client=client, + ) + + litellm_utils.executor.shutdown(wait=True) # Should have error event and transaction assert len(events) >= 1 From 598d6b5baba4f8890a6e11b2623616553823b39d Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 10 Apr 2026 15:09:21 +0200 Subject: [PATCH 3/5] undo merge --- tests/integrations/litellm/test_litellm.py | 33 +++++++++------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index d3bffa0eef..0196ff413f 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -465,9 +465,7 @@ def test_embeddings_no_pii( assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"] -def test_exception_handling( - reset_litellm_executor, sentry_init, capture_events, get_rate_limit_model_response -): +def test_exception_handling(sentry_init, capture_events): sentry_init( integrations=[LiteLLMIntegration()], traces_sample_rate=1.0, @@ -476,24 +474,19 @@ def test_exception_handling( messages = [{"role": "user", "content": "Hello!"}] - client = OpenAI(api_key="z") - - model_response = get_rate_limit_model_response() - - with mock.patch.object( - client.embeddings._client._client, - "send", - return_value=model_response, - ): - with start_transaction(name="litellm test"): - with pytest.raises(litellm.RateLimitError): - litellm.completion( - model="gpt-3.5-turbo", - messages=messages, - client=client, - ) + with start_transaction(name="litellm test"): + kwargs = { + "model": "gpt-3.5-turbo", + "messages": messages, + } - litellm_utils.executor.shutdown(wait=True) + _input_callback(kwargs) + _failure_callback( + kwargs, + Exception("API rate limit reached"), + datetime.now(), + datetime.now(), + ) # Should have error event and transaction assert len(events) >= 1 From a8689cdca7f0d5444345e5042955b3933d353eb3 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 10 Apr 2026 15:13:43 +0200 Subject: [PATCH 4/5] remove fixture --- tests/conftest.py | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index b8327622f4..796cfaf310 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1080,28 +1080,6 @@ def inner(response_content, serialize_pydantic=False, request_headers=None): return inner -@pytest.fixture -def get_rate_limit_model_response(): - def inner(request_headers=None): - if request_headers is None: - request_headers = {} - - model_request = HttpxRequest( - "POST", - "/responses", - headers=request_headers, - ) - - response = HttpxResponse( - 429, - request=model_request, - ) - - return response - - return inner - - @pytest.fixture def streaming_chat_completions_model_response(): return [ From a2b35856bf8f0fa3cd71042d9c9bdfb936a3d653 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 10 Apr 2026 16:48:59 +0200 Subject: [PATCH 5/5] make request headers consistent --- tests/integrations/litellm/test_litellm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 0196ff413f..40a7dd00c4 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -306,7 +306,7 @@ def test_embeddings_create( model_response = get_model_response( openai_embedding_model_response, serialize_pydantic=True, - request_headers={"X-Stainless-Raw-Response": "True"}, + request_headers={"X-Stainless-Raw-Response": "true"}, ) with mock.patch.object( @@ -367,7 +367,7 @@ def test_embeddings_create_with_list_input( model_response = get_model_response( openai_embedding_model_response, serialize_pydantic=True, - request_headers={"X-Stainless-Raw-Response": "True"}, + request_headers={"X-Stainless-Raw-Response": "true"}, ) with mock.patch.object( @@ -429,7 +429,7 @@ def test_embeddings_no_pii( model_response = get_model_response( openai_embedding_model_response, serialize_pydantic=True, - request_headers={"X-Stainless-Raw-Response": "True"}, + request_headers={"X-Stainless-Raw-Response": "true"}, ) with mock.patch.object(