Add unit tests for AnthropicTelegramInferenceBot
This commit is contained in:
@@ -0,0 +1,280 @@
|
||||
import unittest
|
||||
from unittest.mock import MagicMock, patch, AsyncMock, ANY
|
||||
import os
|
||||
|
||||
# Assuming anthropic_telegram_inference_bot.py is in the parent directory or PYTHONPATH is set
|
||||
from anthropic_telegram_inference_bot import AnthropicTelegramInferenceBot
|
||||
|
||||
# Mock response from Anthropic client's messages.create
|
||||
def create_mock_anthropic_response(content_text=None, stop_reason="end_turn", tool_use_parts=None):
|
||||
mock_response = MagicMock()
|
||||
mock_response.stop_reason = stop_reason
|
||||
|
||||
content_blocks = []
|
||||
if content_text:
|
||||
text_block = MagicMock()
|
||||
text_block.type = "text"
|
||||
text_block.text = content_text
|
||||
content_blocks.append(text_block)
|
||||
|
||||
if tool_use_parts:
|
||||
for tu_part in tool_use_parts: # tu_part = {"id": "toolu_123", "name": "get_weather", "input": {}}
|
||||
tool_block = MagicMock()
|
||||
tool_block.type = "tool_use"
|
||||
tool_block.id = tu_part["id"]
|
||||
tool_block.name = tu_part["name"]
|
||||
tool_block.input = tu_part["input"]
|
||||
content_blocks.append(tool_block)
|
||||
|
||||
mock_response.content = content_blocks
|
||||
return mock_response
|
||||
|
||||
class TestAnthropicTelegramInferenceBot(unittest.IsolatedAsyncioTestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.original_anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY")
|
||||
self.original_small_model = os.environ.get("ANTHROPIC_SMALL_MODEL")
|
||||
self.original_large_model = os.environ.get("ANTHROPIC_LARGE_MODEL")
|
||||
self.original_system_prompt_path = os.environ.get("SYSTEM_PROMPT_PATH")
|
||||
|
||||
for key in ["ANTHROPIC_API_KEY", "ANTHROPIC_SMALL_MODEL", "ANTHROPIC_LARGE_MODEL", "SYSTEM_PROMPT_PATH"]:
|
||||
if os.environ.get(key):
|
||||
del os.environ[key]
|
||||
|
||||
self.mock_anthropic_client_instance = MagicMock()
|
||||
self.mock_anthropic_client_instance.messages.create = MagicMock()
|
||||
|
||||
def tearDown(self):
|
||||
if self.original_anthropic_api_key: os.environ["ANTHROPIC_API_KEY"] = self.original_anthropic_api_key
|
||||
if self.original_small_model: os.environ["ANTHROPIC_SMALL_MODEL"] = self.original_small_model
|
||||
if self.original_large_model: os.environ["ANTHROPIC_LARGE_MODEL"] = self.original_large_model
|
||||
if self.original_system_prompt_path: os.environ["SYSTEM_PROMPT_PATH"] = self.original_system_prompt_path
|
||||
|
||||
@patch('anthropic.Anthropic')
|
||||
def test_init_with_anthropic_defaults_env_key(self, MockAnthropicConstructor):
|
||||
MockAnthropicConstructor.return_value = self.mock_anthropic_client_instance
|
||||
os.environ["ANTHROPIC_API_KEY"] = "test_anthropic_key"
|
||||
|
||||
bot = AnthropicTelegramInferenceBot()
|
||||
|
||||
MockAnthropicConstructor.assert_called_once_with(api_key="test_anthropic_key")
|
||||
self.assertEqual(bot.anthropic_client, self.mock_anthropic_client_instance)
|
||||
self.assertEqual(bot.model, os.environ.get("ANTHROPIC_SMALL_MODEL", "claude-3-haiku-20240307"))
|
||||
self.assertEqual(bot.max_tokens, int(os.environ.get("ANTHROPIC_SMALL_MODEL_MAX_TOKENS", 2000)))
|
||||
|
||||
@patch('anthropic.Anthropic')
|
||||
def test_init_with_provided_client_and_models(self, MockAnthropicConstructor):
|
||||
preconfigured_client = MagicMock()
|
||||
bot = AnthropicTelegramInferenceBot(
|
||||
anthropic_client=preconfigured_client,
|
||||
small_model_name="custom-small",
|
||||
small_model_max_tokens=100,
|
||||
large_model_name="custom-large",
|
||||
large_model_max_tokens=200
|
||||
)
|
||||
|
||||
MockAnthropicConstructor.assert_not_called()
|
||||
self.assertEqual(bot.anthropic_client, preconfigured_client)
|
||||
self.assertEqual(bot.model, "custom-small")
|
||||
self.assertEqual(bot.max_tokens, 100)
|
||||
self.assertEqual(bot.small_model_name, "custom-small")
|
||||
self.assertEqual(bot.large_model_name, "custom-large")
|
||||
|
||||
|
||||
def test_get_llm_description(self):
|
||||
bot = AnthropicTelegramInferenceBot(small_model_name="claude-test", small_model_max_tokens=500)
|
||||
self.assertEqual(bot.get_llm_description(), "LLM: claude-test, Max Tokens: 500")
|
||||
|
||||
async def test_switch_model(self):
|
||||
bot = AnthropicTelegramInferenceBot(
|
||||
small_model_name="claude-small", small_model_max_tokens=10,
|
||||
large_model_name="claude-large", large_model_max_tokens=20
|
||||
)
|
||||
self.assertEqual(bot.model, "claude-small")
|
||||
self.assertEqual(bot.max_tokens, 10)
|
||||
|
||||
status = await bot.switch_model()
|
||||
self.assertEqual(bot.model, "claude-large")
|
||||
self.assertEqual(bot.max_tokens, 20)
|
||||
self.assertEqual(status, "Switched to model: claude-large")
|
||||
|
||||
status = await bot.switch_model()
|
||||
self.assertEqual(bot.model, "claude-small")
|
||||
self.assertEqual(bot.max_tokens, 10)
|
||||
self.assertEqual(status, "Switched to model: claude-small")
|
||||
|
||||
def test_get_chat_response_success_text_only(self):
|
||||
bot = AnthropicTelegramInferenceBot(anthropic_client=self.mock_anthropic_client_instance)
|
||||
bot.model = "test-claude"
|
||||
bot.max_tokens = 150
|
||||
|
||||
mock_api_response = create_mock_anthropic_response(content_text="Hello from Anthropic API")
|
||||
self.mock_anthropic_client_instance.messages.create.return_value = mock_api_response
|
||||
|
||||
messages = [{"role": "user", "content": "Hi"}] # Anthropic format
|
||||
response = bot.get_chat_response(messages, []) # tools = empty list
|
||||
|
||||
self.mock_anthropic_client_instance.messages.create.assert_called_once_with(
|
||||
model="test-claude",
|
||||
max_tokens=150,
|
||||
messages=messages,
|
||||
system=bot.system_prompt, # Ensure system prompt is passed
|
||||
tools=None, # No tools passed to API if empty list or None
|
||||
tool_choice=None
|
||||
)
|
||||
self.assertEqual(response, mock_api_response)
|
||||
|
||||
def test_get_chat_response_with_tools(self):
|
||||
bot = AnthropicTelegramInferenceBot(anthropic_client=self.mock_anthropic_client_instance)
|
||||
bot.model = "claude-toolmaster"
|
||||
bot.max_tokens = 300
|
||||
|
||||
mock_tools_spec = [{"name": "get_weather", "description": "Gets weather", "input_schema": {"type": "object", "properties": {}}}]
|
||||
|
||||
mock_api_response = create_mock_anthropic_response(content_text="Thinking...", tool_use_parts=[
|
||||
{"id": "tool1", "name": "get_weather", "input": {"location": "here"}}
|
||||
])
|
||||
self.mock_anthropic_client_instance.messages.create.return_value = mock_api_response
|
||||
|
||||
messages = [{"role": "user", "content": "Weather?"}]
|
||||
response = bot.get_chat_response(messages, mock_tools_spec)
|
||||
|
||||
self.mock_anthropic_client_instance.messages.create.assert_called_once_with(
|
||||
model="claude-toolmaster",
|
||||
max_tokens=300,
|
||||
messages=messages,
|
||||
system=bot.system_prompt,
|
||||
tools=mock_tools_spec,
|
||||
tool_choice={"type": "auto"}
|
||||
)
|
||||
self.assertEqual(response.content[0].type, "text") # First part can be text
|
||||
self.assertEqual(response.content[1].type, "tool_use")
|
||||
|
||||
|
||||
def test_get_chat_response_api_error(self):
|
||||
bot = AnthropicTelegramInferenceBot(anthropic_client=self.mock_anthropic_client_instance)
|
||||
self.mock_anthropic_client_instance.messages.create.side_effect = Exception("Anthropic API Down")
|
||||
|
||||
with self.assertRaisesRegex(Exception, "Anthropic API Down"):
|
||||
bot.get_chat_response([{"role": "user", "content": "trigger"}], [])
|
||||
|
||||
|
||||
async def test_handle_message_simple_response_no_tools(self):
|
||||
# This test is more involved as it touches BaseTelegramInferenceBot's handle_message structure
|
||||
# which then calls the overridden get_chat_response.
|
||||
bot = AnthropicTelegramInferenceBot(anthropic_client=self.mock_anthropic_client_instance)
|
||||
bot.system_prompt = "System prompt for Anthropic"
|
||||
|
||||
# Mock get_chat_response directly to isolate its behavior from full handle_message logic of base
|
||||
# However, the point of this bot is its get_chat_response and subsequent processing.
|
||||
# So, let's mock the API call within get_chat_response.
|
||||
|
||||
api_response = create_mock_anthropic_response(content_text="Anthropic says hello.")
|
||||
self.mock_anthropic_client_instance.messages.create.return_value = api_response
|
||||
|
||||
# Ensure functions are empty for this test, so no tool logic is triggered
|
||||
bot.functions = []
|
||||
bot.tools = []
|
||||
|
||||
response_content = await bot.handle_message(user_id=101, user_message="Hello Anthropic")
|
||||
|
||||
self.assertEqual(response_content, "Anthropic says hello.")
|
||||
self.assertIn(101, bot.conversation_history)
|
||||
# Anthropic's handle_message structure:
|
||||
# 1. User message added to history.
|
||||
# 2. get_chat_response is called.
|
||||
# 3. Response content (text) is extracted.
|
||||
# 4. Assistant text response is added to history.
|
||||
# Expected history: [User, Assistant_Text_Response] (system prompt handled by get_chat_response)
|
||||
# The base class handle_message adds system prompt if not present.
|
||||
# Anthropic handle_message modifies history format before calling get_chat_response.
|
||||
|
||||
# Let's trace Base.handle_message -> Anthropic.handle_message -> Anthropic.get_chat_response
|
||||
# Base.handle_message:
|
||||
# - Adds system prompt to history if first turn: `self.conversation_history[user_id] = [{"role": "system", "content": self.system_prompt}]` (OpenAI style)
|
||||
# - Appends user message: `{"role": "user", "content": user_message}`
|
||||
# - Calls self.get_chat_response(messages, self.functions) -> This is Anthropic's get_chat_response
|
||||
# Anthropic.get_chat_response:
|
||||
# - Takes OpenAI style `messages` and `self.functions` (tool specs).
|
||||
# - Calls `anthropic_client.messages.create` with Anthropic style messages and system prompt.
|
||||
# Anthropic.handle_message (overridden):
|
||||
# - Prepares Anthropic-style messages from conversation_history (which is OpenAI style from Base)
|
||||
# - Calls get_chat_response with these Anthropic messages and self.functions (tool_specs)
|
||||
# - Processes response, extracts text, handles tool calls.
|
||||
# - Appends *user* message (original) and *assistant* text response to self.conversation_history (OpenAI style).
|
||||
|
||||
# For this test, we are calling AnthropicBot.handle_message directly.
|
||||
# 1. `user_id` not in `self.conversation_history`: `system_prompt` not added yet by Base logic.
|
||||
# Anthropic's `handle_message` will create `anthropic_messages` from this.
|
||||
# If `conversation_history` is empty, `anthropic_messages` = `[{"role": "user", "content": user_message}]`
|
||||
# 2. `get_chat_response` called with `anthropic_messages` and `bot.system_prompt` passed to API.
|
||||
# 3. Response "Anthropic says hello."
|
||||
# 4. Original `user_message` and "Anthropic says hello." (as assistant) added to `self.conversation_history`.
|
||||
|
||||
history = bot.conversation_history[101]
|
||||
self.assertEqual(len(history), 2) # User, Assistant
|
||||
self.assertEqual(history[0]["role"], "user")
|
||||
self.assertEqual(history[0]["content"], "Hello Anthropic")
|
||||
self.assertEqual(history[1]["role"], "assistant")
|
||||
self.assertEqual(history[1]["content"], "Anthropic says hello.")
|
||||
|
||||
# Check API call (made by the mocked get_chat_response indirectly)
|
||||
self.mock_anthropic_client_instance.messages.create.assert_called_once()
|
||||
call_args = self.mock_anthropic_client_instance.messages.create.call_args
|
||||
self.assertEqual(call_args.kwargs["system"], "System prompt for Anthropic")
|
||||
# Initial messages for API should just be the user message for first turn
|
||||
self.assertEqual(call_args.kwargs["messages"], [{"role": "user", "content": "Hello Anthropic"}])
|
||||
|
||||
|
||||
async def test_handle_message_with_tool_calls(self):
|
||||
bot = AnthropicTelegramInferenceBot(anthropic_client=self.mock_anthropic_client_instance)
|
||||
bot.system_prompt = "You are a helpful, tool-using assistant."
|
||||
|
||||
# Define a tool for the bot (OpenAI format, will be converted by Anthropic bot for API)
|
||||
mock_tool_oai_format = {"type": "function", "function": {"name": "get_weather", "description": "Get weather", "parameters": {}}}
|
||||
bot.functions = [mock_tool_oai_format] # This is used to generate anthropic_tools for API
|
||||
|
||||
# API Response 1: Request for tool call
|
||||
tool_use_part = {"id": "toolu_xyz", "name": "get_weather", "input": {"location": "paris"}}
|
||||
api_response_1 = create_mock_anthropic_response(tool_use_parts=[tool_use_part])
|
||||
|
||||
# API Response 2: Final text response after tool execution
|
||||
api_response_2 = create_mock_anthropic_response(content_text="The weather in Paris is nice.")
|
||||
|
||||
self.mock_anthropic_client_instance.messages.create.side_effect = [api_response_1, api_response_2]
|
||||
|
||||
# Mock the bot's call_tool method (from BaseTelegramInferenceBot)
|
||||
bot.call_tool = MagicMock(return_value='''{"weather": "sunny"}''') # Tool execution result
|
||||
|
||||
user_id = 102
|
||||
user_message = "What's the weather in Paris?"
|
||||
final_text_response = await bot.handle_message(user_id, user_message)
|
||||
|
||||
self.assertEqual(final_text_response, "The weather in Paris is nice.")
|
||||
self.assertEqual(self.mock_anthropic_client_instance.messages.create.call_count, 2)
|
||||
|
||||
bot.call_tool.assert_called_once_with("get_weather", {"location": "paris"}) # Anthropic passes input as dict
|
||||
|
||||
# Check conversation history (OpenAI style)
|
||||
history = bot.conversation_history[user_id]
|
||||
self.assertEqual(history[0]["role"], "user")
|
||||
self.assertEqual(history[0]["content"], user_message)
|
||||
|
||||
# Assistant message that requested tool call (Anthropic-specific format stored by its handle_message)
|
||||
# Anthropic's handle_message appends the raw tool_use block and then the tool_result
|
||||
self.assertEqual(history[1]["role"], "assistant")
|
||||
self.assertTrue(isinstance(history[1]["content"], list)) # Anthropic content is a list
|
||||
self.assertEqual(history[1]["content"][0]["type"], "tool_use")
|
||||
self.assertEqual(history[1]["content"][0]["id"], "toolu_xyz")
|
||||
|
||||
self.assertEqual(history[2]["role"], "tool")
|
||||
self.assertEqual(history[2]["tool_call_id"], "toolu_xyz")
|
||||
self.assertEqual(history[2]["name"], "get_weather")
|
||||
self.assertEqual(history[2]["content"], '''{"weather": "sunny"}''') # call_tool result
|
||||
|
||||
self.assertEqual(history[3]["role"], "assistant") # Final text response
|
||||
self.assertTrue(isinstance(history[3]["content"], str)) # simple text
|
||||
self.assertEqual(history[3]["content"], "The weather in Paris is nice.")
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user