import json import os import importlib import inspect import tempfile import base64 from discord.ext import commands from openai import OpenAI from dotenv import load_dotenv from tools.base_tool import BaseTool # Load environment variables load_dotenv() client = OpenAI() GPT_4O = "gpt-4o" GPT_4O_MINI = "gpt-4o-mini" # Load system prompt with open("prompts/developer_prompt.txt", "r") as file: system_prompt = file.read().strip() # Set up Discord bot DISCORD_BOT_TOKEN = os.getenv('DISCORD_BOT_TOKEN') # Dictionary to store conversation history for each user conversation_history = {} # Dictionary to store the last image file for each user user_images = {} # Load tools tools = [] tools_dir = os.path.join(os.path.dirname(__file__), 'tools') for filename in os.listdir(tools_dir): if filename.endswith('.py') and filename != '__init__.py' and filename != 'base_tool.py': module_name = f'tools.{filename[:-3]}' module = importlib.import_module(module_name) for name, obj in inspect.getmembers(module): if inspect.isclass(obj) and issubclass(obj, BaseTool) and obj != BaseTool: tools.append(obj()) # Collect all function definitions functions = [] for tool in tools: functions.extend(tool.get_functions()) bot = commands.Bot(command_prefix='!') @bot.event async def on_ready(): print(f'Bot is ready and logged in as {bot.user}') @bot.command(name='start') async def start(ctx): await ctx.send("Hello! I'm your AI assistant. How can I help you today? You can send me images and then ask questions about them.") @bot.command(name='clear') async def clear(ctx): user_id = ctx.author.id if user_id in conversation_history: del conversation_history[user_id] if user_id in user_images: os.remove(user_images[user_id]) del user_images[user_id] await ctx.send("Conversation history and image cleared. Let's start fresh!") @bot.event async def on_message(message): # This is required to let commands still work, since on_message overrides the default handler await bot.process_commands(message) if message.author == bot.user: return user_id = message.author.id user_message = message.content # Initialize conversation history for new users if user_id not in conversation_history: conversation_history[user_id] = [] # Add user message to conversation history conversation_history[user_id].append({"role": "user", "content": user_message}) # Prepare messages for OpenAI API messages = [{"role": "system", "content": system_prompt}] + conversation_history[user_id] # Check if there's an image to process if user_id in user_images: with open(user_images[user_id], "rb") as image_file: response = client.chat_completions_create( model=GPT_4O_MINI, messages=[ { "role": "user", "content": [ {"type": "text", "text": user_message}, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64.b64encode(image_file.read()).decode('utf-8')}" } }, ], } ], max_tokens=16384 ) # Remove the temporary image file os.remove(user_images[user_id]) del user_images[user_id] else: # Call OpenAI API for inference (text-only) response = get_chat_response(client, messages, 4096, GPT_4O) # Extract the assistant's reply assistant_message = response.choices[0].message tool_use_count = 0 if hasattr(assistant_message, 'function_call') and assistant_message.function_call: while hasattr(assistant_message, 'function_call') and assistant_message.function_call and tool_use_count < 10: tool_response = call_tool(assistant_message.function_call, messages) conversation_history[user_id].append({"role": "function", "name": assistant_message.function_call.name, "content": json.dumps(tool_response)}) messages.append({ "role": "function", "name": assistant_message.function_call.name, "content": json.dumps(tool_response) }) # Call API again to get the final response assistant_message = get_chat_response(client, messages, 4096, GPT_4O).choices[0].message if not hasattr(assistant_message, 'function_call') or not assistant_message.function_call: assistant_reply = assistant_message.content conversation_history[user_id].append({"role": "assistant", "content": assistant_reply}) else: assistant_reply = assistant_message.content # Add assistant's reply to conversation history conversation_history[user_id].append({"role": "assistant", "content": assistant_reply}) # Trim conversation history if it gets too long (e.g., keep last 10 messages) if len(conversation_history[user_id]) > 10: conversation_history[user_id] = conversation_history[user_id][-10:] # Send the reply back to the user await message.channel.send(assistant_reply) @bot.event async def on_message_edit(before, after): await on_message(after) @bot.event async def on_reaction_add(reaction, user): if reaction.message.author == bot.user and user != bot.user: user_id = user.id # Save the reaction as an interaction conversation_history[user_id].append({"role": "user", "content": f"{user.name} reacted with {reaction.emoji}"}) messages = [{"role": "system", "content": system_prompt}] + conversation_history[user_id] # Call OpenAI API for inference response = get_chat_response(client, messages, 4096, GPT_4O) assistant_message = response.choices[0].message assistant_reply = assistant_message.content conversation_history[user_id].append({"role": "assistant", "content": assistant_reply}) # Trim conversation history if it gets too long if len(conversation_history[user_id]) > 10: conversation_history[user_id] = conversation_history[user_id][-10:] # Send the reply back to the user await reaction.message.channel.send(f"{user.name}, {assistant_reply}") def call_tool(function_call, messages): # Execute the function function_name = function_call.name function_args = function_call.arguments for tool in tools: if function_name in [f["name"] for f in tool.get_functions()]: return tool.execute(function_name, **eval(function_args)) def get_chat_response(client, messages, max_tokens, model): response = client.chat_completions_create( model=model, messages=messages, functions=functions, function_call="auto", max_tokens=max_tokens ) return response if __name__ == '__main__': bot.run(DISCORD_BOT_TOKEN)