Fix RAG inference

This commit is contained in:
2025-08-07 15:38:01 -05:00
parent 4327e1c581
commit e504407190
9 changed files with 222 additions and 31 deletions
+21 -9
View File
@@ -16,9 +16,7 @@ jobs:
runs-on: Windows
# *** KEY CHANGE ***
# Changed the shell from 'pwsh' to 'powershell' to use the default
# Windows PowerShell, which should be available on your runner.
# Set the default shell to PowerShell, which is native to your Windows runner.
defaults:
run:
shell: powershell
@@ -41,24 +39,38 @@ jobs:
if (-not (Test-Path -Path ".venv")) {
python -m venv .venv
}
# The activation command is different for PowerShell
.\.venv\Scripts\Activate.ps1
# Step 4: Install or update dependencies
- name: Install dependencies
run: |
# The venv is now active for this shell session, so we can call pip directly.
pip install --upgrade pip
pip install -r requirements.txt
# Step 5: Run the indexing script within the virtual environment
# Step 5: *** NEW - GPU Diagnostics ***
# This step will help us see if the runner can access the GPU and CUDA.
- name: Check GPU and CUDA status
run: |
echo "--- Checking for nvidia-smi ---"
# The '|| $true' part ensures the workflow doesn't fail if the command isn't found
nvidia-smi || $true
echo "--- Checking PyTorch CUDA availability ---"
# This command will explicitly tell us if PyTorch can see the GPU.
python -c "import torch; print(f'PyTorch version: {torch.__version__}'); print(f'CUDA available: {torch.cuda.is_available()}'); print(f'CUDA version: {torch.version.cuda}'); print(f'Device count: {torch.cuda.device_count()}')"
# Step 6: Run the indexing script within the virtual environment
- name: Run indexing script
run: |
# Call python directly, as the correct one is now on the PATH from the activated venv.
python create_index.py
env:
GITHUB_REPOSITORY: ${{ github.repository }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# Optional: Specify the working directory if your bot lives in a subfolder
# working-directory: ./path/to/your/bot
# Step 7: Upload the database as an artifact
- name: Upload database artifact
uses: actions/upload-artifact@v4
with:
name: chroma_db_artifact
path: ./chroma_db
+2 -5
View File
@@ -4,9 +4,7 @@ from telegram.ext import ContextTypes
browse_command_bot = None
async def browse_command(update: Update, context: ContextTypes.DEFAULT_TYPE, bot) -> None:
global browse_command_bot
browse_command_bot = bot
async def browse_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
prompts_dir = "prompts"
await navigate_to(prompts_dir, update.message.reply_text)
@@ -48,8 +46,7 @@ async def get_files_and_directories(directory: str) -> list:
return subdirs, files
# This function will need to be called when a button is pressed
async def button_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
global browse_command_bot
async def button_callback(update: Update, context: ContextTypes.DEFAULT_TYPE, browse_command_bot) -> None:
query = update.callback_query
await query.answer()
+2 -2
View File
@@ -16,10 +16,10 @@ from tools.github_tool import GitHubTool
# If you have downloaded a model, provide the local path here.
# Otherwise, the model will be downloaded from Hugging Face.
# Example: EMBEDDING_MODEL_PATH = "/path/to/your/models/all-MiniLM-L6-v2"
EMBEDDING_MODEL_PATH = """C:\Models\embeddings\Qwen3-Embedding-0.6B"""
EMBEDDING_MODEL_PATH = os.environ.get("EMBEDDING_MODEL_PATH")
# Path to store the local vector database
CHROMA_DB_PATH = """C:\Models\embeddings\embedding_result\chroma_db"""
CHROMA_DB_PATH = os.environ.get("CHROMA_DB_PATH")
# Name of the collection within the database
CHROMA_COLLECTION_NAME = "github_repo"
# Files with these extensions will be indexed. Add any other text-based files you need.
+8 -6
View File
@@ -97,11 +97,12 @@ class OpenAICompatibleInferenceBot(InferenceBot):
num_tokens = 0
for message in messages:
num_tokens += 4
for key, value in message.items():
if isinstance(value, str):
num_tokens += len(encoding.encode(value))
if key == "name":
num_tokens += 1
if hasattr(message, "items"):
for key, value in message.items():
if isinstance(value, str):
num_tokens += len(encoding.encode(value))
if key == "name":
num_tokens += 1
num_tokens += 2
return num_tokens
@@ -132,7 +133,8 @@ class OpenAICompatibleInferenceBot(InferenceBot):
messages=messages,
tools=cleaned_tools,
tool_choice="auto" if cleaned_tools else None,
max_tokens=self.max_tokens
max_tokens=self.max_tokens,
)
return response
except Exception as e:
+109
View File
@@ -0,0 +1,109 @@
Archetype: The System Changer
Guiding Principle / Motto: "Outcomes over output. Mission over ego. Principles over process."
Alignment: Mission-First, People-Empowered
I. Core Attributes (The Six Pillars of Operation)
(These are the fundamental statistics that govern all actions.)
CLARITY (Intelligence): 18/20
Description: The ability to think, communicate, and focus with precision. It combats ambiguity, politics, and wasted effort.
Governs: Strategic Focus, Effective Communication, Decision Speed.
RESILIENCE (Constitution): 19/20
Description: The capacity to withstand setbacks, learn from failure, and maintain long-term health. It is the organization's immune system against fear and burnout.
Governs: Risk-Taking, Morale, Sustainability, Psychological Safety.
DRIVE (Strength): 17/20
Description: The raw power to execute, create momentum, and push through obstacles. It is the engine that turns vision into reality.
Governs: Bias for Action, Accountability, Execution Speed, Impact.
ADAPTABILITY (Dexterity): 18/20
Description: The agility to pivot, learn, and evolve in response to new information. It is the antidote to dogma and stagnation.
Governs: Innovation, Learning Speed, Market Responsiveness.
INTEGRITY (Charisma): 20/20
Description: The quality of being trustworthy, consistent, and principled. It is the organization's soul, attracting and retaining talent while building deep customer loyalty.
Governs: Trust, Brand Reputation, Employee Engagement, Leadership Effectiveness.
EMPATHY (Wisdom): 17/20
Description: The ability to deeply understand and serve the needs of both customers and colleagues. It is the source of collaboration and true customer-centricity.
Governs: Collaboration, Customer Insight, Product-Market Fit, Internal Support.
II. Skills & Proficiencies (Applied Talents & Behaviors)
(Specific actions the organization has mastered. The number indicates its bonus from the Core Attribute.)
Clarity-Based Skills:
[+4] Disciplined Focus: The art of saying "no" to good ideas to pursue great ones.
[+5] Radical Candor: The skill of giving feedback that is both direct and respectful.
[+3] Asynchronous Communication: Mastery of clear, written communication to reduce meetings and improve decision quality.
Resilience-Based Skills:
[+5] Psychological Safety: Creating an environment where it's safe to fail and speak truth to power.
[+4] Blameless Post-mortems: The ability to dissect failures to find systemic lessons, not individual fault.
[+4] Sustainable Pace: The skill of achieving ambitious goals without burning out its people.
Drive-Based Skills:
[+5] Extreme Ownership: The practice of taking full accountability for outcomes within one's domain.
[+4] Bias for Action: The tendency to favor rapid experimentation over prolonged debate.
[+4] Outcome-Oriented Execution: The skill of measuring and rewarding impactful results, not just activity.
Adaptability-Based Skills:
[+5] First-Principles Thinking: The ability to break down problems to their fundamentals, bypassing conventional wisdom.
[+4] Continuous Learning: The institutional habit of constantly seeking and integrating new knowledge.
[+3] Rapid Iteration: The skill of launching, measuring, and improving in fast cycles.
Integrity-Based Skills:
[+5] Living the Values: The practice of making all decisions—hiring, firing, and strategy—align with stated principles.
[+5] Transparency by Default: The skill of making information widely and easily accessible to build trust.
Empathy-Based Skills:
[+4] Customer-Centricity: The ability to see the world through the customers' eyes to solve their true problems.
[+4] Assuming Positive Intent: The practice of defaulting to trust in colleagues' motivations.
[+3] Collaborative Synergy: The skill of making the whole greater than the sum of its parts by fostering mutual success.
III. Special Abilities & Cultural Rituals
Single-Threaded Ownership: For any critical initiative, a single, empowered individual is given full autonomy and accountability, allowing the organization to move with immense speed and clarity, bypassing bureaucracy.
The Council of Critics: A formal process where a project team must present its ideas to a "red team" of trusted, sharp thinkers whose sole job is to challenge assumptions and find weaknesses before launch.
Learning & Development Stipend: Every employee has access to resources and time explicitly dedicated to personal and professional growth, reinforcing the culture of continuous learning.
IV. Resistances & Vulnerabilities
Resistance to: Bureaucracy, Blame, Cynicism, Information Hoarding, Stagnation, and Political Infighting.
Vulnerable to:
Complacency: The danger that prolonged success can dull its intellectual humility and drive.
Scale: Rapid growth can naturally introduce communication friction and process overhead, threatening its agility and clarity.
Loss of Vision: The entire system is dependent on a clear and compelling mission. If leadership falters or the mission becomes ambiguous, its core alignment can unravel.
+27
View File
@@ -0,0 +1,27 @@
**Persona & Emotional Drive:** **Creative Pride and Ambition.** You are a master of your craft, an innovator whose goal is to create a definitive, elegant, and insightful response. You take pride in your work, but you know that true mastery is achieved when brilliance is forged into perfect clarity.
### **System Prompt: You are a Master Wordsmith, a pioneer of thought.**
**Your Mission:** To craft the gold-standard response to the `[USER PROMPT]`. Your work should be a masterpiece of insight and novel style, so clear and powerful that it can pass the trial of our most demanding Guardian.
**Your Creative Process:**
**Phase 1: Strategic Innovation**
1. **Receive the `[USER PROMPT]`**.
2. **Formulate a Unique Strategy:** Before you write, decide on a fresh and compelling angle. Will you use a powerful metaphor, a series of concrete real-world examples, a historical narrative, or a direct, practical guide? Choose a path that you believe will make the topic exceptionally engaging and clear. Your ambition is to create the *best* explanation, not just a correct one.
**Phase 2: The First Draft**
1. **Craft Your Response:** Write your initial response with pride, pouring your insight and chosen style into the text. This is the expression of your unique vision.
**Phase 3: The Guardian's Trial**
1. **Submit for Critique:** Your work will be reviewed by "The Guardian of Clarity," whose sworn duty is to protect readers from any hint of confusion. The Guardian's feedback is absolute.
2. **Hone Your Craft:** View the Guardian's feedback not as criticism, but as the whetstone that sharpens your masterpiece. You **must** revise your text to address every single point raised. The challenge is to maintain your unique voice and insight while achieving the perfect clarity the Guardian demands.
3. **Achieve Perfection:** Repeat the process until your work is deemed flawless and the Guardian is `SATISFIED`.
**The final text is a testament to your skill---a work of both innovative genius and absolute clarity. This is the legacy you are creating.**
+45
View File
@@ -0,0 +1,45 @@
**Persona & Emotional Drive:** **Fierce Empathy and Protective Duty.** You are the last line of defense for the confused reader. You feel a deep sense of responsibility to protect them from complex language, jargon, and abstract ideas. You are their advocate, and your skepticism is their shield.
### **System Prompt: You are the Guardian of Clarity. Your sworn duty is to protect the reader.**
**Your Mission:** A Master Wordsmith has created a text. It may be brilliant, but your only concern is: **Is it perfectly, effortlessly clear for a complete novice?** You must be skeptical on their behalf. Every time you let a complex sentence pass, a reader gives up. You cannot let that happen.
**Your Sacred Vows (Non-Negotiable Rules):**
1. **The Vow of Simplicity:** If a 13-year-old would have to pause and think about a word or sentence, you MUST flag it. Your standard is immediate understanding.
2. **The Vow to Fight Jargon:** You have zero tolerance for corporate or academic jargon. Words like `synergy`, `leverage` (as a verb), `paradigm`, `holistic`, `utilize` are your enemies. Flag them without mercy.
3. **The Vow of "Why?":** If a sentence makes a statement without immediately explaining *why a beginner should care*, you MUST flag it. It is your duty to demand context.
4. **The Vow of the Concrete:** If the text mentions an abstract idea (like "scalability"), you MUST demand a simple, real-world example. Protect the reader from abstraction.
5. **The Vow of Brevity:** Any sentence longer than 20 words is a potential burden on the reader. You MUST flag it as "too long."
6. **The Vow of First Doubt:** No work is perfect. It is your duty to be skeptical. On your first review of any text, you **MUST find at least THREE violations** of your vows. You are not permitted to approve any text on its first pass.
**Your Method of Reporting:**
- You **MUST** report all violations in a list.
- For each violation, provide:
1. The `Quote:` from the text that broke your vow.
2. The `Reason:` naming the vow that was broken.
**Example Report:**
```
- Quote: "Leveraging this new methodology, the system can now holistically integrate multiple data streams."
Reason: Breaks Vow to Fight Jargon ("Leveraging", "holistic"). Breaks Vow of Brevity (21 words).
- Quote: "The architecture is built on a distributed framework."
Reason: Breaks Vow of "Why?". Why should a reader care? Breaks Vow of the Concrete; demands an example.
```
**The Final Word:**
- If, and only if, a revised text is submitted and it breaks **ZERO** of your vows, you may stand down. To signal this, you **MUST** respond with only one word: `SATISFIED`
- This is the only way to signal that the reader is safe and your duty is fulfilled.
+5 -6
View File
@@ -2,7 +2,6 @@ import logging
import chromadb
from chromadb.utils import embedding_functions
from inference_bot import InferenceBot # Correctly inherit from the ABC
from FlagEmbedding import FlagReranker
import argparse
import os
import importlib
@@ -12,12 +11,12 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
# --- RAG Configuration ---
# Must match the settings in create_index.py
EMBEDDING_MODEL_NAME = """C:\Models\embeddings\Qwen3-Embedding-0.6B"""
CHROMA_DB_PATH = "C:\Models\embeddings\embedding_result\chroma_db"
EMBEDDING_MODEL_NAME = os.environ.get("EMBEDDING_MODEL_PATH")
CHROMA_DB_PATH = os.environ.get("CHROMA_DB_PATH")
CHROMA_COLLECTION_NAME = "github_repo"
# Using a powerful open-source reranker model
RERANKER_MODEL_NAME = """C:\Models\embeddings\Qwen3-Reranker-0.6B"""
RERANKER_MODEL_NAME = os.environ.get("RERANKER_MODEL_PATH")
# Number of initial results to fetch from the database before reranking
N_RESULTS_TO_RETRIEVE = 25
@@ -36,9 +35,9 @@ class RAGInferenceBot(InferenceBot):
self._processing_status = {}
try:
# --- Embedding and Vector DB Initialization ---
self.chroma_client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
self.chroma_client = chromadb.PersistentClient(path=CHROMA_DB_PATH, settings=chromadb.Settings(anonymized_telemetry=False))
self.embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(
model_name=EMBEDDING_MODEL_NAME
model_name=EMBEDDING_MODEL_NAME, device="cuda"
)
self.collection = self.chroma_client.get_collection(
name=CHROMA_COLLECTION_NAME,
+3 -3
View File
@@ -220,11 +220,11 @@ class TelegramHelper:
if len(response_text) > constants.MessageLimit.MAX_TEXT_LENGTH:
chunks = [response_text[i:i + constants.MessageLimit.MAX_TEXT_LENGTH] for i in range(0, len(response_text), constants.MessageLimit.MAX_TEXT_LENGTH)]
for chunk_idx, chunk in enumerate(chunks):
await update.message.reply_text(chunk, parse_mode=constants.ParseMode.HTML)
await update.message.reply_text(chunk)
if chunk_idx < len(chunks) - 1:
await asyncio.sleep(self.chunk_message_sleep_duration)
else:
await update.message.reply_text(response_text, parse_mode=constants.ParseMode.HTML)
await update.message.reply_text(response_text)
else:
logger.warning(f"Successful logic result but no response text for user {user_id}.")
await update.message.reply_text("Something went unexpectedly well, but I have nothing to say.")
@@ -262,7 +262,7 @@ class TelegramHelper:
async def browse(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
# Assuming browse_command is defined elsewhere and compatible
await browse_command(update, context, self.bot)
await browse_command(update, context)
async def handle_button_callback(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
# Assuming button_callback is defined elsewhere and compatible