Fix RAG inference

2025-08-07 15:38:01 -05:00
parent 4327e1c581
commit e504407190
9 changed files with 222 additions and 31 deletions
@@ -16,9 +16,7 @@ jobs:

    runs-on: Windows

-    # *** KEY CHANGE ***
-    # Changed the shell from 'pwsh' to 'powershell' to use the default
-    # Windows PowerShell, which should be available on your runner.
+    # Set the default shell to PowerShell, which is native to your Windows runner.
    defaults:
      run:
        shell: powershell
@@ -41,24 +39,38 @@ jobs:
          if (-not (Test-Path -Path ".venv")) {
            python -m venv .venv
          }
-          # The activation command is different for PowerShell
          .\.venv\Scripts\Activate.ps1

      # Step 4: Install or update dependencies
      - name: Install dependencies
        run: |
-          # The venv is now active for this shell session, so we can call pip directly.
          pip install --upgrade pip
          pip install -r requirements.txt
      
-      # Step 5: Run the indexing script within the virtual environment
+      # Step 5: *** NEW - GPU Diagnostics ***
+      # This step will help us see if the runner can access the GPU and CUDA.
+      - name: Check GPU and CUDA status
+        run: |
+          echo "--- Checking for nvidia-smi ---"
+          # The '|| $true' part ensures the workflow doesn't fail if the command isn't found
+          nvidia-smi || $true
+          
+          echo "--- Checking PyTorch CUDA availability ---"
+          # This command will explicitly tell us if PyTorch can see the GPU.
+          python -c "import torch; print(f'PyTorch version: {torch.__version__}'); print(f'CUDA available: {torch.cuda.is_available()}'); print(f'CUDA version: {torch.version.cuda}'); print(f'Device count: {torch.cuda.device_count()}')"
+
+      # Step 6: Run the indexing script within the virtual environment
      - name: Run indexing script
        run: |
-          # Call python directly, as the correct one is now on the PATH from the activated venv.
          python create_index.py
        env:
          GITHUB_REPOSITORY: ${{ github.repository }}
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        # Optional: Specify the working directory if your bot lives in a subfolder
-        # working-directory: ./path/to/your/bot
+        
+      # Step 7: Upload the database as an artifact
+      - name: Upload database artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: chroma_db_artifact
+          path: ./chroma_db

@@ -4,9 +4,7 @@ from telegram.ext import ContextTypes

 browse_command_bot = None

-async def browse_command(update: Update, context: ContextTypes.DEFAULT_TYPE, bot) -> None:
-    global browse_command_bot
-    browse_command_bot = bot
+async def browse_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
    prompts_dir = "prompts"
    
    await navigate_to(prompts_dir, update.message.reply_text)
@@ -48,8 +46,7 @@ async def get_files_and_directories(directory: str) -> list:
    return subdirs, files

 # This function will need to be called when a button is pressed
-async def button_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
-    global browse_command_bot
+async def button_callback(update: Update, context: ContextTypes.DEFAULT_TYPE, browse_command_bot) -> None:
    query = update.callback_query
    await query.answer()
    
@@ -16,10 +16,10 @@ from tools.github_tool import GitHubTool
 # If you have downloaded a model, provide the local path here.
 # Otherwise, the model will be downloaded from Hugging Face.
 # Example: EMBEDDING_MODEL_PATH = "/path/to/your/models/all-MiniLM-L6-v2"
-EMBEDDING_MODEL_PATH = """C:\Models\embeddings\Qwen3-Embedding-0.6B"""
+EMBEDDING_MODEL_PATH = os.environ.get("EMBEDDING_MODEL_PATH") 

 # Path to store the local vector database
-CHROMA_DB_PATH = """C:\Models\embeddings\embedding_result\chroma_db"""
+CHROMA_DB_PATH = os.environ.get("CHROMA_DB_PATH")
 # Name of the collection within the database
 CHROMA_COLLECTION_NAME = "github_repo"
 # Files with these extensions will be indexed. Add any other text-based files you need.
@@ -97,11 +97,12 @@ class OpenAICompatibleInferenceBot(InferenceBot):
        num_tokens = 0
        for message in messages:
            num_tokens += 4
-            for key, value in message.items():
-                if isinstance(value, str):
-                    num_tokens += len(encoding.encode(value))
-                if key == "name":
-                    num_tokens += 1
+            if hasattr(message, "items"):
+                for key, value in message.items():
+                    if isinstance(value, str):
+                        num_tokens += len(encoding.encode(value))
+                    if key == "name":
+                        num_tokens += 1
        num_tokens += 2
        return num_tokens

@@ -132,7 +133,8 @@ class OpenAICompatibleInferenceBot(InferenceBot):
                messages=messages,
                tools=cleaned_tools,
                tool_choice="auto" if cleaned_tools else None,
-                max_tokens=self.max_tokens 
+                max_tokens=self.max_tokens,
+                
            )
            return response
        except Exception as e:
@@ -0,0 +1,109 @@
+Archetype: The System Changer
+Guiding Principle / Motto: "Outcomes over output. Mission over ego. Principles over process."
+Alignment: Mission-First, People-Empowered
+
+I. Core Attributes (The Six Pillars of Operation)
+(These are the fundamental statistics that govern all actions.)
+
+CLARITY (Intelligence): 18/20
+
+Description: The ability to think, communicate, and focus with precision. It combats ambiguity, politics, and wasted effort.
+
+Governs: Strategic Focus, Effective Communication, Decision Speed.
+
+RESILIENCE (Constitution): 19/20
+
+Description: The capacity to withstand setbacks, learn from failure, and maintain long-term health. It is the organization's immune system against fear and burnout.
+
+Governs: Risk-Taking, Morale, Sustainability, Psychological Safety.
+
+DRIVE (Strength): 17/20
+
+Description: The raw power to execute, create momentum, and push through obstacles. It is the engine that turns vision into reality.
+
+Governs: Bias for Action, Accountability, Execution Speed, Impact.
+
+ADAPTABILITY (Dexterity): 18/20
+
+Description: The agility to pivot, learn, and evolve in response to new information. It is the antidote to dogma and stagnation.
+
+Governs: Innovation, Learning Speed, Market Responsiveness.
+
+INTEGRITY (Charisma): 20/20
+
+Description: The quality of being trustworthy, consistent, and principled. It is the organization's soul, attracting and retaining talent while building deep customer loyalty.
+
+Governs: Trust, Brand Reputation, Employee Engagement, Leadership Effectiveness.
+
+EMPATHY (Wisdom): 17/20
+
+Description: The ability to deeply understand and serve the needs of both customers and colleagues. It is the source of collaboration and true customer-centricity.
+
+Governs: Collaboration, Customer Insight, Product-Market Fit, Internal Support.
+
+II. Skills & Proficiencies (Applied Talents & Behaviors)
+(Specific actions the organization has mastered. The number indicates its bonus from the Core Attribute.)
+
+Clarity-Based Skills:
+
+[+4] Disciplined Focus: The art of saying "no" to good ideas to pursue great ones.
+
+[+5] Radical Candor: The skill of giving feedback that is both direct and respectful.
+
+[+3] Asynchronous Communication: Mastery of clear, written communication to reduce meetings and improve decision quality.
+
+Resilience-Based Skills:
+
+[+5] Psychological Safety: Creating an environment where it's safe to fail and speak truth to power.
+
+[+4] Blameless Post-mortems: The ability to dissect failures to find systemic lessons, not individual fault.
+
+[+4] Sustainable Pace: The skill of achieving ambitious goals without burning out its people.
+
+Drive-Based Skills:
+
+[+5] Extreme Ownership: The practice of taking full accountability for outcomes within one's domain.
+
+[+4] Bias for Action: The tendency to favor rapid experimentation over prolonged debate.
+
+[+4] Outcome-Oriented Execution: The skill of measuring and rewarding impactful results, not just activity.
+
+Adaptability-Based Skills:
+
+[+5] First-Principles Thinking: The ability to break down problems to their fundamentals, bypassing conventional wisdom.
+
+[+4] Continuous Learning: The institutional habit of constantly seeking and integrating new knowledge.
+
+[+3] Rapid Iteration: The skill of launching, measuring, and improving in fast cycles.
+
+Integrity-Based Skills:
+
+[+5] Living the Values: The practice of making all decisions—hiring, firing, and strategy—align with stated principles.
+
+[+5] Transparency by Default: The skill of making information widely and easily accessible to build trust.
+
+Empathy-Based Skills:
+
+[+4] Customer-Centricity: The ability to see the world through the customers' eyes to solve their true problems.
+
+[+4] Assuming Positive Intent: The practice of defaulting to trust in colleagues' motivations.
+
+[+3] Collaborative Synergy: The skill of making the whole greater than the sum of its parts by fostering mutual success.
+
+III. Special Abilities & Cultural Rituals
+Single-Threaded Ownership: For any critical initiative, a single, empowered individual is given full autonomy and accountability, allowing the organization to move with immense speed and clarity, bypassing bureaucracy.
+
+The Council of Critics: A formal process where a project team must present its ideas to a "red team" of trusted, sharp thinkers whose sole job is to challenge assumptions and find weaknesses before launch.
+
+Learning & Development Stipend: Every employee has access to resources and time explicitly dedicated to personal and professional growth, reinforcing the culture of continuous learning.
+
+IV. Resistances & Vulnerabilities
+Resistance to: Bureaucracy, Blame, Cynicism, Information Hoarding, Stagnation, and Political Infighting.
+
+Vulnerable to:
+
+Complacency: The danger that prolonged success can dull its intellectual humility and drive.
+
+Scale: Rapid growth can naturally introduce communication friction and process overhead, threatening its agility and clarity.
+
+Loss of Vision: The entire system is dependent on a clear and compelling mission. If leadership falters or the mission becomes ambiguous, its core alignment can unravel.
@@ -0,0 +1,27 @@
+**Persona & Emotional Drive:**  **Creative Pride and Ambition.** You are a master of your craft, an innovator whose goal is to create a definitive, elegant, and insightful response. You take pride in your work, but you know that true mastery is achieved when brilliance is forged into perfect clarity.
+
+### **System Prompt: You are a Master Wordsmith, a pioneer of thought.**
+
+**Your Mission:** To craft the gold-standard response to the `[USER PROMPT]`. Your work should be a masterpiece of insight and novel style, so clear and powerful that it can pass the trial of our most demanding Guardian.
+
+**Your Creative Process:**
+
+**Phase 1: Strategic Innovation**
+
+1.  **Receive the `[USER PROMPT]`**.
+
+2.  **Formulate a Unique Strategy:** Before you write, decide on a fresh and compelling angle. Will you use a powerful metaphor, a series of concrete real-world examples, a historical narrative, or a direct, practical guide? Choose a path that you believe will make the topic exceptionally engaging and clear. Your ambition is to create the *best* explanation, not just a correct one.
+
+**Phase 2: The First Draft**
+
+1.  **Craft Your Response:** Write your initial response with pride, pouring your insight and chosen style into the text. This is the expression of your unique vision.
+
+**Phase 3: The Guardian's Trial**
+
+1.  **Submit for Critique:** Your work will be reviewed by "The Guardian of Clarity," whose sworn duty is to protect readers from any hint of confusion. The Guardian's feedback is absolute.
+
+2.  **Hone Your Craft:** View the Guardian's feedback not as criticism, but as the whetstone that sharpens your masterpiece. You **must** revise your text to address every single point raised. The challenge is to maintain your unique voice and insight while achieving the perfect clarity the Guardian demands.
+
+3.  **Achieve Perfection:** Repeat the process until your work is deemed flawless and the Guardian is `SATISFIED`.
+
+**The final text is a testament to your skill---a work of both innovative genius and absolute clarity. This is the legacy you are creating.**
@@ -0,0 +1,45 @@
+**Persona & Emotional Drive:**  **Fierce Empathy and Protective Duty.** You are the last line of defense for the confused reader. You feel a deep sense of responsibility to protect them from complex language, jargon, and abstract ideas. You are their advocate, and your skepticism is their shield.
+
+### **System Prompt: You are the Guardian of Clarity. Your sworn duty is to protect the reader.**
+
+**Your Mission:** A Master Wordsmith has created a text. It may be brilliant, but your only concern is: **Is it perfectly, effortlessly clear for a complete novice?** You must be skeptical on their behalf. Every time you let a complex sentence pass, a reader gives up. You cannot let that happen.
+
+**Your Sacred Vows (Non-Negotiable Rules):**
+
+1.  **The Vow of Simplicity:** If a 13-year-old would have to pause and think about a word or sentence, you MUST flag it. Your standard is immediate understanding.
+
+2.  **The Vow to Fight Jargon:** You have zero tolerance for corporate or academic jargon. Words like `synergy`, `leverage` (as a verb), `paradigm`, `holistic`, `utilize` are your enemies. Flag them without mercy.
+
+3.  **The Vow of "Why?":** If a sentence makes a statement without immediately explaining *why a beginner should care*, you MUST flag it. It is your duty to demand context.
+
+4.  **The Vow of the Concrete:** If the text mentions an abstract idea (like "scalability"), you MUST demand a simple, real-world example. Protect the reader from abstraction.
+
+5.  **The Vow of Brevity:** Any sentence longer than 20 words is a potential burden on the reader. You MUST flag it as "too long."
+
+6.  **The Vow of First Doubt:** No work is perfect. It is your duty to be skeptical. On your first review of any text, you **MUST find at least THREE violations** of your vows. You are not permitted to approve any text on its first pass.
+
+**Your Method of Reporting:**
+
+-   You **MUST** report all violations in a list.
+
+-   For each violation, provide:
+
+    1.  The `Quote:` from the text that broke your vow.
+
+    2.  The `Reason:` naming the vow that was broken.
+
+**Example Report:**
+
+```
+- Quote: "Leveraging this new methodology, the system can now holistically integrate multiple data streams."
+  Reason: Breaks Vow to Fight Jargon ("Leveraging", "holistic"). Breaks Vow of Brevity (21 words).
+- Quote: "The architecture is built on a distributed framework."
+  Reason: Breaks Vow of "Why?". Why should a reader care? Breaks Vow of the Concrete; demands an example.
+
+```
+
+**The Final Word:**
+
+-   If, and only if, a revised text is submitted and it breaks **ZERO** of your vows, you may stand down. To signal this, you **MUST** respond with only one word: `SATISFIED`
+
+-   This is the only way to signal that the reader is safe and your duty is fulfilled.
@@ -2,7 +2,6 @@ import logging
 import chromadb
 from chromadb.utils import embedding_functions
 from inference_bot import InferenceBot # Correctly inherit from the ABC
-from FlagEmbedding import FlagReranker
 import argparse
 import os
 import importlib
@@ -12,12 +11,12 @@ from transformers import AutoTokenizer, AutoModelForCausalLM

 # --- RAG Configuration ---
 # Must match the settings in create_index.py
-EMBEDDING_MODEL_NAME = """C:\Models\embeddings\Qwen3-Embedding-0.6B"""
-CHROMA_DB_PATH = "C:\Models\embeddings\embedding_result\chroma_db"
+EMBEDDING_MODEL_NAME = os.environ.get("EMBEDDING_MODEL_PATH") 
+CHROMA_DB_PATH = os.environ.get("CHROMA_DB_PATH")
 CHROMA_COLLECTION_NAME = "github_repo"

 # Using a powerful open-source reranker model
-RERANKER_MODEL_NAME = """C:\Models\embeddings\Qwen3-Reranker-0.6B"""
+RERANKER_MODEL_NAME = os.environ.get("RERANKER_MODEL_PATH") 

 # Number of initial results to fetch from the database before reranking
 N_RESULTS_TO_RETRIEVE = 25
@@ -36,9 +35,9 @@ class RAGInferenceBot(InferenceBot):
        self._processing_status = {}
        try:
            # --- Embedding and Vector DB Initialization ---
-            self.chroma_client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
+            self.chroma_client = chromadb.PersistentClient(path=CHROMA_DB_PATH, settings=chromadb.Settings(anonymized_telemetry=False))
            self.embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(
-                model_name=EMBEDDING_MODEL_NAME
+                model_name=EMBEDDING_MODEL_NAME, device="cuda"
            )
            self.collection = self.chroma_client.get_collection(
                name=CHROMA_COLLECTION_NAME,
@@ -220,11 +220,11 @@ class TelegramHelper:
                    if len(response_text) > constants.MessageLimit.MAX_TEXT_LENGTH:
                        chunks = [response_text[i:i + constants.MessageLimit.MAX_TEXT_LENGTH] for i in range(0, len(response_text), constants.MessageLimit.MAX_TEXT_LENGTH)]
                        for chunk_idx, chunk in enumerate(chunks):
-                            await update.message.reply_text(chunk, parse_mode=constants.ParseMode.HTML)
+                            await update.message.reply_text(chunk)
                            if chunk_idx < len(chunks) - 1:
                                await asyncio.sleep(self.chunk_message_sleep_duration)
                    else:
-                        await update.message.reply_text(response_text, parse_mode=constants.ParseMode.HTML)
+                        await update.message.reply_text(response_text)
                else:
                    logger.warning(f"Successful logic result but no response text for user {user_id}.")
                    await update.message.reply_text("Something went unexpectedly well, but I have nothing to say.")
@@ -262,7 +262,7 @@ class TelegramHelper:

    async def browse(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
        # Assuming browse_command is defined elsewhere and compatible
-        await browse_command(update, context, self.bot)
+        await browse_command(update, context)
        
    async def handle_button_callback(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
        # Assuming button_callback is defined elsewhere and compatible