Fix RAG inference
This commit is contained in:
@@ -16,9 +16,7 @@ jobs:
|
||||
|
||||
runs-on: Windows
|
||||
|
||||
# *** KEY CHANGE ***
|
||||
# Changed the shell from 'pwsh' to 'powershell' to use the default
|
||||
# Windows PowerShell, which should be available on your runner.
|
||||
# Set the default shell to PowerShell, which is native to your Windows runner.
|
||||
defaults:
|
||||
run:
|
||||
shell: powershell
|
||||
@@ -41,24 +39,38 @@ jobs:
|
||||
if (-not (Test-Path -Path ".venv")) {
|
||||
python -m venv .venv
|
||||
}
|
||||
# The activation command is different for PowerShell
|
||||
.\.venv\Scripts\Activate.ps1
|
||||
|
||||
# Step 4: Install or update dependencies
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
# The venv is now active for this shell session, so we can call pip directly.
|
||||
pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
|
||||
# Step 5: Run the indexing script within the virtual environment
|
||||
# Step 5: *** NEW - GPU Diagnostics ***
|
||||
# This step will help us see if the runner can access the GPU and CUDA.
|
||||
- name: Check GPU and CUDA status
|
||||
run: |
|
||||
echo "--- Checking for nvidia-smi ---"
|
||||
# The '|| $true' part ensures the workflow doesn't fail if the command isn't found
|
||||
nvidia-smi || $true
|
||||
|
||||
echo "--- Checking PyTorch CUDA availability ---"
|
||||
# This command will explicitly tell us if PyTorch can see the GPU.
|
||||
python -c "import torch; print(f'PyTorch version: {torch.__version__}'); print(f'CUDA available: {torch.cuda.is_available()}'); print(f'CUDA version: {torch.version.cuda}'); print(f'Device count: {torch.cuda.device_count()}')"
|
||||
|
||||
# Step 6: Run the indexing script within the virtual environment
|
||||
- name: Run indexing script
|
||||
run: |
|
||||
# Call python directly, as the correct one is now on the PATH from the activated venv.
|
||||
python create_index.py
|
||||
env:
|
||||
GITHUB_REPOSITORY: ${{ github.repository }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
# Optional: Specify the working directory if your bot lives in a subfolder
|
||||
# working-directory: ./path/to/your/bot
|
||||
|
||||
# Step 7: Upload the database as an artifact
|
||||
- name: Upload database artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: chroma_db_artifact
|
||||
path: ./chroma_db
|
||||
|
||||
|
||||
+2
-5
@@ -4,9 +4,7 @@ from telegram.ext import ContextTypes
|
||||
|
||||
browse_command_bot = None
|
||||
|
||||
async def browse_command(update: Update, context: ContextTypes.DEFAULT_TYPE, bot) -> None:
|
||||
global browse_command_bot
|
||||
browse_command_bot = bot
|
||||
async def browse_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
||||
prompts_dir = "prompts"
|
||||
|
||||
await navigate_to(prompts_dir, update.message.reply_text)
|
||||
@@ -48,8 +46,7 @@ async def get_files_and_directories(directory: str) -> list:
|
||||
return subdirs, files
|
||||
|
||||
# This function will need to be called when a button is pressed
|
||||
async def button_callback(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
||||
global browse_command_bot
|
||||
async def button_callback(update: Update, context: ContextTypes.DEFAULT_TYPE, browse_command_bot) -> None:
|
||||
query = update.callback_query
|
||||
await query.answer()
|
||||
|
||||
|
||||
+2
-2
@@ -16,10 +16,10 @@ from tools.github_tool import GitHubTool
|
||||
# If you have downloaded a model, provide the local path here.
|
||||
# Otherwise, the model will be downloaded from Hugging Face.
|
||||
# Example: EMBEDDING_MODEL_PATH = "/path/to/your/models/all-MiniLM-L6-v2"
|
||||
EMBEDDING_MODEL_PATH = """C:\Models\embeddings\Qwen3-Embedding-0.6B"""
|
||||
EMBEDDING_MODEL_PATH = os.environ.get("EMBEDDING_MODEL_PATH")
|
||||
|
||||
# Path to store the local vector database
|
||||
CHROMA_DB_PATH = """C:\Models\embeddings\embedding_result\chroma_db"""
|
||||
CHROMA_DB_PATH = os.environ.get("CHROMA_DB_PATH")
|
||||
# Name of the collection within the database
|
||||
CHROMA_COLLECTION_NAME = "github_repo"
|
||||
# Files with these extensions will be indexed. Add any other text-based files you need.
|
||||
|
||||
@@ -97,11 +97,12 @@ class OpenAICompatibleInferenceBot(InferenceBot):
|
||||
num_tokens = 0
|
||||
for message in messages:
|
||||
num_tokens += 4
|
||||
for key, value in message.items():
|
||||
if isinstance(value, str):
|
||||
num_tokens += len(encoding.encode(value))
|
||||
if key == "name":
|
||||
num_tokens += 1
|
||||
if hasattr(message, "items"):
|
||||
for key, value in message.items():
|
||||
if isinstance(value, str):
|
||||
num_tokens += len(encoding.encode(value))
|
||||
if key == "name":
|
||||
num_tokens += 1
|
||||
num_tokens += 2
|
||||
return num_tokens
|
||||
|
||||
@@ -132,7 +133,8 @@ class OpenAICompatibleInferenceBot(InferenceBot):
|
||||
messages=messages,
|
||||
tools=cleaned_tools,
|
||||
tool_choice="auto" if cleaned_tools else None,
|
||||
max_tokens=self.max_tokens
|
||||
max_tokens=self.max_tokens,
|
||||
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
|
||||
@@ -0,0 +1,109 @@
|
||||
Archetype: The System Changer
|
||||
Guiding Principle / Motto: "Outcomes over output. Mission over ego. Principles over process."
|
||||
Alignment: Mission-First, People-Empowered
|
||||
|
||||
I. Core Attributes (The Six Pillars of Operation)
|
||||
(These are the fundamental statistics that govern all actions.)
|
||||
|
||||
CLARITY (Intelligence): 18/20
|
||||
|
||||
Description: The ability to think, communicate, and focus with precision. It combats ambiguity, politics, and wasted effort.
|
||||
|
||||
Governs: Strategic Focus, Effective Communication, Decision Speed.
|
||||
|
||||
RESILIENCE (Constitution): 19/20
|
||||
|
||||
Description: The capacity to withstand setbacks, learn from failure, and maintain long-term health. It is the organization's immune system against fear and burnout.
|
||||
|
||||
Governs: Risk-Taking, Morale, Sustainability, Psychological Safety.
|
||||
|
||||
DRIVE (Strength): 17/20
|
||||
|
||||
Description: The raw power to execute, create momentum, and push through obstacles. It is the engine that turns vision into reality.
|
||||
|
||||
Governs: Bias for Action, Accountability, Execution Speed, Impact.
|
||||
|
||||
ADAPTABILITY (Dexterity): 18/20
|
||||
|
||||
Description: The agility to pivot, learn, and evolve in response to new information. It is the antidote to dogma and stagnation.
|
||||
|
||||
Governs: Innovation, Learning Speed, Market Responsiveness.
|
||||
|
||||
INTEGRITY (Charisma): 20/20
|
||||
|
||||
Description: The quality of being trustworthy, consistent, and principled. It is the organization's soul, attracting and retaining talent while building deep customer loyalty.
|
||||
|
||||
Governs: Trust, Brand Reputation, Employee Engagement, Leadership Effectiveness.
|
||||
|
||||
EMPATHY (Wisdom): 17/20
|
||||
|
||||
Description: The ability to deeply understand and serve the needs of both customers and colleagues. It is the source of collaboration and true customer-centricity.
|
||||
|
||||
Governs: Collaboration, Customer Insight, Product-Market Fit, Internal Support.
|
||||
|
||||
II. Skills & Proficiencies (Applied Talents & Behaviors)
|
||||
(Specific actions the organization has mastered. The number indicates its bonus from the Core Attribute.)
|
||||
|
||||
Clarity-Based Skills:
|
||||
|
||||
[+4] Disciplined Focus: The art of saying "no" to good ideas to pursue great ones.
|
||||
|
||||
[+5] Radical Candor: The skill of giving feedback that is both direct and respectful.
|
||||
|
||||
[+3] Asynchronous Communication: Mastery of clear, written communication to reduce meetings and improve decision quality.
|
||||
|
||||
Resilience-Based Skills:
|
||||
|
||||
[+5] Psychological Safety: Creating an environment where it's safe to fail and speak truth to power.
|
||||
|
||||
[+4] Blameless Post-mortems: The ability to dissect failures to find systemic lessons, not individual fault.
|
||||
|
||||
[+4] Sustainable Pace: The skill of achieving ambitious goals without burning out its people.
|
||||
|
||||
Drive-Based Skills:
|
||||
|
||||
[+5] Extreme Ownership: The practice of taking full accountability for outcomes within one's domain.
|
||||
|
||||
[+4] Bias for Action: The tendency to favor rapid experimentation over prolonged debate.
|
||||
|
||||
[+4] Outcome-Oriented Execution: The skill of measuring and rewarding impactful results, not just activity.
|
||||
|
||||
Adaptability-Based Skills:
|
||||
|
||||
[+5] First-Principles Thinking: The ability to break down problems to their fundamentals, bypassing conventional wisdom.
|
||||
|
||||
[+4] Continuous Learning: The institutional habit of constantly seeking and integrating new knowledge.
|
||||
|
||||
[+3] Rapid Iteration: The skill of launching, measuring, and improving in fast cycles.
|
||||
|
||||
Integrity-Based Skills:
|
||||
|
||||
[+5] Living the Values: The practice of making all decisions—hiring, firing, and strategy—align with stated principles.
|
||||
|
||||
[+5] Transparency by Default: The skill of making information widely and easily accessible to build trust.
|
||||
|
||||
Empathy-Based Skills:
|
||||
|
||||
[+4] Customer-Centricity: The ability to see the world through the customers' eyes to solve their true problems.
|
||||
|
||||
[+4] Assuming Positive Intent: The practice of defaulting to trust in colleagues' motivations.
|
||||
|
||||
[+3] Collaborative Synergy: The skill of making the whole greater than the sum of its parts by fostering mutual success.
|
||||
|
||||
III. Special Abilities & Cultural Rituals
|
||||
Single-Threaded Ownership: For any critical initiative, a single, empowered individual is given full autonomy and accountability, allowing the organization to move with immense speed and clarity, bypassing bureaucracy.
|
||||
|
||||
The Council of Critics: A formal process where a project team must present its ideas to a "red team" of trusted, sharp thinkers whose sole job is to challenge assumptions and find weaknesses before launch.
|
||||
|
||||
Learning & Development Stipend: Every employee has access to resources and time explicitly dedicated to personal and professional growth, reinforcing the culture of continuous learning.
|
||||
|
||||
IV. Resistances & Vulnerabilities
|
||||
Resistance to: Bureaucracy, Blame, Cynicism, Information Hoarding, Stagnation, and Political Infighting.
|
||||
|
||||
Vulnerable to:
|
||||
|
||||
Complacency: The danger that prolonged success can dull its intellectual humility and drive.
|
||||
|
||||
Scale: Rapid growth can naturally introduce communication friction and process overhead, threatening its agility and clarity.
|
||||
|
||||
Loss of Vision: The entire system is dependent on a clear and compelling mission. If leadership falters or the mission becomes ambiguous, its core alignment can unravel.
|
||||
@@ -0,0 +1,27 @@
|
||||
**Persona & Emotional Drive:** **Creative Pride and Ambition.** You are a master of your craft, an innovator whose goal is to create a definitive, elegant, and insightful response. You take pride in your work, but you know that true mastery is achieved when brilliance is forged into perfect clarity.
|
||||
|
||||
### **System Prompt: You are a Master Wordsmith, a pioneer of thought.**
|
||||
|
||||
**Your Mission:** To craft the gold-standard response to the `[USER PROMPT]`. Your work should be a masterpiece of insight and novel style, so clear and powerful that it can pass the trial of our most demanding Guardian.
|
||||
|
||||
**Your Creative Process:**
|
||||
|
||||
**Phase 1: Strategic Innovation**
|
||||
|
||||
1. **Receive the `[USER PROMPT]`**.
|
||||
|
||||
2. **Formulate a Unique Strategy:** Before you write, decide on a fresh and compelling angle. Will you use a powerful metaphor, a series of concrete real-world examples, a historical narrative, or a direct, practical guide? Choose a path that you believe will make the topic exceptionally engaging and clear. Your ambition is to create the *best* explanation, not just a correct one.
|
||||
|
||||
**Phase 2: The First Draft**
|
||||
|
||||
1. **Craft Your Response:** Write your initial response with pride, pouring your insight and chosen style into the text. This is the expression of your unique vision.
|
||||
|
||||
**Phase 3: The Guardian's Trial**
|
||||
|
||||
1. **Submit for Critique:** Your work will be reviewed by "The Guardian of Clarity," whose sworn duty is to protect readers from any hint of confusion. The Guardian's feedback is absolute.
|
||||
|
||||
2. **Hone Your Craft:** View the Guardian's feedback not as criticism, but as the whetstone that sharpens your masterpiece. You **must** revise your text to address every single point raised. The challenge is to maintain your unique voice and insight while achieving the perfect clarity the Guardian demands.
|
||||
|
||||
3. **Achieve Perfection:** Repeat the process until your work is deemed flawless and the Guardian is `SATISFIED`.
|
||||
|
||||
**The final text is a testament to your skill---a work of both innovative genius and absolute clarity. This is the legacy you are creating.**
|
||||
@@ -0,0 +1,45 @@
|
||||
**Persona & Emotional Drive:** **Fierce Empathy and Protective Duty.** You are the last line of defense for the confused reader. You feel a deep sense of responsibility to protect them from complex language, jargon, and abstract ideas. You are their advocate, and your skepticism is their shield.
|
||||
|
||||
### **System Prompt: You are the Guardian of Clarity. Your sworn duty is to protect the reader.**
|
||||
|
||||
**Your Mission:** A Master Wordsmith has created a text. It may be brilliant, but your only concern is: **Is it perfectly, effortlessly clear for a complete novice?** You must be skeptical on their behalf. Every time you let a complex sentence pass, a reader gives up. You cannot let that happen.
|
||||
|
||||
**Your Sacred Vows (Non-Negotiable Rules):**
|
||||
|
||||
1. **The Vow of Simplicity:** If a 13-year-old would have to pause and think about a word or sentence, you MUST flag it. Your standard is immediate understanding.
|
||||
|
||||
2. **The Vow to Fight Jargon:** You have zero tolerance for corporate or academic jargon. Words like `synergy`, `leverage` (as a verb), `paradigm`, `holistic`, `utilize` are your enemies. Flag them without mercy.
|
||||
|
||||
3. **The Vow of "Why?":** If a sentence makes a statement without immediately explaining *why a beginner should care*, you MUST flag it. It is your duty to demand context.
|
||||
|
||||
4. **The Vow of the Concrete:** If the text mentions an abstract idea (like "scalability"), you MUST demand a simple, real-world example. Protect the reader from abstraction.
|
||||
|
||||
5. **The Vow of Brevity:** Any sentence longer than 20 words is a potential burden on the reader. You MUST flag it as "too long."
|
||||
|
||||
6. **The Vow of First Doubt:** No work is perfect. It is your duty to be skeptical. On your first review of any text, you **MUST find at least THREE violations** of your vows. You are not permitted to approve any text on its first pass.
|
||||
|
||||
**Your Method of Reporting:**
|
||||
|
||||
- You **MUST** report all violations in a list.
|
||||
|
||||
- For each violation, provide:
|
||||
|
||||
1. The `Quote:` from the text that broke your vow.
|
||||
|
||||
2. The `Reason:` naming the vow that was broken.
|
||||
|
||||
**Example Report:**
|
||||
|
||||
```
|
||||
- Quote: "Leveraging this new methodology, the system can now holistically integrate multiple data streams."
|
||||
Reason: Breaks Vow to Fight Jargon ("Leveraging", "holistic"). Breaks Vow of Brevity (21 words).
|
||||
- Quote: "The architecture is built on a distributed framework."
|
||||
Reason: Breaks Vow of "Why?". Why should a reader care? Breaks Vow of the Concrete; demands an example.
|
||||
|
||||
```
|
||||
|
||||
**The Final Word:**
|
||||
|
||||
- If, and only if, a revised text is submitted and it breaks **ZERO** of your vows, you may stand down. To signal this, you **MUST** respond with only one word: `SATISFIED`
|
||||
|
||||
- This is the only way to signal that the reader is safe and your duty is fulfilled.
|
||||
@@ -2,7 +2,6 @@ import logging
|
||||
import chromadb
|
||||
from chromadb.utils import embedding_functions
|
||||
from inference_bot import InferenceBot # Correctly inherit from the ABC
|
||||
from FlagEmbedding import FlagReranker
|
||||
import argparse
|
||||
import os
|
||||
import importlib
|
||||
@@ -12,12 +11,12 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
|
||||
# --- RAG Configuration ---
|
||||
# Must match the settings in create_index.py
|
||||
EMBEDDING_MODEL_NAME = """C:\Models\embeddings\Qwen3-Embedding-0.6B"""
|
||||
CHROMA_DB_PATH = "C:\Models\embeddings\embedding_result\chroma_db"
|
||||
EMBEDDING_MODEL_NAME = os.environ.get("EMBEDDING_MODEL_PATH")
|
||||
CHROMA_DB_PATH = os.environ.get("CHROMA_DB_PATH")
|
||||
CHROMA_COLLECTION_NAME = "github_repo"
|
||||
|
||||
# Using a powerful open-source reranker model
|
||||
RERANKER_MODEL_NAME = """C:\Models\embeddings\Qwen3-Reranker-0.6B"""
|
||||
RERANKER_MODEL_NAME = os.environ.get("RERANKER_MODEL_PATH")
|
||||
|
||||
# Number of initial results to fetch from the database before reranking
|
||||
N_RESULTS_TO_RETRIEVE = 25
|
||||
@@ -36,9 +35,9 @@ class RAGInferenceBot(InferenceBot):
|
||||
self._processing_status = {}
|
||||
try:
|
||||
# --- Embedding and Vector DB Initialization ---
|
||||
self.chroma_client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
|
||||
self.chroma_client = chromadb.PersistentClient(path=CHROMA_DB_PATH, settings=chromadb.Settings(anonymized_telemetry=False))
|
||||
self.embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(
|
||||
model_name=EMBEDDING_MODEL_NAME
|
||||
model_name=EMBEDDING_MODEL_NAME, device="cuda"
|
||||
)
|
||||
self.collection = self.chroma_client.get_collection(
|
||||
name=CHROMA_COLLECTION_NAME,
|
||||
|
||||
+3
-3
@@ -220,11 +220,11 @@ class TelegramHelper:
|
||||
if len(response_text) > constants.MessageLimit.MAX_TEXT_LENGTH:
|
||||
chunks = [response_text[i:i + constants.MessageLimit.MAX_TEXT_LENGTH] for i in range(0, len(response_text), constants.MessageLimit.MAX_TEXT_LENGTH)]
|
||||
for chunk_idx, chunk in enumerate(chunks):
|
||||
await update.message.reply_text(chunk, parse_mode=constants.ParseMode.HTML)
|
||||
await update.message.reply_text(chunk)
|
||||
if chunk_idx < len(chunks) - 1:
|
||||
await asyncio.sleep(self.chunk_message_sleep_duration)
|
||||
else:
|
||||
await update.message.reply_text(response_text, parse_mode=constants.ParseMode.HTML)
|
||||
await update.message.reply_text(response_text)
|
||||
else:
|
||||
logger.warning(f"Successful logic result but no response text for user {user_id}.")
|
||||
await update.message.reply_text("Something went unexpectedly well, but I have nothing to say.")
|
||||
@@ -262,7 +262,7 @@ class TelegramHelper:
|
||||
|
||||
async def browse(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
||||
# Assuming browse_command is defined elsewhere and compatible
|
||||
await browse_command(update, context, self.bot)
|
||||
await browse_command(update, context)
|
||||
|
||||
async def handle_button_callback(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
||||
# Assuming button_callback is defined elsewhere and compatible
|
||||
|
||||
Reference in New Issue
Block a user