# Requirements - Install necessary packages if not installed
# py -3.10 -m pip install requests beautifulsoup4 langchain-community bs4 langchain-text-splitters langchain-chroma langchain-ollama

# --1: Import necessary libraries
import os
import bs4
import requests  # Needed for session handling
import hashlib  # To track URL changes
import shutil  # For clearing ChromaDB
import time  # To ensure cleanup before deletion
import platform
import psutil
import subprocess
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document  # ✅ Import Document wrapper
from langchain_ollama import OllamaLLM  # ✅ Import Ollama LLM
from langchain_community.vectorstores import Chroma  # ✅ Import ChromaDB

# ✅ os.name
print("--")
print("OS Name:")
print(os.name)
print("--")

# ✅ platform
print("Platform:")
print(platform.platform())
print("--")

# ✅ psutil
print("CPU Count:")
print(psutil.cpu_count())
print("--")

# ✅ psutil
mem = psutil.virtual_memory().total/1024**3
print("Physical memory = %0.2f GiB" % mem)
print("--")

mem2 = psutil.virtual_memory()
print("Physical memory details:")
print(mem2)
print("--")

#print("PID:")
#for p in psutil.process_iter(): print(p)

# ✅ Step 1: Stop all running Ollama processes
print("\n🔍 Searching for 'ollama' processes running...\n")

for p in psutil.process_iter(['pid', 'name']):
    if "ollama" in p.info['name'].lower():  # Check if "ollama" is in the process name
        print(f"🛑 Stopping: {p.info['name']} (PID: {p.info['pid']})")
        psutil.Process(p.info['pid']).terminate()  # Terminate the process

# ✅ Step 2: Wait a moment to ensure processes are terminated
time.sleep(3)

# ✅ Step 3: Start a new Ollama process
print("\n🚀 Starting a new Ollama process...\n")
subprocess.Popen(["ollama", "serve"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

print("\n✅ Ollama has been restarted successfully!\n")

print("🔍 Searching for 'ollama app' process...\n")

for p in psutil.process_iter(['pid', 'name']):
    if "ollama" in p.info['name'].lower():  # Check if "ollama" is in process name
        print(f"✅ Found: {p.info['name']} (PID: {p.info['pid']})")

# ✅ Set the USER_AGENT environment variable and also in requests
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
os.environ["USER_AGENT"] = USER_AGENT  # Ensures USER_AGENT is set globally

# ✅ Persistent database path for Chroma
chroma_db_path = "chroma_db"
url_hash_file = "last_url.txt"

# ✅ Create a requests session and set the User-Agent
session = requests.Session()
session.headers.update({"User-Agent": USER_AGENT})

# ✅ Initialize ChromaDB reference (global)
vectorstore = None
article_title = "Unknown Title"  # Default title


def clear_chroma_db():
    """ Properly closes and deletes the ChromaDB before reloading """
    global vectorstore

    if vectorstore:
        print("\n🔄 Closing ChromaDB connection before deleting the database...")
        vectorstore = None  # Remove reference to ensure closure

    # ✅ Ensure ChromaDB releases all resources before deletion
    time.sleep(2)  # Wait for resources to be fully released

    # ✅ Delete ChromaDB folder
    if os.path.exists(chroma_db_path):
        try:
            shutil.rmtree(chroma_db_path)
            print("\n✅ ChromaDB successfully cleared.")
        except PermissionError:
            print("\n🚨 ChromaDB is still in use. Retry after closing any processes using it.")


def set_new_url():
    """ Function to handle URL input and update the database if needed """
    global vectorstore, url, article_title

    while True:
        url = input("\n🌍 Enter the webpage URL: ").strip()
        if url:
            break

    # ✅ Compute hash of the URL to detect changes
    url_hash = hashlib.md5(url.encode()).hexdigest()

    # ✅ Check if the URL has changed since the last run
    if os.path.exists(url_hash_file):
        with open(url_hash_file, "r") as f:
            last_url_hash = f.read().strip()
        if last_url_hash != url_hash:
            print("\n🔄 URL changed! Clearing ChromaDB to rebuild with new data...")
            clear_chroma_db()
    else:
        print("\n🆕 First-time run. Creating new ChromaDB.")

    # ✅ Save the new URL hash
    with open(url_hash_file, "w") as f:
        f.write(url_hash)

    # ✅ Load the webpage content
    print("\n🔗 Scraping content from:", url)
    page = session.get(url)
    soup = bs4.BeautifulSoup(page.content, "html.parser")

    # ✅ Extract the article title from <div class="post-title">
    title_div = soup.find("h1", class_="post-title")
    article_title = title_div.get_text(strip=True) if title_div else "Unknown Title"

    # ✅ Extract text from <div class="post-content">
    post_content_div = soup.find("div", class_="post-content")

    if post_content_div:
        raw_text = post_content_div.get_text(strip=True)
    else:
        print("\n🚨 No content found inside <div class='post-content'>!")
        exit()

    # ✅ Wrap the extracted text into LangChain Document format
    docs = [Document(page_content=raw_text)]  # ✅ Fix applied here

    # ✅ Debugging: Print extracted text preview
    print(f"\n📜 Extracted Title: {article_title}")
    print(raw_text[:500] + "...\n" if raw_text else "No text extracted!")

    # --2: Split the text into smaller chunks
    from langchain_text_splitters import RecursiveCharacterTextSplitter

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1200, chunk_overlap=100, add_start_index=True
    )
    all_splits = text_splitter.split_documents(docs)  # ✅ Now works properly

    # ✅ Debugging: Print number of text chunks created
    print(f"\n🔹 Number of text chunks created: {len(all_splits)}")

    # If no chunks are created, stop execution
    if not all_splits:
        print("\n🚨 No text chunks were created! The document might be empty or incorrectly processed.")
        exit()

    # --3: Create embeddings and vector store
    from langchain_ollama import OllamaEmbeddings

    # 🔹 ✅ Ensure Ollama is running and use a reliable model
    try:
        local_embeddings = OllamaEmbeddings(model="all-minilm")  # Ensure the model exists
    except Exception as e:
        print("\n🚨 Error loading Ollama embeddings. Ensure Ollama is running and the model is installed.")
        print("Run: ollama serve and ollama pull all-minilm")
        print("Error Details:", e)
        exit()

    # ✅ Debugging: Try embedding a sample chunk
    sample_embeddings = local_embeddings.embed_documents([all_splits[0].page_content]) if all_splits else []
    if not sample_embeddings:
        print("\n🚨 Embedding model failed! No embeddings were generated.")
        exit()

    # ✅ Rebuild ChromaDB with new data if URL changed
    vectorstore = Chroma.from_documents(documents=all_splits, embedding=local_embeddings, persist_directory=chroma_db_path)

    print(f"\n✅ Webpage '{article_title}' has been processed! You can now ask questions.")


# ✅ Set the initial URL and build the database
set_new_url()

# ✅ Start the chatbot loop
llm = OllamaLLM(model="llama3.2:1b")  # ✅ Use a valid Ollama model

print("\n💬 You can now ask questions! Type 'change url' to scrape a new page or 'exit app' to quit.")

while True:
    question = input(f"\n🔎 [{article_title}] Enter your question: ").strip()

    if question.lower() == "exit app":
        print("\n👋 Exiting the app. Goodbye!")
        break  # Exit loop when user types "exit app"

    if question.lower() == "change url":
        print("\n🔄 Switching to a new URL...\n")
        set_new_url()
        continue  # Restart loop with a new URL

    retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})
    retrieved_docs = retriever.invoke(question)

    # ✅ Combine the retrieved document content into a context string
    context = ' '.join([doc.page_content for doc in retrieved_docs])

    response = llm.invoke(f"""Answer the question according to the context given very briefly:
               Question: {question}
               Context: {context}
    """)

    # ✅ Print the answer
    print("\n" + "="*80)
    print(f"📌 Question: '{question}' from: '{article_title}'")
    print("="*80)
    print(f"📌 Answer: {response}")
    #print(response)
    print("="*80 + "\n")