# semantic_search.py

import json
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# Load cleaned FAQs
with open("cleaned_faqs.json", "r", encoding="utf-8") as f:
    data = json.load(f)

questions = [item["question"] for item in data]
model = SentenceTransformer('all-MiniLM-L6-v2')  # lightweight and HuggingFace-friendly

# Generate embeddings
embeddings = model.encode(questions)

def search_faq(query, top_k=3):
    query_embedding = model.encode([query])
    scores = cosine_similarity(query_embedding, embeddings)[0]
    top_indices = np.argsort(scores)[::-1][:top_k]

    results = []
    for idx in top_indices:
        results.append(data[idx])
    return results

#----------------------------------------------------------------------------
# from rephrase_with_mistral import rephrase_with_mistral
# from semantic_search import search_faq

# api_key = "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"  # <-- Paste your actual API key here

# query = "how do I update my KYC?"
# top_faq = search_faq(query)[0]

# print("🔎 FAQ Retrieved:")
# print(top_faq['question'])
# print(top_faq['answer'])

# # Now rephrase
# print("\n💬 Rephrased Answer:")
# #print(rephrase_with_mistral(top_faq['question'], top_faq['answer'], api_key))
# print(rephrase_with_mistral(query, [top_faq], api_key))  # use a list of one FAQ