# semantic_search.py import json import numpy as np from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity # Load cleaned FAQs with open("cleaned_faqs.json", "r", encoding="utf-8") as f: data = json.load(f) questions = [item["question"] for item in data] model = SentenceTransformer('all-MiniLM-L6-v2') # lightweight and HuggingFace-friendly # Generate embeddings embeddings = model.encode(questions) def search_faq(query, top_k=3): query_embedding = model.encode([query]) scores = cosine_similarity(query_embedding, embeddings)[0] top_indices = np.argsort(scores)[::-1][:top_k] results = [] for idx in top_indices: results.append(data[idx]) return results #---------------------------------------------------------------------------- # from rephrase_with_mistral import rephrase_with_mistral # from semantic_search import search_faq # api_key = "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" # <-- Paste your actual API key here # query = "how do I update my KYC?" # top_faq = search_faq(query)[0] # print("šŸ”Ž FAQ Retrieved:") # print(top_faq['question']) # print(top_faq['answer']) # # Now rephrase # print("\nšŸ’¬ Rephrased Answer:") # #print(rephrase_with_mistral(top_faq['question'], top_faq['answer'], api_key)) # print(rephrase_with_mistral(query, [top_faq], api_key)) # use a list of one FAQ