Spaces:
Runtime error
Runtime error
Commit
Β·
5588cb0
1
Parent(s):
03ccb0f
vector DB
Browse files- StudybotAPI/backend/data/History_1.pdf +0 -3
- StudybotAPI/backend/ingestion/embeddings.py +5 -10
- StudybotAPI/backend/ingestion/streamer.py +1 -0
- StudybotAPI/backend/retriever/ops.py +1 -0
- StudybotAPI/backend/utils/chain_loader.py +2 -1
- StudybotAPI/requirements.txt +3 -2
- data/History_1.pdf +0 -0
- frontend/layouts/mainlayout.py +2 -2
- frontend/pages/file_streaming.py +41 -0
- frontend/π‘_Home.py +1 -1
StudybotAPI/backend/data/History_1.pdf
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:025a1c0395072fd9108f649e92bccc443c852b9e4b28943f465b61ccaecdcc75
|
| 3 |
-
size 814721
|
|
|
|
|
|
|
|
|
|
|
|
StudybotAPI/backend/ingestion/embeddings.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
-
from langchain.vectorstores import Qdrant
|
|
|
|
| 2 |
from langchain.embeddings import HuggingFaceBgeEmbeddings
|
| 3 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 4 |
|
|
@@ -32,17 +33,11 @@ class Embeddings:
|
|
| 32 |
|
| 33 |
texts = self.split_docs(docs)
|
| 34 |
|
| 35 |
-
vector_store =
|
| 36 |
-
texts,
|
| 37 |
-
embeddings,
|
| 38 |
-
# path=self.cfg.VECTOR_DB,
|
| 39 |
-
location=":memory:",
|
| 40 |
-
# host="localhost",
|
| 41 |
-
# prefer_grpc=True,
|
| 42 |
-
collection_name=self.cfg.VECTOR_DB,
|
| 43 |
)
|
| 44 |
|
| 45 |
-
print(f"Vector store created
|
| 46 |
|
| 47 |
return vector_store
|
| 48 |
|
|
|
|
| 1 |
+
# from langchain.vectorstores import Qdrant
|
| 2 |
+
from langchain.vectorstores import DocArrayInMemorySearch
|
| 3 |
from langchain.embeddings import HuggingFaceBgeEmbeddings
|
| 4 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 5 |
|
|
|
|
| 33 |
|
| 34 |
texts = self.split_docs(docs)
|
| 35 |
|
| 36 |
+
vector_store = DocArrayInMemorySearch.from_documents(
|
| 37 |
+
texts, embeddings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
)
|
| 39 |
|
| 40 |
+
print(f"Vector store created.")
|
| 41 |
|
| 42 |
return vector_store
|
| 43 |
|
StudybotAPI/backend/ingestion/streamer.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
StudybotAPI/backend/retriever/ops.py
CHANGED
|
@@ -22,4 +22,5 @@ def ops_inference(response_result: FrontendResponseModel, question: str):
|
|
| 22 |
except Exception as e:
|
| 23 |
response_result["status"] = "error"
|
| 24 |
response_result["message"].append(str(e))
|
|
|
|
| 25 |
raise ModelDeploymentException(response_result)
|
|
|
|
| 22 |
except Exception as e:
|
| 23 |
response_result["status"] = "error"
|
| 24 |
response_result["message"].append(str(e))
|
| 25 |
+
print(response_result)
|
| 26 |
raise ModelDeploymentException(response_result)
|
StudybotAPI/backend/utils/chain_loader.py
CHANGED
|
@@ -44,7 +44,8 @@ async def llm_chain_loader(DATA_PATH: str):
|
|
| 44 |
qa_chain = ConversationalRetrievalChain.from_llm(
|
| 45 |
llm=llm,
|
| 46 |
chain_type="stuff",
|
| 47 |
-
retriever=db.as_retriever(
|
|
|
|
| 48 |
# return_source_documents=True,
|
| 49 |
# chain_type_kwargs={"prompt": prompt},
|
| 50 |
condense_question_prompt=prompt,
|
|
|
|
| 44 |
qa_chain = ConversationalRetrievalChain.from_llm(
|
| 45 |
llm=llm,
|
| 46 |
chain_type="stuff",
|
| 47 |
+
retriever=db.as_retriever(
|
| 48 |
+
search_type="mmr", search_kwargs={"k": 2, "fetch_k": 4}),
|
| 49 |
# return_source_documents=True,
|
| 50 |
# chain_type_kwargs={"prompt": prompt},
|
| 51 |
condense_question_prompt=prompt,
|
StudybotAPI/requirements.txt
CHANGED
|
@@ -5,7 +5,7 @@ langchain==0.0.346
|
|
| 5 |
pydantic==1.10.2
|
| 6 |
pypdf
|
| 7 |
python-box
|
| 8 |
-
qdrant-client
|
| 9 |
torch
|
| 10 |
transformers
|
| 11 |
sentence_transformers
|
|
@@ -13,4 +13,5 @@ clarifai
|
|
| 13 |
Pillow
|
| 14 |
tiktoken
|
| 15 |
python-multipart
|
| 16 |
-
urllib3==1.25.11
|
|
|
|
|
|
| 5 |
pydantic==1.10.2
|
| 6 |
pypdf
|
| 7 |
python-box
|
| 8 |
+
# qdrant-client
|
| 9 |
torch
|
| 10 |
transformers
|
| 11 |
sentence_transformers
|
|
|
|
| 13 |
Pillow
|
| 14 |
tiktoken
|
| 15 |
python-multipart
|
| 16 |
+
# urllib3==1.25.11
|
| 17 |
+
docarray
|
data/History_1.pdf
ADDED
|
Binary file (815 kB). View file
|
|
|
frontend/layouts/mainlayout.py
CHANGED
|
@@ -9,7 +9,7 @@ from components import authors, user_greetings, add_logo
|
|
| 9 |
|
| 10 |
def mainlayout(func: Callable):
|
| 11 |
def wrapper():
|
| 12 |
-
with open("
|
| 13 |
st_page_layouts = json.load(f)
|
| 14 |
|
| 15 |
st.set_page_config(
|
|
@@ -19,7 +19,7 @@ def mainlayout(func: Callable):
|
|
| 19 |
else "home"
|
| 20 |
]
|
| 21 |
)
|
| 22 |
-
add_logo("
|
| 23 |
st.markdown("# Studybot π")
|
| 24 |
user_greetings()
|
| 25 |
authors()
|
|
|
|
| 9 |
|
| 10 |
def mainlayout(func: Callable):
|
| 11 |
def wrapper():
|
| 12 |
+
with open("layouts/st_page_layouts.json", "r", encoding="utf-8") as f:
|
| 13 |
st_page_layouts = json.load(f)
|
| 14 |
|
| 15 |
st.set_page_config(
|
|
|
|
| 19 |
else "home"
|
| 20 |
]
|
| 21 |
)
|
| 22 |
+
add_logo("images/studybotlogo.svg", svg=True)
|
| 23 |
st.markdown("# Studybot π")
|
| 24 |
user_greetings()
|
| 25 |
authors()
|
frontend/pages/file_streaming.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
import streamlit as st
|
| 4 |
+
|
| 5 |
+
from langchain.callbacks.base import BaseCallbackHandler
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class StreamHandler(BaseCallbackHandler):
|
| 9 |
+
def __init__(
|
| 10 |
+
self, container: st.delta_generator.DeltaGenerator, initial_text: str = ""
|
| 11 |
+
):
|
| 12 |
+
self.container = container
|
| 13 |
+
self.text = initial_text
|
| 14 |
+
self.run_id_ignore_token = None
|
| 15 |
+
|
| 16 |
+
def on_llm_start(self, serialized: dict, prompts: list, **kwargs):
|
| 17 |
+
# Workaround to prevent showing the rephrased question as output
|
| 18 |
+
if prompts[0].startswith("Human"):
|
| 19 |
+
self.run_id_ignore_token = kwargs.get("run_id")
|
| 20 |
+
|
| 21 |
+
def on_llm_new_token(self, token: str, **kwargs) -> None:
|
| 22 |
+
if self.run_id_ignore_token == kwargs.get("run_id", False):
|
| 23 |
+
return
|
| 24 |
+
self.text += token
|
| 25 |
+
self.container.markdown(self.text)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class PrintRetrievalHandler(BaseCallbackHandler):
|
| 29 |
+
def __init__(self, container):
|
| 30 |
+
self.status = container.status("**Context Retrieval**")
|
| 31 |
+
|
| 32 |
+
def on_retriever_start(self, serialized: dict, query: str, **kwargs):
|
| 33 |
+
self.status.write(f"**Question:** {query}")
|
| 34 |
+
self.status.update(label=f"**Context Retrieval:** {query}")
|
| 35 |
+
|
| 36 |
+
def on_retriever_end(self, documents, **kwargs):
|
| 37 |
+
for idx, doc in enumerate(documents):
|
| 38 |
+
source = os.path.basename(doc.metadata["source"])
|
| 39 |
+
self.status.write(f"**Document {idx} from {source}**")
|
| 40 |
+
self.status.markdown(doc.page_content)
|
| 41 |
+
self.status.update(state="complete")
|
frontend/π‘_Home.py
CHANGED
|
@@ -63,7 +63,7 @@ def home():
|
|
| 63 |
"<h2 style='text-align: center; color: black;'>Studybot Architecture</h1>",
|
| 64 |
unsafe_allow_html=True,
|
| 65 |
)
|
| 66 |
-
st.image("
|
| 67 |
|
| 68 |
|
| 69 |
home()
|
|
|
|
| 63 |
"<h2 style='text-align: center; color: black;'>Studybot Architecture</h1>",
|
| 64 |
unsafe_allow_html=True,
|
| 65 |
)
|
| 66 |
+
st.image("images/architecture.png")
|
| 67 |
|
| 68 |
|
| 69 |
home()
|