Shuffled context code.
Browse files
app.py
CHANGED
|
@@ -113,18 +113,12 @@ def moderator(message):
|
|
| 113 |
|
| 114 |
|
| 115 |
# Retrieve context from the ChromaDB.
|
| 116 |
-
def get_context(message):
|
| 117 |
if not collection:
|
| 118 |
return ""
|
| 119 |
-
ctxsize = os.getenv("CTXSIZE")
|
| 120 |
-
if not ctxsize:
|
| 121 |
-
ctxsize = 3 # 9 seems to introduce rubbish...
|
| 122 |
-
else:
|
| 123 |
-
ctxsize = int(ctxsize)
|
| 124 |
-
DBG("get_context: " + str(ctxsize))
|
| 125 |
results = collection.query(
|
| 126 |
query_texts=[message],
|
| 127 |
-
n_results=
|
| 128 |
)
|
| 129 |
data = rerank(message, results)
|
| 130 |
# data = results["documents"] # [0][0]
|
|
@@ -136,8 +130,8 @@ def get_context(message):
|
|
| 136 |
|
| 137 |
|
| 138 |
# Hybrid retriever from hybrid, uses pufendorfstore.
|
| 139 |
-
def get_hybrid_context(message):
|
| 140 |
-
documents = retrieve(hybrid_retrieval, message, top_k=
|
| 141 |
return documents
|
| 142 |
|
| 143 |
|
|
@@ -348,8 +342,20 @@ with gr.Blocks(js=js_func, theme="Monochrome") as demo_blocks:
|
|
| 348 |
yield history, gr.update()
|
| 349 |
return
|
| 350 |
|
| 351 |
-
|
| 352 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
for hc in hybrid_context:
|
| 354 |
DBG(
|
| 355 |
str(hc.meta["file_path"])
|
|
@@ -363,28 +369,15 @@ with gr.Blocks(js=js_func, theme="Monochrome") as demo_blocks:
|
|
| 363 |
for x in context:
|
| 364 |
DBG(x)
|
| 365 |
# Take the top-3, they have already been reranked in the get_context(...) fn.
|
| 366 |
-
ctxkeep = os.getenv("CTXKEEP")
|
| 367 |
-
if not ctxkeep:
|
| 368 |
-
ctxkeep = 3
|
| 369 |
-
else:
|
| 370 |
-
ctxkeep = int(ctxkeep)
|
| 371 |
-
DBG("CONTEXT")
|
| 372 |
DBG("context keep: " + str(ctxkeep))
|
| 373 |
-
context = context[0:ctxkeep]
|
| 374 |
context_str = ""
|
| 375 |
if ctxkeep > 0:
|
| 376 |
for i, x in enumerate(context): # note different after reranking
|
| 377 |
DBG(x)
|
| 378 |
context_str += x + "\n\n"
|
| 379 |
# The hc is the new haystack contents.
|
| 380 |
-
hybridkeep = os.getenv("HYBRIDKEEP")
|
| 381 |
-
if not hybridkeep:
|
| 382 |
-
hybridkeep = 3
|
| 383 |
-
else:
|
| 384 |
-
hybridkeep = int(hybridkeep)
|
| 385 |
DBG("hybrid context keep: " + str(hybridkeep))
|
| 386 |
if hybridkeep > 0:
|
| 387 |
-
hybrid_context = hybrid_context[0:hybridkeep]
|
| 388 |
for i, x in enumerate(hybrid_context):
|
| 389 |
DBG(x)
|
| 390 |
context_str += x.content + "\n\n"
|
|
|
|
| 113 |
|
| 114 |
|
| 115 |
# Retrieve context from the ChromaDB.
|
| 116 |
+
def get_context(message, num):
|
| 117 |
if not collection:
|
| 118 |
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
results = collection.query(
|
| 120 |
query_texts=[message],
|
| 121 |
+
n_results=num,
|
| 122 |
)
|
| 123 |
data = rerank(message, results)
|
| 124 |
# data = results["documents"] # [0][0]
|
|
|
|
| 130 |
|
| 131 |
|
| 132 |
# Hybrid retriever from hybrid, uses pufendorfstore.
|
| 133 |
+
def get_hybrid_context(message, num):
|
| 134 |
+
documents = retrieve(hybrid_retrieval, message, top_k=num, scale=True)
|
| 135 |
return documents
|
| 136 |
|
| 137 |
|
|
|
|
| 342 |
yield history, gr.update()
|
| 343 |
return
|
| 344 |
|
| 345 |
+
ctxkeep = os.getenv("CTXKEEP")
|
| 346 |
+
if not ctxkeep:
|
| 347 |
+
ctxkeep = 3
|
| 348 |
+
else:
|
| 349 |
+
ctxkeep = int(ctxkeep)
|
| 350 |
+
hybridkeep = os.getenv("HYBRIDKEEP")
|
| 351 |
+
if not hybridkeep:
|
| 352 |
+
hybridkeep = 3
|
| 353 |
+
else:
|
| 354 |
+
hybridkeep = int(hybridkeep)
|
| 355 |
+
|
| 356 |
+
context = get_context(user_message, ctxkeep)
|
| 357 |
+
hybrid_context = get_hybrid_context(user_message, hybridkeep)
|
| 358 |
+
|
| 359 |
for hc in hybrid_context:
|
| 360 |
DBG(
|
| 361 |
str(hc.meta["file_path"])
|
|
|
|
| 369 |
for x in context:
|
| 370 |
DBG(x)
|
| 371 |
# Take the top-3, they have already been reranked in the get_context(...) fn.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
DBG("context keep: " + str(ctxkeep))
|
|
|
|
| 373 |
context_str = ""
|
| 374 |
if ctxkeep > 0:
|
| 375 |
for i, x in enumerate(context): # note different after reranking
|
| 376 |
DBG(x)
|
| 377 |
context_str += x + "\n\n"
|
| 378 |
# The hc is the new haystack contents.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
DBG("hybrid context keep: " + str(hybridkeep))
|
| 380 |
if hybridkeep > 0:
|
|
|
|
| 381 |
for i, x in enumerate(hybrid_context):
|
| 382 |
DBG(x)
|
| 383 |
context_str += x.content + "\n\n"
|
vector3_db/a1b2bf9f-4f30-46a6-a6c2-b6ca99effce9/data_level0.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16760000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:33dca61881e04f1a2571b2ec84053c3c673fcb03982b223f8401454a5f0ec6c2
|
| 3 |
size 16760000
|
vector3_db/a1b2bf9f-4f30-46a6-a6c2-b6ca99effce9/length.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 40000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee2dfe0fac21ee43690bb2a35b3438cd9ac4872685ceecaaa55ac1641b188be0
|
| 3 |
size 40000
|
vector3_db/chroma.sqlite3
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 11452416
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b9dbe7b759b87c6de968679094d0211bc13eff736a7e926a61c5a38712489a8
|
| 3 |
size 11452416
|