pberck commited on
Commit
da413c8
·
1 Parent(s): 82a10b6

Shuffled context code.

Browse files
app.py CHANGED
@@ -113,18 +113,12 @@ def moderator(message):
113
 
114
 
115
  # Retrieve context from the ChromaDB.
116
- def get_context(message):
117
  if not collection:
118
  return ""
119
- ctxsize = os.getenv("CTXSIZE")
120
- if not ctxsize:
121
- ctxsize = 3 # 9 seems to introduce rubbish...
122
- else:
123
- ctxsize = int(ctxsize)
124
- DBG("get_context: " + str(ctxsize))
125
  results = collection.query(
126
  query_texts=[message],
127
- n_results=ctxsize,
128
  )
129
  data = rerank(message, results)
130
  # data = results["documents"] # [0][0]
@@ -136,8 +130,8 @@ def get_context(message):
136
 
137
 
138
  # Hybrid retriever from hybrid, uses pufendorfstore.
139
- def get_hybrid_context(message):
140
- documents = retrieve(hybrid_retrieval, message, top_k=3, scale=True)
141
  return documents
142
 
143
 
@@ -348,8 +342,20 @@ with gr.Blocks(js=js_func, theme="Monochrome") as demo_blocks:
348
  yield history, gr.update()
349
  return
350
 
351
- context = get_context(user_message)
352
- hybrid_context = get_hybrid_context(user_message)
 
 
 
 
 
 
 
 
 
 
 
 
353
  for hc in hybrid_context:
354
  DBG(
355
  str(hc.meta["file_path"])
@@ -363,28 +369,15 @@ with gr.Blocks(js=js_func, theme="Monochrome") as demo_blocks:
363
  for x in context:
364
  DBG(x)
365
  # Take the top-3, they have already been reranked in the get_context(...) fn.
366
- ctxkeep = os.getenv("CTXKEEP")
367
- if not ctxkeep:
368
- ctxkeep = 3
369
- else:
370
- ctxkeep = int(ctxkeep)
371
- DBG("CONTEXT")
372
  DBG("context keep: " + str(ctxkeep))
373
- context = context[0:ctxkeep]
374
  context_str = ""
375
  if ctxkeep > 0:
376
  for i, x in enumerate(context): # note different after reranking
377
  DBG(x)
378
  context_str += x + "\n\n"
379
  # The hc is the new haystack contents.
380
- hybridkeep = os.getenv("HYBRIDKEEP")
381
- if not hybridkeep:
382
- hybridkeep = 3
383
- else:
384
- hybridkeep = int(hybridkeep)
385
  DBG("hybrid context keep: " + str(hybridkeep))
386
  if hybridkeep > 0:
387
- hybrid_context = hybrid_context[0:hybridkeep]
388
  for i, x in enumerate(hybrid_context):
389
  DBG(x)
390
  context_str += x.content + "\n\n"
 
113
 
114
 
115
  # Retrieve context from the ChromaDB.
116
+ def get_context(message, num):
117
  if not collection:
118
  return ""
 
 
 
 
 
 
119
  results = collection.query(
120
  query_texts=[message],
121
+ n_results=num,
122
  )
123
  data = rerank(message, results)
124
  # data = results["documents"] # [0][0]
 
130
 
131
 
132
  # Hybrid retriever from hybrid, uses pufendorfstore.
133
+ def get_hybrid_context(message, num):
134
+ documents = retrieve(hybrid_retrieval, message, top_k=num, scale=True)
135
  return documents
136
 
137
 
 
342
  yield history, gr.update()
343
  return
344
 
345
+ ctxkeep = os.getenv("CTXKEEP")
346
+ if not ctxkeep:
347
+ ctxkeep = 3
348
+ else:
349
+ ctxkeep = int(ctxkeep)
350
+ hybridkeep = os.getenv("HYBRIDKEEP")
351
+ if not hybridkeep:
352
+ hybridkeep = 3
353
+ else:
354
+ hybridkeep = int(hybridkeep)
355
+
356
+ context = get_context(user_message, ctxkeep)
357
+ hybrid_context = get_hybrid_context(user_message, hybridkeep)
358
+
359
  for hc in hybrid_context:
360
  DBG(
361
  str(hc.meta["file_path"])
 
369
  for x in context:
370
  DBG(x)
371
  # Take the top-3, they have already been reranked in the get_context(...) fn.
 
 
 
 
 
 
372
  DBG("context keep: " + str(ctxkeep))
 
373
  context_str = ""
374
  if ctxkeep > 0:
375
  for i, x in enumerate(context): # note different after reranking
376
  DBG(x)
377
  context_str += x + "\n\n"
378
  # The hc is the new haystack contents.
 
 
 
 
 
379
  DBG("hybrid context keep: " + str(hybridkeep))
380
  if hybridkeep > 0:
 
381
  for i, x in enumerate(hybrid_context):
382
  DBG(x)
383
  context_str += x.content + "\n\n"
vector3_db/a1b2bf9f-4f30-46a6-a6c2-b6ca99effce9/data_level0.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c9af344e120a5be64710368f5276638e6f52b1f06593604c5de84036a6f72b0
3
  size 16760000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33dca61881e04f1a2571b2ec84053c3c673fcb03982b223f8401454a5f0ec6c2
3
  size 16760000
vector3_db/a1b2bf9f-4f30-46a6-a6c2-b6ca99effce9/length.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:248727a598e2d668480e8705407c5f5c97986b7e407a2382bbf44f9120743b09
3
  size 40000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee2dfe0fac21ee43690bb2a35b3438cd9ac4872685ceecaaa55ac1641b188be0
3
  size 40000
vector3_db/chroma.sqlite3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7485f035f1cb2d45b176f1cb03183a22a5522e9cb6d6f8e75e30af74cba13ef1
3
  size 11452416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b9dbe7b759b87c6de968679094d0211bc13eff736a7e926a61c5a38712489a8
3
  size 11452416