Spaces:
Sleeping
Sleeping
File size: 848 Bytes
aeabd50 29cd19f 6092512 aeabd50 ecdd9b4 29cd19f 6092512 29cd19f aeabd50 29cd19f aeabd50 6092512 29cd19f 6092512 29cd19f aeabd50 29cd19f 9e88cb6 29cd19f 9e88cb6 29cd19f 9e88cb6 aeabd50 9e88cb6 6092512 aeabd50 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
import os
import gradio as gr
import download_model # Ensures model is downloaded
from llama_cpp import Llama
MODEL_PATH = "model/Mistral-7b-instruct-v0.3.Q4_K_M.gguf"
# Load model
llm = Llama(
model_path=MODEL_PATH,
n_ctx=2048,
n_threads=6,
use_mlock=True,
use_mmap=True,
)
# Format input prompt
def format_prompt(user_input):
return f"[INST] {user_input.strip()} [/INST]"
# Non-streaming response (fast with 32 tokens)
def chat_fn(message, history=None):
prompt = format_prompt(message)
output = llm(prompt, max_tokens=32, stop=["</s>"])
return output["choices"][0]["text"].strip()
# Gradio interface
gr.ChatInterface(
fn=chat_fn,
title="🦙 Mistral 7B v0.3 (Fast)",
description="Chatbot using Mistral 7B with reduced token generation for faster responses.",
theme="default",
).launch()
|