mistral-7b-chat / app.py
himamsa's picture
Update app.py
9e88cb6 verified
raw
history blame contribute delete
848 Bytes
import os
import gradio as gr
import download_model # Ensures model is downloaded
from llama_cpp import Llama
MODEL_PATH = "model/Mistral-7b-instruct-v0.3.Q4_K_M.gguf"
# Load model
llm = Llama(
model_path=MODEL_PATH,
n_ctx=2048,
n_threads=6,
use_mlock=True,
use_mmap=True,
)
# Format input prompt
def format_prompt(user_input):
return f"[INST] {user_input.strip()} [/INST]"
# Non-streaming response (fast with 32 tokens)
def chat_fn(message, history=None):
prompt = format_prompt(message)
output = llm(prompt, max_tokens=32, stop=["</s>"])
return output["choices"][0]["text"].strip()
# Gradio interface
gr.ChatInterface(
fn=chat_fn,
title="πŸ¦™ Mistral 7B v0.3 (Fast)",
description="Chatbot using Mistral 7B with reduced token generation for faster responses.",
theme="default",
).launch()