Spaces:

himamsa
/

mistral-7b-chat

Sleeping

mistral-7b-chat / app.py

Update app.py

9e88cb6 verified 8 months ago

848 Bytes

	import os
	import gradio as gr
	import download_model # Ensures model is downloaded
	from llama_cpp import Llama

	MODEL_PATH = "model/Mistral-7b-instruct-v0.3.Q4_K_M.gguf"

	# Load model
	llm = Llama(
	model_path=MODEL_PATH,
	n_ctx=2048,
	n_threads=6,
	use_mlock=True,
	use_mmap=True,
	)

	# Format input prompt
	def format_prompt(user_input):
	return f"[INST] {user_input.strip()} [/INST]"

	# Non-streaming response (fast with 32 tokens)
	def chat_fn(message, history=None):
	prompt = format_prompt(message)
	output = llm(prompt, max_tokens=32, stop=["</s>"])
	return output["choices"][0]["text"].strip()

	# Gradio interface
	gr.ChatInterface(
	fn=chat_fn,
	title="🦙 Mistral 7B v0.3 (Fast)",
	description="Chatbot using Mistral 7B with reduced token generation for faster responses.",
	theme="default",
	).launch()