import gradio as gr from ctransformers import AutoModelForCausalLM # Cargar modelo GGUF model = AutoModelForCausalLM.from_pretrained( "tinyllama-1.1b-chat-v1.0.Q4_0.gguf", model_type="llama", gpu_layers=0, threads=4 ) def chat_fn(message, history): prompt = ( "<|system|>Eres un asistente útil especializado en trámites y administración en España.<|end|>\n" f"<|user|>{message}<|end|>\n" "<|assistant|>" ) output = model( prompt, max_new_tokens=200, temperature=0.7 ) if isinstance(output, bytes): output = output.decode("utf-8", errors="ignore") return output demo = gr.ChatInterface( fn=chat_fn, title="Lumen Admin Assistant", description="Chat en español usando Spanish-TinyLLaMA." ) demo.launch()