Spaces:

chenguittiMaroua
/

asm-app

Sleeping

App Files Files Community

chenguittiMaroua commited on Apr 26, 2025

Commit

23e0e9b

verified ·

1 Parent(s): d69a8fd

Update main.py

Browse files

Files changed (1) hide show

main.py +54 -83

main.py CHANGED Viewed

@@ -131,73 +131,45 @@ def get_summarizer():
-from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
-import torch
-# Model options (ordered by preference)
-QA_MODELS = [
-    {"name": "google/flan-t5-small", "max_length": 512},
-    {"name": "facebook/bart-large-cnn", "max_length": 1024}
 ]
-class QASystem:
     def __init__(self):
         self.model = None
-        self.tokenizer = None
-        self.current_model = None
         self.device = 0 if torch.cuda.is_available() else -1
-    def load_model(self):
-        for model_info in QA_MODELS:
             try:
-                logger.info(f"Loading model: {model_info['name']}")
-                self.tokenizer = AutoTokenizer.from_pretrained(model_info["name"])
-                self.model = AutoModelForSeq2SeqLM.from_pretrained(
-                    model_info["name"],
-                    device_map="auto",
-                    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
                 )
-                self.current_model = model_info
-                logger.info(f"Successfully loaded {model_info['name']}")
                 return True
             except Exception as e:
-                logger.warning(f"Failed to load {model_info['name']}: {str(e)}")
                 continue
         logger.error("All model loading attempts failed")
         return False
-    def generate_answer(self, question: str, context: Optional[str] = None):
-        try:
-            if context:
-                input_text = f"question: {question} context: {context[:2000]}"
-            else:
-                input_text = f"question: {question}"
-            inputs = self.tokenizer(
-                input_text,
-                return_tensors="pt",
-                truncation=True,
-                max_length=self.current_model["max_length"]
-            ).to(self.device)
-            outputs = self.model.generate(
-                **inputs,
-                max_new_tokens=200,
-                num_beams=4,
-                early_stopping=True
-            )
-            return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-        except Exception as e:
-            logger.error(f"Generation failed: {str(e)}")
-            raise
-# Initialize QA system
-qa_system = QASystem()
@@ -891,59 +863,58 @@ async def summarize_document(request: Request, file: UploadFile = File(...)):
 from typing import Optional
 @app.post("/qa")
-async def question_answering(
     question: str = Form(...),
-    file: Optional[UploadFile] = File(None),
-    language: str = Form("en")
 ):
-    # Initialize model if not loaded
-    if not qa_system.model:
-        if not qa_system.load_model():
             raise HTTPException(
-                500,
                 detail={
-                    "error": "System initialization failed",
-                    "tried_models": [m["name"] for m in QA_MODELS],
-                    "suggestion": "Check logs for loading errors"
                 }
             )
     try:
-        # Process file if provided
         context = None
         if file:
-            try:
-                file_ext, content = await process_uploaded_file(file)
-                context = extract_text(content, file_ext)
-                context = re.sub(r'\s+', ' ', context).strip()[:3000]
-            except Exception as e:
-                logger.error(f"File processing failed: {str(e)}")
-                raise HTTPException(422, detail=f"File processing error: {str(e)}")
-        # Generate answer
         try:
-            answer = qa_system.generate_answer(question, context)
             return {
                 "question": question,
-                "answer": answer,
-                "model": qa_system.current_model["name"],
-                "source": "document" if context else "general",
-                "language": language
             }
         except Exception as e:
-            logger.error(f"Answer generation failed: {str(e)}")
             raise HTTPException(
-                500,
                 detail={
                     "error": "Answer generation failed",
-                    "model": qa_system.current_model["name"],
-                    "input_length": len(question) + (len(context) if context else 0),
-                    "suggestion": "Try simplifying your question or reducing document size"
                 }
             )
     except HTTPException:
         raise
     except Exception as e:

+MODEL_CHOICES = [
+    "patrickvonplaten/t5-tiny-random",  # Tiny test model (always works)
+    "google/flan-t5-small",             # 300MB
+    "google/flan-t5-base",              # 900MB
+    "facebook/bart-large-cnn"           # 1.6GB
 ]
+class QAService:
     def __init__(self):
         self.model = None
+        self.model_name = None
         self.device = 0 if torch.cuda.is_available() else -1
+    def initialize(self):
+        """Try loading models until one succeeds"""
+        for model_name in MODEL_CHOICES:
             try:
+                logger.info(f"Attempting to load {model_name}")
+                # Lightweight pipeline initialization
+                self.model = pipeline(
+                    "text2text-generation",
+                    model=model_name,
+                    device=self.device,
+                    torch_dtype=torch.float16 if self.device == 0 else torch.float32
                 )
+                self.model_name = model_name
+                logger.info(f"Successfully loaded {model_name}")
                 return True
             except Exception as e:
+                logger.warning(f"Failed to load {model_name}: {str(e)}")
                 continue
         logger.error("All model loading attempts failed")
         return False
+# Global service instance
+qa_service = QAService()
 from typing import Optional
 @app.post("/qa")
+async def handle_qa_request(
     question: str = Form(...),
+    file: Optional[UploadFile] = File(None)
 ):
+    # Initialize service if needed
+    if not qa_service.model:
+        if not qa_service.initialize():
             raise HTTPException(
+                status_code=500,
                 detail={
+                    "error": "System unavailable",
+                    "status": "Model initialization failed",
+                    "recovery_suggestion": "Retry in 30 seconds or contact support"
                 }
             )
     try:
+        # Process input
         context = None
         if file:
+            file_ext, content = await process_uploaded_file(file)
+            context = extract_text(content, file_ext)[:2000]  # Strict limit
+        # Generate response
         try:
+            input_text = f"question: {question}" + (f" context: {context}" if context else "")
+            result = qa_service.model(
+                input_text,
+                max_length=150,
+                num_beams=2,
+                early_stopping=True
+            )
             return {
                 "question": question,
+                "answer": result[0]["generated_text"],
+                "model": qa_service.model_name,
+                "context_used": bool(context)
             }
         except Exception as e:
+            logger.error(f"Generation failed: {str(e)}")
             raise HTTPException(
+                status_code=500,
                 detail={
                     "error": "Answer generation failed",
+                    "model": qa_service.model_name,
+                    "input_size": len(input_text) if 'input_text' in locals() else None,
+                    "suggestion": "Simplify your question or reduce document size"
                 }
             )
     except HTTPException:
         raise
     except Exception as e: