Spaces:

chenguittiMaroua
/

asm-app

Sleeping

App Files Files Community

chenguittiMaroua commited on Apr 25

Commit

516cb8d

verified ·

1 Parent(s): c07a295

Update main.py

Browse files

Files changed (1) hide show

main.py +148 -51

main.py CHANGED Viewed

@@ -798,71 +798,168 @@ async def summarize_document(request: Request, file: UploadFile = File(...)):
         logger.error(f"Summarization failed: {str(e)}", exc_info=True)
         raise HTTPException(500, "Document summarization failed")
 @app.post("/qa")
 @limiter.limit("5/minute")
-async def question_answering(
     request: Request,
-    file: UploadFile = File(...),
     question: str = Form(...),
-    language: str = Form("fr")
 ):
     try:
-        file_ext, content = await process_uploaded_file(file)
-        text = extract_text(content, file_ext)
-        if not text.strip():
-            raise HTTPException(400, "No extractable text found")
-        # Clean and truncate text
-        text = re.sub(r'\s+', ' ', text).strip()[:5000]
-        # Theme detection
-        theme_keywords = ["thème", "sujet principal", "quoi le sujet", "theme", "main topic"]
-        if any(kw in question.lower() for kw in theme_keywords):
-            try:
-                summarizer = get_summarizer()
-                summary_output = summarizer(
-                    text,
-                    max_length=min(100, len(text)//4),
-                    min_length=30,
-                    do_sample=False,
-                    truncation=True
-                )
-                theme = summary_output[0].get("summary_text", text[:200] + "...")
-                return {
-                    "question": question,
-                    "answer": f"Le document traite principalement de : {theme}",
-                    "confidence": 0.95,
-                    "language": language
-                }
-            except Exception:
-                theme = text[:200] + ("..." if len(text) > 200 else "")
-                return {
-                    "question": question,
-                    "answer": f"D'après le document : {theme}",
-                    "confidence": 0.7,
-                    "language": language,
-                    "warning": "theme_summary_fallback"
-                }
-        # Standard QA
-        qa = get_qa_model()
-        result = qa(question=question, context=text[:3000])
-        return {
-            "question": question,
-            "answer": result["answer"],
-            "confidence": result["score"],
-            "language": language
-        }
     except HTTPException:
         raise
     except Exception as e:
-        logger.error(f"QA processing failed: {str(e)}")
         raise HTTPException(500, detail=f"Analysis failed: {str(e)}")
 @app.post("/visualize/natural")
 async def natural_language_visualization(

         logger.error(f"Summarization failed: {str(e)}", exc_info=True)
         raise HTTPException(500, "Document summarization failed")
+from typing import Optional
+import re
+from fastapi import HTTPException
+from concurrent.futures import ThreadPoolExecutor
+executor = ThreadPoolExecutor(max_workers=4)
 @app.post("/qa")
 @limiter.limit("5/minute")
+async def universal_question_answering(
     request: Request,
+    file: UploadFile = File(None),
+    text_input: str = Form(None),
     question: str = Form(...),
+    language: str = Form("en")
 ):
+    """
+    Universal QA endpoint that handles:
+    - Any file type (PPTX, XLSX, DOCX, PDF, Images)
+    - Direct text input
+    - Any question type (factual, thematic, analytical)
+    - Multiple languages
+    """
     try:
+        # Step 1: Extract and preprocess content
+        text = await extract_content(file, text_input)
+        # Step 2: Classify question type
+        question_type = classify_question(question, language)
+        # Step 3: Process based on question type
+        if question_type == "theme":
+            return await handle_theme(text, question, language)
+        elif question_type == "summary":
+            return await handle_summary(text, question, language)
+        elif question_type == "fact":
+            return await handle_factual(text, question, language)
+        elif question_type == "list":
+            return await handle_list(text, question, language)
+        elif question_type == "comparison":
+            return await handle_comparison(text, question, language)
+        else:
+            return await handle_general(text, question, language)
     except HTTPException:
         raise
     except Exception as e:
+        logger.error(f"QA failed: {str(e)}", exc_info=True)
         raise HTTPException(500, detail=f"Analysis failed: {str(e)}")
+async def extract_content(file: Optional[UploadFile], text_input: Optional[str]) -> str:
+    """Extract and preprocess content from file or direct text"""
+    if file:
+        file_ext, content = await process_uploaded_file(file)
+        loop = asyncio.get_event_loop()
+        text = await loop.run_in_executor(executor, extract_text, content, file_ext)
+    elif text_input:
+        text = text_input
+    else:
+        raise HTTPException(400, "Either file or text_input must be provided")
+    if not text.strip():
+        raise HTTPException(400, "No extractable content found")
+    # Advanced cleaning preserving structure
+    text = re.sub(r'\s+', ' ', text).strip()
+    return smart_truncate(text, 15000)  # Increased context window
+def classify_question(question: str, language: str) -> str:
+    """Determine question type using keyword matching and ML"""
+    question_lower = question.lower()
+    # Theme detection
+    theme_keywords = {
+        "en": ["theme", "main topic", "about", "subject"],
+        "fr": ["thème", "sujet principal", "parle de"],
+        "es": ["tema", "asunto principal"]
+    }
+    if any(kw in question_lower for kw in theme_keywords.get(language, theme_keywords["en"])):
+        return "theme"
+    # Summary detection
+    summary_keywords = {
+        "en": ["summarize", "overview", "brief"],
+        "fr": ["résumer", "aperçu"],
+        "es": ["resumir", "resumen"]
+    }
+    if any(kw in question_lower for kw in summary_keywords.get(language, summary_keywords["en"])):
+        return "summary"
+    # Factual questions
+    factual_keywords = ["what", "when", "who", "which", "where", "quoi", "quand", "qui"]
+    if any(question_lower.startswith(kw) for kw in factual_keywords):
+        return "fact"
+    # List questions
+    list_keywords = ["list", "examples", "name all", "énumérer"]
+    if any(kw in question_lower for kw in list_keywords):
+        return "list"
+    # Comparison questions
+    comparison_keywords = ["compare", "difference", "contrast", "comparer"]
+    if any(kw in question_lower for kw in comparison_keywords):
+        return "comparison"
+    return "general"
+async def handle_theme(text: str, question: str, language: str) -> dict:
+    """Handle theme/topic questions"""
+    summarizer = get_summarizer()
+    summary = await asyncio.get_event_loop().run_in_executor(
+        executor,
+        lambda: summarizer(
+            text,
+            max_length=150,
+            min_length=50,
+            do_sample=False
+        )[0]["summary_text"]
+    )
+    responses = {
+        "en": f"The main theme is: {summary}",
+        "fr": f"Le thème principal est : {summary}",
+        "es": f"El tema principal es: {summary}"
+    }
+    return format_response(question, responses.get(language, responses["en"]), 0.95)
+async def handle_factual(text: str, question: str, language: str) -> dict:
+    """Handle factual questions"""
+    qa = get_qa_model()
+    context = select_relevant_context(text, question)
+    result = await asyncio.get_event_loop().run_in_executor(
+        executor,
+        lambda: qa(question=question, context=context)
+    )
+    return format_response(question, result["answer"], result["score"])
+async def handle_general(text: str, question: str, language: str) -> dict:
+    """Handle any generic question"""
+    # First try standard QA
+    try:
+        qa_result = await handle_factual(text, question, language)
+        if qa_result["confidence"] > 0.7:
+            return qa_result
+    except Exception:
+        pass
+    # Fallback to summarization
+    return await handle_theme(text, question, language)
+def format_response(question: str, answer: str, confidence: float) -> dict:
+    """Standardize response format"""
+    return {
+        "question": question,
+        "answer": answer,
+        "confidence": float(confidence),
+        "type": "qa_response"
+    }
+# Include other helper functions from previous implementation (smart_truncate, select_relevant_context, etc.)
 @app.post("/visualize/natural")
 async def natural_language_visualization(