Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -725,8 +725,30 @@ def generate_dynamic_visualization_code(df: pd.DataFrame, request: Visualization
|
|
| 725 |
@limiter.limit("5/minute")
|
| 726 |
async def summarize_document(request: Request, file: UploadFile = File(...)):
|
| 727 |
try:
|
| 728 |
-
|
| 729 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 730 |
|
| 731 |
if not text.strip():
|
| 732 |
raise HTTPException(400, "No extractable text found")
|
|
|
|
| 725 |
@limiter.limit("5/minute")
|
| 726 |
async def summarize_document(request: Request, file: UploadFile = File(...)):
|
| 727 |
try:
|
| 728 |
+
# Check file type
|
| 729 |
+
filename = file.filename.lower()
|
| 730 |
+
|
| 731 |
+
# Process different file types
|
| 732 |
+
if filename.endswith(('.txt', '.md')):
|
| 733 |
+
text = (await file.read()).decode('utf-8')
|
| 734 |
+
elif filename.endswith('.docx'):
|
| 735 |
+
doc = Document(file.file)
|
| 736 |
+
text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
|
| 737 |
+
elif filename.endswith('.pptx'):
|
| 738 |
+
prs = Presentation(file.file)
|
| 739 |
+
text = []
|
| 740 |
+
for slide in prs.slides:
|
| 741 |
+
for shape in slide.shapes:
|
| 742 |
+
if hasattr(shape, "text"):
|
| 743 |
+
text.append(shape.text)
|
| 744 |
+
text = "\n".join(text)
|
| 745 |
+
elif filename.endswith('.pdf'):
|
| 746 |
+
content = await file.read()
|
| 747 |
+
text = extract_text(content, 'pdf') # Your existing PDF extraction
|
| 748 |
+
else:
|
| 749 |
+
# Fallback to textract for other formats (rtf, etc.)
|
| 750 |
+
content = await file.read()
|
| 751 |
+
text = textract.process(content).decode('utf-8')
|
| 752 |
|
| 753 |
if not text.strip():
|
| 754 |
raise HTTPException(400, "No extractable text found")
|