Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -546,6 +546,50 @@ def read_any_excel(content: bytes) -> pd.DataFrame:
|
|
| 546 |
logger.error(f"Excel reading failed: {str(e)}")
|
| 547 |
raise HTTPException(422, f"Could not process Excel file: {str(e)}")
|
| 548 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 549 |
def is_date_like(s: str) -> bool:
|
| 550 |
"""Helper to detect date-like strings"""
|
| 551 |
date_patterns = [
|
|
@@ -771,65 +815,6 @@ async def question_answering(
|
|
| 771 |
logger.error(f"QA processing failed: {str(e)}")
|
| 772 |
raise HTTPException(500, detail=f"Analysis failed: {str(e)}")
|
| 773 |
|
| 774 |
-
@app.post("/visualize/code")
|
| 775 |
-
@limiter.limit("5/minute")
|
| 776 |
-
async def visualize_with_code(
|
| 777 |
-
request: Request,
|
| 778 |
-
file: UploadFile = File(...),
|
| 779 |
-
chart_type: str = Form(...),
|
| 780 |
-
x_column: Optional[str] = Form(None),
|
| 781 |
-
y_column: Optional[str] = Form(None),
|
| 782 |
-
hue_column: Optional[str] = Form(None),
|
| 783 |
-
title: Optional[str] = Form(None),
|
| 784 |
-
x_label: Optional[str] = Form(None),
|
| 785 |
-
y_label: Optional[str] = Form(None),
|
| 786 |
-
style: str = Form("seaborn-v0_8"), # Updated default
|
| 787 |
-
filters: Optional[str] = Form(None)
|
| 788 |
-
):
|
| 789 |
-
try:
|
| 790 |
-
file_ext, content = await process_uploaded_file(file)
|
| 791 |
-
|
| 792 |
-
if file_ext not in {"xlsx", "xls"}:
|
| 793 |
-
raise HTTPException(400, "Visualization is only supported for Excel files")
|
| 794 |
-
|
| 795 |
-
df = pd.read_excel(io.BytesIO(content))
|
| 796 |
-
|
| 797 |
-
if df.empty:
|
| 798 |
-
raise HTTPException(400, "The uploaded Excel file is empty")
|
| 799 |
-
|
| 800 |
-
# Convert filters from string to dictionary safely
|
| 801 |
-
filters_dict = None
|
| 802 |
-
if filters:
|
| 803 |
-
try:
|
| 804 |
-
filters_dict = ast.literal_eval(filters)
|
| 805 |
-
if not isinstance(filters_dict, dict):
|
| 806 |
-
raise ValueError()
|
| 807 |
-
except Exception:
|
| 808 |
-
raise HTTPException(400, "Invalid format for filters. Must be a valid dictionary string.")
|
| 809 |
-
|
| 810 |
-
viz_request = VisualizationRequest(
|
| 811 |
-
chart_type=chart_type,
|
| 812 |
-
x_column=x_column,
|
| 813 |
-
y_column=y_column,
|
| 814 |
-
hue_column=hue_column,
|
| 815 |
-
title=title,
|
| 816 |
-
x_label=x_label,
|
| 817 |
-
y_label=y_label,
|
| 818 |
-
style=style,
|
| 819 |
-
filters=filters_dict
|
| 820 |
-
)
|
| 821 |
-
|
| 822 |
-
code = generate_visualization_code(df, viz_request)
|
| 823 |
-
return {"code": code}
|
| 824 |
-
|
| 825 |
-
except HTTPException:
|
| 826 |
-
raise
|
| 827 |
-
except Exception as e:
|
| 828 |
-
logger.error(f"Visualization code generation failed: {str(e)}")
|
| 829 |
-
raise HTTPException(500, f"Visualization code generation failed: {str(e)}")
|
| 830 |
-
|
| 831 |
-
from fastapi.responses import FileResponse # Add this import at the top
|
| 832 |
-
|
| 833 |
|
| 834 |
|
| 835 |
# [Previous imports remain exactly the same...]
|
|
|
|
| 546 |
logger.error(f"Excel reading failed: {str(e)}")
|
| 547 |
raise HTTPException(422, f"Could not process Excel file: {str(e)}")
|
| 548 |
|
| 549 |
+
|
| 550 |
+
def clean_and_convert_data(df: pd.DataFrame) -> pd.DataFrame:
|
| 551 |
+
"""
|
| 552 |
+
Clean and convert data types in a DataFrame with proper error handling
|
| 553 |
+
"""
|
| 554 |
+
df_clean = df.copy()
|
| 555 |
+
|
| 556 |
+
for col in df_clean.columns:
|
| 557 |
+
# Try numeric conversion with proper error handling
|
| 558 |
+
try:
|
| 559 |
+
numeric_vals = pd.to_numeric(df_clean[col])
|
| 560 |
+
df_clean[col] = numeric_vals
|
| 561 |
+
continue # Skip to next column if successful
|
| 562 |
+
except (ValueError, TypeError):
|
| 563 |
+
pass
|
| 564 |
+
|
| 565 |
+
# Try datetime conversion with format inference
|
| 566 |
+
try:
|
| 567 |
+
# First try ISO format
|
| 568 |
+
datetime_vals = pd.to_datetime(df_clean[col], format='ISO8601')
|
| 569 |
+
df_clean[col] = datetime_vals
|
| 570 |
+
continue
|
| 571 |
+
except (ValueError, TypeError):
|
| 572 |
+
try:
|
| 573 |
+
# Fallback to mixed format
|
| 574 |
+
datetime_vals = pd.to_datetime(df_clean[col], format='mixed')
|
| 575 |
+
df_clean[col] = datetime_vals
|
| 576 |
+
continue
|
| 577 |
+
except (ValueError, TypeError):
|
| 578 |
+
pass
|
| 579 |
+
|
| 580 |
+
# Clean string columns
|
| 581 |
+
if df_clean[col].dtype == object:
|
| 582 |
+
df_clean[col] = (
|
| 583 |
+
df_clean[col]
|
| 584 |
+
.astype(str)
|
| 585 |
+
.str.strip()
|
| 586 |
+
.replace(['nan', 'None', 'NaT', ''], pd.NA)
|
| 587 |
+
)
|
| 588 |
+
|
| 589 |
+
return df_clean
|
| 590 |
+
|
| 591 |
+
|
| 592 |
+
|
| 593 |
def is_date_like(s: str) -> bool:
|
| 594 |
"""Helper to detect date-like strings"""
|
| 595 |
date_patterns = [
|
|
|
|
| 815 |
logger.error(f"QA processing failed: {str(e)}")
|
| 816 |
raise HTTPException(500, detail=f"Analysis failed: {str(e)}")
|
| 817 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 818 |
|
| 819 |
|
| 820 |
# [Previous imports remain exactly the same...]
|