chenguittiMaroua commited on
Commit
31a4493
·
verified ·
1 Parent(s): 3918290

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +44 -59
main.py CHANGED
@@ -546,6 +546,50 @@ def read_any_excel(content: bytes) -> pd.DataFrame:
546
  logger.error(f"Excel reading failed: {str(e)}")
547
  raise HTTPException(422, f"Could not process Excel file: {str(e)}")
548
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
549
  def is_date_like(s: str) -> bool:
550
  """Helper to detect date-like strings"""
551
  date_patterns = [
@@ -771,65 +815,6 @@ async def question_answering(
771
  logger.error(f"QA processing failed: {str(e)}")
772
  raise HTTPException(500, detail=f"Analysis failed: {str(e)}")
773
 
774
- @app.post("/visualize/code")
775
- @limiter.limit("5/minute")
776
- async def visualize_with_code(
777
- request: Request,
778
- file: UploadFile = File(...),
779
- chart_type: str = Form(...),
780
- x_column: Optional[str] = Form(None),
781
- y_column: Optional[str] = Form(None),
782
- hue_column: Optional[str] = Form(None),
783
- title: Optional[str] = Form(None),
784
- x_label: Optional[str] = Form(None),
785
- y_label: Optional[str] = Form(None),
786
- style: str = Form("seaborn-v0_8"), # Updated default
787
- filters: Optional[str] = Form(None)
788
- ):
789
- try:
790
- file_ext, content = await process_uploaded_file(file)
791
-
792
- if file_ext not in {"xlsx", "xls"}:
793
- raise HTTPException(400, "Visualization is only supported for Excel files")
794
-
795
- df = pd.read_excel(io.BytesIO(content))
796
-
797
- if df.empty:
798
- raise HTTPException(400, "The uploaded Excel file is empty")
799
-
800
- # Convert filters from string to dictionary safely
801
- filters_dict = None
802
- if filters:
803
- try:
804
- filters_dict = ast.literal_eval(filters)
805
- if not isinstance(filters_dict, dict):
806
- raise ValueError()
807
- except Exception:
808
- raise HTTPException(400, "Invalid format for filters. Must be a valid dictionary string.")
809
-
810
- viz_request = VisualizationRequest(
811
- chart_type=chart_type,
812
- x_column=x_column,
813
- y_column=y_column,
814
- hue_column=hue_column,
815
- title=title,
816
- x_label=x_label,
817
- y_label=y_label,
818
- style=style,
819
- filters=filters_dict
820
- )
821
-
822
- code = generate_visualization_code(df, viz_request)
823
- return {"code": code}
824
-
825
- except HTTPException:
826
- raise
827
- except Exception as e:
828
- logger.error(f"Visualization code generation failed: {str(e)}")
829
- raise HTTPException(500, f"Visualization code generation failed: {str(e)}")
830
-
831
- from fastapi.responses import FileResponse # Add this import at the top
832
-
833
 
834
 
835
  # [Previous imports remain exactly the same...]
 
546
  logger.error(f"Excel reading failed: {str(e)}")
547
  raise HTTPException(422, f"Could not process Excel file: {str(e)}")
548
 
549
+
550
+ def clean_and_convert_data(df: pd.DataFrame) -> pd.DataFrame:
551
+ """
552
+ Clean and convert data types in a DataFrame with proper error handling
553
+ """
554
+ df_clean = df.copy()
555
+
556
+ for col in df_clean.columns:
557
+ # Try numeric conversion with proper error handling
558
+ try:
559
+ numeric_vals = pd.to_numeric(df_clean[col])
560
+ df_clean[col] = numeric_vals
561
+ continue # Skip to next column if successful
562
+ except (ValueError, TypeError):
563
+ pass
564
+
565
+ # Try datetime conversion with format inference
566
+ try:
567
+ # First try ISO format
568
+ datetime_vals = pd.to_datetime(df_clean[col], format='ISO8601')
569
+ df_clean[col] = datetime_vals
570
+ continue
571
+ except (ValueError, TypeError):
572
+ try:
573
+ # Fallback to mixed format
574
+ datetime_vals = pd.to_datetime(df_clean[col], format='mixed')
575
+ df_clean[col] = datetime_vals
576
+ continue
577
+ except (ValueError, TypeError):
578
+ pass
579
+
580
+ # Clean string columns
581
+ if df_clean[col].dtype == object:
582
+ df_clean[col] = (
583
+ df_clean[col]
584
+ .astype(str)
585
+ .str.strip()
586
+ .replace(['nan', 'None', 'NaT', ''], pd.NA)
587
+ )
588
+
589
+ return df_clean
590
+
591
+
592
+
593
  def is_date_like(s: str) -> bool:
594
  """Helper to detect date-like strings"""
595
  date_patterns = [
 
815
  logger.error(f"QA processing failed: {str(e)}")
816
  raise HTTPException(500, detail=f"Analysis failed: {str(e)}")
817
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
818
 
819
 
820
  # [Previous imports remain exactly the same...]