Spaces:

Adilbai
/

Kepler-automated-detection

Sleeping

App Files Files Community

baiganinn commited on Oct 5

Commit

3fdaf15

1 Parent(s): a9b3cfe

fix: bugs in workflow

Browse files

Files changed (2) hide show

app.py +43 -11
mapping.py +51 -2

app.py CHANGED Viewed

@@ -43,8 +43,24 @@ def predict_exoplanets(uploaded_file):
         if uploaded_file is None:
             return None, "Error: Please upload a CSV file", None
-        # Read uploaded file
-        df_uploaded = pd.read_csv(uploaded_file.name, comment='#')
         info_msg = f"Loaded rows: {len(df_uploaded)}\n"
         info_msg += f"Columns in uploaded dataset: {len(df_uploaded.columns)}\n\n"
@@ -72,17 +88,29 @@ def predict_exoplanets(uploaded_file):
         # Prepare X with correct columns
         info_msg += f"Creating DataFrame with {len(expected_features)} columns...\n"
-        # Create empty DataFrame with correct columns
-        X = pd.DataFrame(index=df_mapped.index, columns=expected_features)
         # Fill columns that exist in df_mapped
         for col in expected_features:
             if col in df_mapped.columns:
-                X[col] = df_mapped[col].values
-            else:
-                X[col] = 0.0  # Fill missing columns with zeros
-        # Convert all columns to numeric format
         X = X.apply(pd.to_numeric, errors='coerce')
         # Calculate statistics
@@ -94,8 +122,12 @@ def predict_exoplanets(uploaded_file):
         info_msg += f"DEBUG: X.shape = {X.shape}, expected: ({len(df_mapped)}, {len(expected_features)})\n"
-        # Fill NaN with mean values
-        X = X.fillna(X.mean().fillna(0))
         info_msg += f"DEBUG: After fillna X.shape = {X.shape}\n"

         if uploaded_file is None:
             return None, "Error: Please upload a CSV file", None
+        # Read uploaded file with robust error handling
+        try:
+            df_uploaded = pd.read_csv(uploaded_file.name, comment='#', low_memory=False)
+        except Exception as e:
+            try:
+                # Try with different encoding
+                df_uploaded = pd.read_csv(uploaded_file.name, comment='#', encoding='latin1', low_memory=False)
+            except Exception as e2:
+                return None, f"Error reading CSV file: {str(e)}\nAlternative attempt: {str(e2)}", None
+        # Ensure all columns are properly formatted (no multi-dimensional data)
+        for col in df_uploaded.columns:
+            # Check if column contains lists or arrays
+            if df_uploaded[col].dtype == 'object':
+                first_val = df_uploaded[col].dropna().iloc[0] if len(df_uploaded[col].dropna()) > 0 else None
+                if isinstance(first_val, (list, tuple)):
+                    # Flatten lists/tuples - take first element
+                    df_uploaded[col] = df_uploaded[col].apply(lambda x: x[0] if isinstance(x, (list, tuple)) and len(x) > 0 else x)
         info_msg = f"Loaded rows: {len(df_uploaded)}\n"
         info_msg += f"Columns in uploaded dataset: {len(df_uploaded.columns)}\n\n"
         # Prepare X with correct columns
         info_msg += f"Creating DataFrame with {len(expected_features)} columns...\n"
+        # Create empty DataFrame with correct columns filled with zeros
+        X = pd.DataFrame(0.0, index=df_mapped.index, columns=expected_features)
         # Fill columns that exist in df_mapped
         for col in expected_features:
             if col in df_mapped.columns:
+                try:
+                    # Convert to numeric, handling any data type
+                    col_data = pd.to_numeric(df_mapped[col], errors='coerce')
+                    # Ensure we have a 1D Series, flatten if needed
+                    if hasattr(col_data, 'values'):
+                        col_values = col_data.values
+                        if len(col_values.shape) > 1:
+                            info_msg += f"Warning: Column '{col}' has shape {col_values.shape}, flattening...\n"
+                            col_values = col_values.flatten()[:len(X)]  # Take only first N values
+                        X[col] = col_values
+                    else:
+                        X[col] = col_data
+                except Exception as e:
+                    info_msg += f"Warning: Could not convert column '{col}': {str(e)}\n"
+                    X[col] = 0.0
+        # Ensure all columns are numeric
         X = X.apply(pd.to_numeric, errors='coerce')
         # Calculate statistics
         info_msg += f"DEBUG: X.shape = {X.shape}, expected: ({len(df_mapped)}, {len(expected_features)})\n"
+        # Fill NaN with column means, then with 0 for any remaining NaN
+        X = X.fillna(X.mean())
+        X = X.fillna(0)
+        # Ensure no infinite values
+        X = X.replace([float('inf'), float('-inf')], 0)
         info_msg += f"DEBUG: After fillna X.shape = {X.shape}\n"

mapping.py CHANGED Viewed

@@ -313,6 +313,34 @@ Mapping:"""
         if valid_mapping:
             df_mapped = df_mapped.rename(columns=valid_mapping)
         return df_mapped
@@ -330,9 +358,30 @@ Mapping:"""
         # Копируем датафрейм чтобы не изменять оригинал
         df_work = uploaded_df.copy()
         # Конвертируем координаты в градусы если они в текстовом формате
         coord_columns = [col for col in df_work.columns if any(
-            keyword in col.lower() for keyword in ['ra', 'dec', 'coord', 'right_ascension', 'declination']
         )]
         for col in coord_columns:
@@ -362,7 +411,7 @@ Mapping:"""
         # Check which target columns are missing
         missing_cols = set(target_columns) - set(mapped_df.columns)
         if missing_cols:
-            info_msg += f"\nWarning: Missing {len(missing_cols)} target columns (will be filled with NaN)\n"
         return mapped_df, mapping, info_msg

         if valid_mapping:
             df_mapped = df_mapped.rename(columns=valid_mapping)
+        # Ensure all columns are properly flattened and converted to numeric where possible
+        for col in df_mapped.columns:
+            try:
+                # Get the column as a Series
+                col_data = df_mapped[col]
+                # Check if it's actually a Series (not a DataFrame)
+                if not isinstance(col_data, pd.Series):
+                    continue
+                # Check if column has object dtype or might contain complex data
+                if col_data.dtype == 'object':
+                    try:
+                        # Try to convert to numeric
+                        df_mapped[col] = pd.to_numeric(col_data, errors='coerce')
+                    except:
+                        pass
+                # Ensure column is 1D
+                if hasattr(col_data, 'values'):
+                    col_values = col_data.values
+                    if len(col_values.shape) > 1:
+                        # Flatten multi-dimensional arrays
+                        df_mapped[col] = col_values.flatten()[:len(df_mapped)]
+            except Exception as e:
+                # Skip problematic columns
+                continue
         return df_mapped
         # Копируем датафрейм чтобы не изменять оригинал
         df_work = uploaded_df.copy()
+        # Clean up column names - remove extra spaces, special characters
+        df_work.columns = df_work.columns.str.strip()
+        # Handle any multi-dimensional columns before mapping
+        for col in df_work.columns:
+            if df_work[col].dtype == 'object':
+                # Check if column contains complex structures
+                first_val = df_work[col].dropna().iloc[0] if len(df_work[col].dropna()) > 0 else None
+                if isinstance(first_val, (list, tuple)):
+                    # Flatten lists/tuples - take first element
+                    df_work[col] = df_work[col].apply(
+                        lambda x: x[0] if isinstance(x, (list, tuple)) and len(x) > 0 else (x if not isinstance(x, (list, tuple)) else None)
+                    )
+                elif isinstance(first_val, str):
+                    # Try to convert string representations of numbers
+                    try:
+                        df_work[col] = pd.to_numeric(df_work[col], errors='ignore')
+                    except:
+                        pass
         # Конвертируем координаты в градусы если они в текстовом формате
         coord_columns = [col for col in df_work.columns if any(
+            keyword in col.lower() for keyword in ['ra', 'dec', 'coord', 'right_ascension', 'declination', 'rastr', 'decstr']
         )]
         for col in coord_columns:
         # Check which target columns are missing
         missing_cols = set(target_columns) - set(mapped_df.columns)
         if missing_cols:
+            info_msg += f"\nWarning: Missing {len(missing_cols)} target columns (will be filled with zeros)\n"
         return mapped_df, mapping, info_msg