Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import joblib | |
| import numpy as np | |
| from sklearn.neighbors import NearestCentroid | |
| # STREAMLIT TABS | |
| app, model_eval = st.tabs(["Application", "Model Evaluation"]) | |
| # Load Models | |
| models = { | |
| "K-Means": "kmeans_model.pkl", | |
| "Gaussian Mixture": "gaussianMixture_model.pkl", | |
| "Hierarchical": "hierarchical_model.pkl" | |
| } | |
| scaler = joblib.load("scaler.pkl") | |
| with app: | |
| # Sidebar Model Selection | |
| selected_model = st.sidebar.selectbox("Select Clustering Model", list(models.keys())) | |
| # Load Selected Model | |
| with open(models[selected_model], "rb") as file: | |
| model = joblib.load(file) | |
| # Cluster Labels for Each Model | |
| cluster_labels = { | |
| "K-Means": { | |
| 0: "Balanced Consumer", | |
| 1: "Premium Customer", | |
| 2: "Impulsive Buyer", | |
| 3: "Cautious Buyer", | |
| 4: "Budget-Conscious Customer" | |
| }, | |
| "Hierarchical": { | |
| 2: "Balanced Consumer", | |
| 1: "Premium Customer", | |
| 3: "Impulsive Buyer", | |
| 0: "Cautious Buyer", | |
| 4: "Budget-Conscious Customer" | |
| }, | |
| "Gaussian Mixture": { | |
| 0: "Balanced Consumer", | |
| 1: "Premium Customer", | |
| 2: "Impulsive Buyer", | |
| 3: "Cautious Buyer", | |
| 4: "Budget-Conscious Customer" | |
| } | |
| } | |
| # User Input | |
| st.title("Mall Customer Segmentation") | |
| income = st.number_input("Annual Income ($)", min_value=0, step=1) | |
| spending_score = st.slider("Spending Score (1-100)", min_value=1, max_value=100) | |
| if st.button("Predict"): | |
| scaled_input = scaler.transform([[income, spending_score]]) | |
| if selected_model in ["K-Means", "Gaussian Mixture"]: | |
| cluster = model.predict(scaled_input)[0] | |
| elif selected_model == "Hierarchical": | |
| # Load the dataset with assigned hierarchical clusters | |
| # Load precomputed hierarchical clusters | |
| df_clustered = joblib.load("clustered_data.pkl") # Ensure this file exists | |
| # Compute Centroids for Hierarchical Clustering | |
| # Compute centroids for each cluster | |
| centroids = df_clustered.groupby("Cluster_Hierarchical")[["Annual Income (k$)", "Spending Score (1-100)"]].mean() | |
| # Use Nearest Centroid Classifier | |
| clf = NearestCentroid() | |
| clf.fit(centroids, centroids.index) | |
| cluster = clf.predict(scaled_input)[0] | |
| # Display Prediction | |
| st.subheader("Customer Classification:") | |
| st.success(f"You are a: **{cluster_labels[selected_model][cluster]}**") | |
| with model_eval: | |
| st.header("π Model Evaluation") | |
| st.write("The Customer Segmentation models were trained to classify customer classes based on spending power and income. The dataset was sourced from Kaggle.") | |
| st.write("Dataset by **Vijay Choudhary**. [Link to dataset](https://www.kaggle.com/datasets/vjchoudhary7/customer-segmentation-tutorial-in-python/data)") | |
| st.header("K Means Clustering ") | |
| st.image("KMeansClusteringSegmentation.png") | |
| st.header("Huerarchical Clustering ") | |
| st.image("HierarchicalClusteringSegmentation.png") | |
| st.header("Gaussian Mixture ") | |
| st.image("GaussianMixtureSegmentation.png") | |
| # EVALUATION METRICS | |
| st.subheader("π Evaluation Metrics") | |
| st.write("Silhouette and Davis Bouldin Scores") | |
| st.header("K Means Clustering Evaluation Metrics") | |
| st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the K Means Clustering model.") | |
| st.image("kmeans_clustering_metrics.png") | |
| st.header("Hierarchical Clustering Evaluation Metrics") | |
| st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the Hierarchical Clustering model.") | |
| st.image("hierarchical_clustering_metrics.png") | |
| st.header("Gaussian Mixture Evaluation Metrics") | |
| st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the Gaussian Mixture Clustering model.") | |
| st.image("gmm_evaluation_metrics.png") | |
| st.header("Comparison") | |
| st.write("Based on the evaluation metrics, we can assume that out of the three clustering algorithms chosen, K Means Clustering performs the best using this dataset") | |