import streamlit as st import joblib import numpy as np from sklearn.neighbors import NearestCentroid # STREAMLIT TABS app, model_eval = st.tabs(["Application", "Model Evaluation"]) # Load Models models = { "K-Means": "kmeans_model.pkl", "Gaussian Mixture": "gaussianMixture_model.pkl", "Hierarchical": "hierarchical_model.pkl" } scaler = joblib.load("scaler.pkl") with app: # Sidebar Model Selection selected_model = st.sidebar.selectbox("Select Clustering Model", list(models.keys())) # Load Selected Model with open(models[selected_model], "rb") as file: model = joblib.load(file) # Cluster Labels for Each Model cluster_labels = { "K-Means": { 0: "Balanced Consumer", 1: "Premium Customer", 2: "Impulsive Buyer", 3: "Cautious Buyer", 4: "Budget-Conscious Customer" }, "Hierarchical": { 2: "Balanced Consumer", 1: "Premium Customer", 3: "Impulsive Buyer", 0: "Cautious Buyer", 4: "Budget-Conscious Customer" }, "Gaussian Mixture": { 0: "Balanced Consumer", 1: "Premium Customer", 2: "Impulsive Buyer", 3: "Cautious Buyer", 4: "Budget-Conscious Customer" } } # User Input st.title("Mall Customer Segmentation") income = st.number_input("Annual Income ($)", min_value=0, step=1) spending_score = st.slider("Spending Score (1-100)", min_value=1, max_value=100) if st.button("Predict"): scaled_input = scaler.transform([[income, spending_score]]) if selected_model in ["K-Means", "Gaussian Mixture"]: cluster = model.predict(scaled_input)[0] elif selected_model == "Hierarchical": # Load the dataset with assigned hierarchical clusters # Load precomputed hierarchical clusters df_clustered = joblib.load("clustered_data.pkl") # Ensure this file exists # Compute Centroids for Hierarchical Clustering # Compute centroids for each cluster centroids = df_clustered.groupby("Cluster_Hierarchical")[["Annual Income (k$)", "Spending Score (1-100)"]].mean() # Use Nearest Centroid Classifier clf = NearestCentroid() clf.fit(centroids, centroids.index) cluster = clf.predict(scaled_input)[0] # Display Prediction st.subheader("Customer Classification:") st.success(f"You are a: **{cluster_labels[selected_model][cluster]}**") with model_eval: st.header("📊 Model Evaluation") st.write("The Customer Segmentation models were trained to classify customer classes based on spending power and income. The dataset was sourced from Kaggle.") st.write("Dataset by **Vijay Choudhary**. [Link to dataset](https://www.kaggle.com/datasets/vjchoudhary7/customer-segmentation-tutorial-in-python/data)") st.header("K Means Clustering ") st.image("KMeansClusteringSegmentation.png") st.header("Huerarchical Clustering ") st.image("HierarchicalClusteringSegmentation.png") st.header("Gaussian Mixture ") st.image("GaussianMixtureSegmentation.png") # EVALUATION METRICS st.subheader("📌 Evaluation Metrics") st.write("Silhouette and Davis Bouldin Scores") st.header("K Means Clustering Evaluation Metrics") st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the K Means Clustering model.") st.image("kmeans_clustering_metrics.png") st.header("Hierarchical Clustering Evaluation Metrics") st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the Hierarchical Clustering model.") st.image("hierarchical_clustering_metrics.png") st.header("Gaussian Mixture Evaluation Metrics") st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the Gaussian Mixture Clustering model.") st.image("gmm_evaluation_metrics.png") st.header("Comparison") st.write("Based on the evaluation metrics, we can assume that out of the three clustering algorithms chosen, K Means Clustering performs the best using this dataset")