CristopherWVSU's picture
Added more Models
f5b702a
import streamlit as st
import joblib
import numpy as np
from sklearn.neighbors import NearestCentroid
# STREAMLIT TABS
app, model_eval = st.tabs(["Application", "Model Evaluation"])
# Load Models
models = {
"K-Means": "kmeans_model.pkl",
"Gaussian Mixture": "gaussianMixture_model.pkl",
"Hierarchical": "hierarchical_model.pkl"
}
scaler = joblib.load("scaler.pkl")
with app:
# Sidebar Model Selection
selected_model = st.sidebar.selectbox("Select Clustering Model", list(models.keys()))
# Load Selected Model
with open(models[selected_model], "rb") as file:
model = joblib.load(file)
# Cluster Labels for Each Model
cluster_labels = {
"K-Means": {
0: "Balanced Consumer",
1: "Premium Customer",
2: "Impulsive Buyer",
3: "Cautious Buyer",
4: "Budget-Conscious Customer"
},
"Hierarchical": {
2: "Balanced Consumer",
1: "Premium Customer",
3: "Impulsive Buyer",
0: "Cautious Buyer",
4: "Budget-Conscious Customer"
},
"Gaussian Mixture": {
0: "Balanced Consumer",
1: "Premium Customer",
2: "Impulsive Buyer",
3: "Cautious Buyer",
4: "Budget-Conscious Customer"
}
}
# User Input
st.title("Mall Customer Segmentation")
income = st.number_input("Annual Income ($)", min_value=0, step=1)
spending_score = st.slider("Spending Score (1-100)", min_value=1, max_value=100)
if st.button("Predict"):
scaled_input = scaler.transform([[income, spending_score]])
if selected_model in ["K-Means", "Gaussian Mixture"]:
cluster = model.predict(scaled_input)[0]
elif selected_model == "Hierarchical":
# Load the dataset with assigned hierarchical clusters
# Load precomputed hierarchical clusters
df_clustered = joblib.load("clustered_data.pkl") # Ensure this file exists
# Compute Centroids for Hierarchical Clustering
# Compute centroids for each cluster
centroids = df_clustered.groupby("Cluster_Hierarchical")[["Annual Income (k$)", "Spending Score (1-100)"]].mean()
# Use Nearest Centroid Classifier
clf = NearestCentroid()
clf.fit(centroids, centroids.index)
cluster = clf.predict(scaled_input)[0]
# Display Prediction
st.subheader("Customer Classification:")
st.success(f"You are a: **{cluster_labels[selected_model][cluster]}**")
with model_eval:
st.header("πŸ“Š Model Evaluation")
st.write("The Customer Segmentation models were trained to classify customer classes based on spending power and income. The dataset was sourced from Kaggle.")
st.write("Dataset by **Vijay Choudhary**. [Link to dataset](https://www.kaggle.com/datasets/vjchoudhary7/customer-segmentation-tutorial-in-python/data)")
st.header("K Means Clustering ")
st.image("KMeansClusteringSegmentation.png")
st.header("Huerarchical Clustering ")
st.image("HierarchicalClusteringSegmentation.png")
st.header("Gaussian Mixture ")
st.image("GaussianMixtureSegmentation.png")
# EVALUATION METRICS
st.subheader("πŸ“Œ Evaluation Metrics")
st.write("Silhouette and Davis Bouldin Scores")
st.header("K Means Clustering Evaluation Metrics")
st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the K Means Clustering model.")
st.image("kmeans_clustering_metrics.png")
st.header("Hierarchical Clustering Evaluation Metrics")
st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the Hierarchical Clustering model.")
st.image("hierarchical_clustering_metrics.png")
st.header("Gaussian Mixture Evaluation Metrics")
st.write("The image below represents the **Silhouette and Davis Bouldin Scores** of the Gaussian Mixture Clustering model.")
st.image("gmm_evaluation_metrics.png")
st.header("Comparison")
st.write("Based on the evaluation metrics, we can assume that out of the three clustering algorithms chosen, K Means Clustering performs the best using this dataset")