Spaces:

CristopherWVSU
/

CustomerClustering

Sleeping

App Files Files Community

CustomerClustering / app.py

CristopherWVSU

Added more Models

f5b702a 10 months ago

raw

history blame contribute delete

4.32 kB

	import streamlit as st
	import joblib
	import numpy as np
	from sklearn.neighbors import NearestCentroid

	# STREAMLIT TABS
	app, model_eval = st.tabs(["Application", "Model Evaluation"])

	# Load Models
	models = {
	"K-Means": "kmeans_model.pkl",
	"Gaussian Mixture": "gaussianMixture_model.pkl",
	"Hierarchical": "hierarchical_model.pkl"
	}

	scaler = joblib.load("scaler.pkl")


	with app:
	# Sidebar Model Selection
	selected_model = st.sidebar.selectbox("Select Clustering Model", list(models.keys()))

	# Load Selected Model
	with open(models[selected_model], "rb") as file:
	model = joblib.load(file)

	# Cluster Labels for Each Model
	cluster_labels = {
	"K-Means": {
	0: "Balanced Consumer",
	1: "Premium Customer",
	2: "Impulsive Buyer",
	3: "Cautious Buyer",
	4: "Budget-Conscious Customer"
	},
	"Hierarchical": {
	2: "Balanced Consumer",
	1: "Premium Customer",
	3: "Impulsive Buyer",
	0: "Cautious Buyer",
	4: "Budget-Conscious Customer"
	},
	"Gaussian Mixture": {
	0: "Balanced Consumer",
	1: "Premium Customer",
	2: "Impulsive Buyer",
	3: "Cautious Buyer",
	4: "Budget-Conscious Customer"
	}
	}

	# User Input
	st.title("Mall Customer Segmentation")
	income = st.number_input("Annual Income ($)", min_value=0, step=1)
	spending_score = st.slider("Spending Score (1-100)", min_value=1, max_value=100)

	if st.button("Predict"):
	scaled_input = scaler.transform([[income, spending_score]])

	if selected_model in ["K-Means", "Gaussian Mixture"]:
	cluster = model.predict(scaled_input)[0]

	elif selected_model == "Hierarchical":
	# Load the dataset with assigned hierarchical clusters
	# Load precomputed hierarchical clusters
	df_clustered = joblib.load("clustered_data.pkl") # Ensure this file exists


	# Compute Centroids for Hierarchical Clustering
	# Compute centroids for each cluster
	centroids = df_clustered.groupby("Cluster_Hierarchical")[["Annual Income (k$)", "Spending Score (1-100)"]].mean()

	# Use Nearest Centroid Classifier
	clf = NearestCentroid()
	clf.fit(centroids, centroids.index)
	cluster = clf.predict(scaled_input)[0]

	# Display Prediction
	st.subheader("Customer Classification:")
	st.success(f"You are a: {cluster_labels[selected_model][cluster]}")
	with model_eval:
	st.header("📊 Model Evaluation")
	st.write("The Customer Segmentation models were trained to classify customer classes based on spending power and income. The dataset was sourced from Kaggle.")
	st.write("Dataset by Vijay Choudhary. [Link to dataset](https://www.kaggle.com/datasets/vjchoudhary7/customer-segmentation-tutorial-in-python/data)")

	st.header("K Means Clustering ")
	st.image("KMeansClusteringSegmentation.png")

	st.header("Huerarchical Clustering ")
	st.image("HierarchicalClusteringSegmentation.png")

	st.header("Gaussian Mixture ")
	st.image("GaussianMixtureSegmentation.png")

	# EVALUATION METRICS
	st.subheader("📌 Evaluation Metrics")
	st.write("Silhouette and Davis Bouldin Scores")

	st.header("K Means Clustering Evaluation Metrics")
	st.write("The image below represents the Silhouette and Davis Bouldin Scores of the K Means Clustering model.")
	st.image("kmeans_clustering_metrics.png")

	st.header("Hierarchical Clustering Evaluation Metrics")
	st.write("The image below represents the Silhouette and Davis Bouldin Scores of the Hierarchical Clustering model.")
	st.image("hierarchical_clustering_metrics.png")

	st.header("Gaussian Mixture Evaluation Metrics")
	st.write("The image below represents the Silhouette and Davis Bouldin Scores of the Gaussian Mixture Clustering model.")
	st.image("gmm_evaluation_metrics.png")


	st.header("Comparison")
	st.write("Based on the evaluation metrics, we can assume that out of the three clustering algorithms chosen, K Means Clustering performs the best using this dataset")