Back to Projects
Analytics
Streaming
CLV
Dashboards
Real-time Customer Analytics Dashboard
Interactive analytics platform processing 10M+ events daily with predictive customer lifetime value modeling.
Overview
Built a comprehensive customer analytics platform that processes streaming data to provide real-time insights into customer behavior, churn prediction, and lifetime value calculation. Features interactive dashboards, automated reporting, and predictive models for customer segmentation and retention strategies.
Code Highlight
Customer Lifetime Value Prediction
import pandas as pdimport numpy as npfrom sklearn.ensemble import RandomForestRegressorfrom sklearn.model_selection import train_test_splitimport plotly.graph_objects as gofrom datetime import datetime, timedeltaclass CustomerAnalytics:def __init__(self):self.clv_model = RandomForestRegressor(n_estimators=100, random_state=42)self.churn_model = Noneself.feature_columns = ['total_purchases', 'avg_order_value', 'days_since_last_purchase','purchase_frequency', 'total_sessions', 'support_tickets','referral_count', 'subscription_tier']def calculate_clv_features(self, customer_data: pd.DataFrame) -> pd.DataFrame:"""Calculate customer lifetime value features"""features = customer_data.copy()# Recency featuresfeatures['days_since_last_purchase'] = (datetime.now() - features['last_purchase_date']).dt.days# Frequency featuresfeatures['purchase_frequency'] = (features['total_purchases'] / features['customer_age_days'])# Monetary featuresfeatures['avg_order_value'] = (features['total_revenue'] / features['total_purchases'])# Engagement featuresfeatures['sessions_per_day'] = (features['total_sessions'] / features['customer_age_days'])return features[self.feature_columns]def predict_customer_lifetime_value(self, customer_data: pd.DataFrame) -> pd.DataFrame:"""Predict customer lifetime value for next 12 months"""features = self.calculate_clv_features(customer_data)# Predict CLVpredicted_clv = self.clv_model.predict(features)# Calculate confidence intervalspredictions = []for estimator in self.clv_model.estimators_:predictions.append(estimator.predict(features))predictions = np.array(predictions)clv_lower = np.percentile(predictions, 10, axis=0)clv_upper = np.percentile(predictions, 90, axis=0)results = customer_data.copy()results['predicted_clv'] = predicted_clvresults['clv_lower_bound'] = clv_lowerresults['clv_upper_bound'] = clv_upperresults['prediction_date'] = datetime.now()return resultsdef generate_customer_segments(self, customers: pd.DataFrame) -> pd.DataFrame:"""Segment customers based on RFM analysis"""# Calculate RFM scorescustomers['recency_score'] = pd.cut(customers['days_since_last_purchase'],bins=5, labels=[5,4,3,2,1])customers['frequency_score'] = pd.cut(customers['purchase_frequency'],bins=5, labels=[1,2,3,4,5])customers['monetary_score'] = pd.cut(customers['avg_order_value'],bins=5, labels=[1,2,3,4,5])# Create segmentsdef assign_segment(row):if row['recency_score'] >= 4 and row['frequency_score'] >= 4:return 'Champions'elif row['recency_score'] >= 3 and row['frequency_score'] >= 3:return 'Loyal Customers'elif row['recency_score'] >= 3 and row['frequency_score'] <= 2:return 'Potential Loyalists'elif row['recency_score'] <= 2 and row['frequency_score'] >= 3:return 'At Risk'else:return 'New Customers'customers['segment'] = customers.apply(assign_segment, axis=1)return customers
Key Results
10M+ events processed daily
Real-time customer segmentation
85% churn prediction accuracy
25% improvement in retention rate
Technologies Used
Python
Apache Kafka
ClickHouse
Grafana
Plotly Dash
Scikit-learn
Docker
Project Category
data analytics