WITH monthly_engagement AS (
SELECT
c.id, c.name,
YEAR_MONTH(o.order_date) as month,
COUNT(*) as orders_in_month,
SUM(o.amount) as spending
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id
AND o.order_date >= DATE_SUB(CURDATE(), INTERVAL 6 MONTH)
GROUP BY c.id, month
)
SELECT DISTINCT c.id, c.name
FROM customers c
WHERE c.id IN (
SELECT id FROM monthly_engagement
WHERE orders_in_month < 2 AND spending < 100
GROUP BY id HAVING COUNT(*) >= 3
);
WITH customer_purchases AS (
SELECT o.customer_id, p.category
FROM orders o
JOIN order_items oi ON o.id = oi.order_id
JOIN products p ON oi.product_id = p.id
GROUP BY o.customer_id, p.category
)
SELECT DISTINCT p.id, p.name, p.category
FROM products p
LEFT JOIN customer_purchases cp ON cp.customer_id = ?
AND cp.category = p.category
WHERE cp.customer_id IS NULL
LIMIT 10;
WITH order_history AS (
SELECT
c.id, c.created_at as customer_since,
COUNT(DISTINCT YEAR_MONTH(o.order_date)) as purchase_months,
COUNT(DISTINCT o.id) as total_orders,
AVG(o.amount) as avg_order_value,
MAX(o.order_date) as last_order_date,
DATEDIFF(CURDATE(), MAX(o.order_date)) as days_since_last,
SUM(o.amount) as lifetime_value
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id
GROUP BY c.id
)
SELECT
id, lifetime_value, total_orders,
CASE
WHEN days_since_last <= 30 THEN 'Active'
WHEN days_since_last <= 90 THEN 'At Risk'
ELSE 'Inactive'
END as status
FROM order_history;
WITH product_pairs AS (
SELECT
oi1.product_id as product1,
oi2.product_id as product2,
COUNT(DISTINCT oi1.order_id) as co_purchase_count
FROM order_items oi1
JOIN order_items oi2 ON oi1.order_id = oi2.order_id
AND oi1.product_id < oi2.product_id
GROUP BY oi1.product_id, oi2.product_id
HAVING co_purchase_count > 5
)
SELECT
p1.name as product,
p2.name as cross_sell,
co_purchase_count
FROM product_pairs pp
JOIN products p1 ON pp.product1 = p1.id
JOIN products p2 ON pp.product2 = p2.id
ORDER BY co_purchase_count DESC;
WITH customer_sequence AS (
SELECT
o.customer_id,
oi.product_id,
o.order_date,
LAG(oi.product_id) OVER (PARTITION BY o.customer_id ORDER BY o.order_date) as prev_product,
LEAD(oi.product_id) OVER (PARTITION BY o.customer_id ORDER BY o.order_date) as next_product
FROM orders o
JOIN order_items oi ON o.id = oi.order_id
)
SELECT
p1.name as first_product,
p2.name as second_product,
COUNT(*) as transition_count
FROM customer_sequence cs
LEFT JOIN products p1 ON cs.product_id = p1.id
LEFT JOIN products p2 ON cs.next_product = p2.id
WHERE cs.next_product IS NOT NULL
GROUP BY cs.product_id, cs.next_product
ORDER BY transition_count DESC;
WITH rfm_data AS (
SELECT
c.id, c.name,
DATEDIFF(CURDATE(), MAX(o.order_date)) as recency,
COUNT(o.id) as frequency,
SUM(o.amount) as monetary
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id
GROUP BY c.id
),
rfm_quartiles AS (
SELECT
id, name, recency, frequency, monetary,
NTILE(4) OVER (ORDER BY recency DESC) as r_score,
NTILE(4) OVER (ORDER BY frequency DESC) as f_score,
NTILE(4) OVER (ORDER BY monetary DESC) as m_score
FROM rfm_data
)
SELECT
id, name, rfm_code,
CASE
WHEN r_score = 4 AND f_score = 4 AND m_score = 4 THEN 'Champions'
WHEN r_score >= 3 AND f_score >= 3 THEN 'Loyal'
ELSE 'Others'
END as segment
FROM (
SELECT *, CONCAT(r_score, f_score, m_score) as rfm_code
FROM rfm_quartiles
) t;
SELECT
o.id, o.customer_id, o.amount,
(SELECT AVG(amount) FROM orders) as mean_amount,
(SELECT STDDEV(amount) FROM orders) as std_amount,
CASE
WHEN ABS(o.amount - (SELECT AVG(amount) FROM orders)) > 3 * (SELECT STDDEV(amount) FROM orders)
THEN 'Extreme Outlier'
WHEN ABS(o.amount - (SELECT AVG(amount) FROM orders)) > 2 * (SELECT STDDEV(amount) FROM orders)
THEN 'Outlier'
ELSE 'Normal'
END as anomaly_type
FROM orders o
WHERE ABS(o.amount - (SELECT AVG(amount) FROM orders)) > 2 * (SELECT STDDEV(amount) FROM orders);
SELECT
p.id, p.name, p.category, p.price,
PERCENT_RANK() OVER (PARTITION BY p.category ORDER BY p.price) as price_percentile,
NTILE(10) OVER (PARTITION BY p.category ORDER BY p.price) as price_decile,
AVG(p.price) OVER (PARTITION BY p.category) as category_avg,
CASE
WHEN p.price < AVG(p.price) OVER (PARTITION BY p.category) THEN 'Budget'
WHEN p.price = AVG(p.price) OVER (PARTITION BY p.category) THEN 'Mid-range'
ELSE 'Premium'
END as price_segment
FROM products p
ORDER BY p.category, p.price;
SELECT
QUARTER(order_date) as quarter,
YEAR(order_date) as year,
SUM(amount) as quarterly_sales,
AVG(SUM(amount)) OVER (PARTITION BY QUARTER(order_date)) as avg_quarterly,
ROUND(100.0 * (SUM(amount) - AVG(SUM(amount)) OVER (PARTITION BY QUARTER(order_date))) /
AVG(SUM(amount)) OVER (PARTITION BY QUARTER(order_date)), 2) as variance_pct
FROM orders
GROUP BY YEAR(order_date), QUARTER(order_date)
ORDER BY YEAR(order_date), QUARTER(order_date);
WITH monthly_sales AS (
SELECT
p.category,
DATE_FORMAT(o.order_date, '%Y-%m') as month,
SUM(oi.quantity) as units_sold,
SUM(oi.price * oi.quantity) as revenue
FROM order_items oi
JOIN orders o ON oi.order_id = o.id
JOIN products p ON oi.product_id = p.id
GROUP BY p.category, month
)
SELECT
category,
AVG(units_sold) as avg_monthly_sales,
SUM(units_sold) as total_units,
COUNT(DISTINCT month) as months_tracked,
ROUND(SUM(units_sold) / COUNT(DISTINCT month), 2) as turnover_rate
FROM monthly_sales
GROUP BY category;
WITH cohort_data AS (
SELECT
c.id,
DATE_FORMAT(MIN(o.order_date), '%Y-%m') as cohort_month,
DATE_FORMAT(o.order_date, '%Y-%m') as active_month
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id
GROUP BY c.id, active_month
)
SELECT
cohort_month,
COUNT(DISTINCT CASE WHEN active_month = cohort_month THEN id END) as month_0,
COUNT(DISTINCT CASE WHEN DATE_ADD(DATE_FORMAT(STR_TO_DATE(CONCAT(cohort_month, '-01'), '%Y-%m-%d'), '%Y-%m'), INTERVAL 1 MONTH) = active_month THEN id END) as month_1,
COUNT(DISTINCT CASE WHEN DATE_ADD(DATE_FORMAT(STR_TO_DATE(CONCAT(cohort_month, '-01'), '%Y-%m-%d'), '%Y-%m'), INTERVAL 2 MONTH) = active_month THEN id END) as month_2
FROM cohort_data
GROUP BY cohort_month
ORDER BY cohort_month;
SELECT
customer_id,
MIN(amount) as min_order,
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY amount) as q1,
PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY amount) as median,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY amount) as q3,
MAX(amount) as max_order,
AVG(amount) as avg_order
FROM orders
GROUP BY customer_id
HAVING COUNT(*) >= 5
ORDER BY avg_order DESC;
SELECT
p.id, p.name, p.category,
SUM(oi.quantity) as units_sold,
SUM(oi.price * oi.quantity) as revenue,
(SELECT cost FROM products WHERE id = p.id) as unit_cost,
SUM(oi.quantity) * (SELECT cost FROM products WHERE id = p.id) as total_cost,
SUM(oi.price * oi.quantity) - SUM(oi.quantity) * (SELECT cost FROM products WHERE id = p.id) as profit,
ROUND(100.0 * (SUM(oi.price * oi.quantity) - SUM(oi.quantity) * (SELECT cost FROM products WHERE id = p.id)) /
SUM(oi.price * oi.quantity), 2) as margin_pct
FROM products p
LEFT JOIN order_items oi ON p.id = oi.product_id
GROUP BY p.id
ORDER BY profit DESC;
WITH order_gaps AS (
SELECT
c.id, c.name,
o.order_date,
LAG(o.order_date) OVER (PARTITION BY c.id ORDER BY o.order_date) as prev_order_date,
DATEDIFF(o.order_date, LAG(o.order_date) OVER (PARTITION BY c.id ORDER BY o.order_date)) as days_since_last
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id
)
SELECT DISTINCT id, name
FROM order_gaps
WHERE days_since_last > 60
ORDER BY id;
WITH order_intervals AS (
SELECT
c.id, c.name,
MAX(o.order_date) as last_order,
AVG(DATEDIFF(o.order_date, LAG(o.order_date) OVER (PARTITION BY c.id ORDER BY o.order_date))) as avg_days_between_orders
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id
GROUP BY c.id
)
SELECT
id, name, last_order,
ROUND(avg_days_between_orders, 0) as avg_days,
DATE_ADD(last_order, INTERVAL ROUND(avg_days_between_orders, 0) DAY) as predicted_next_order
FROM order_intervals
WHERE last_order IS NOT NULL
ORDER BY predicted_next_order;
WITH customer_avg AS (
SELECT
c.id, c.name,
AVG(o.amount) as avg_order_value,
COUNT(o.id) as order_count
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id
GROUP BY c.id
)
SELECT
id, name, order_count, avg_order_value,
NTILE(4) OVER (ORDER BY avg_order_value) as aov_quartile,
CASE
WHEN NTILE(4) OVER (ORDER BY avg_order_value) = 1 THEN 'Budget'
WHEN NTILE(4) OVER (ORDER BY avg_order_value) = 2 THEN 'Standard'
WHEN NTILE(4) OVER (ORDER BY avg_order_value) = 3 THEN 'Premium'
ELSE 'Elite'
END as segment
FROM customer_avg
WHERE order_count > 0;
WITH order_products AS (
SELECT
oi.order_id,
GROUP_CONCAT(p.name ORDER BY p.name) as product_bundle
FROM order_items oi
JOIN products p ON oi.product_id = p.id
GROUP BY oi.order_id
)
SELECT
product_bundle,
COUNT(*) as bundle_frequency,
RANK() OVER (ORDER BY COUNT(*) DESC) as bundle_rank
FROM order_products
GROUP BY product_bundle
HAVING COUNT(*) > 3
LIMIT 20;
SELECT
p.category,
SUM(oi.price * oi.quantity) as revenue,
SUM(oi.quantity * COALESCE((SELECT cost FROM products WHERE id = oi.product_id), 0)) as cost,
SUM(oi.price * oi.quantity) - SUM(oi.quantity * COALESCE((SELECT cost FROM products WHERE id = oi.product_id), 0)) as profit,
ROUND(100.0 * (SUM(oi.price * oi.quantity) - SUM(oi.quantity * COALESCE((SELECT cost FROM products WHERE id = oi.product_id), 0))) /
SUM(oi.price * oi.quantity), 2) as profit_margin_pct,
ROUND(100.0 * SUM(oi.price * oi.quantity) / (SELECT SUM(price * quantity) FROM order_items), 2) as revenue_contribution
FROM order_items oi
JOIN products p ON oi.product_id = p.id
GROUP BY p.category
ORDER BY profit DESC;
WITH first_30_days AS (
SELECT
c.id, c.name, c.created_at,
SUM(o.amount) as spend_in_first_30,
COUNT(o.id) as orders_in_first_30,
MAX(o.order_date) as last_order_date
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id
AND o.order_date <= DATE_ADD(c.created_at, INTERVAL 30 DAY)
WHERE c.created_at >= DATE_SUB(CURDATE(), INTERVAL 90 DAY)
GROUP BY c.id
)
SELECT
id, name, created_at,
spend_in_first_30,
orders_in_first_30,
ROUND(spend_in_first_30 / orders_in_first_30, 2) as avg_order_value
FROM first_30_days
WHERE spend_in_first_30 > (SELECT AVG(amount) FROM orders) * 2
ORDER BY spend_in_first_30 DESC;
WITH test_groups AS (
SELECT
o.id,
CASE WHEN MOD(c.id, 2) = 0 THEN 'Control' ELSE 'Test' END as group_name,
o.amount,
o.order_date
FROM orders o
JOIN customers c ON o.customer_id = c.id
WHERE o.order_date BETWEEN '2024-01-01' AND '2024-01-31'
)
SELECT
group_name,
COUNT(*) as order_count,
AVG(amount) as avg_order_value,
STDDEV(amount) as std_dev,
SQRT(COUNT(*)) as sample_size,
ROUND(AVG(amount), 2) as mean_value
FROM test_groups
GROUP BY group_name;