MySQL

MySQL Interview Queries 2 — Advanced SQL

20 Questions

WITH monthly_engagement AS (
  SELECT 
    c.id, c.name,
    YEAR_MONTH(o.order_date) as month,
    COUNT(*) as orders_in_month,
    SUM(o.amount) as spending
  FROM customers c
  LEFT JOIN orders o ON c.id = o.customer_id 
    AND o.order_date >= DATE_SUB(CURDATE(), INTERVAL 6 MONTH)
  GROUP BY c.id, month
)
SELECT DISTINCT c.id, c.name
FROM customers c
WHERE c.id IN (
  SELECT id FROM monthly_engagement 
  WHERE orders_in_month < 2 AND spending < 100
  GROUP BY id HAVING COUNT(*) >= 3
);

WITH customer_purchases AS (
  SELECT o.customer_id, p.category
  FROM orders o
  JOIN order_items oi ON o.id = oi.order_id
  JOIN products p ON oi.product_id = p.id
  GROUP BY o.customer_id, p.category
)
SELECT DISTINCT p.id, p.name, p.category
FROM products p
LEFT JOIN customer_purchases cp ON cp.customer_id = ? 
  AND cp.category = p.category
WHERE cp.customer_id IS NULL
LIMIT 10;

WITH order_history AS (
  SELECT 
    c.id, c.created_at as customer_since,
    COUNT(DISTINCT YEAR_MONTH(o.order_date)) as purchase_months,
    COUNT(DISTINCT o.id) as total_orders,
    AVG(o.amount) as avg_order_value,
    MAX(o.order_date) as last_order_date,
    DATEDIFF(CURDATE(), MAX(o.order_date)) as days_since_last,
    SUM(o.amount) as lifetime_value
  FROM customers c
  LEFT JOIN orders o ON c.id = o.customer_id
  GROUP BY c.id
)
SELECT 
  id, lifetime_value, total_orders,
  CASE 
    WHEN days_since_last <= 30 THEN 'Active'
    WHEN days_since_last <= 90 THEN 'At Risk'
    ELSE 'Inactive'
  END as status
FROM order_history;

WITH product_pairs AS (
  SELECT 
    oi1.product_id as product1,
    oi2.product_id as product2,
    COUNT(DISTINCT oi1.order_id) as co_purchase_count
  FROM order_items oi1
  JOIN order_items oi2 ON oi1.order_id = oi2.order_id 
    AND oi1.product_id < oi2.product_id
  GROUP BY oi1.product_id, oi2.product_id
  HAVING co_purchase_count > 5
)
SELECT 
  p1.name as product, 
  p2.name as cross_sell, 
  co_purchase_count
FROM product_pairs pp
JOIN products p1 ON pp.product1 = p1.id
JOIN products p2 ON pp.product2 = p2.id
ORDER BY co_purchase_count DESC;

WITH customer_sequence AS (
  SELECT 
    o.customer_id,
    oi.product_id,
    o.order_date,
    LAG(oi.product_id) OVER (PARTITION BY o.customer_id ORDER BY o.order_date) as prev_product,
    LEAD(oi.product_id) OVER (PARTITION BY o.customer_id ORDER BY o.order_date) as next_product
  FROM orders o
  JOIN order_items oi ON o.id = oi.order_id
)
SELECT 
  p1.name as first_product,
  p2.name as second_product,
  COUNT(*) as transition_count
FROM customer_sequence cs
LEFT JOIN products p1 ON cs.product_id = p1.id
LEFT JOIN products p2 ON cs.next_product = p2.id
WHERE cs.next_product IS NOT NULL
GROUP BY cs.product_id, cs.next_product
ORDER BY transition_count DESC;

WITH rfm_data AS (
  SELECT 
    c.id, c.name,
    DATEDIFF(CURDATE(), MAX(o.order_date)) as recency,
    COUNT(o.id) as frequency,
    SUM(o.amount) as monetary
  FROM customers c
  LEFT JOIN orders o ON c.id = o.customer_id
  GROUP BY c.id
),
rfm_quartiles AS (
  SELECT 
    id, name, recency, frequency, monetary,
    NTILE(4) OVER (ORDER BY recency DESC) as r_score,
    NTILE(4) OVER (ORDER BY frequency DESC) as f_score,
    NTILE(4) OVER (ORDER BY monetary DESC) as m_score
  FROM rfm_data
)
SELECT 
  id, name, rfm_code,
  CASE 
    WHEN r_score = 4 AND f_score = 4 AND m_score = 4 THEN 'Champions'
    WHEN r_score >= 3 AND f_score >= 3 THEN 'Loyal'
    ELSE 'Others'
  END as segment
FROM (
  SELECT *, CONCAT(r_score, f_score, m_score) as rfm_code
  FROM rfm_quartiles
) t;

SELECT 
  o.id, o.customer_id, o.amount,
  (SELECT AVG(amount) FROM orders) as mean_amount,
  (SELECT STDDEV(amount) FROM orders) as std_amount,
  CASE 
    WHEN ABS(o.amount - (SELECT AVG(amount) FROM orders)) > 3 * (SELECT STDDEV(amount) FROM orders) 
      THEN 'Extreme Outlier'
    WHEN ABS(o.amount - (SELECT AVG(amount) FROM orders)) > 2 * (SELECT STDDEV(amount) FROM orders) 
      THEN 'Outlier'
    ELSE 'Normal'
  END as anomaly_type
FROM orders o
WHERE ABS(o.amount - (SELECT AVG(amount) FROM orders)) > 2 * (SELECT STDDEV(amount) FROM orders);

SELECT 
  p.id, p.name, p.category, p.price,
  PERCENT_RANK() OVER (PARTITION BY p.category ORDER BY p.price) as price_percentile,
  NTILE(10) OVER (PARTITION BY p.category ORDER BY p.price) as price_decile,
  AVG(p.price) OVER (PARTITION BY p.category) as category_avg,
  CASE 
    WHEN p.price < AVG(p.price) OVER (PARTITION BY p.category) THEN 'Budget'
    WHEN p.price = AVG(p.price) OVER (PARTITION BY p.category) THEN 'Mid-range'
    ELSE 'Premium'
  END as price_segment
FROM products p
ORDER BY p.category, p.price;

SELECT 
  QUARTER(order_date) as quarter,
  YEAR(order_date) as year,
  SUM(amount) as quarterly_sales,
  AVG(SUM(amount)) OVER (PARTITION BY QUARTER(order_date)) as avg_quarterly,
  ROUND(100.0 * (SUM(amount) - AVG(SUM(amount)) OVER (PARTITION BY QUARTER(order_date))) / 
    AVG(SUM(amount)) OVER (PARTITION BY QUARTER(order_date)), 2) as variance_pct
FROM orders
GROUP BY YEAR(order_date), QUARTER(order_date)
ORDER BY YEAR(order_date), QUARTER(order_date);

WITH monthly_sales AS (
  SELECT 
    p.category,
    DATE_FORMAT(o.order_date, '%Y-%m') as month,
    SUM(oi.quantity) as units_sold,
    SUM(oi.price * oi.quantity) as revenue
  FROM order_items oi
  JOIN orders o ON oi.order_id = o.id
  JOIN products p ON oi.product_id = p.id
  GROUP BY p.category, month
)
SELECT 
  category,
  AVG(units_sold) as avg_monthly_sales,
  SUM(units_sold) as total_units,
  COUNT(DISTINCT month) as months_tracked,
  ROUND(SUM(units_sold) / COUNT(DISTINCT month), 2) as turnover_rate
FROM monthly_sales
GROUP BY category;

WITH cohort_data AS (
  SELECT 
    c.id,
    DATE_FORMAT(MIN(o.order_date), '%Y-%m') as cohort_month,
    DATE_FORMAT(o.order_date, '%Y-%m') as active_month
  FROM customers c
  LEFT JOIN orders o ON c.id = o.customer_id
  GROUP BY c.id, active_month
)
SELECT 
  cohort_month,
  COUNT(DISTINCT CASE WHEN active_month = cohort_month THEN id END) as month_0,
  COUNT(DISTINCT CASE WHEN DATE_ADD(DATE_FORMAT(STR_TO_DATE(CONCAT(cohort_month, '-01'), '%Y-%m-%d'), '%Y-%m'), INTERVAL 1 MONTH) = active_month THEN id END) as month_1,
  COUNT(DISTINCT CASE WHEN DATE_ADD(DATE_FORMAT(STR_TO_DATE(CONCAT(cohort_month, '-01'), '%Y-%m-%d'), '%Y-%m'), INTERVAL 2 MONTH) = active_month THEN id END) as month_2
FROM cohort_data
GROUP BY cohort_month
ORDER BY cohort_month;

SELECT 
  customer_id,
  MIN(amount) as min_order,
  PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY amount) as q1,
  PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY amount) as median,
  PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY amount) as q3,
  MAX(amount) as max_order,
  AVG(amount) as avg_order
FROM orders
GROUP BY customer_id
HAVING COUNT(*) >= 5
ORDER BY avg_order DESC;

SELECT 
  p.id, p.name, p.category,
  SUM(oi.quantity) as units_sold,
  SUM(oi.price * oi.quantity) as revenue,
  (SELECT cost FROM products WHERE id = p.id) as unit_cost,
  SUM(oi.quantity) * (SELECT cost FROM products WHERE id = p.id) as total_cost,
  SUM(oi.price * oi.quantity) - SUM(oi.quantity) * (SELECT cost FROM products WHERE id = p.id) as profit,
  ROUND(100.0 * (SUM(oi.price * oi.quantity) - SUM(oi.quantity) * (SELECT cost FROM products WHERE id = p.id)) / 
    SUM(oi.price * oi.quantity), 2) as margin_pct
FROM products p
LEFT JOIN order_items oi ON p.id = oi.product_id
GROUP BY p.id
ORDER BY profit DESC;

WITH order_gaps AS (
  SELECT 
    c.id, c.name,
    o.order_date,
    LAG(o.order_date) OVER (PARTITION BY c.id ORDER BY o.order_date) as prev_order_date,
    DATEDIFF(o.order_date, LAG(o.order_date) OVER (PARTITION BY c.id ORDER BY o.order_date)) as days_since_last
  FROM customers c
  LEFT JOIN orders o ON c.id = o.customer_id
)
SELECT DISTINCT id, name
FROM order_gaps
WHERE days_since_last > 60
ORDER BY id;

WITH order_intervals AS (
  SELECT 
    c.id, c.name,
    MAX(o.order_date) as last_order,
    AVG(DATEDIFF(o.order_date, LAG(o.order_date) OVER (PARTITION BY c.id ORDER BY o.order_date))) as avg_days_between_orders
  FROM customers c
  LEFT JOIN orders o ON c.id = o.customer_id
  GROUP BY c.id
)
SELECT 
  id, name, last_order,
  ROUND(avg_days_between_orders, 0) as avg_days,
  DATE_ADD(last_order, INTERVAL ROUND(avg_days_between_orders, 0) DAY) as predicted_next_order
FROM order_intervals
WHERE last_order IS NOT NULL
ORDER BY predicted_next_order;

WITH customer_avg AS (
  SELECT 
    c.id, c.name,
    AVG(o.amount) as avg_order_value,
    COUNT(o.id) as order_count
  FROM customers c
  LEFT JOIN orders o ON c.id = o.customer_id
  GROUP BY c.id
)
SELECT 
  id, name, order_count, avg_order_value,
  NTILE(4) OVER (ORDER BY avg_order_value) as aov_quartile,
  CASE 
    WHEN NTILE(4) OVER (ORDER BY avg_order_value) = 1 THEN 'Budget'
    WHEN NTILE(4) OVER (ORDER BY avg_order_value) = 2 THEN 'Standard'
    WHEN NTILE(4) OVER (ORDER BY avg_order_value) = 3 THEN 'Premium'
    ELSE 'Elite'
  END as segment
FROM customer_avg
WHERE order_count > 0;

WITH order_products AS (
  SELECT 
    oi.order_id,
    GROUP_CONCAT(p.name ORDER BY p.name) as product_bundle
  FROM order_items oi
  JOIN products p ON oi.product_id = p.id
  GROUP BY oi.order_id
)
SELECT 
  product_bundle,
  COUNT(*) as bundle_frequency,
  RANK() OVER (ORDER BY COUNT(*) DESC) as bundle_rank
FROM order_products
GROUP BY product_bundle
HAVING COUNT(*) > 3
LIMIT 20;

SELECT 
  p.category,
  SUM(oi.price * oi.quantity) as revenue,
  SUM(oi.quantity * COALESCE((SELECT cost FROM products WHERE id = oi.product_id), 0)) as cost,
  SUM(oi.price * oi.quantity) - SUM(oi.quantity * COALESCE((SELECT cost FROM products WHERE id = oi.product_id), 0)) as profit,
  ROUND(100.0 * (SUM(oi.price * oi.quantity) - SUM(oi.quantity * COALESCE((SELECT cost FROM products WHERE id = oi.product_id), 0))) / 
    SUM(oi.price * oi.quantity), 2) as profit_margin_pct,
  ROUND(100.0 * SUM(oi.price * oi.quantity) / (SELECT SUM(price * quantity) FROM order_items), 2) as revenue_contribution
FROM order_items oi
JOIN products p ON oi.product_id = p.id
GROUP BY p.category
ORDER BY profit DESC;

WITH first_30_days AS (
  SELECT 
    c.id, c.name, c.created_at,
    SUM(o.amount) as spend_in_first_30,
    COUNT(o.id) as orders_in_first_30,
    MAX(o.order_date) as last_order_date
  FROM customers c
  LEFT JOIN orders o ON c.id = o.customer_id 
    AND o.order_date <= DATE_ADD(c.created_at, INTERVAL 30 DAY)
  WHERE c.created_at >= DATE_SUB(CURDATE(), INTERVAL 90 DAY)
  GROUP BY c.id
)
SELECT 
  id, name, created_at,
  spend_in_first_30,
  orders_in_first_30,
  ROUND(spend_in_first_30 / orders_in_first_30, 2) as avg_order_value
FROM first_30_days
WHERE spend_in_first_30 > (SELECT AVG(amount) FROM orders) * 2
ORDER BY spend_in_first_30 DESC;

WITH test_groups AS (
  SELECT 
    o.id,
    CASE WHEN MOD(c.id, 2) = 0 THEN 'Control' ELSE 'Test' END as group_name,
    o.amount,
    o.order_date
  FROM orders o
  JOIN customers c ON o.customer_id = c.id
  WHERE o.order_date BETWEEN '2024-01-01' AND '2024-01-31'
)
SELECT 
  group_name,
  COUNT(*) as order_count,
  AVG(amount) as avg_order_value,
  STDDEV(amount) as std_dev,
  SQRT(COUNT(*)) as sample_size,
  ROUND(AVG(amount), 2) as mean_value
FROM test_groups
GROUP BY group_name;