-- =============================================================================
-- RFM(T) Segmentation Template  ·  by Gökhan Ağarer  ·  gokhanagarer.com
-- =============================================================================
-- A reusable, profit-weighted RFM(T) customer segmentation in plain SQL.
-- Generic table/column names: swap them for your own. No vendor lock-in.
--
-- What you get:
--   1) One clean row per customer (Recency, Frequency, Monetary, Tenure, Profit)
--   2) Quintile (1-5) scores, with recency reversed so 5 always means "best"
--   3) A PROFIT-WEIGHTED monetary score (the upgrade most tutorials skip)
--   4) Plain-language named tiers your team can actually act on
--
-- Tested in standard ANSI SQL with window functions (works on BigQuery,
-- PostgreSQL, Snowflake, Redshift; minor date-function tweaks per engine).
--
-- HOW TO ADAPT (3 things):
--   a) Point `source_transactions` at your transactions table.
--   b) Map the columns: customer id, transaction date, amount, and (optional)
--      margin/profit. If you have no profit column, see the note at step 2.
--   c) Set the analysis window (default: rolling 12 months) and "as of" date.
-- =============================================================================

-- ---- 0) Parameters you control -------------------------------------------------
-- Replace CURRENT_DATE with your reporting snapshot date if not "today".
-- Replace the 12-month window if your cycle is different.

WITH params AS (
  SELECT
    CURRENT_DATE                         AS as_of_date,      -- reporting snapshot
    DATE_SUB(CURRENT_DATE, INTERVAL 12 MONTH) AS window_start -- rolling window start
),

-- ---- 1) One clean row per customer --------------------------------------------
-- Aggregate the raw transactions into the four RFM(T) inputs + profit.
customer_base AS (
  SELECT
    t.customer_id,
    MAX(t.transaction_date)                          AS last_txn_date,
    COUNT(*)                                          AS frequency,        -- F: how often
    SUM(t.amount)                                     AS monetary,         -- M: total value
    SUM(COALESCE(t.margin, t.amount * 0.0))           AS total_profit,     -- profit (see note)
    MIN(t.signup_date)                                AS signup_date
  FROM source_transactions t                          -- <-- YOUR transactions table
  CROSS JOIN params p
  WHERE t.transaction_date >= p.window_start
  GROUP BY t.customer_id
),

-- ---- 2) Derive Recency (days) and Tenure (days), keep profit ------------------
-- Recency: smaller "days since last txn" is better. We reverse it when scoring.
-- Profit note: if you have NO margin column, delete `total_profit` everywhere and
-- the profit-weighting blends to plain monetary automatically (see step 4 comment).
customer_metrics AS (
  SELECT
    cb.customer_id,
    DATE_DIFF(p.as_of_date, cb.last_txn_date, DAY)    AS recency_days,     -- R: smaller = better
    cb.frequency,                                                          -- F
    cb.monetary,                                                           -- M
    cb.total_profit,                                                       -- profit signal
    DATE_DIFF(p.as_of_date, cb.signup_date, DAY)      AS tenure_days       -- T: longer = more loyal
  FROM customer_base cb
  CROSS JOIN params p
),

-- ---- 3) Quintile scores (1-5) -------------------------------------------------
-- NTILE(5) splits the base into five equal-sized buckets per dimension.
-- Quintiles adapt to your data; hardcoded thresholds rot when the business shifts.
scored AS (
  SELECT
    customer_id,
    recency_days, frequency, monetary, total_profit, tenure_days,
    -- Recency reversed: NTILE gives 1 to the smallest recency_days (best),
    -- so we do (6 - ntile) to make 5 = most recent = best.
    6 - NTILE(5) OVER (ORDER BY recency_days ASC)     AS r_score,
    NTILE(5) OVER (ORDER BY frequency ASC)            AS f_score,
    NTILE(5) OVER (ORDER BY monetary ASC)             AS m_score,
    NTILE(5) OVER (ORDER BY total_profit ASC)         AS p_score,  -- profit quintile
    NTILE(5) OVER (ORDER BY tenure_days ASC)          AS t_score
  FROM customer_metrics
),

-- ---- 4) The upgrade: a PROFIT-WEIGHTED monetary score -------------------------
-- Classic RFM scores value on revenue alone, so a high-volume / thin-margin
-- customer scores the same as a genuinely profitable one. We blend the two.
-- Weight: 60% profit, 40% revenue. Tune to your business. If you have no profit
-- column, set the profit weight to 0 and this collapses to plain RFM cleanly.
weighted AS (
  SELECT
    *,
    CAST(ROUND(0.6 * p_score + 0.4 * m_score) AS INT) AS value_score  -- profit-weighted "M"
  FROM scored
),

-- ---- 5) Map scores to plain-language tiers ------------------------------------
-- Five to seven named tiers, each with one obvious action. The OVERRIDE on the
-- first line is what stops a genuinely high-profit customer from being buried
-- in a low tier just because their raw frequency is average.
tiered AS (
  SELECT
    customer_id,
    r_score, f_score, m_score, p_score, t_score, value_score,
    recency_days, frequency, monetary, total_profit, tenure_days,
    CASE
      -- Override: high profit always earns at least a high-value seat
      WHEN p_score = 5 AND r_score >= 3                         THEN 'Strategic'
      WHEN r_score >= 4 AND f_score >= 4 AND value_score >= 4   THEN 'Strategic'
      WHEN f_score >= 4 AND value_score >= 3                    THEN 'High-Value'
      WHEN r_score >= 4 AND f_score <= 2 AND t_score <= 2       THEN 'Promising / New'
      WHEN r_score <= 2 AND f_score >= 3                        THEN 'At-Risk'        -- was good, slipping
      WHEN r_score <= 2 AND f_score <= 2                        THEN 'Dormant'
      ELSE 'Developing'
    END AS tier
  FROM weighted
)

-- ---- 6) Output ----------------------------------------------------------------
SELECT
  customer_id,
  tier,
  r_score, f_score, m_score, p_score, t_score,
  value_score,
  recency_days, frequency, monetary, total_profit, tenure_days
FROM tiered
ORDER BY value_score DESC, r_score DESC;

-- =============================================================================
-- SEGMENT-MAP  ·  what each tier means and what to do
-- =============================================================================
-- Tier             Looks like                         Action
-- ---------------  ---------------------------------  -------------------------------------------
-- Strategic        High R/F + top profit              Protect & reward. No discounts; ask for referrals.
-- High-Value       Frequent + profitable              Upsell / cross-sell. The most reliable revenue lift.
-- Promising / New  Recent, low frequency, short tenure Onboard hard. Next 30 days decide if they stay.
-- At-Risk          Was active, recency falling         Win-back, urgently. Your most expensive losses.
-- Dormant          Low recency, low frequency          One cheap reactivation, then stop spending.
-- Developing       Everyone in the middle              Nudge toward the next tier; watch the trend.
--
-- TIPS
--  - Re-run this on a schedule (weekly/monthly). RFM is a snapshot; customers move.
--  - Tie each tier to a campaign, then back to revenue, so you know it works.
--  - Keep it to 5-7 tiers. If your team can't recite each tier's action, you have too many.
--
-- Built from real production experience (a fintech merchant portfolio), generalised
-- here with no proprietary data. Questions? gokhanagarer@gmail.com
-- =============================================================================
