-- drop the null rows:
DELETE FROM file WHERE CustomerID IS NULL


-- overview some details
select 
	Transaction_Date
FROM
	file
group by 
	Transaction_Date
order by 
	Transaction_Date


select 
	distinct * 
from 
	file 
where 
	CustomerID is not null


SELECT
	CustomerID, 
	Transaction_Date, 
-- 	Offline_Spend, 
-- 	Online_Spend,
	SUM(Avg_Price * Quantity) as Sales_Amount
FROM
	file 
WHERE
	CustomerID = "17850.0"
GROUP BY
	Transaction_Date;


SELECT
	CustomerID, 
	Transaction_Date, 
	Offline_Spend, 
	Online_Spend,
	(Avg_Price * Quantity) as Sales_Amount
FROM
	file 
WHERE
	CustomerID = "17850.0"



-- RFM model
SELECT
	CustomerID,
	R,
	F,
	M,
IF
	( R >= avg_R, 1, 0 ) AS R_cal,
IF
	( F >= avg_F, 1, 0 ) AS F_cal,
IF
	( M >= avg_M, 1, 0 ) AS M_cal 
FROM
	(
	SELECT
		CustomerID,
		ABS(
		DATEDIFF( MAX( Transaction_Date ), '2023-12-31' )) AS R,
		COUNT(*) AS F,
		SUM( Avg_Price * Quantity ) AS M,
		AVG(
			ABS(
			DATEDIFF( MAX( Transaction_Date ), '2023-12-31' ))) OVER () AS avg_R,
		AVG(
		COUNT(*)) OVER () AS avg_F,
		AVG(
		SUM( Avg_Price * Quantity )) OVER () AS avg_M 
	FROM
		( SELECT DISTINCT * FROM file WHERE CustomerID IS NOT NULL ) AS a 
	GROUP BY
		CustomerID 
	) AS a;



WITH test AS (
	SELECT
		CustomerID,
		ABS(
		DATEDIFF( MAX( Transaction_Date ), '2023-12-31' )) AS R,
		COUNT(*) AS F,
		SUM( Avg_Price * Quantity ) AS M 
	FROM
		( SELECT DISTINCT * FROM file WHERE CustomerID IS NOT NULL ) AS a 
	GROUP BY
	CustomerID 
	)
	
select F, count(*) from test group by F ORDER BY F


-- retention rate
WITH FirstPurchase AS (
	SELECT
		T.CustomerID,
		T.Transaction_Date,
		TIMESTAMPDIFF(MONTH, F.FirstPurchaseDate, T.Transaction_Date) AS MonthsAfterFirstPurchase
	FROM file T
	JOIN (SELECT CustomerID, MIN(Transaction_Date) AS FirstPurchaseDate FROM file GROUP BY CustomerID) F 
	ON T.CustomerID = F.CustomerID
)

select * from FirstPurchase

select MonthsAfterFirstPurchase from test2 group by MonthsAfterFirstPurchase ORDER BY MonthsAfterFirstPurchase

-- step1: find the first purchase date for all the cx
WITH FirstPurchase AS (
  SELECT CustomerID, MIN(Transaction_Date) AS FirstPurchaseDate
  FROM file
  GROUP BY CustomerID
),
-- step2: mark the month difference between each transaction and the first purchase 
Cohorts AS (
  SELECT 
    f1.CustomerID,
    f2.FirstPurchaseDate,
    TIMESTAMPDIFF(MONTH, f2.FirstPurchaseDate, f1.Transaction_Date) AS MonthsAfterFirstPurchase
  FROM file f1
  JOIN FirstPurchase f2 ON f1.CustomerID = f2.CustomerID
)
-- step3: calculate retention rate after first purchase in each month
SELECT 
  MonthsAfterFirstPurchase,
  COUNT(DISTINCT CustomerID) AS ReturningCustomers,
  (SELECT COUNT(DISTINCT CustomerID) FROM FirstPurchase) AS TotalFirstTimeCustomers,
  COUNT(DISTINCT CustomerID) / (SELECT COUNT(DISTINCT CustomerID) FROM FirstPurchase) AS RetentionRate
FROM Cohorts
WHERE MonthsAfterFirstPurchase > 0
GROUP BY MonthsAfterFirstPurchase


-- month repurchase rate

WITH MonthlyPurchases AS (
    SELECT
        CustomerID,
        Month,
        COUNT(*) AS PurchaseCount
    FROM file
    GROUP BY CustomerID, Month
),
RepeatPurchases AS (
    SELECT
        Month,
        COUNT(*) AS RepeatCustomerCount
    FROM (
        SELECT
            Month
        FROM MonthlyPurchases
        WHERE PurchaseCount > 1
    ) AS RepeatPurchasesGroup
    GROUP BY Month
),
TotalPurchases AS (
    SELECT
        Month,
        COUNT(DISTINCT CustomerID) AS TotalCustomerCount
    FROM file
    GROUP BY Month
)
SELECT
    RP.Month,
    RP.RepeatCustomerCount,
    TP.TotalCustomerCount,
    (RP.RepeatCustomerCount / TP.TotalCustomerCount) AS RepurchaseRate
FROM RepeatPurchases RP
JOIN TotalPurchases TP ON RP.Month = TP.Month
ORDER BY RP.Month;


-- life cycle:
SELECT
	CustomerID, 
	DATEDIFF(MAX(Transaction_Date), MIN(Transaction_Date)) as LifeCycle
FROM
	file 
GROUP BY
	CustomerID


-- analysis with other dimensions:
WITH FirstPurchase AS (
  SELECT 
    CustomerID, 
    MIN(Transaction_Date) AS FirstPurchaseDate,
    Location  -- location info
  FROM file
  GROUP BY CustomerID, Location
),

Cohorts AS (
  SELECT 
    f1.CustomerID,
    f2.FirstPurchaseDate,
    f2.Location,
    TIMESTAMPDIFF(MONTH, f2.FirstPurchaseDate, f1.Transaction_Date) AS MonthsAfterFirstPurchase
  FROM file f1
  JOIN FirstPurchase f2 ON f1.CustomerID = f2.CustomerID AND f1.Location = f2.Location
)

SELECT 
  Location,
  MonthsAfterFirstPurchase,
  COUNT(DISTINCT CustomerID) AS ReturningCustomers,
  (SELECT COUNT(DISTINCT CustomerID) FROM FirstPurchase WHERE Location = Cohorts.Location) AS TotalFirstTimeCustomers,  -- 计算每个地区的首次购买客户数
  COUNT(DISTINCT CustomerID) / (SELECT COUNT(DISTINCT CustomerID) FROM FirstPurchase WHERE Location = Cohorts.Location) AS RetentionRate  -- 计算留存率时考虑地区
FROM Cohorts
WHERE MonthsAfterFirstPurchase > 0
GROUP BY Location, MonthsAfterFirstPurchase
ORDER BY Location, MonthsAfterFirstPurchase;


WITH FirstPurchase AS (
  SELECT 
    CustomerID, 
    MIN(Transaction_Date) AS FirstPurchaseDate
  FROM file
  GROUP BY CustomerID
),

Cohorts AS (
  SELECT 
    f1.CustomerID,
    f2.FirstPurchaseDate,
    TIMESTAMPDIFF(MONTH, f2.FirstPurchaseDate, f1.Transaction_Date) AS MonthsAfterFirstPurchase,
    f1.Coupon_Status AS CurrentPurchaseCouponStatus
  FROM file f1
  JOIN FirstPurchase f2 ON f1.CustomerID = f2.CustomerID
)

SELECT 
  MonthsAfterFirstPurchase,
  CurrentPurchaseCouponStatus,
  COUNT(DISTINCT CustomerID) AS ReturningCustomers,
  (SELECT COUNT(DISTINCT CustomerID) FROM FirstPurchase) AS TotalFirstTimeCustomers,
  COUNT(DISTINCT CustomerID) / (SELECT COUNT(DISTINCT CustomerID) FROM FirstPurchase) AS RetentionRate
FROM Cohorts
WHERE MonthsAfterFirstPurchase > 0
GROUP BY MonthsAfterFirstPurchase, CurrentPurchaseCouponStatus


WITH FirstPurchase AS (
  SELECT 
    CustomerID, 
    MIN(Transaction_Date) AS FirstPurchaseDate
  FROM file
  WHERE Location = "Washington DC"
  GROUP BY CustomerID
),

Cohorts AS (
  SELECT 
    f1.CustomerID,
    f2.FirstPurchaseDate,
    TIMESTAMPDIFF(MONTH, f2.FirstPurchaseDate, f1.Transaction_Date) AS MonthsAfterFirstPurchase,
    f1.Coupon_Status AS CurrentPurchaseCouponStatus
  FROM file f1
  JOIN FirstPurchase f2 ON f1.CustomerID = f2.CustomerID
)

SELECT 
  MonthsAfterFirstPurchase,
  CurrentPurchaseCouponStatus,
  COUNT(DISTINCT CustomerID) AS ReturningCustomers,
  (SELECT COUNT(DISTINCT CustomerID) FROM FirstPurchase) AS TotalFirstTimeCustomers,
  COUNT(DISTINCT CustomerID) / (SELECT COUNT(DISTINCT CustomerID) FROM FirstPurchase) AS RetentionRate
FROM Cohorts
WHERE MonthsAfterFirstPurchase > 0
GROUP BY MonthsAfterFirstPurchase, CurrentPurchaseCouponStatus