-- drop the null rows:DELETEFROMfileWHERE CustomerID ISNULL-- overview some detailsselect Transaction_DateFROMfilegroup by Transaction_Dateorder by Transaction_Dateselectdistinct*fromfilewhere CustomerID is not nullSELECT CustomerID, Transaction_Date, -- Offline_Spend, -- Online_Spend,SUM(Avg_Price * Quantity) as Sales_AmountFROMfileWHERE CustomerID ="17850.0"GROUP BY Transaction_Date;SELECT CustomerID, Transaction_Date, Offline_Spend, Online_Spend, (Avg_Price * Quantity) as Sales_AmountFROMfileWHERE CustomerID ="17850.0"-- RFM modelSELECT CustomerID, R, F, M,IF ( R >= avg_R, 1, 0 ) AS R_cal,IF ( F >= avg_F, 1, 0 ) AS F_cal,IF ( M >= avg_M, 1, 0 ) AS M_cal FROM (SELECT CustomerID,ABS(DATEDIFF( MAX( Transaction_Date ), '2023-12-31' )) AS R,COUNT(*) AS F,SUM( Avg_Price * Quantity ) AS M,AVG(ABS(DATEDIFF( MAX( Transaction_Date ), '2023-12-31' ))) OVER()AS avg_R,AVG(COUNT(*)) OVER()AS avg_F,AVG(SUM( Avg_Price * Quantity )) OVER()AS avg_M FROM ( SELECT DISTINCT*FROMfileWHERE CustomerID IS NOT NULL ) AS a GROUP BY CustomerID ) AS a;WITH test AS (SELECT CustomerID,ABS(DATEDIFF( MAX( Transaction_Date ), '2023-12-31' )) AS R,COUNT(*) AS F,SUM( Avg_Price * Quantity ) AS M FROM ( SELECT DISTINCT*FROMfileWHERE CustomerID IS NOT NULL ) AS a GROUP BY CustomerID )select F, count(*) from test group by F ORDER BY F-- retention rateWITH FirstPurchase AS (SELECT T.CustomerID, T.Transaction_Date, TIMESTAMPDIFF(MONTH, F.FirstPurchaseDate, T.Transaction_Date) AS MonthsAfterFirstPurchaseFROMfile TJOIN (SELECT CustomerID, MIN(Transaction_Date) AS FirstPurchaseDate FROMfileGROUP BY CustomerID) F ON T.CustomerID = F.CustomerID)select*from FirstPurchaseselect MonthsAfterFirstPurchase from test2 group by MonthsAfterFirstPurchase ORDER BY MonthsAfterFirstPurchase-- step1: find the first purchase date for all the cxWITH FirstPurchase AS (SELECT CustomerID, MIN(Transaction_Date) AS FirstPurchaseDateFROMfileGROUP BY CustomerID),-- step2: mark the month difference between each transaction and the first purchase Cohorts AS (SELECT f1.CustomerID, f2.FirstPurchaseDate, TIMESTAMPDIFF(MONTH, f2.FirstPurchaseDate, f1.Transaction_Date) AS MonthsAfterFirstPurchaseFROMfile f1JOIN FirstPurchase f2 ON f1.CustomerID = f2.CustomerID)-- step3: calculate retention rate after first purchase in each monthSELECT MonthsAfterFirstPurchase,COUNT(DISTINCT CustomerID) AS ReturningCustomers, (SELECTCOUNT(DISTINCT CustomerID) FROM FirstPurchase) AS TotalFirstTimeCustomers,COUNT(DISTINCT CustomerID) / (SELECTCOUNT(DISTINCT CustomerID) FROM FirstPurchase) AS RetentionRateFROM CohortsWHERE MonthsAfterFirstPurchase >0GROUP BY MonthsAfterFirstPurchase-- month repurchase rateWITH MonthlyPurchases AS (SELECT CustomerID,Month,COUNT(*) AS PurchaseCountFROMfileGROUP BY CustomerID, Month),RepeatPurchases AS (SELECTMonth,COUNT(*) AS RepeatCustomerCountFROM (SELECTMonthFROM MonthlyPurchasesWHERE PurchaseCount >1 ) AS RepeatPurchasesGroupGROUP BYMonth),TotalPurchases AS (SELECTMonth,COUNT(DISTINCT CustomerID) AS TotalCustomerCountFROMfileGROUP BYMonth)SELECT RP.Month, RP.RepeatCustomerCount, TP.TotalCustomerCount, (RP.RepeatCustomerCount / TP.TotalCustomerCount) AS RepurchaseRateFROM RepeatPurchases RPJOIN TotalPurchases TP ON RP.Month = TP.MonthORDER BY RP.Month;-- life cycle:SELECT CustomerID, DATEDIFF(MAX(Transaction_Date), MIN(Transaction_Date)) as LifeCycleFROMfileGROUP BY CustomerID-- analysis with other dimensions:WITH FirstPurchase AS (SELECT CustomerID, MIN(Transaction_Date) AS FirstPurchaseDate,Location-- location infoFROMfileGROUP BY CustomerID, Location),Cohorts AS (SELECT f1.CustomerID, f2.FirstPurchaseDate, f2.Location, TIMESTAMPDIFF(MONTH, f2.FirstPurchaseDate, f1.Transaction_Date) AS MonthsAfterFirstPurchaseFROMfile f1JOIN FirstPurchase f2 ON f1.CustomerID = f2.CustomerID AND f1.Location = f2.Location)SELECTLocation, MonthsAfterFirstPurchase,COUNT(DISTINCT CustomerID) AS ReturningCustomers, (SELECTCOUNT(DISTINCT CustomerID) FROM FirstPurchase WHERELocation= Cohorts.Location) AS TotalFirstTimeCustomers, -- 计算每个地区的首次购买客户数COUNT(DISTINCT CustomerID) / (SELECTCOUNT(DISTINCT CustomerID) FROM FirstPurchase WHERELocation= Cohorts.Location) AS RetentionRate -- 计算留存率时考虑地区FROM CohortsWHERE MonthsAfterFirstPurchase >0GROUP BYLocation, MonthsAfterFirstPurchaseORDER BYLocation, MonthsAfterFirstPurchase;WITH FirstPurchase AS (SELECT CustomerID, MIN(Transaction_Date) AS FirstPurchaseDateFROMfileGROUP BY CustomerID),Cohorts AS (SELECT f1.CustomerID, f2.FirstPurchaseDate, TIMESTAMPDIFF(MONTH, f2.FirstPurchaseDate, f1.Transaction_Date) AS MonthsAfterFirstPurchase, f1.Coupon_Status AS CurrentPurchaseCouponStatusFROMfile f1JOIN FirstPurchase f2 ON f1.CustomerID = f2.CustomerID)SELECT MonthsAfterFirstPurchase, CurrentPurchaseCouponStatus,COUNT(DISTINCT CustomerID) AS ReturningCustomers, (SELECTCOUNT(DISTINCT CustomerID) FROM FirstPurchase) AS TotalFirstTimeCustomers,COUNT(DISTINCT CustomerID) / (SELECTCOUNT(DISTINCT CustomerID) FROM FirstPurchase) AS RetentionRateFROM CohortsWHERE MonthsAfterFirstPurchase >0GROUP BY MonthsAfterFirstPurchase, CurrentPurchaseCouponStatusWITH FirstPurchase AS (SELECT CustomerID, MIN(Transaction_Date) AS FirstPurchaseDateFROMfileWHERELocation="Washington DC"GROUP BY CustomerID),Cohorts AS (SELECT f1.CustomerID, f2.FirstPurchaseDate, TIMESTAMPDIFF(MONTH, f2.FirstPurchaseDate, f1.Transaction_Date) AS MonthsAfterFirstPurchase, f1.Coupon_Status AS CurrentPurchaseCouponStatusFROMfile f1JOIN FirstPurchase f2 ON f1.CustomerID = f2.CustomerID)SELECT MonthsAfterFirstPurchase, CurrentPurchaseCouponStatus,COUNT(DISTINCT CustomerID) AS ReturningCustomers, (SELECTCOUNT(DISTINCT CustomerID) FROM FirstPurchase) AS TotalFirstTimeCustomers,COUNT(DISTINCT CustomerID) / (SELECTCOUNT(DISTINCT CustomerID) FROM FirstPurchase) AS RetentionRateFROM CohortsWHERE MonthsAfterFirstPurchase >0GROUP BY MonthsAfterFirstPurchase, CurrentPurchaseCouponStatus