24 June 2010 13:25 tonyrogerson

Recency Frequency talk from last nights user group - the SQL to do the Recency/Frequency grouping on Adventureworks - examples of OVER and PARTITION BY, using CTE's, CROSS APPLY

Code below is from the Recency Frequency of {} (RFM and RFI) talk I did at the user group last night, like I said its the first part - it doesn't do the category flow rates between temporal windows.

I'm writing a full day talk on how to do this using Inmon (normalised route) and Kimball (denormalised star/snowflake and dimensional database routes) - if you have any questions, ideas or suggestions then please do email me tonyrogerson@torver.net - I'm very keen on feedback.

USE AdventureWorks2008R2

go


 

--

--  Self Join and the Window of rows

--


 

select winstart = case when soh2.SalesOrderID = soh4.SalesOrderID then '***' else '' end,

       control_input_SalesOrderID = soh2.SalesOrderID,

       control_input_OrderDate    = soh2.OrderDate,

       lookup_input_SalesOrderID  = soh4.SalesOrderID,

       lookup_input_OrderDate     = soh4.OrderDate,

       recency_days = DATEDIFF( day, soh4.OrderDate, soh2.OrderDate )

from Sales.SalesOrderHeader soh2

    left outer join Sales.SalesOrderHeader soh4

            on soh4.CustomerID = soh2.CustomerID

           AND soh4.OrderDate <= soh2.OrderDate

--           AND soh4.SalesOrderID <> soh2.SalesOrderID

                                               

where soh2.CustomerID = 11091

order by control_input_SalesOrderID,

         lookup_input_SalesOrderID desc


 


 


 


 


 


 

--

--  What about efficiency? The above a good way to do this?

--


 

select soh2.CustomerID,

       curr_order   = soh2.OrderDate,

       prev_order   = soh3.OrderDate,

       recency_days = DATEDIFF( day, soh3.OrderDate, soh2.OrderDate )


 

from Sales.SalesOrderHeader soh2

    left outer join Sales.SalesOrderHeader soh3

            on soh3.SalesOrderID = (    SELECT MAX( soh4.SalesOrderID )

                                        FROM Sales.SalesOrderHeader soh4

                                        WHERE soh4.CustomerID = soh2.CustomerID

                                          AND soh4.OrderDate <= soh2.OrderDate

                                          AND soh4.SalesOrderID <> soh2.SalesOrderID )

order by soh2.CustomerID, recency_days

go                                              


 

--  Rob Farley....

select soh.CustomerID,

       curr_order   = soh.OrderDate,

       prev_order   = prev.PrevOrderDate,

       recency_days = DATEDIFF(day, prev.PrevOrderDate, soh.OrderDate )

from Sales.SalesOrderHeader as soh

    cross apply (   select MAX(prev.OrderDate) as PrevOrderDate

                    from Sales.SalesOrderHeader as prev

                    where prev.CustomerID = soh.CustomerID

                      and prev.SalesOrderID <> soh.SalesOrderID

                      and prev.OrderDate <= soh.OrderDate) prev

order by soh.CustomerID, recency_days

go


 

--  Itzik Ben Gan....

with soh_rn as

(

  select CustomerID,

         OrderDate,

         n = row_number() over(order by CustomerID, OrderDate, SalesOrderID)

  from Sales.SalesOrderHeader


 

)

select s1.CustomerID,

       curr_order   = s1.OrderDate,

       prev_order   = s2.OrderDate,

       recency_days = DATEDIFF( day, s2.OrderDate, s1.OrderDate )

  from soh_rn as s1

    left outer join soh_rn as s2

      on s1.CustomerID = s2.CustomerID

      and s1.n = s2.n + 1

order by s1.CustomerID, recency_days


 


 


 


 


 


 


 

--

--  OVER clause

--


 

select CustomerID,

       SalesOrderID,

       SubTotal,

       SUM( SubTotal ) OVER( PARTITION BY CustomerID ) AS GrandTotal,

       ( SubTotal /

            SUM( SubTotal ) OVER( PARTITION BY CustomerID )) * 100 AS pct_GrandTotal

      

from Sales.SalesOrderHeader

where CustomerID in ( 11091

                    , 11176

                    , 11185

                    , 11200

                    , 11223 )


 


 

select CustomerID,

       SalesOrderID,

       SubTotal,

       LEFT( CONVERT( Char(8), OrderDate, 112 ), 6 ) AS YYYYMM,

       SUM( SubTotal ) OVER( PARTITION BY CustomerID, LEFT( CONVERT( Char(8), OrderDate, 112 ), 6 ) ) AS YYYYMMGrandTotal,

       SUM( SubTotal ) OVER( PARTITION BY CustomerID ) AS GrandTotal,

       ( SubTotal / SUM( SubTotal ) OVER( PARTITION BY CustomerID )) * 100

            AS pct_GrandTotal,

       ( SubTotal / SUM( SubTotal ) OVER( PARTITION BY CustomerID, LEFT( CONVERT( Char(8), OrderDate, 112 ), 6 ) )) * 100

            AS pct_YYYYMMGrandTotal

      

from Sales.SalesOrderHeader

where CustomerID in (    11091

                       , 11176

                       , 11185

                       , 11200

                       , 11223 )


 


 


 


 

--

--  CROSS APPLY

--


 

select soh.CustomerID,

       curr_order   = soh.OrderDate,

       prev_order   = prev.PrevOrderDate,

       recency_days = DATEDIFF(day, prev.PrevOrderDate, soh.OrderDate )

from Sales.SalesOrderHeader as soh

    cross apply (   select MAX(prev.OrderDate) as PrevOrderDate

                    from Sales.SalesOrderHeader as prev

                    where prev.CustomerID = soh.CustomerID

                      and prev.SalesOrderID <> soh.SalesOrderID

                      and prev.OrderDate <= soh.OrderDate) prev

order by soh.CustomerID, recency_days

go


 


 


 


 

--

--  Common Table Expression

--


 

; with recency ( curr_order, prev_order, recency_days )

as (


 

    select curr_order   = soh2.OrderDate,

           prev_order   = soh3.OrderDate,

           recency_days = DATEDIFF( day, soh3.OrderDate, soh2.OrderDate )

    from Sales.SalesOrderHeader soh2

        left outer join Sales.SalesOrderHeader soh3

                on soh3.SalesOrderID = (    SELECT MAX( soh4.SalesOrderID )

                                            FROM Sales.SalesOrderHeader soh4

                                            WHERE soh4.CustomerID = soh2.CustomerID

                                              AND soh4.OrderDate <= soh2.OrderDate

                                              AND soh4.SalesOrderID <> soh2.SalesOrderID )

                                                  

    where soh2.CustomerID = 11091

    )

select  curr_order,

        prev_order,

        recency_days,

        average_recency = ( select AVG( recency_days )

                            from recency ),

        stddev_recency  = ( select STDEVP( recency_days )

                            from recency ),

        dist_within_1std= ( select COUNT(*)

                            from recency r

                                cross join ( select stddev_recency = STDEVP( recency_days )

                                             from recency ) as st

                            where r.recency_days between -stddev_recency and stddev_recency ),

        dist_within_2std= ( select COUNT(*)

                            from recency r

                                cross join ( select stddev_recency = STDEVP( recency_days )

                                             from recency ) as st

                            where r.recency_days between -stddev_recency * 2 and stddev_recency * 2 ),

        dist_over_2std=   ( select COUNT(*)

                            from recency r

                                cross join ( select stddev_recency = STDEVP( recency_days )

                                             from recency ) as st

                            where r.recency_days < -stddev_recency * 2

                               or r.recency_days > stddev_recency * 2 ),

        population_Size=  ( select COUNT(*)

                            from recency )

from recency

order by curr_order


 


 


 


 


 


 


 


 

--

--  Pulling the data for the recency frequency analysis

--


 

; with soh_rn

          as   (

       select CustomerID,

               OrderDate,

               SubTotal,

               rn = row_number() over(order by CustomerID, OrderDate, SalesOrderID)

       from Sales.SalesOrderHeader


 

     )

 , recency ( CustomerID, curr_order, prev_order, recency_days, SubTotal, rn )

          as (


 

     select s1.CustomerID,

             curr_order   = s1.OrderDate,

             prev_order   = s2.OrderDate,

             recency_days = DATEDIFF( day, s2.OrderDate, s1.OrderDate ),

             s1.subTotal,

             s1.rn

       from soh_rn as s1

          left outer join soh_rn as s2

            on s1.CustomerID = s2.CustomerID

            and s1.rn = s2.rn + 1 )


 

select  c1.*,

          median = AVG( cast( case when population_size % 2 = 0 then

                        case when position_in_dataset between population_size / 2     --   even number - average the middle two

                                                      and ( population_size / 2 ) + 1 then c1.recency_days else null end

                                 else case when position_in_dataset = ( population_size / 2 ) + 1 then c1.recency_days else null end       --     odd number - take the middle

                            end as decimal( 28, 5 ) ) )

                           

                                      OVER( PARTITION BY c1.CustomerID )

into #t

from (   

    select  r.CustomerID,

            curr_order,

            prev_order,

            recency_days,

            population_size = COUNT(recency_days) OVER( PARTITION BY r.CustomerID ),

            position_in_dataset = ROW_NUMBER() OVER( PARTITION BY r.CustomerID ORDER BY recency_days),

            average_recency = AVG( cast( recency_days as decimal( 28, 5 ) ) ) OVER( PARTITION BY r.CustomerID ),

            stddev_recency  = STDEV( recency_days ) OVER( PARTITION BY r.CustomerID ),

            SubTotal,

            rn

    from recency r

    ) as c1


 


 

--

--   Frequency worked out...

--


 

select *

from #t

where CustomerID = 11176

order by position_in_dataset

order by CustomerID

       , curr_order desc

      

select customerid from #t group by CustomerID order by COUNT(*) desc      


 

--

--   Work out recency by frequency using the most recent order...

--   to current date, we are using the current date of 20080803 because

--   the data is static

--


 

select recency_days = DATEDIFF( day, curr_order, '20080803' ),

        frequency    = COALESCE( median, 1 ),

        intensity    = COUNT( * ),

        monetary     = SUM( SubTotal )

into #t2

from #t t

where rn = (  SELECT MAX( t2.rn )

                  FROM #t t2

                  WHERE t2.CustomerID = t.CustomerID

                  )

group by DATEDIFF( day, curr_order, '20080803' ),

           COALESCE( median, 1 )

order by recency_days,

           frequency


 

--

--   Group by category

--


 

declare @recency_categories table (

     cat_name varchar(50) not null primary key clustered,

     recency_from  smallint not null,

     recency_to         smallint not null,

          check( recency_from <= recency_to )

     )

declare @frequency_categories table (

     cat_name varchar(50) not null primary key clustered,

     frequency_from smallint not null,

     frequency_to  smallint not null,

          check( frequency_from <= frequency_to )

     )

insert @recency_categories ( cat_name, recency_from, recency_to )

     values ( 'past 15 days', 0, 15 ),

             ( '16 - 30 days', 16, 30 ),

            ( '31 - 45 days', 31, 45 ),

            ( '46 - 60 days', 46, 60 ),

            ( '61 - 75 days', 61, 75 ),

            ( '76 - 90 days', 76, 90 ),

            ( 'over 90 days', 91, 32767 )

insert @frequency_categories ( cat_name, frequency_from, frequency_to )

     values ( 'single', 1, 1 ),

             ( '2 - 5', 2, 5 ),

            ( '6 - 10', 6, 10 ),

            ( 'over 10', 11, 32767 )


 

select recency_cat   = rc.cat_name,

        recency_ord   = rc.recency_from,

        frequency_cat = fq.cat_name,

        frequency_ord = fq.frequency_from,

        intensity = SUM( t.intensity ),

        monetary  = SUM( t.monetary )

from #t2 t

     inner join @recency_categories rc on t.recency_days between rc.recency_from

                                                                        and rc.recency_to

     inner join @frequency_categories fq on t.frequency between fq.frequency_from

                                                                      and fq.frequency_to

group by rc.cat_name,

           rc.recency_from,

           fq.cat_name,

           fq.frequency_from

order by recency_ord,

          frequency_ord


 

--

-- Review recency

--


 

select recency_cat   = rc.cat_name,

        recency_ord   = rc.recency_from,

        intensity = SUM( t.intensity ),

        monetary  = SUM( t.monetary )

from #t2 t

     inner join @recency_categories rc on t.recency_days between rc.recency_from

                                                                        and rc.recency_to

     inner join @frequency_categories fq on t.frequency between fq.frequency_from

                                                                      and fq.frequency_to

group by rc.cat_name,

           rc.recency_from

order by recency_ord


 


 


 

--

--   Check

--

select COUNT( DISTINCT CustomerID )

from #t

where curr_order >= dateadd( day, -15, '20080803' )


 

select SUM( intensity )

from #t2

where recency_days <= 15

         

select COUNT( distinct customerid )

from Sales.SalesOrderHeader

where OrderDate >= dateadd( day, -15, '20080803' )


 

select *

from #t2

where recency_days <= 15


 


 

 

Filed under: ,

Comments

# Twitter Trackbacks for Recency Frequency talk from last nights user group - the SQL to do the Recency/Frequency grouping on Adventureworks - examples of OVER and PARTITION BY, using CTE's, CROSS APPLY - Tony Rogerson's [sqlblogcasts

Pingback from  Twitter Trackbacks for                 Recency Frequency talk from last nights user group - the SQL to do the Recency/Frequency grouping on Adventureworks - examples of OVER and PARTITION BY, using CTE's, CROSS APPLY - Tony Rogerson's         [sqlblogcasts.com]        on Topsy.com

# Lime Review | Independent Expert Analysis &raquo; Microwave Oven and Spare Parts

Pingback from  Lime Review | Independent Expert Analysis &raquo; Microwave Oven and Spare Parts