Friday, May 21, 2010 1:12 PM tonyrogerson

SQL Windowing screencast session for Cuppa Corner - rolling totals, data cleansing

In this 10 minute screencast I go through the basics of what I term windowing, which is basically the technique of filtering to a set of rows given a specific value, for instance a Sub-Query that aggregates or a join that returns more than just one row (for instance on a one to one relationship).

http://sqlserverfaq.com/content/SQL-Basic-Windowing-using-Joins.aspx

SQL below...

USE tempdb

go

   

CREATE TABLE RollingTotals_Nesting (

 

    client_id          int not null,

    transaction_date   date not null,

    transaction_amount decimal( 28, 2 ) not null,

   

        constraint pk_RollingTotal_Nesting

            primary key clustered( client_id, transaction_date )

);

 

INSERT INTO RollingTotals_Nesting ( client_id, transaction_date, transaction_amount )

    VALUES ( 1, '20100410', 10 ),

           ( 1, '20100411', 10 ),

           ( 1, '20100412', 10 ),

           ( 1, '20100413', 10 ),

           ( 1, '20100414', 10 );

          

          

 

--

--  Single level of nesting (the sub-query)

--

BEGIN

 

    SELECT client_id, transaction_date, transaction_amount,

           (    SELECT SUM( rt2.transaction_amount )

                FROM RollingTotals_Nesting AS rt2

                WHERE rt2.client_id = rt1.client_id

                  AND rt2.transaction_date <= rt1.transaction_date ) AS transaction_amount_rolling_total

    FROM RollingTotals_Nesting AS rt1;

 

END

   

--

--  What we really doing?

--

BEGIN

 

    --  For each row in RollingTotals_Nesting do this...

 

    SELECT SUM( rt2.transaction_amount ) AS transaction_amount_rolling_total

    FROM RollingTotals_Nesting AS rt2

    WHERE rt2.client_id = 1

      AND rt2.transaction_date <= '20100410';

    SELECT SUM( rt2.transaction_amount ) AS transaction_amount_rolling_total

    FROM RollingTotals_Nesting AS rt2

    WHERE rt2.client_id = 1

      AND rt2.transaction_date <= '20100411';

    SELECT SUM( rt2.transaction_amount ) AS transaction_amount_rolling_total

    FROM RollingTotals_Nesting AS rt2

    WHERE rt2.client_id = 1

      AND rt2.transaction_date <= '20100412';

    SELECT SUM( rt2.transaction_amount ) AS transaction_amount_rolling_total

    FROM RollingTotals_Nesting AS rt2

    WHERE rt2.client_id = 1

      AND rt2.transaction_date <= '20100413';

    SELECT SUM( rt2.transaction_amount ) AS transaction_amount_rolling_total

    FROM RollingTotals_Nesting AS rt2

    WHERE rt2.client_id = 1

      AND rt2.transaction_date <= '20100414';

     

END

GO

   

--

--  Order of processing the rows in RollingTotals_Nesting is imaterial.

--  Our restriction is on the columns {client_id, transaction_date}

--

 

BEGIN

 

    DROP TABLE RollingTotals_Nesting

 

    CREATE TABLE RollingTotals_Nesting (

 

        client_id   int not null,

        transaction_date date not null,

        transaction_amount decimal( 28, 2 ) not null,

       

            constraint pk_RollingTotal_Nesting

                primary key NONclustered( client_id, transaction_date )

    );

 

    INSERT INTO RollingTotals_Nesting ( client_id, transaction_date, transaction_amount )

        VALUES ( 1, '20100412', 10 ),

               ( 1, '20100410', 10 ),

               ( 1, '20100413', 10 ),

               ( 1, '20100411', 10 ),

               ( 1, '20100414', 10 );

              

    SELECT client_id, transaction_date, transaction_amount

    FROM RollingTotals_Nesting;

 

    SELECT client_id, transaction_date, transaction_amount,

           (    SELECT SUM( rt2.transaction_amount )

                FROM RollingTotals_Nesting AS rt2

                WHERE rt2.client_id = rt1.client_id

                  AND rt2.transaction_date <= rt1.transaction_date ) AS transaction_amount_rolling_total

    FROM RollingTotals_Nesting AS rt1;

 

END

  

---

--- Using a "Window" over your data

---

 

BEGIN

 

    --  Source data

 

    select top 100 rownumber, F1, F2, F3, F4, F5

    from imp_MOTXLS

    order by rownumber

  

    --  Refine #1 - get closer to what we want

 

    select top 10000 rownumber, F1, F2, F3, F4, F5

    from imp_MOTXLS

    where F3 is not null

    order by rownumber

   

    --

    --      Find windows for maker/model so we can fill in the NULL's

    --

 

    select i1.F1 as maker,

           i1.F2 as model,         --  Model names sometimes wrap onto the next "line"

           MIN( i1.rownumber ) as range_start,

           MIN( i2.rownumber ) as range_end

    into #MakerModelWindowRanges

    from dbo.imp_MOTXLS as i1

          inner join dbo.imp_MOTXLS as i2 on i2.rownumber > i1.rownumber

    where i1.F3 is not null     -- data quality restriction

      and i2.F3 is not null     -- data quality restriction

      and i2.F1 is not null     -- data quality restriction

      and i2.F2 is not null     -- data quality restriction

      and i2.F1 <> i1.F1    --  Make change

      and i2.F2 <> i2.F1    --  Model change

    group by i1.F1,

             i1.F2

    order by maker, model

  

    --  Add the key...

 

    alter table #MakerModelWindowRanges alter column maker varchar(100) not null

    alter table #MakerModelWindowRanges alter column model varchar(100) not null

 

    alter table #MakerModelWindowRanges add primary key clustered( maker, model )

  

    select maker, model, range_start, range_end

    from #MakerModelWindowRanges

    order by maker, model

   

    --  Check a window

 

    select rownumber, F1, F2, F3, F4, F5

    from imp_MOTXLS

    where rownumber between 96 and 120

      and F3 is not null

   

     

    --  Remember we are after when F1 changes OR F2 changes

    --  F2 changes on rownumber 121

    --  hence our right most range in the window is 120

    --      and i2.F1 <> i1.F1

    --      and i2.F2 <> i2.F1

 

    select rownumber, F1, F2, F3, F4, F5

    from imp_MOTXLS

    where rownumber between 96 - 2 and 120 + 2

      and F3 is not null

   

    --  Now we have our Lookup Windowing table we can work the actual gubbings out

 

    select wrng.maker,

           wrng.model,

           m.F3 as YearOfFirstUse,

           m.F4 as TestPasses,

           m.F5 as TestFails

    from #MakerModelWindowRanges AS wrng

        inner join imp_MOTXLS AS m ON m.rownumber BETWEEN wrng.range_start AND wrng.range_end

    where m.F3 not in ( 'First Use', 'Total', 'Year of' )

    order by wrng.maker, wrng.model, YearOfFirstUse

 

       

       

    DROP TABLE #MakerModelWindowRanges

    DROP TABLE RollingTotals_Nesting

  

END

 

Filed under: , ,

Comments

No Comments