Friday, May 21, 2010 1:12 PM
tonyrogerson
SQL Windowing screencast session for Cuppa Corner - rolling totals, data cleansing
In this 10 minute screencast I go through the basics of what I term windowing, which is basically the technique of filtering to a set of rows given a specific value, for instance a Sub-Query that aggregates or a join that returns more than just one row (for instance on a one to one relationship).
http://sqlserverfaq.com/content/SQL-Basic-Windowing-using-Joins.aspx
SQL below...
USE tempdb
go
CREATE TABLE RollingTotals_Nesting (
client_id int not null,
transaction_date date not null,
transaction_amount decimal( 28, 2 ) not null,
constraint pk_RollingTotal_Nesting
primary key clustered( client_id, transaction_date )
);
INSERT INTO RollingTotals_Nesting ( client_id, transaction_date, transaction_amount )
VALUES ( 1, '20100410', 10 ),
( 1, '20100411', 10 ),
( 1, '20100412', 10 ),
( 1, '20100413', 10 ),
( 1, '20100414', 10 );
--
-- Single level of nesting (the sub-query)
--
BEGIN
SELECT client_id, transaction_date, transaction_amount,
( SELECT SUM( rt2.transaction_amount )
FROM RollingTotals_Nesting AS rt2
WHERE rt2.client_id = rt1.client_id
AND rt2.transaction_date <= rt1.transaction_date ) AS transaction_amount_rolling_total
FROM RollingTotals_Nesting AS rt1;
END
--
-- What we really doing?
--
BEGIN
-- For each row in RollingTotals_Nesting do this...
SELECT SUM( rt2.transaction_amount ) AS transaction_amount_rolling_total
FROM RollingTotals_Nesting AS rt2
WHERE rt2.client_id = 1
AND rt2.transaction_date <= '20100410';
SELECT SUM( rt2.transaction_amount ) AS transaction_amount_rolling_total
FROM RollingTotals_Nesting AS rt2
WHERE rt2.client_id = 1
AND rt2.transaction_date <= '20100411';
SELECT SUM( rt2.transaction_amount ) AS transaction_amount_rolling_total
FROM RollingTotals_Nesting AS rt2
WHERE rt2.client_id = 1
AND rt2.transaction_date <= '20100412';
SELECT SUM( rt2.transaction_amount ) AS transaction_amount_rolling_total
FROM RollingTotals_Nesting AS rt2
WHERE rt2.client_id = 1
AND rt2.transaction_date <= '20100413';
SELECT SUM( rt2.transaction_amount ) AS transaction_amount_rolling_total
FROM RollingTotals_Nesting AS rt2
WHERE rt2.client_id = 1
AND rt2.transaction_date <= '20100414';
END
GO
--
-- Order of processing the rows in RollingTotals_Nesting is imaterial.
-- Our restriction is on the columns {client_id, transaction_date}
--
BEGIN
DROP TABLE RollingTotals_Nesting
CREATE TABLE RollingTotals_Nesting (
client_id int not null,
transaction_date date not null,
transaction_amount decimal( 28, 2 ) not null,
constraint pk_RollingTotal_Nesting
primary key NONclustered( client_id, transaction_date )
);
INSERT INTO RollingTotals_Nesting ( client_id, transaction_date, transaction_amount )
VALUES ( 1, '20100412', 10 ),
( 1, '20100410', 10 ),
( 1, '20100413', 10 ),
( 1, '20100411', 10 ),
( 1, '20100414', 10 );
SELECT client_id, transaction_date, transaction_amount
FROM RollingTotals_Nesting;
SELECT client_id, transaction_date, transaction_amount,
( SELECT SUM( rt2.transaction_amount )
FROM RollingTotals_Nesting AS rt2
WHERE rt2.client_id = rt1.client_id
AND rt2.transaction_date <= rt1.transaction_date ) AS transaction_amount_rolling_total
FROM RollingTotals_Nesting AS rt1;
END
---
--- Using a "Window" over your data
---
BEGIN
-- Source data
select top 100 rownumber, F1, F2, F3, F4, F5
from imp_MOTXLS
order by rownumber
-- Refine #1 - get closer to what we want
select top 10000 rownumber, F1, F2, F3, F4, F5
from imp_MOTXLS
where F3 is not null
order by rownumber
--
-- Find windows for maker/model so we can fill in the NULL's
--
select i1.F1 as maker,
i1.F2 as model, -- Model names sometimes wrap onto the next "line"
MIN( i1.rownumber ) as range_start,
MIN( i2.rownumber ) as range_end
into #MakerModelWindowRanges
from dbo.imp_MOTXLS as i1
inner join dbo.imp_MOTXLS as i2 on i2.rownumber > i1.rownumber
where i1.F3 is not null -- data quality restriction
and i2.F3 is not null -- data quality restriction
and i2.F1 is not null -- data quality restriction
and i2.F2 is not null -- data quality restriction
and i2.F1 <> i1.F1 -- Make change
and i2.F2 <> i2.F1 -- Model change
group by i1.F1,
i1.F2
order by maker, model
-- Add the key...
alter table #MakerModelWindowRanges alter column maker varchar(100) not null
alter table #MakerModelWindowRanges alter column model varchar(100) not null
alter table #MakerModelWindowRanges add primary key clustered( maker, model )
select maker, model, range_start, range_end
from #MakerModelWindowRanges
order by maker, model
-- Check a window
select rownumber, F1, F2, F3, F4, F5
from imp_MOTXLS
where rownumber between 96 and 120
and F3 is not null
-- Remember we are after when F1 changes OR F2 changes
-- F2 changes on rownumber 121
-- hence our right most range in the window is 120
-- and i2.F1 <> i1.F1
-- and i2.F2 <> i2.F1
select rownumber, F1, F2, F3, F4, F5
from imp_MOTXLS
where rownumber between 96 - 2 and 120 + 2
and F3 is not null
-- Now we have our Lookup Windowing table we can work the actual gubbings out
select wrng.maker,
wrng.model,
m.F3 as YearOfFirstUse,
m.F4 as TestPasses,
m.F5 as TestFails
from #MakerModelWindowRanges AS wrng
inner join imp_MOTXLS AS m ON m.rownumber BETWEEN wrng.range_start AND wrng.range_end
where m.F3 not in ( 'First Use', 'Total', 'Year of' )
order by wrng.maker, wrng.model, YearOfFirstUse
DROP TABLE #MakerModelWindowRanges
DROP TABLE RollingTotals_Nesting
END
Filed under: SQL, Windowing, Cuppa Corner