I must admit that CTP3 Denaliwill bring some usefull T-SQL features. which i'm looking forward very much and just can't wait for the final release.
Itzik Ben-Gan posted this game on SQL magazine previous week, stating:
The challenge is to write a query that works with two input arguments @from and @to holding
the start and end dates of a date range, and calculates, for each day, various statistics in
respect to the previous day. Specifically, how many visitors visited the site that day, how
many new visitors were added compared to the previous day, how many visitors were removed
compared to the previous day, and how many remained. For the given sample data, the desired
result should look like this:
dt numvisits added removed remained
---------- ----------- ----------- ----------- -----------
2011-06-01 3 3 0 0
2011-06-02 2 0 1 2
2011-06-03 2 1 1 1
2011-06-04 0 0 2 0
2011-06-05 0 0 0 0
2011-06-06 0 0 0 0
2011-06-07 2 2 0 0
2011-06-08 3 2 1 1
Observe that there’s a key defined on dt and visitor, meaning that you store only one
occurrence at most per visitor and day. Also note that there is a possibility that
during some days there would be no visitors.
-- DDL and sample data for DailyVisits table
SET NOCOUNT ON;
USE TC;
IF OBJECT_ID('dbo.DailyVisits', 'U') IS NOT NULL
DROP TABLE dbo.DailyVisits;
GO
CREATE TABLE dbo.DailyVisits
(
dt DATEtime NOT NULL,
visitor VARCHAR(10) NOT NULL,
CONSTRAINT PK_DailyVisits PRIMARY KEY(dt, visitor)
);
INSERT INTO dbo.DailyVisits(dt, visitor) VALUES
('20110601', 'A'),
('20110601', 'B'),
('20110601', 'C'),
('20110602', 'A'),
('20110602', 'C'),
('20110603', 'A'),
('20110603', 'D'),
('20110607', 'A'),
('20110607', 'D'),
('20110608', 'D'),
('20110608', 'E'),
('20110608', 'F');
two simple solutions:
-- SOLUTION #1
;with dt_cte as
-- insert empty/missing dates
(select
min(dt) as dt_
,max(dt) as dt_m
from dailyvisits
union all
select
dt_+1 as dt_
,dt_m
from dt_cte
where
dt_+1 <= dt_m)
-- calculating added, removed and remained users
select
date_.dt_ as date
,count(distinct dv.visitor) as numvisits
,count(distinct dv.visitor)-(sum( case when (dv1.visitor = dv.visitor and dv.visitor is not null and dv1.visitor is not null) then 1 else 0 end)) as added
,count(distinct dv1.visitor) - sum(case when dv.visitor = dv1.visitor then 1 else 0 end) as removed
,sum(case when dv.visitor = dv1.visitor then 1 else 0 end) as remained
from
dt_cte as date_
left hash join dailyvisits as dv
on date_.dt_ = dv.dt
left hash join dailyvisits as dv1
on date_.dt_ = dateadd(day,1,dv1.dt)
group by date_.dt_
-- SOLUTION #2
-- calculating added, removed and remained users
select
date_.dt_ as date
,count(distinct dv.visitor) as numvisits
,count(distinct dv.visitor)-(sum( case when (dv1.visitor = dv.visitor and dv.visitor is not null and dv1.visitor is not null) then 1 else 0 end)) as added
,count(distinct dv1.visitor)-sum(case when dv.visitor = dv1.visitor then 1 else 0 end) as removed
,sum(case when dv.visitor = dv1.visitor then 1 else 0 end) as remained
from
-- insert empty/missing dates
(
select
(select min(dt) from dailyvisits)+number as dt_
from master..spt_values as b with (nolock readuncommitted)
where
[type] = 'P'
and number between 1 and (select datediff(day,(select min(dt) from dailyvisits), (select max(dt) from dailyvisits)))
) as date_
left hash join dailyvisits as dv
on date_.dt_ = dv.dt
left hash join dailyvisits as dv1
on date_.dt_ = dateadd(day,1,dv1.dt)
group by date_.dt_
Five ways to generate Date Tally Table:
-- 1. way
;create table tc..dt
(id int identity(1,1)
,dt datetime)
insert into dt(dt)
select
a.dt+b.number as dt
from
(select cast('2010/12/31' as datetime) as dt
) as a
cross join master..spt_values as b
where
type = 'P' and number between 1 and 365
-- 2. way
;create table tc..dt
(id int identity(1,1)
,dt datetime)
;with tab as
(
select cast('2011-1-1' as datetime) as dt, 1 as rn
union all
select dateadd(day,1,dt) as dt,rn + 1 as rn
from tab
where rn < 365
)
insert into dt(dt)
select dt
from tab
option (maxrecursion 365)
-- 3. way
;create table tc..dt
(id int identity(1,1)
,dt datetime)
insert into dt(dt)
;select
cast('2010/12/31' as datetime)+number as dt
from master..spt_values as b
where
type = 'P' and number between 1 and 365
-- 4. way
;create table tc..dt
(id int identity(1,1)
,dt datetime)
insert into dt(dt)
select
dateadd(day, isnull((select scope_identity()),2),'2010/12/31')
go 365
-- 5. way
;create table tc..dt
(id int identity(1,1)
,dt datetime)
while isnull(@@identity,1) <= 365
begin
insert into dt(dt)
select
dateadd(day, isnull(@@identity,1),'2010/12/31') as dt
end