Based on list of words, one can randomize the letters in the word using simple cross apply.
;with dictionary as
(
select 'soundgarden' as word union all
select 'impossible' as word union all
select 'apple' as word union all
select 'microsoft' as word union all
select 'mices' as word union all
select 'ITCrowds' as word
)
select
t1.word
,x.n
from dictionary as t1
cross apply (
select
substring(t1.word,sv.number,1)
from
master..spt_values as sv
where
sv.type = 'P'
and sv.number between 1 and 30 -- max word length
order by (select (abs(checksum(newid())))%1000) desc
for xml path('')
) as x(n)
I must admit that CTP3 Denaliwill bring some usefull T-SQL features. which i'm looking forward very much and just can't wait for the final release.
Itzik Ben-Gan posted this game on SQL magazine previous week, stating:
The challenge is to write a query that works with two input arguments @from and @to holding
the start and end dates of a date range, and calculates, for each day, various statistics in
respect to the previous day. Specifically, how many visitors visited the site that day, how
many new visitors were added compared to the previous day, how many visitors were removed
compared to the previous day, and how many remained. For the given sample data, the desired
result should look like this:
dt numvisits added removed remained
---------- ----------- ----------- ----------- -----------
2011-06-01 3 3 0 0
2011-06-02 2 0 1 2
2011-06-03 2 1 1 1
2011-06-04 0 0 2 0
2011-06-05 0 0 0 0
2011-06-06 0 0 0 0
2011-06-07 2 2 0 0
2011-06-08 3 2 1 1
Observe that there’s a key defined on dt and visitor, meaning that you store only one
occurrence at most per visitor and day. Also note that there is a possibility that
during some days there would be no visitors.
-- DDL and sample data for DailyVisits table
SET NOCOUNT ON;
USE TC;
IF OBJECT_ID('dbo.DailyVisits', 'U') IS NOT NULL
DROP TABLE dbo.DailyVisits;
GO
CREATE TABLE dbo.DailyVisits
(
dt DATEtime NOT NULL,
visitor VARCHAR(10) NOT NULL,
CONSTRAINT PK_DailyVisits PRIMARY KEY(dt, visitor)
);
INSERT INTO dbo.DailyVisits(dt, visitor) VALUES
('20110601', 'A'),
('20110601', 'B'),
('20110601', 'C'),
('20110602', 'A'),
('20110602', 'C'),
('20110603', 'A'),
('20110603', 'D'),
('20110607', 'A'),
('20110607', 'D'),
('20110608', 'D'),
('20110608', 'E'),
('20110608', 'F');
two simple solutions:
-- SOLUTION #1
;with dt_cte as
-- insert empty/missing dates
(select
min(dt) as dt_
,max(dt) as dt_m
from dailyvisits
union all
select
dt_+1 as dt_
,dt_m
from dt_cte
where
dt_+1 <= dt_m)
-- calculating added, removed and remained users
select
date_.dt_ as date
,count(distinct dv.visitor) as numvisits
,count(distinct dv.visitor)-(sum( case when (dv1.visitor = dv.visitor and dv.visitor is not null and dv1.visitor is not null) then 1 else 0 end)) as added
,count(distinct dv1.visitor) - sum(case when dv.visitor = dv1.visitor then 1 else 0 end) as removed
,sum(case when dv.visitor = dv1.visitor then 1 else 0 end) as remained
from
dt_cte as date_
left hash join dailyvisits as dv
on date_.dt_ = dv.dt
left hash join dailyvisits as dv1
on date_.dt_ = dateadd(day,1,dv1.dt)
group by date_.dt_
-- SOLUTION #2
-- calculating added, removed and remained users
select
date_.dt_ as date
,count(distinct dv.visitor) as numvisits
,count(distinct dv.visitor)-(sum( case when (dv1.visitor = dv.visitor and dv.visitor is not null and dv1.visitor is not null) then 1 else 0 end)) as added
,count(distinct dv1.visitor)-sum(case when dv.visitor = dv1.visitor then 1 else 0 end) as removed
,sum(case when dv.visitor = dv1.visitor then 1 else 0 end) as remained
from
-- insert empty/missing dates
(
select
(select min(dt) from dailyvisits)+number as dt_
from master..spt_values as b with (nolock readuncommitted)
where
[type] = 'P'
and number between 1 and (select datediff(day,(select min(dt) from dailyvisits), (select max(dt) from dailyvisits)))
) as date_
left hash join dailyvisits as dv
on date_.dt_ = dv.dt
left hash join dailyvisits as dv1
on date_.dt_ = dateadd(day,1,dv1.dt)
group by date_.dt_
Five ways to generate Date Tally Table:
-- 1. way
;create table tc..dt
(id int identity(1,1)
,dt datetime)
insert into dt(dt)
select
a.dt+b.number as dt
from
(select cast('2010/12/31' as datetime) as dt
) as a
cross join master..spt_values as b
where
type = 'P' and number between 1 and 365
-- 2. way
;create table tc..dt
(id int identity(1,1)
,dt datetime)
;with tab as
(
select cast('2011-1-1' as datetime) as dt, 1 as rn
union all
select dateadd(day,1,dt) as dt,rn + 1 as rn
from tab
where rn < 365
)
insert into dt(dt)
select dt
from tab
option (maxrecursion 365)
-- 3. way
;create table tc..dt
(id int identity(1,1)
,dt datetime)
insert into dt(dt)
;select
cast('2010/12/31' as datetime)+number as dt
from master..spt_values as b
where
type = 'P' and number between 1 and 365
-- 4. way
;create table tc..dt
(id int identity(1,1)
,dt datetime)
insert into dt(dt)
select
dateadd(day, isnull((select scope_identity()),2),'2010/12/31')
go 365
-- 5. way
;create table tc..dt
(id int identity(1,1)
,dt datetime)
while isnull(@@identity,1) <= 365
begin
insert into dt(dt)
select
dateadd(day, isnull(@@identity,1),'2010/12/31') as dt
end
I was very happy to dive in into SQL Bits VII. Several interesting presentations, interesting concepts, advices and most of all interesting people.
Some things actually disturb me.
1. the outlook of all presentations. I would recommend to have standardized outlook of all presentations for each of the presenter. Meaning, to have a general rules of PPT design (fonts, views, etc.), general diagrams, pictures that can be used by everyone (something like techEd introduced as well as SQL PASS).
2. access to presentations and code / samples online (if presenters agree with this as well as SQL Bits agree with this)
3. try to avoid personalized computers settings (online skype messages, email messages, windows settings, windows sizes, etc.). Some of best presenters and presentations by experts (Simon Sabin, Ramesh Meyyappan, Chriss Webb) were having from what i've seen some problems with that. :)
4. maybe some additional reviewing would be welcome.
5. all presenters use same database (when possible). It can be bl***y AdventureWorks2008 or it can be SQLBitsWorks :)
6. I like interactive presentation. one things, for the video or the track, it would be nice when listeners in audiance ask questions, to either (1) to pass them mic or (2) repeat the question for the video.
7. nice polo shirts. would love to have one :)
Otherwise i hope i will be able to prepare something from my side for next SqlBits to see it in reality. I trully love SqlBits, it's really down-to-earth, case-study presentations and everything is based on experience, milage and long hours of code typing.
Can't wait for SqlBits VIII.
SQL Table walks to a psychiatrist dr. Index
Table: "Doctor, I have a problem"
Dr: "what kind a problem?"
Table: "I'm a mess. I have things all over the place, i always look for my stuff"
Dr. "No problem. I will get you in order".
Index and table are reading a book "index-sutra"
Table: Oh, baby tonight we can try a clustered position"
Index: "yeah baby, we can also try covered position"
Table: "or maybe multiple clustered position"
Index: "baby, yes, that's the one. i'm just gonna call my friends"
Indexianina philosophy on Fill factor 50 = "Half empty or Half full?"
What does an execution plan say to t-sql query? "Go f**k yourself, if you are not happy with me"
Execution plan to t-sql query is like alter-ego to self.
Short example how to use Timestamp for a mini ETL process of your data.
example below is following:
Table_1 is production table on server1
Table_2 is datawarehouse table on server2 where datawarehouse is located
Every day data are extracted, transformed and loaded to dataware house for further off-line usage and data analysis and business decision support.
1. Creating the environment
if object_id ('table_1') is not null
drop table table_1;
go
if object_id ('table_2') is not null
drop table table_2;
go
if object_id ('delete_logs') is not null
drop table delete_logs;
go
create table table_1
(id int identity(1,1) not null
,class1 varchar(255) null
,class2 varchar(255) null
,num1 int null
,TS timestamp not null
,TSR binary(8) null
,check_sum as checksum(class1, class2, num1))
create table table_2
(id int identity(1,1) not null
,class1 varchar(255) null
,class2 varchar(255) null
,num1 int null
,TSR binary(8) null
,check_sum bigint not null)
create table delete_logs
(id int not null
,TSR binary(8)
,delete_date datetime
,flag_delete tinyint default(0))
if object_id ('table1.trigger_delete','TR') is not null
drop trigger table1.trigger_delete;
go
create trigger trigger_delete
on table_1
after delete
as
delete from table_1
where id in (select id from deleted)
insert into delete_logs
select id,TS, getdate(),0 from deleted
go
2. populating the production table:
insert into table_1 (class1, class2, num1, TSR) values ('seattle','portland',251,null)
insert into table_1 (class1, class2, num1, TSR) values ('portland','portland',257,null)
insert into table_1 (class1, class2, num1, TSR) values ('seattle','san diego',254,null)
insert into table_1 (class1, class2, num1, TSR) values ('san francisco','seatlle',252,null)
-- Dataware house scenario
-- 0. scenario
-- table full load
insert into table_2 (class1, class2, num1, TSR,check_sum)
select
class1
,class2
,num1
,TS as TSR
,check_sum
from table_1
--1.scenario
-- row is delete from original table
-- and stored in table to delete the row from dataware house copy table (table_2)
delete from table_1 where id = 2
;with cte_delete (id, tsr)
as
(
select id,tsr from delete_logs
where flag_delete <> 1
)
delete from table_2 where id in (select id from cte_delete)
;with cte_delete (id, tsr)
as
(
select id,tsr from delete_logs
where flag_delete <> 1
)
update delete_logs
set flag_delete = 1
where id in (select id from cte_delete)
--2. scenario
-- row is updated in original table
;update table_1
set num1 = 300
where id = 1
;with cte_update (id, class1, class2, num1, ts)
as
(
select t1.id, t1.class1, t1.class2, t1.num1, t1.ts
from table_1 as t1
join table_2 as t2
on t1.id = t2.id
where
t1.ts <> t2.tsr
)
update t2
set t2.class1 = c.class1
,t2.class2 = c.class2
,t2.num1 = c.num1
,t2.tsr = c.ts
from table_2 as t2
join cte_update as c
on t2.id = c.id
-- 3. scenario
-- row is inserted in original table
insert into table_1 (class1, class2, num1, TSR) values ('san francisco','seattle',259,null)
;with cte_insert (id, class1, class2, num1, TS, check_sum)
as
(select
id, class1, class2, num1, ts, check_sum
from table_1 as t1
where
not exists (select id from table_2 as t2
where t2.id = t1.id)
)
insert into table_2
select class1, class2, num1, ts as tsr, check_sum from cte_insert
-- check equality
select * from table_1
select * from table_2
select * from delete_logs
i did comparison also with timestamp and checksum and timestamp is faster in terms of finding records for UPSERT and DELETE.
UPDATE: Please note that table TRUNCATE do not initiate DELETE TRIGGER, because it is not working row by row.
Only for lazy people -> how to order our output by defining numbers instead of column names:
select
*
from
(
select 1 as id, 'test' as text_name, 32 as seq union all
select 3 as id, 'best' as text_name, 61 as seq union all
select 4 as id, 'best' as text_name, 12 as seq union all
select 4 as id, 'best' as text_name, 6 as seq union all
select 2 as id, 'hest' as text_name, 21 as seq
) as x
order by 2,3
--order by 1,2,3
you can specify the select list or you can leave it blank as I did, i will return the natural order of select list, when using numbers in order by.
Quick and neat. just don't get to lazy with MSSQL :)
Our partner was explaining to me that they are using GUID as primary key on all the tables.
My immediate reaction was - why? and couple of basic doubts were:
- since I can read uniqueidentifier, it does not tell me absolutely anything
- if I will use my relational table, i sure will use other columns to get the information out
- SQL is terrible when setting up clustered index on GUID columns (and hence performance problems)
- why not use INT? it will save you space on disk, optimizer will be able to read data faster (instead of creating varchar field for uniqueidentifier) and
- since it's just a surrogate key, any of the numbers used as index do not tell me anything, so why not save on space and use something more "economic".
and short example:-- create GUID
select
newid() as id
,number
,type
,status
into Test1
from spt_values
--(2506 row(s) affected)
-- create normal identity
create table test2
(id int identity(1,1)
,number int
,type nchar(3)
,status int)
insert into test2 (number, type, status)
select number, type, status from test1
--(2506 row(s) affected)
CREATE CLUSTERED INDEX [CL_IX_test1_ID] ON [dbo].[Test1]
([id] ASC)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF,
DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
CREATE CLUSTERED INDEX [CL_IX_test2_ID] ON [dbo].[Test2]
([id] ASC)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF,
DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
-- run the execution plan
-- and compere both queries
-- you can also run the i/o statistics and time
select * from test1
select * from test2
More Posts
Next page »