Tomaz.tsql

simple word randomization based on dictionary

Based on list of words, one can randomize the letters in the word using simple cross apply.

 

;with dictionary as
(
    select 'soundgarden' as  word union all
    select 'impossible' as word union all
    select 'apple' as word union all
    select 'microsoft' as word union all
    select 'mices' as word union all
    select 'ITCrowds' as word
)
select
     t1.word
    ,x.n
        from dictionary as t1
    cross apply (
                    select
                            substring(t1.word,sv.number,1)
                        from   
                                master..spt_values as sv
                        where
                            sv.type = 'P'
                        and sv.number between 1 and 30 -- max word length
                        order by (select (abs(checksum(newid())))%1000) desc
                        for xml path('')
                ) as x(n)

 

CTP3

I must admit that CTP3 Denaliwill bring some usefull T-SQL features. which i'm looking forward very much and just can't wait for the final release.

Reoccuring visits

Itzik Ben-Gan posted this game on SQL magazine previous week, stating:

 

The challenge is to write a query that works with two input arguments @from and @to holding
the start and end dates of a date range, and calculates, for each day, various statistics in
respect to the previous day. Specifically, how many visitors visited the site that day, how
 many new visitors were added compared to the previous day, how many visitors were removed
compared to the previous day, and how many remained. For the given sample data, the desired
result should look like this:

dt         numvisits   added       removed     remained
---------- ----------- ----------- ----------- -----------
2011-06-01 3           3           0           0
2011-06-02 2           0           1           2
2011-06-03 2           1           1           1
2011-06-04 0           0           2           0
2011-06-05 0           0           0           0
2011-06-06 0           0           0           0
2011-06-07 2           2           0           0
2011-06-08 3           2           1           1

Observe that there’s a key defined on dt and visitor, meaning that you store only one
occurrence at most per visitor and day. Also note that there is a possibility that
during some days there would be no visitors.

 

-- DDL and sample data for DailyVisits table
SET NOCOUNT ON;
USE TC;
IF OBJECT_ID('dbo.DailyVisits', 'U') IS NOT NULL
  DROP TABLE dbo.DailyVisits;
GO

CREATE TABLE dbo.DailyVisits
(
  dt      DATEtime        NOT NULL,
  visitor VARCHAR(10) NOT NULL,
  CONSTRAINT PK_DailyVisits PRIMARY KEY(dt, visitor)
);



INSERT INTO dbo.DailyVisits(dt, visitor) VALUES
     ('20110601', 'A'),
     ('20110601', 'B'),
     ('20110601', 'C'),
     ('20110602', 'A'),
     ('20110602', 'C'),
     ('20110603', 'A'),
     ('20110603', 'D'),
     ('20110607', 'A'),
     ('20110607', 'D'),
     ('20110608', 'D'),
     ('20110608', 'E'),
     ('20110608', 'F');

 

two simple solutions:


-- SOLUTION #1


;with dt_cte as
-- insert empty/missing dates
(select
    min(dt) as dt_
    ,max(dt) as dt_m
from dailyvisits
union all
select
     dt_+1 as dt_
    ,dt_m
from dt_cte
where
    dt_+1 <= dt_m)
-- calculating added, removed and remained users
select
date_.dt_ as date
,count(distinct dv.visitor) as numvisits
,count(distinct dv.visitor)-(sum( case when (dv1.visitor = dv.visitor and dv.visitor is not null and dv1.visitor is not null) then 1 else 0 end)) as added
,count(distinct dv1.visitor) - sum(case when dv.visitor = dv1.visitor then 1 else 0 end) as removed
,sum(case when dv.visitor = dv1.visitor then 1 else 0 end) as remained

from
dt_cte as date_
left hash join dailyvisits as dv
on date_.dt_ = dv.dt
left hash join dailyvisits as dv1
on date_.dt_ = dateadd(day,1,dv1.dt)
group by date_.dt_



-- SOLUTION #2

-- calculating added, removed and remained users
select
date_.dt_ as date
,count(distinct dv.visitor) as numvisits
,count(distinct dv.visitor)-(sum( case when (dv1.visitor = dv.visitor and dv.visitor is not null and dv1.visitor is not null) then 1 else 0 end)) as added
,count(distinct dv1.visitor)-sum(case when dv.visitor = dv1.visitor then 1 else 0 end) as removed
,sum(case when dv.visitor = dv1.visitor then 1 else 0 end) as remained

from
-- insert empty/missing dates
(
    select
         (select min(dt) from dailyvisits)+number as dt_
    from master..spt_values as b with (nolock readuncommitted)
    where
        [type] = 'P'
    and number between 1 and (select datediff(day,(select min(dt) from dailyvisits), (select max(dt) from dailyvisits)))
) as date_
left hash join dailyvisits as dv
on date_.dt_ = dv.dt
left hash join dailyvisits as dv1
on date_.dt_ = dateadd(day,1,dv1.dt)
group by date_.dt_
 

 

Posted: Jul 05 2011, 07:23 PM by Tomaz.tsql | with no comments
Filed under: , , ,
DateTime Tally Table

 Five ways to generate Date Tally Table:

-- 1. way
;create table tc..dt
(id int identity(1,1)
,dt datetime)

insert into dt(dt)
select
a.dt+b.number as dt
from
(select cast('2010/12/31' as datetime) as dt
) as a
cross join master..spt_values as b
where
 type = 'P' and number between 1 and 365


-- 2. way
;create table tc..dt
(id int identity(1,1)
,dt datetime)


;with tab as
(
select cast('2011-1-1' as datetime) as dt, 1 as rn
union all
select dateadd(day,1,dt) as dt,rn + 1 as rn
from tab
where rn < 365
)
insert into dt(dt)
select dt
from tab
option (maxrecursion 365)


-- 3. way
;create table tc..dt
(id int identity(1,1)
,dt datetime)

insert into dt(dt)
;select
    cast('2010/12/31' as datetime)+number as dt
from master..spt_values as b
where
 type = 'P' and number between 1 and 365


-- 4. way
;create table tc..dt
(id int identity(1,1)
,dt datetime)

insert into dt(dt)
select
dateadd(day, isnull((select scope_identity()),2),'2010/12/31')
go 365


-- 5. way
;create table tc..dt
(id int identity(1,1)
,dt datetime)

while  isnull(@@identity,1) <= 365
begin
    insert into dt(dt)
    select
        dateadd(day, isnull(@@identity,1),'2010/12/31') as dt
end

 

SQL Bits VII

I was very happy to dive in into SQL Bits VII.  Several interesting presentations, interesting concepts, advices and most of all interesting people.

Some things actually disturb me. 

1. the outlook of all presentations. I would recommend to have standardized outlook of all presentations for each of the presenter. Meaning, to have a general rules of PPT design (fonts, views, etc.), general diagrams, pictures that can be used by everyone (something like techEd introduced as well as SQL PASS). 

2. access to presentations and code / samples online (if presenters agree with this as well as SQL Bits agree with this)

3. try to avoid personalized computers settings (online skype messages, email messages, windows settings, windows sizes,  etc.). Some of best presenters and presentations by experts (Simon Sabin, Ramesh Meyyappan, Chriss Webb) were having from what  i've seen some problems with that. :)

4. maybe some additional reviewing would be welcome.

5. all presenters use same database (when possible). It can be bl***y AdventureWorks2008 or it can be SQLBitsWorks :) 

6. I like interactive presentation. one things, for the video or the track, it would be nice when listeners in audiance ask questions, to either (1) to pass them mic or (2) repeat the question for the video.

7. nice polo shirts. would love to have one :) 

 

Otherwise i hope i will be able to  prepare something from my side for next  SqlBits to see it in reality. I trully love SqlBits, it's really down-to-earth, case-study presentations and everything is based on experience, milage and long hours of code typing.

 

Can't wait for SqlBits VIII.


 

Posted: Feb 20 2011, 09:40 AM by Tomaz.tsql | with no comments
Filed under:
T-SQL Jokes

 SQL Table walks to a psychiatrist dr. Index
 Table: "Doctor, I have a problem"
 Dr: "what kind a problem?"
 Table: "I'm a mess. I have things all over the place, i always look for my stuff"
 Dr. "No problem. I will get you in order".
 

 Index and table are reading a book "index-sutra"
 Table: Oh, baby tonight we can try a clustered position"
 Index: "yeah baby, we can also try covered position"
 Table: "or maybe multiple clustered position"
 Index: "baby, yes, that's the one. i'm just gonna call my friends"

 Indexianina philosophy on Fill factor 50 = "Half empty or Half full?"

 What does an execution plan say to t-sql query? "Go f**k yourself, if you are not happy with me"

 Execution plan to t-sql query is like alter-ego to self.

 

Posted: Jan 30 2011, 10:59 PM by Tomaz.tsql | with no comments
Filed under:
insert into script

My contribution to community Smile

http://www.sqlmag.com/article/sql-server/Transfer-small-amount-of-data-by-generating-the-INSERT-INTO-statement.aspx

 

Enjoy!

TimeStamp and mini-ETL (extract, transform, load)

Short example  how to use Timestamp for a mini ETL process of your data.

example below is following:

Table_1 is production table on server1

Table_2 is datawarehouse table on server2 where datawarehouse is located

Every day data are extracted, transformed and loaded to dataware house for further off-line usage and data analysis and business decision support.

 

1. Creating the environment

if object_id ('table_1') is not null
drop table table_1;
go

if object_id ('table_2') is not null
drop table table_2;
go

if object_id ('delete_logs') is not null
drop table delete_logs;
go


create table table_1
(id int identity(1,1) not null
,class1 varchar(255) null
,class2 varchar(255) null
,num1 int null
,TS timestamp not null
,TSR binary(8) null
,check_sum as checksum(class1, class2, num1))

create table table_2
(id int identity(1,1) not null
,class1 varchar(255) null
,class2 varchar(255) null
,num1 int null
,TSR binary(8) null
,check_sum bigint not null)

create table delete_logs
(id int not null
,TSR binary(8)
,delete_date datetime
,flag_delete tinyint default(0))

if object_id ('table1.trigger_delete','TR') is not null
drop trigger table1.trigger_delete;
go

create trigger trigger_delete
on table_1
after delete
as

delete from table_1
where id in (select id from deleted)

insert into delete_logs
select id,TS, getdate(),0 from deleted

go

 

2. populating the production table:

insert into table_1 (class1, class2, num1, TSR) values ('seattle','portland',251,null)
insert into table_1 (class1, class2, num1, TSR) values ('portland','portland',257,null)
insert into table_1 (class1, class2, num1, TSR) values ('seattle','san diego',254,null)
insert into table_1 (class1, class2, num1, TSR) values ('san francisco','seatlle',252,null)

 

-- Dataware house scenario

-- 0. scenario
-- table full load
insert into table_2 (class1, class2, num1, TSR,check_sum)
select
    class1
    ,class2
    ,num1
    ,TS as TSR
    ,check_sum
from table_1

 


--1.scenario
-- row is delete from original table
-- and stored in table to delete the row from dataware house copy table (table_2)
delete from table_1 where id = 2


;with cte_delete (id, tsr)
as
(
select id,tsr from delete_logs
where flag_delete <> 1
)
delete from table_2 where id in (select id from cte_delete)

;with cte_delete (id, tsr)
as
(
select id,tsr from delete_logs
where flag_delete <> 1
)
update delete_logs
    set flag_delete = 1
where id in (select id from cte_delete)


--2. scenario
-- row is updated in original table
;update table_1
set num1 = 300
where id = 1

;with cte_update (id, class1, class2, num1, ts)
as
(
select t1.id, t1.class1, t1.class2, t1.num1, t1.ts
    from table_1 as t1
    join table_2 as t2
    on t1.id = t2.id
where
    t1.ts <> t2.tsr
)

update t2
set t2.class1 = c.class1
,t2.class2 = c.class2
,t2.num1 = c.num1
,t2.tsr = c.ts
from table_2  as t2
    join cte_update as c
    on t2.id = c.id



-- 3. scenario
-- row is inserted in original table
insert into table_1 (class1, class2, num1, TSR) values ('san francisco','seattle',259,null)

;with cte_insert (id, class1, class2, num1, TS, check_sum)
as
(select
    id, class1, class2, num1, ts, check_sum
    from table_1 as t1
where
    not exists (select id from table_2 as t2
                    where t2.id = t1.id)
)

insert into table_2
select class1, class2, num1, ts as tsr, check_sum from cte_insert

 

-- check equality

select * from table_1
select * from table_2
select * from delete_logs
 

 i did comparison also with timestamp and checksum and timestamp is faster in terms of finding records for UPSERT and DELETE.

 

UPDATE: Please note that table TRUNCATE do not initiate DELETE TRIGGER, because it is not working row by row.

ORDER BY 1,2,3

Only for lazy people -> how to order our output by defining numbers instead of column names:

select
*
from
(
select 1 as id, 'test' as text_name, 32 as seq union all
select 3 as id, 'best' as text_name, 61 as seq union all
select 4 as id, 'best' as text_name, 12 as seq union all
select 4 as id, 'best' as text_name, 6 as seq union all
select 2 as id, 'hest' as text_name, 21 as seq
) as x

order by 2,3
--order by 1,2,3

you can specify the select list or you can leave it blank as I did, i will return the natural order of select list, when using numbers in order by.

Quick and neat. just don't get to lazy with MSSQL :)

Posted: Dec 07 2010, 06:38 PM by Tomaz.tsql | with 2 comment(s)
Filed under:
... i just avoid GUID

Our partner was explaining to me that they are using GUID as primary key on all the tables.

My immediate reaction was - why? and couple of basic doubts were:

- since I can read uniqueidentifier, it does not tell me absolutely anything

- if I will use my relational table, i sure will use other columns to get the information out

- SQL is terrible when setting up clustered index on GUID columns (and hence performance problems)

- why not use INT? it will save you space on disk, optimizer will be able to read data faster (instead of creating varchar field for uniqueidentifier) and

- since it's just a surrogate key, any of the numbers used as index do not tell me anything, so why not save on space and use something more "economic".

 

and short example:-- create GUID
select
newid() as id
,number
,type
,status
into Test1
from spt_values
--(2506 row(s) affected)


-- create normal identity
create table test2
(id int identity(1,1)
,number int
,type nchar(3)
,status int)

insert into test2 (number, type, status)
select number, type, status from test1
--(2506 row(s) affected)


CREATE CLUSTERED INDEX [CL_IX_test1_ID] ON [dbo].[Test1]
([id] ASC)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF,
DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]


CREATE CLUSTERED INDEX [CL_IX_test2_ID] ON [dbo].[Test2]
([id] ASC)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF,
DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]


-- run the execution plan
-- and compere both queries
-- you can also run the i/o statistics and time
select * from test1
select * from test2

More Posts Next page »