Tomaz.tsql

TimeStamp and mini-ETL (extract, transform, load)

Short example  how to use Timestamp for a mini ETL process of your data.

example below is following:

Table_1 is production table on server1

Table_2 is datawarehouse table on server2 where datawarehouse is located

Every day data are extracted, transformed and loaded to dataware house for further off-line usage and data analysis and business decision support.

 

1. Creating the environment

if object_id ('table_1') is not null
drop table table_1;
go

if object_id ('table_2') is not null
drop table table_2;
go

if object_id ('delete_logs') is not null
drop table delete_logs;
go


create table table_1
(id int identity(1,1) not null
,class1 varchar(255) null
,class2 varchar(255) null
,num1 int null
,TS timestamp not null
,TSR binary(8) null
,check_sum as checksum(class1, class2, num1))

create table table_2
(id int identity(1,1) not null
,class1 varchar(255) null
,class2 varchar(255) null
,num1 int null
,TSR binary(8) null
,check_sum bigint not null)

create table delete_logs
(id int not null
,TSR binary(8)
,delete_date datetime
,flag_delete tinyint default(0))

if object_id ('table1.trigger_delete','TR') is not null
drop trigger table1.trigger_delete;
go

create trigger trigger_delete
on table_1
after delete
as

delete from table_1
where id in (select id from deleted)

insert into delete_logs
select id,TS, getdate(),0 from deleted

go

 

2. populating the production table:

insert into table_1 (class1, class2, num1, TSR) values ('seattle','portland',251,null)
insert into table_1 (class1, class2, num1, TSR) values ('portland','portland',257,null)
insert into table_1 (class1, class2, num1, TSR) values ('seattle','san diego',254,null)
insert into table_1 (class1, class2, num1, TSR) values ('san francisco','seatlle',252,null)

 

-- Dataware house scenario

-- 0. scenario
-- table full load
insert into table_2 (class1, class2, num1, TSR,check_sum)
select
    class1
    ,class2
    ,num1
    ,TS as TSR
    ,check_sum
from table_1

 


--1.scenario
-- row is delete from original table
-- and stored in table to delete the row from dataware house copy table (table_2)
delete from table_1 where id = 2


;with cte_delete (id, tsr)
as
(
select id,tsr from delete_logs
where flag_delete <> 1
)
delete from table_2 where id in (select id from cte_delete)

;with cte_delete (id, tsr)
as
(
select id,tsr from delete_logs
where flag_delete <> 1
)
update delete_logs
    set flag_delete = 1
where id in (select id from cte_delete)


--2. scenario
-- row is updated in original table
;update table_1
set num1 = 300
where id = 1

;with cte_update (id, class1, class2, num1, ts)
as
(
select t1.id, t1.class1, t1.class2, t1.num1, t1.ts
    from table_1 as t1
    join table_2 as t2
    on t1.id = t2.id
where
    t1.ts <> t2.tsr
)

update t2
set t2.class1 = c.class1
,t2.class2 = c.class2
,t2.num1 = c.num1
,t2.tsr = c.ts
from table_2  as t2
    join cte_update as c
    on t2.id = c.id



-- 3. scenario
-- row is inserted in original table
insert into table_1 (class1, class2, num1, TSR) values ('san francisco','seattle',259,null)

;with cte_insert (id, class1, class2, num1, TS, check_sum)
as
(select
    id, class1, class2, num1, ts, check_sum
    from table_1 as t1
where
    not exists (select id from table_2 as t2
                    where t2.id = t1.id)
)

insert into table_2
select class1, class2, num1, ts as tsr, check_sum from cte_insert

 

-- check equality

select * from table_1
select * from table_2
select * from delete_logs
 

 i did comparison also with timestamp and checksum and timestamp is faster in terms of finding records for UPSERT and DELETE.

 

UPDATE: Please note that table TRUNCATE do not initiate DELETE TRIGGER, because it is not working row by row.

Comments

No Comments