TimeStamp and mini-ETL (extract, transform, load)
Short example how to use Timestamp for a mini ETL process of your data.
example below is following:
Table_1 is production table on server1
Table_2 is datawarehouse table on server2 where datawarehouse is located
Every day data are extracted, transformed and loaded to dataware house for further off-line usage and data analysis and business decision support.
1. Creating the environment
if object_id ('table_1') is not null
drop table table_1;
go
if object_id ('table_2') is not null
drop table table_2;
go
if object_id ('delete_logs') is not null
drop table delete_logs;
go
create table table_1
(id int identity(1,1) not null
,class1 varchar(255) null
,class2 varchar(255) null
,num1 int null
,TS timestamp not null
,TSR binary(8) null
,check_sum as checksum(class1, class2, num1))
create table table_2
(id int identity(1,1) not null
,class1 varchar(255) null
,class2 varchar(255) null
,num1 int null
,TSR binary(8) null
,check_sum bigint not null)
create table delete_logs
(id int not null
,TSR binary(8)
,delete_date datetime
,flag_delete tinyint default(0))
if object_id ('table1.trigger_delete','TR') is not null
drop trigger table1.trigger_delete;
go
create trigger trigger_delete
on table_1
after delete
as
delete from table_1
where id in (select id from deleted)
insert into delete_logs
select id,TS, getdate(),0 from deleted
go
2. populating the production table:
insert into table_1 (class1, class2, num1, TSR) values ('seattle','portland',251,null)
insert into table_1 (class1, class2, num1, TSR) values ('portland','portland',257,null)
insert into table_1 (class1, class2, num1, TSR) values ('seattle','san diego',254,null)
insert into table_1 (class1, class2, num1, TSR) values ('san francisco','seatlle',252,null)
-- Dataware house scenario
-- 0. scenario
-- table full load
insert into table_2 (class1, class2, num1, TSR,check_sum)
select
class1
,class2
,num1
,TS as TSR
,check_sum
from table_1
--1.scenario
-- row is delete from original table
-- and stored in table to delete the row from dataware house copy table (table_2)
delete from table_1 where id = 2
;with cte_delete (id, tsr)
as
(
select id,tsr from delete_logs
where flag_delete <> 1
)
delete from table_2 where id in (select id from cte_delete)
;with cte_delete (id, tsr)
as
(
select id,tsr from delete_logs
where flag_delete <> 1
)
update delete_logs
set flag_delete = 1
where id in (select id from cte_delete)
--2. scenario
-- row is updated in original table
;update table_1
set num1 = 300
where id = 1
;with cte_update (id, class1, class2, num1, ts)
as
(
select t1.id, t1.class1, t1.class2, t1.num1, t1.ts
from table_1 as t1
join table_2 as t2
on t1.id = t2.id
where
t1.ts <> t2.tsr
)
update t2
set t2.class1 = c.class1
,t2.class2 = c.class2
,t2.num1 = c.num1
,t2.tsr = c.ts
from table_2 as t2
join cte_update as c
on t2.id = c.id
-- 3. scenario
-- row is inserted in original table
insert into table_1 (class1, class2, num1, TSR) values ('san francisco','seattle',259,null)
;with cte_insert (id, class1, class2, num1, TS, check_sum)
as
(select
id, class1, class2, num1, ts, check_sum
from table_1 as t1
where
not exists (select id from table_2 as t2
where t2.id = t1.id)
)
insert into table_2
select class1, class2, num1, ts as tsr, check_sum from cte_insert
-- check equality
select * from table_1
select * from table_2
select * from delete_logs
i did comparison also with timestamp and checksum and timestamp is faster in terms of finding records for UPSERT and DELETE.
UPDATE: Please note that table TRUNCATE do not initiate DELETE TRIGGER, because it is not working row by row.