Using the ROW_NUMBER() function in a non recursive CTE gives a very big performance degradation because the Sequence Generation is executed not once as you'd expect but for once for every row in the anchor starting results, example - if the anchor query returns 588 rows to the CTE then the Sequence Generator will be executed for every reference of the CTE multiplied by 588!

Check this query:

with Trans ( row_no, id, salesperson_id, tran_date, clear_date, amount, transaction_types_id, account_id )

as (

    select row_no = row_number() over( order by account_id, tran_date, id ), id, salesperson_id, tran_date, clear_date, amount, transaction_types_id, account_id

    from Transactions

    where account_id between 1000 and 1020

    )

select t1.*,

       running_amount = coalesce(

                         (  select sum( amount )

                            from Trans t2

                            where t2.row_no < t1.row_no

                              and t2.account_id = t1.account_id ), 0 )

                        + amount

from Trans t1

order by account_id, row_no

go

 

 

The results of SET STATISTICS PROFILE ON is as follows:

Rows

Execs

588

1

with Trans ( row_no, id, salesperson_id, tran_date, clear_date, amount, transaction_types_id, account_id )  as (      select row_no = row_number() over( order by account_id, tran_date, id ), id, salesperson_id, tran_date, clear_date, amount, transaction_types_id, account_id      from Transactions      where account_id between 1000 and 1020      )  select t1.*,          running_amount = coalesce(                            (  select sum( amount )                              from Trans t2                              where t2.row_no < t1.row_no                                and t2.account_id = t1.account_id ), 0 )                          + amount  from Trans t1  order by account_id, row_no

0

0

  |--Compute Scalar(DEFINE:([Expr1016]=CASE WHEN [Expr1008] IS NOT NULL THEN [Expr1014] ELSE (0.00) END+[SQLBits20080301].[dbo].[transactions].[amount]))

588

1

       |--Nested Loops(Inner Join, PASSTHRU:(IsFalseOrNull [Expr1008] IS NOT NULL), OUTER REFERENCES:([SQLBits20080301].[dbo].[transactions].[account_id], [Expr1003]))

588

1

            |--Nested Loops(Inner Join, OUTER REFERENCES:([SQLBits20080301].[dbo].[transactions].[account_id], [Expr1003]))

588

1

            |    |--Sequence Project(DEFINE:([Expr1003]=row_number))

0

0

            |    |    |--Compute Scalar(DEFINE:([Expr1033]=(1)))

588

1

            |    |         |--Segment

588

1

            |    |              |--Sort(ORDER BY:([SQLBits20080301].[dbo].[transactions].[account_id] ASC, [SQLBits20080301].[dbo].[transactions].[tran_date] ASC, [SQLBits20080301].[dbo].[transactions].[id] ASC))

588

1

            |    |                   |--Nested Loops(Inner Join, OUTER REFERENCES:([SQLBits20080301].[dbo].[transactions].[id], [Expr1031]) WITH UNORDERED PREFETCH)

588

1

            |    |                        |--Index Seek(OBJECT:([SQLBits20080301].[dbo].[transactions].[ncidx_account_id]), SEEK:([SQLBits20080301].[dbo].[transactions].[account_id] >= (1000) AND [SQLBits20080301].[dbo].[transactions].[account_id] <= (1020)) ORDERED FORWARD)

588

588

            |    |                        |--Clustered Index Seek(OBJECT:([SQLBits20080301].[dbo].[transactions].[pk_transactions]), SEEK:([SQLBits20080301].[dbo].[transactions].[id]=[SQLBits20080301].[dbo].[transactions].[id]) LOOKUP ORDERED FORWARD)

588

588

            |    |--Index Spool(SEEK:([SQLBits20080301].[dbo].[transactions].[account_id]=[SQLBits20080301].[dbo].[transactions].[account_id] AND [Expr1003]=[Expr1003]))

0

0

            |         |--Compute Scalar(DEFINE:([Expr1008]=CASE WHEN [Expr1036]=(0) THEN NULL ELSE [Expr1037] END))

588

588

            |              |--Stream Aggregate(DEFINE:([Expr1036]=Count(*), [Expr1037]=SUM([SQLBits20080301].[dbo].[transactions].[amount])))

8132

588

            |                   |--Filter(WHERE:([Expr1007]<[Expr1003] AND [SQLBits20080301].[dbo].[transactions].[account_id]=[SQLBits20080301].[dbo].[transactions].[account_id]))

173166

588

            |                        |--Top(TOP EXPRESSION:(CASE WHEN [Expr1003] IS NULL OR [Expr1003]<(0) THEN (0) ELSE [Expr1003] END))

173166

588

            |                             |--Sequence Project(DEFINE:([Expr1007]=row_number))

0

0

            |                                  |--Compute Scalar(DEFINE:([Expr1035]=(1)))

173166

588

            |                                       |--Segment

173166

588

            |                                            |--Sort(ORDER BY:([SQLBits20080301].[dbo].[transactions].[account_id] ASC, [SQLBits20080301].[dbo].[transactions].[tran_date] ASC, [SQLBits20080301].[dbo].[transactions].[id] ASC))

345744

588

            |                                                 |--Index Seek(OBJECT:([SQLBits20080301].[dbo].[transactions].[ncidx]), SEEK:([SQLBits20080301].[dbo].[transactions].[account_id] >= (1000) AND [SQLBits20080301].[dbo].[transactions].[account_id] <= (1020)) ORDERED FORWARD)

567

567

            |--Index Spool(SEEK:([SQLBits20080301].[dbo].[transactions].[account_id]=[SQLBits20080301].[dbo].[transactions].[account_id] AND [Expr1003]=[Expr1003]))

0

0

                 |--Compute Scalar(DEFINE:([Expr1014]=CASE WHEN [Expr1040]=(0) THEN NULL ELSE [Expr1041] END))

567

567

                      |--Stream Aggregate(DEFINE:([Expr1040]=Count(*), [Expr1041]=SUM([SQLBits20080301].[dbo].[transactions].[amount])))

8132

567

                           |--Filter(WHERE:([Expr1013]<[Expr1003] AND [SQLBits20080301].[dbo].[transactions].[account_id]=[SQLBits20080301].[dbo].[transactions].[account_id]))

167250

567

                                |--Top(TOP EXPRESSION:(CASE WHEN [Expr1003] IS NULL OR [Expr1003]<(0) THEN (0) ELSE [Expr1003] END))

167250

567

                                     |--Sequence Project(DEFINE:([Expr1013]=row_number))

0

0

                                          |--Compute Scalar(DEFINE:([Expr1039]=(1)))

167250

567

                                               |--Segment

167250

567

                                                    |--Sort(ORDER BY:([SQLBits20080301].[dbo].[transactions].[account_id] ASC, [SQLBits20080301].[dbo].[transactions].[tran_date] ASC, [SQLBits20080301].[dbo].[transactions].[id] ASC))

333396

567

                                                         |--Index Seek(OBJECT:([SQLBits20080301].[dbo].[transactions].[ncidx]), SEEK:([SQLBits20080301].[dbo].[transactions].[account_id] >= (1000) AND [SQLBits20080301].[dbo].[transactions].[account_id] <= (1020)) ORDERED FORWARD)

 

 

The CTE should return just 588 rows because that’s what where account_id between 1000 and 1020 yields.

However, look at the number of rows processed in the above STATISTICS PROFILE output; 333,396 rows – ouch! If you divide 333,396 by 567 and 345744 by 588 you will get the result 588 which is our target result; so – on every execution the ROW_NUMBER is being recalculated on the 588 rows instead of the Sequence Generator just being produced once and reused, this is also confirmed if you look at the “Sequence Project(DEFINE:” step above which is executed 588 times.

When it boils down to it the actual problem is the fact that the CTE anchor is just expanded inline into the main query which is bad as I’ve demonstrated in my previous entry on performance of CTE’s.

The problem does not occur when using a Recursive Member.

Recommendation: Using the # table route as I suggested before.

 

Unless you are using recursion then the Common Table Expression sucks and you shouldn’t use it; the big problem is that the SQL in the anchor is repeated rather than spooled into a work table.

In the last article I went through the basics of the Common Table Expression and CTE Recursion and hinted that there may be performance problems, in this article I delve a bit deeper – specifically we see how self-join’s on a non-recursive CTE are very bad for performance.

Looking at our example query below we are using the Common Table Expression with a self-join and additionally a sub-query on the select; don’t look too much at what I’m doing in the query because it pretty much doesn’t make any sense – the important aspects to concentrate on are the self-join and sub-query.

Example CTE:

with ctePerf ( objname, colid, colname, max_length )

as (

 

      select objname = o.name,

               colid   = c.column_id,

               colname = c.name,

               c.max_length

      from #sysobjs o

            inner join #syscols c on c.object_id=o.object_id

      where o.type='U'

        and o.name= 'accounts'

  )

select p.objname, p.colname, p.colid,

         size_sofar = SUM( p2.max_length ),

         size_left  = ( SELECT SUM( p3.max_length )

                        FROM ctePerf p3

                        WHERE p3.objname=p.objname

                          and p3.colid > p.colid

                                    )

from ctePerf p

      left outer join ctePerf p2 on p2.objname=p.objname

                                            and p2.colid <=p.colid

group by p.objname, p.colname, p.colid

order by objname, colid

Let’s look at the query plan for the query we use as the CTE anchor:

CTE “anchor” query and plan:

select objname = o.name,

       colid   = c.column_id,

       colname = c.name,

       c.max_length

from #sysobjs o

      inner join #syscols c on c.object_id=o.object_id

where o.type='U'

  and o.name= 'accounts'

  |--Hash Match(Inner Join, HASH:([o ].[object_id])=([c ].[object_id]))

       |--Table Scan(OBJECT:([tempdb].[dbo].[#sysobjs] AS [o ]), WHERE:([tempdb].[dbo].[#sysobjs].[type] as [o ].[type]='U' AND [tempdb].[dbo].[#sysobjs].[name] as [o ].[name]=N'accounts'))

       |--Table Scan(OBJECT:([tempdb].[dbo].[#syscols] AS [c ]))

Check the query plan after incorporating the query into the CTE (below), before looking at the query plan note we are referencing the CTE anchor 3 times (shown in bold – aliases p, p2 and p3):

Query and Execution Plan:

with ctePerf ( objname, colid, colname, max_length )

as (

 

      select objname = o.name,

               colid   = c.column_id,

               colname = c.name,

               c.max_length

      from #sysobjs o

            inner join #syscols c on c.object_id=o.object_id

      where o.type='U'

        and o.name= 'accounts'

  )

select p.objname, p.colname, p.colid,

         size_sofar = SUM( p2.max_length ),

         size_left  = ( SELECT SUM( p3.max_length )

                        FROM ctePerf p3

                        WHERE p3.objname=p.objname

                          and p3.colid > p.colid

                                    )

from