Tuesday, March 27, 2012

Cursor performance slows exponentially

I have a strange problem. I am running a process that unfortunately
requires that I analyze each record of a table which has [currently]
led me to using a cursor.
I noticed some performance problems and so I ran some tests and it
seems that the cursor performance is getting exponentially slower, for
example, the first 500 records of the cursor process in about 1733ms
but the records 10000-10500 takes about 13516ms and records 20000-20500
takes 22500ms.
Is this normal behaviour with cursors? I know that they are not
optimal for performance, however, I didn't expect the performance to
degrade as it was running.
Below is a snip from the code. At first I thought that perhaps the
processing of the records was growing because it was counting values in
a table that is growing but when I ran a timer around the processing
logic it showed little if any substantial difference.
Aside: I'm using '##temp' tables because I want to be able to inspect
the tables after the query runs, other wise I just use '#temp' tables.
Thanks.
WHILE (@.@.FETCH_STATUS=0)
BEGIN
-- Reset environment
SET @.processFlag = 0
SET @.resetBalanceFlag = 0
SET @.setEndDateFlag = 0
IF(SELECT COUNT(*)
FROM ##tempHistory
WHERE AgencyID=@.AgyID
AND CollectionInventoryType=@.HwyID) >= 1
BEGIN
SET @.resetBalanceFlag = 1
SET @.setEndDateFlag = 1
END
IF(@.HwyID=@.prevHwyID)
SET @.processFlag = 0
ELSE
BEGIN
SET @.processFlag = 1
SET @.setEndDateFlag = 1
END
IF((@.AgyID=@.prevAgyID) AND @.RecordType='R')
SET @.processFlag = 0
ELSE
BEGIN
SET @.processFlag = 1
SET @.setEndDateFlag = 1
END
IF(@.processFlag=1)
BEGIN
IF(@.resetBalanceFlag = 1)
BEGIN
-- Reset assign balance to $0 for all records on temp table that
match
-- the agyid, hwyid and RAN
UPDATE ##tempHistory
SET AssignBalance = 0
WHERE CollectInvCode=@.CollectInvCode
AND AgencyID=@.AgyID
AND CollectionInventoryType=@.HwyID
END
IF(@.setEndDateFlag = 1)
BEGIN
UPDATE ##tempHistory
SET EndDate=@.ProcessedDate
WHERE CollectInvCode=@.CollectInvCode
AND EndDate IS NULL
END
INSERT INTO
##tempHistory(CollectInvCode,AccountCode
,AgencyID,StartDate,EndDate,AssignBa
lance,ProductCatID,CollectionInventoryTy
pe)
VALUES(@.CollectInvCode,@.AccountCode,@.Agy
ID,@.ProcessedDate,NULL,@.Balance,@.Pro
dID,@.HwyID)
END
SET @.prevHwyID = @.HwyID
SET @.prevAgyID = @.AgyID
FETCH NEXT FROM cur_ProcessRAN INTO
@.RAN,@.CollectInvCode,@.AccountCode,@.ProdI
D,@.RecordType,@.Balance,@.ProcessedDat
e,@.HwyID,@.AgyID
-- remove after testing
set @.cnt=@.cnt+1
if(@.cnt%500)=0
begin
print cast(@.cnt as varchar) + ':' + cast(datediff(ms,@.ttime,getdate())
as varchar)
set @.ttime=getdate()
end
-- end: remove
END -- END: Loop through all non-"U" records for the given RANHi
It's pretty hard to suggest something without seeing a whole ddl + sample
data.
Cursors are almost bad in terms of performance ,hence try to re-write as set
based solution.
<mrpubnight@.hotmail.com> wrote in message
news:1124678427.316826.197680@.g14g2000cwa.googlegroups.com...
>I have a strange problem. I am running a process that unfortunately
> requires that I analyze each record of a table which has [currently]
> led me to using a cursor.
> I noticed some performance problems and so I ran some tests and it
> seems that the cursor performance is getting exponentially slower, for
> example, the first 500 records of the cursor process in about 1733ms
> but the records 10000-10500 takes about 13516ms and records 20000-20500
> takes 22500ms.
> Is this normal behaviour with cursors? I know that they are not
> optimal for performance, however, I didn't expect the performance to
> degrade as it was running.
> Below is a snip from the code. At first I thought that perhaps the
> processing of the records was growing because it was counting values in
> a table that is growing but when I ran a timer around the processing
> logic it showed little if any substantial difference.
> Aside: I'm using '##temp' tables because I want to be able to inspect
> the tables after the query runs, other wise I just use '#temp' tables.
> Thanks.
> WHILE (@.@.FETCH_STATUS=0)
> BEGIN
> -- Reset environment
> SET @.processFlag = 0
> SET @.resetBalanceFlag = 0
> SET @.setEndDateFlag = 0
> IF(SELECT COUNT(*)
> FROM ##tempHistory
> WHERE AgencyID=@.AgyID
> AND CollectionInventoryType=@.HwyID) >= 1
> BEGIN
> SET @.resetBalanceFlag = 1
> SET @.setEndDateFlag = 1
> END
> IF(@.HwyID=@.prevHwyID)
> SET @.processFlag = 0
> ELSE
> BEGIN
> SET @.processFlag = 1
> SET @.setEndDateFlag = 1
> END
> IF((@.AgyID=@.prevAgyID) AND @.RecordType='R')
> SET @.processFlag = 0
> ELSE
> BEGIN
> SET @.processFlag = 1
> SET @.setEndDateFlag = 1
> END
> IF(@.processFlag=1)
> BEGIN
> IF(@.resetBalanceFlag = 1)
> BEGIN
> -- Reset assign balance to $0 for all records on temp table that
> match
> -- the agyid, hwyid and RAN
> UPDATE ##tempHistory
> SET AssignBalance = 0
> WHERE CollectInvCode=@.CollectInvCode
> AND AgencyID=@.AgyID
> AND CollectionInventoryType=@.HwyID
> END
> IF(@.setEndDateFlag = 1)
> BEGIN
> UPDATE ##tempHistory
> SET EndDate=@.ProcessedDate
> WHERE CollectInvCode=@.CollectInvCode
> AND EndDate IS NULL
> END
> INSERT INTO
> ##tempHistory(CollectInvCode,AccountCode
,AgencyID,StartDate,EndDate,Assign
Balance,ProductCatID,CollectionInventory
Type)
> VALUES(@.CollectInvCode,@.AccountCode,@.Agy
ID,@.ProcessedDate,NULL,@.Balance,@.P
rodID,@.HwyID)
> END
> SET @.prevHwyID = @.HwyID
> SET @.prevAgyID = @.AgyID
> FETCH NEXT FROM cur_ProcessRAN INTO
> @.RAN,@.CollectInvCode,@.AccountCode,@.ProdI
D,@.RecordType,@.Balance,@.ProcessedD
ate,@.HwyID,@.AgyID
> -- remove after testing
> set @.cnt=@.cnt+1
> if(@.cnt%500)=0
> begin
> print cast(@.cnt as varchar) + ':' + cast(datediff(ms,@.ttime,getdate())
> as varchar)
> set @.ttime=getdate()
> end
> -- end: remove
> END -- END: Loop through all non-"U" records for the given RAN
>|||Do you have indexes on ##temp? If not, then that's your problem. Without
indexes, SELECT COUNT(*) and the two UPDATE statements will execute a table
scan, which will obviously take a lot longer as the table fills up. In an
aside, you should use EXISTS instead of COUNT(*) to determine whether a row
exists, because EXISTS shortcircuits as soon as it gets a hit, whereas
COUNT(*) must complete a scan.
To maximize the performance of a cursor you should minimize the reads and
writes within the fetch loop. To minimize reads, you can usually alter the
select statement of the cursor to include all information required to
perform the calculation, or you can create a separate cursor with a similar
ORDER BY clause and scan both tables synchronously (similar to a merge
join). In this way all of the information needed for calculation is
obtained using set-based operations before the fetch loop begins. To
minimize writes, I prefer to only execute INSERTs to table variables or temp
tables within the fetch loop. UPDATEs and DELETEs should be deferred
whenever possible. INSERTs into a table without any indexes is extremely
fast, and INSERTs into a table with a clustered index on an IDENTITY column
is almost as fast. The whole idea is to scan through the cursor spooling
changes so that they can be written using set-based operations outside of
the fetch loop.
Set-based change operations are always faster than iterated change
operations, because triggers only fire once, indexes can be updated en-mass,
and the amount of information written to the transaction log is minimized.
If foreign key constraints exist, then performance of set based operations
can sometimes be further optimized by escalating to a clustered index scan.
(A clustered index scan is not always a bad thing. It starts out with a
clustered index s to find the first row and then uses the linked-list
between leaf pages within the clustered index to find the subsequent rows.)
<mrpubnight@.hotmail.com> wrote in message
news:1124678427.316826.197680@.g14g2000cwa.googlegroups.com...
> I have a strange problem. I am running a process that unfortunately
> requires that I analyze each record of a table which has [currently]
> led me to using a cursor.
> I noticed some performance problems and so I ran some tests and it
> seems that the cursor performance is getting exponentially slower, for
> example, the first 500 records of the cursor process in about 1733ms
> but the records 10000-10500 takes about 13516ms and records 20000-20500
> takes 22500ms.
> Is this normal behaviour with cursors? I know that they are not
> optimal for performance, however, I didn't expect the performance to
> degrade as it was running.
> Below is a snip from the code. At first I thought that perhaps the
> processing of the records was growing because it was counting values in
> a table that is growing but when I ran a timer around the processing
> logic it showed little if any substantial difference.
> Aside: I'm using '##temp' tables because I want to be able to inspect
> the tables after the query runs, other wise I just use '#temp' tables.
> Thanks.
> WHILE (@.@.FETCH_STATUS=0)
> BEGIN
> -- Reset environment
> SET @.processFlag = 0
> SET @.resetBalanceFlag = 0
> SET @.setEndDateFlag = 0
> IF(SELECT COUNT(*)
> FROM ##tempHistory
> WHERE AgencyID=@.AgyID
> AND CollectionInventoryType=@.HwyID) >= 1
> BEGIN
> SET @.resetBalanceFlag = 1
> SET @.setEndDateFlag = 1
> END
> IF(@.HwyID=@.prevHwyID)
> SET @.processFlag = 0
> ELSE
> BEGIN
> SET @.processFlag = 1
> SET @.setEndDateFlag = 1
> END
> IF((@.AgyID=@.prevAgyID) AND @.RecordType='R')
> SET @.processFlag = 0
> ELSE
> BEGIN
> SET @.processFlag = 1
> SET @.setEndDateFlag = 1
> END
> IF(@.processFlag=1)
> BEGIN
> IF(@.resetBalanceFlag = 1)
> BEGIN
> -- Reset assign balance to $0 for all records on temp table that
> match
> -- the agyid, hwyid and RAN
> UPDATE ##tempHistory
> SET AssignBalance = 0
> WHERE CollectInvCode=@.CollectInvCode
> AND AgencyID=@.AgyID
> AND CollectionInventoryType=@.HwyID
> END
> IF(@.setEndDateFlag = 1)
> BEGIN
> UPDATE ##tempHistory
> SET EndDate=@.ProcessedDate
> WHERE CollectInvCode=@.CollectInvCode
> AND EndDate IS NULL
> END
> INSERT INTO
>
##tempHistory(CollectInvCode,AccountCode
,AgencyID,StartDate,EndDate,AssignBa
lance,ProductCatID,CollectionInventoryTy
pe)
>
VALUES(@.CollectInvCode,@.AccountCode,@.Agy
ID,@.ProcessedDate,NULL,@.Balance,@.Pro
dID,@.HwyID)
> END
> SET @.prevHwyID = @.HwyID
> SET @.prevAgyID = @.AgyID
> FETCH NEXT FROM cur_ProcessRAN INTO
>
@.RAN,@.CollectInvCode,@.AccountCode,@.ProdI
D,@.RecordType,@.Balance,@.ProcessedDat
e,@.HwyID,@.AgyID
> -- remove after testing
> set @.cnt=@.cnt+1
> if(@.cnt%500)=0
> begin
> print cast(@.cnt as varchar) + ':' + cast(datediff(ms,@.ttime,getdate())
> as varchar)
> set @.ttime=getdate()
> end
> -- end: remove
> END -- END: Loop through all non-"U" records for the given RAN
>

No comments:

Post a Comment