TSQL -> Remove duplicate values from table using PARTITION BY and CTE Query

New TSQL features (begining from 2008 version) makes possible to remove duplicate values using PARTITION BY numerator and CTE query. In the following script we will create test table with duplicates values and then we'll remove them.


-- prepare test table and find duplicate values

DROP TABLE dbo.TestTable
GO

CREATE TABLE dbo.TestTable(
Code Varchar (1) NULL,
n smallint NULL
) ON [PRIMARY]
GO

INSERT INTO dbo.TestTable
VALUES
  ('A',1),
  ('A',2),
  ('A',2),
  ('B',1),
  ('B',2),
  ('B',3),
  ('C',1),
  ('C',1),
  ('C',2)
GO

select Code, n from dbo.TestTable order by Code, n
go

/*
-- find duplicates

select Code, n, count(*) AS cn
from dbo.TestTable
group by Code, n
having count(*) > 1
go
*/



Duplicate rows are highlighted:
Coden
A1
A2
A2
B1
B2
B3
C1
C1
C2


-- Numerate records using Partition By

WITH CTE (Code, n, DuplicateCount)
AS
(
  SELECT Code, n,
  ROW_NUMBER() OVER(PARTITION BY Code, n
   ORDER BY Code) AS DuplicateCount
  FROM dbo.TestTable
)
SELECT *
FROM CTE
ORDER BY Code, n, DuplicateCount
GO



Part Numeration Result:
CodenDuplicateCount
A11
A21
A22
B11
B21
B31
C11
C12
C21


-- Delete Duplicate records

WITH CTE (Code, n, DuplicateCount)
AS
(
  SELECT Code, n,
  ROW_NUMBER() OVER(PARTITION BY Code, n
   ORDER BY Code) AS DuplicateCount
  FROM dbo.TestTable
)
DELETE
FROM CTE
WHERE DuplicateCount > 1
GO

-- see results
select Code, n from dbo.TestTable order by Code, n
go



Result without duplicates:
Coden
A1
A2
B1
B2
B3
C1
C 2



sqlexamples.info