sql >> Databasteknik >  >> RDS >> Sqlserver

En mycket komplicerad SQL-fråga

Jag har ändrat din datamodell lite för att försöka göra det lite mer uppenbart vad som händer.

CREATE TABLE [dbo].[Customer]
(
    [CustomerName]      VARCHAR(20)     NOT NULL,
    [CustomerLink]      VARBINARY(20)   NULL
)

CREATE TABLE [dbo].[CustomerIdentification]
(
    [CustomerName]      VARCHAR(20)     NOT NULL,
    [ID]                VARCHAR(50)     NOT NULL,
    [IDType]            VARCHAR(16)     NOT NULL
)

Och jag har lagt till lite mer testdata...

INSERT  [dbo].[Customer]
        ([CustomerName])
VALUES  ('Fred'),
        ('Bob'),
        ('Vince'),
        ('Tom'),
        ('Alice'),
        ('Matt'),
        ('Dan')

INSERT  [dbo].[CustomerIdentification]
VALUES  
        ('Fred',    'A',    'Passport'),
        ('Fred',    'A',    'SIN'),
        ('Fred',    'A',    'Drivers Licence'),
        ('Bob',     'A',    'Passport'),
        ('Bob',     'B',    'Drivers Licence'),
        ('Bob',     'C',    'Credit Card'),
        ('Vince',   'A',    'Passport'),
        ('Vince',   'B',    'SIN'),
        ('Vince',   'C',    'Credit Card'),
        ('Tom',     'A',    'Passport'),
        ('Tom',     'B',    'SIN'),
        ('Tom',     'B',    'Drivers Licence'),
        ('Alice',   'B',    'Drivers Licence'),
        ('Matt',    'X',    'Drivers Licence'),
        ('Dan',     'X',    'Drivers Licence')

Är det detta du letar efter:

;WITH [cteNonMatchingIDs] AS (
    -- Pairs where the IDType is the same, but 
    -- name and ID don't match
    SELECT  ci3.[CustomerName] AS [CustomerName1],
            ci4.[CustomerName] AS [CustomerName2]
    FROM [dbo].[CustomerIdentification] ci3
    INNER JOIN [dbo].[CustomerIdentification] ci4
        ON ci3.[IDType] = ci4.[IDType]
    WHERE ci3.[CustomerName] <> ci4.[CustomerName]
    AND ci3.[ID] <> ci4.[ID]
),
[cteMatchedPairs] AS (
    -- Pairs where the IDType and ID match, and
    -- there aren't any non matching IDs for the
    -- CustomerName
    SELECT DISTINCT 
            ci1.[CustomerName] AS [CustomerName1],
            ci2.[CustomerName] AS [CustomerName2]
    FROM [dbo].[CustomerIdentification] ci1
    LEFT JOIN [dbo].[CustomerIdentification] ci2
        ON ci1.[CustomerName] <> ci2.[CustomerName]
        AND ci1.[IDType] = ci2.[IDType] 
    WHERE ci1.[ID] = ISNULL(ci2.[ID], ci1.[ID])
    AND NOT EXISTS (
        SELECT 1
        FROM [cteNonMatchingIDs]
        WHERE ci1.[CustomerName] = [CustomerName1] -- correlated subquery
        AND ci2.[CustomerName] = [CustomerName2]
    )
    AND ci1.[CustomerName] < ci2.[CustomerName]
),
[cteMatchedList] ([CustomerName], [CustomerNameList]) AS (
    -- Turn the matched pairs into list of matching
    -- CustomerNames
    SELECT  [CustomerName1],
            [CustomerNameList]
    FROM (
        SELECT  [CustomerName1],
                CONVERT(VARCHAR(1000), '$'
                 + [CustomerName1] + '$'
                 + [CustomerName2]) AS [CustomerNameList]
        FROM [cteMatchedPairs]
        UNION ALL
        SELECT  [CustomerName2],
                CONVERT(VARCHAR(1000), '$'
                 + [CustomerName2]) AS [CustomerNameList]
        FROM [cteMatchedPairs]
    ) [cteMatchedPairs]
    UNION ALL
    SELECT  [cteMatchedList].[CustomerName],
            CONVERT(VARCHAR(1000),[CustomerNameList] + '$'
             + [cteMatchedPairs].[CustomerName2])
    FROM [cteMatchedList] -- recursive CTE
    INNER JOIN [cteMatchedPairs]
        ON RIGHT([cteMatchedList].[CustomerNameList],
         LEN([cteMatchedPairs].[CustomerName1])
        ) = [cteMatchedPairs].[CustomerName1]
),
[cteSubstringLists] AS (
    SELECT  r1.[CustomerName],
            r2.[CustomerNameList]
    FROM [cteMatchedList] r1
    INNER JOIN [cteMatchedList] r2
        ON r2.[CustomerNameList] LIKE '%' + r1.[CustomerNameList] + '%'
),
[cteCustomerLink] AS (
    SELECT DISTINCT 
            x1.[CustomerName],
            HASHBYTES('SHA1', x2.[CustomerNameList]) AS [CustomerLink]
    FROM (
        SELECT  [CustomerName],
                MAX(LEN([CustomerNameList])) AS [MAX LEN CustomerList]
        FROM [cteSubstringLists]
        GROUP BY [CustomerName]
    ) x1
    INNER JOIN (
        SELECT  [CustomerName],
                LEN([CustomerNameList]) AS [LEN CustomerList], 
                [CustomerNameList]
        FROM [cteSubstringLists]
    ) x2
        ON x1.[MAX LEN CustomerList] = x2.[LEN CustomerList]
        AND x1.[CustomerName] = x2.[CustomerName]
)
UPDATE  c
SET     [CustomerLink] = cl.[CustomerLink]
FROM [dbo].[Customer] c
INNER JOIN [cteCustomerLink] cl
    ON cl.[CustomerName] = c.[CustomerName]


SELECT *
FROM [dbo].[Customer]



  1. Hur kan jag använda mysql i C++?

  2. Få den senaste informationen från två tabeller för att jämföra datum och tid

  3. Ordna foruminlägg efter tidpunkt för senaste svar

  4. sprida mysql-data över flera diskar