Files
sql-scripts/todo clean masking.sql
Schork Thierry (Galenica) 5a4f2784bb sync
2026-01-13 08:14:13 +01:00

402 lines
11 KiB
PL/PgSQL

USE Arizona
/*
1. identify if the duplicate addresses are linked to a persisted or masked address
2. TODO: udpate document_counter with max value of adk_key for type 7 and 8
SELECT dc.*
FROM dbo.Document_counter dc
JOIN ActivePos_read.dbo.CommonVar cv ON cv.[Key] = 'cvCounterADKType7' AND cv.Value = dc.Document_counter_ID
3. delete address_key rows for masked addresses
*/
BEGIN TRANSACTION
SET XACT_ABORT ON;
SET NOCOUNT ON;
IF OBJECT_ID('tempdb..#mask')IS NOT NULL BEGIN;
DROP TABLE #mask;
END;
CREATE TABLE #mask(address_id INT NOT NULL, TT_masking VARCHAR(20) NOT NULL DEFAULT 'MASK')
--fetch duplicate in temp table
INSERT INTO #mask
(
address_id
)
SELECT k.ADK_address
FROM dbo.Address_key k
WHERE k.ADK_type = 7
AND EXISTS(
--get all duplicated adk_key for type 7
SELECT s.ADK_key
FROM dbo.Address_key s
WHERE s.ADK_type=7
AND k.ADK_key = s.ADK_key
GROUP BY s.ADK_subsidiary, s.ADK_key
HAVING COUNT(1)>1
)
--#region flag masked or not address
DECLARE @keywords TABLE(pattern VARCHAR(255) NOT NULL);
INSERT INTO @keywords ([pattern])
SELECT 'TEST %'
UNION SELECT 'Assura%'
UNION SELECT 'PRESCRIPTION%'
UNION SELECT 'RETETE%'
UNION SELECT 'USER%'
UNION SELECT 'ONBOARD%'
UNION SELECT 'SUSPICIOUS%'
UNION SELECT 'DOE'
UNION SELECT 'GIALLO%'
UNION SELECT 'TOLKIEN%'
UNION SELECT 'DEMO %'
UNION SELECT 'demo'
UNION SELECT 'DOCUMEDIS%'
UNION SELECT 'OMNICHANNEL%'
UNION SELECT 'LASTNAME%'
UNION SELECT 'MeiXin%'
UNION SELECT 'DIPLOMATE%'
UNION SELECT 'NO %'
UNION SELECT 'ONLY%'
UNION SELECT 'RDL%'
UNION SELECT 'USER%'
UNION SELECT 'GALEXIS%'
UNION SELECT 'APOS%'
UNION SELECT 'APOSANDMAGENTO%'
UNION SELECT 'Activate'
UNION SELECT 'Sunstore'
UNION SELECT 'Coopvitality'
UNION SELECT 'Ama Plus Sun'
UNION SELECT 'Ama Plus Cvi'
UNION SELECT 'Sun Plus Cvi'
UNION SELECT 'Ama Sun Cvi'
UNION SELECT 'test%[0-9]%'
UNION SELECT 'DEMO'
UNION SELECT '%SUNCLUB%'
UNION SELECT '%QUALITY%'
UNION SELECT '%STARCARD%'
UNION SELECT '%CLIENT%'
UNION SELECT '%MAGENTO%'
UNION SELECT '%RENOUVELLEMENT%'
UNION SELECT '%Mitarbeiter%'
UNION SELECT '% DEV %'
UNION SELECT 'CERN Service%'
UNION SELECT 'BALEXERT CENTRE%'
UNION SELECT 'G.H.O.L.%'
UNION SELECT 'ONLYAPOS%'
UNION SELECT 'In APOS%'
UNION SELECT '%client de passage%'
;
DECLARE @tpl VARCHAR(MAX)='
UPDATE m SET [TT_masking] = ''PERSIST''
FROM dbo.Address ad
JOIN #mask m on m.address_id = ad.address_id
JOIN dbo.Customer cst
ON [cst].[CUST_address] = [ad].[Address_ID]
LEFT JOIN dbo.[Subsidiary] sub
ON [cst].[CUST_subsidiary] = [sub].[Subsidiary_ID]
WHERE [m].[TT_masking] <> ''PERSIST''
AND (
(
[ad].[Address_ID] < 1500000000
AND NOT EXISTS (SELECT 3 FROM [dbo].[Employee] WHERE EM_address = ad.Address_ID)
AND NOT EXISTS (SELECT 3 FROM [dbo].[Customer] WHERE CUST_address = ad.Address_ID)
)
OR(
@all_keywords@
)
)
';
DECLARE @tplFragment VARCHAR(MAX)='
OR (
ad.[AD_last_name] @op@ ''@keyword@''
OR ad.[AD_name] @op@ ''@keyword@''
)
';
DECLARE @all_keywords VARCHAR(MAX)='';
SELECT @all_keywords = @all_keywords + REPLACE(REPLACE(@tplFragment, '@keyword@', k.[pattern]), '@op@', CASE WHEN CHARINDEX('%',k.[pattern])>0 THEN 'LIKE' ELSE '=' END)
FROM @keywords k;
SELECT @all_keywords = STUFF(@all_keywords, 1,CHARINDEX('OR',@all_keywords)+2,'');
DECLARE @query VARCHAR(MAX)='';
SELECT @query = REPLACE(@tpl, '@all_keywords@', @all_keywords);
--PRINT @query;
EXEC(@query);
UPDATE m
SET m.[TT_masking] = 'PERSIST'
FROM dbo.PH_prescriber ph
INNER JOIN dbo.Address ad
ON [ad].[Address_ID] = [ph].[PHPR_address]
JOIN #mask m ON m.address_id = ad.Address_ID
WHERE [ph].[PHPR_deactivation_date] IS NULL
AND m.[TT_masking] <> 'PERSIST';
UPDATE m
SET m.[TT_masking] = 'MASK'
FROM Address ad
JOIN #mask m ON m.address_id = ad.Address_ID
WHERE TT_masking <> 'MASK'
AND ( ad.AD_last_name LIKE 'TESTORI%'
OR ad.AD_last_name LIKE 'APOSTOLOV%'
OR ad.AD_last_name LIKE 'TESTOLIN%'
OR ad.AD_last_name LIKE 'DEMONT%'
OR ad.AD_first_name like 'Testalem%');
UPDATE m
SET m.[TT_masking] = 'PERSIST'
FROM Address ad
JOIN #mask m ON m.address_id = ad.Address_ID
INNER JOIN dbo.Bmc_user_profile bup
ON bup.BMCUP_address = ad.Address_ID
WHERE bup.BMCUP_Initials NOT IN ( 'AMAVITA', 'MEDI', 'PHAR', 'Tran', 'TRIA', 'TRIC', 'TRIM', 'TRIS', 'TRPH' )
AND bup.BMCUP_Initials NOT LIKE 'GAL-%' /* Galenica users */
AND bup.BMCUP_Initials NOT LIKE 'HCI-%' /* HCI Solutions users */
AND bup.BMCUP_Initials NOT LIKE 'EXT-%' /* External Users - 20626*/
AND bup.BMCUP_Initials NOT LIKE 'GC-%' /* GaleniCare Users - 20619*/
AND bup.Bmc_user_profile_Id NOT IN ( 1, 234 );
--persist addresses in satellite tables that we need to keep
UPDATE m
SET m.[TT_masking] = 'PERSIST'
FROM Address ad
JOIN #mask m ON m.address_id = ad.Address_ID
JOIN ph_insurance_card c
ON c. PHIC_patient_address = ad.Address_ID
WHERE c.phic_code IN (
'80756012350000207901'
,'80756012350000206169'
,'80756012350000206899'
,'80756012350000205871'
,'80756012350000210793'
);
UPDATE m
SET m.[TT_masking] = 'PERSIST'
FROM Address ad
JOIN #mask m ON m.address_id = ad.Address_ID
JOIN Organizational_unit ou
ON ou.OU_address = ad.Address_ID
WHERE m.[TT_masking] = 'MASK';
UPDATE m
SET m.[TT_masking] = 'PERSIST'
FROM Address ad
JOIN #mask m ON m.address_id = ad.Address_ID
JOIN Organizational_unit ou
ON ou.OU_manager_address = ad.Address_ID
WHERE m.[TT_masking] = 'MASK';
UPDATE m
SET m.[TT_masking] = 'PERSIST'
FROM Address ad
JOIN #mask m ON m.address_id = ad.Address_ID
JOIN dbo.PH_insurance phi
ON phi.PHIN_address = ad.Address_ID
WHERE m.[TT_masking] = 'MASK';
UPDATE m
SET m.[TT_masking] = 'PERSIST'
FROM Address ad
JOIN #mask m ON m.address_id = ad.Address_ID
JOIN PH_Organizational_unit phou
ON phou.PHOU_manager_address = ad.Address_ID
WHERE m.[TT_masking] = 'MASK';
UPDATE m
SET m.[TT_masking] = 'PERSIST'
FROM Address ad
JOIN #mask m ON m.address_id = ad.Address_ID
JOIN PH_Organizational_unit phou
ON phou.PHOU_distribution_center = ad.Address_ID
WHERE m.[TT_masking] = 'MASK';
UPDATE m
SET m.[TT_masking] = 'PERSIST'
FROM Address ad
JOIN #mask m ON m.address_id = ad.Address_ID
JOIN PH_prescriber php
ON php.PHPR_address = ad.Address_ID
WHERE m.[TT_masking] = 'MASK';
UPDATE m
SET m.[TT_masking] = 'PERSIST'
FROM Address ad
JOIN #mask m ON m.address_id = ad.Address_ID
JOIN Incident_header ih
ON ih.INH_contact_address = ad.Address_ID
WHERE m.[TT_masking] = 'MASK';
UPDATE m
SET m.[TT_masking] = 'PERSIST'
FROM Address ad
JOIN #mask m ON m.address_id = ad.Address_ID
JOIN Incident_header ih
ON ih.INH_originator_address = ad.Address_ID
WHERE m.[TT_masking] = 'MASK';
UPDATE m
SET m.[TT_masking] = 'PERSIST'
FROM Address ad
JOIN #mask m ON m.address_id = ad.Address_ID
JOIN Serial_number_history snh
ON snh.SNH_address = ad.Address_ID
WHERE m.[TT_masking] = 'MASK';
UPDATE m
SET m.[TT_masking] = 'PERSIST'
FROM Address ad
JOIN #mask m ON m.address_id = ad.Address_ID
JOIN dbo.Item_relation_info iri
ON iri.ITRI_address = ad.Address_ID
WHERE m.[TT_masking] = 'MASK';
UPDATE m
SET m.[TT_masking] = 'PERSIST'
FROM Address ad
JOIN #mask m ON m.address_id = ad.Address_ID
JOIN dbo.Fixed_price fp
ON fp.FP_address = ad.Address_ID
WHERE m.[TT_masking] = 'MASK';
UPDATE m
SET m.[TT_masking] = 'PERSIST'
FROM Address ad
JOIN #mask m ON m.address_id = ad.Address_ID
JOIN dbo.Fixed_price fp
ON fp.FP_address = ad.Address_ID
WHERE m.[TT_masking] = 'MASK';
UPDATE m
SET m.[TT_masking] = 'PERSIST'
FROM Address ad
JOIN #mask m ON m.address_id = ad.Address_ID
JOIN dbo.Tariff t
ON t.TF_address = ad.Address_ID
WHERE m.[TT_masking] = 'MASK';
UPDATE m
SET m.[TT_masking] = 'PERSIST'
FROM Address ad
JOIN #mask m ON m.address_id = ad.Address_ID
JOIN dbo.Asset a
ON a.AS_owner = ad.Address_ID
WHERE m.[TT_masking] = 'MASK';
UPDATE m
SET m.[TT_masking] = 'PERSIST'
FROM Address ad
JOIN #mask m ON m.address_id = ad.Address_ID
JOIN dbo.Asset a
ON a.AS_supplier = ad.Address_ID
WHERE m.[TT_masking] = 'MASK';
UPDATE m
SET m.[TT_masking] = 'PERSIST'
FROM Address ad
JOIN #mask m ON m.address_id = ad.Address_ID
JOIN dbo.Asset a
ON a.AS_manufacturer = ad.Address_ID
WHERE m.[TT_masking] = 'MASK';
UPDATE m
SET m.[TT_masking] = 'PERSIST'
FROM Address ad
JOIN #mask m ON m.address_id = ad.Address_ID
JOIN dbo.Supplier s
ON s.SUPP_pay_to = ad.Address_ID
WHERE m.[TT_masking] = 'MASK';
UPDATE m
SET m.[TT_masking] = 'PERSIST'
FROM Address ad
JOIN #mask m ON m.address_id = ad.Address_ID
JOIN dbo.Supplier s
ON s.SUPP_send_order_to = ad.Address_ID
WHERE m.[TT_masking] = 'MASK';
UPDATE m
SET m.[TT_masking] = 'PERSIST'
FROM Address ad
JOIN #mask m ON m.address_id = ad.Address_ID
JOIN dbo.Supplier s
ON s.SUPP_send_order_to = ad.Address_ID
WHERE m.[TT_masking] = 'MASK';
UPDATE m
SET m.[TT_masking] = 'PERSIST'
FROM Address ad
JOIN #mask m ON m.address_id = ad.Address_ID
WHERE m.[TT_masking] = 'MASK'
AND ad.AD_origin = 1;
--#endregion flag masked or not address
--count how much rows should be masked or persisted. distribution should be 50% for each exactly
SELECT m.TT_masking, COUNT(1) AS cnt
FROM #mask m
GROUP BY m.TT_masking;
--check if duplicates are still present in the rows marked as PERSIST. there should be none
IF EXISTS(
SELECT 1
FROM #mask m
JOIN dbo.Address_key ak ON ak.ADK_address = m.address_id
WHERE ak.ADK_type = 7
AND m.TT_masking = 'PERSIST'
GROUP BY ak.ADK_subsidiary, ak.ADK_key
HAVING COUNT(1) > 1
)
BEGIN
RAISERROR('There are still %i rows that are duplicated on addresses marked as PERSIST, check further', 18,6, @@ROWCOUNT);
END
--update address_key
DELETE ak
FROM dbo.Address_key ak
JOIN #mask m ON m.address_id = ak.ADK_address
WHERE ak.ADK_type=7
AND m.TT_masking = 'MASK';
PRINT CONVERT(VARCHAR(20), CURRENT_TIMESTAMP, 114)+' - Deleted adk_type 7 records causing duplication. '+REPLACE(REPLACE(CONVERT(VARCHAR(100), CONVERT(MONEY, @@rowcount), 1),',',''''),'.00','')+' row(s) affected.';
--check again for duplicate
SELECT k.ADK_address
FROM dbo.Address_key k
WHERE k.ADK_type = 7
AND EXISTS(
SELECT s.ADK_key
FROM dbo.Address_key s
WHERE s.ADK_type=7
AND k.ADK_key = s.ADK_key
GROUP BY s.ADK_subsidiary, s.ADK_key
HAVING COUNT(1)>1
);
--fetch max value of the counter
DECLARE @maxVal INT;
SELECT @maxVal = MAX(CONVERT(INT, ADK_key))
FROM dbo.Address_key
WHERE ADK_type = 7
--update counter value
DECLARE @outCounter TABLE(id INT, oldVal INT, newVal INT)
UPDATE dc
SET dc.DOCC_value = @maxVal
OUTPUT Inserted.Document_counter_ID, inserted.DOCC_value, Deleted.DOCC_value INTO @outCounter(id, newVal, oldVal)
FROM dbo.Document_counter dc
JOIN ActivePos_read.dbo.CommonVar cv ON cv.[Key] = 'cvCounterADKType7' AND cv.Value = dc.Document_counter_ID
;
--show previous and new value of the counter
SELECT *
FROM @outCounter;
ROLLBACK TRANSACTION