Last active
May 16, 2019 10:57
-
-
Save LuckyDima/04ca220f0cf6cb773641a001fb6bf6f4 to your computer and use it in GitHub Desktop.
Finding the weight of words for different rows
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| DECLARE @tbl TABLE (FullName NVARCHAR(MAX)) | |
| INSERT @tbl(FullName) | |
| VALUES | |
| (N'All-America Research Team: Basic Materials - Best UAC (Chemicals)'), | |
| ('All-America Research Team: Basic Materials - Chemicals'), | |
| ('All-America Fixed-Income Research Team: Client Services - Best Credit-Rating Agencies'), | |
| ('All-America Fixed-Income Research Team'), | |
| ('All-America Research Team'), | |
| ('All-America Trading Team'), | |
| ('All-Asia Research Team'), | |
| ('All-Asia Reserach Team'), | |
| ('All-Brazil Research Team'), | |
| ('All-China Research Team'), | |
| ('All-Europe Equity Research Team'), | |
| ('All-Europe Equity Trading Tearm Buy-Side'), | |
| ('All-Europe Fixed-Income Research Team'), | |
| ('All-Europe Fixed-Income Reserach Team'), | |
| ('All-Europe Research Team'), | |
| ('All-Europe Trading Team'), | |
| ('All-India Research Team'), | |
| ('All-India Reserach Team'), | |
| ('All-India Web Research Team'), | |
| ('All-Japan Research Team'), | |
| ('All-Japan Reserach Team'), | |
| ('All-Russia Research Team'), | |
| ('Emerging EMEA Research Team'), | |
| ('Emerging-Markets Equity Research Team'), | |
| ('Latin America Research Team'), | |
| ('Latin America Reserach Team') | |
| ;WITH cte AS | |
| ( | |
| SELECT | |
| FullName, | |
| value | |
| FROM @tbl | |
| CROSS APPLY STRING_SPLIT((TRANSLATE(FullName, '+-*/%():',' ')),' ') | |
| ), | |
| cte2 AS | |
| ( | |
| SELECT | |
| cte.FullName, | |
| cte.value, | |
| SOUNDEX(cte.value) AS 'Sound', | |
| DENSE_RANK() OVER (ORDER BY cte.FullName) AS 'GroupRank', | |
| ROW_NUMBER() OVER (PARTITION BY cte.FullName ORDER BY (SELECT NULL)) AS 'Weight' | |
| FROM cte WHERE cte.value != '' | |
| ), | |
| cte3 AS | |
| ( | |
| SELECT c.*, | |
| cc.FullName AS _FullName, | |
| cc.value AS _value, | |
| cc.Sound AS _Sound, | |
| cc.GroupRank AS _GroupRank, | |
| cc.Weight AS _Weight, | |
| LAG(cc.Sound, 1,0) OVER (ORDER BY cc.GroupRank,cc.Weight,c.GroupRank,c.Weight) lag, | |
| LEAD(cc.Sound, 1,0) OVER (ORDER BY cc.GroupRank,cc.Weight,c.GroupRank,c.Weight) lead | |
| FROM cte2 c | |
| LEFT JOIN cte2 cc | |
| ON c.GroupRank != cc.GroupRank OR (cc.GroupRank != c.GroupRank AND cc.Weight != c.Weight) | |
| ),cte4 AS | |
| ( | |
| SELECT *, | |
| CASE | |
| WHEN Sound = _Sound THEN 0 | |
| WHEN Sound = lag OR Sound = lead THEN ABS(_Weight - Weight) | |
| END AS nm | |
| FROM cte3 | |
| WHERE cte3.Weight =cte3._Weight | |
| OR CASE | |
| WHEN Sound = _Sound THEN 0 | |
| WHEN Sound = lag OR Sound = lead THEN _Weight | |
| END IS NOT NULL | |
| ), | |
| cte5 AS | |
| ( | |
| SELECT *, ROW_NUMBER() OVER (PARTITION BY GroupRank,Weight ORDER BY GroupRank,Weight) rnk2 FROM | |
| cte4 | |
| ) | |
| SELECT DISTINCT value, | |
| COUNT(value) OVER (PARTITION BY SOUNDEX(value)) AS w | |
| FROM cte5 | |
| WHERE rnk2 = 1 AND nm IS NOT NULL | |
| ORDER BY w DESC, value ASC | |
| --SELECT SOUNDEX('Research'), SOUNDEX('Reserach') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment