Last active
May 5, 2019 02:34
-
-
Save jruizvar/701507179da719fcdc553e35269808ad to your computer and use it in GitHub Desktop.
Remove duplicated rows and add a partition in a hive table
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE TABLE temp1 (anomes INT, cnpj STRING, feat0 STRING, feat1 STRING); | |
INSERT INTO temp1 VALUES | |
(201901, '0', 'x', 'a'), | |
(201901, '0', 'y', 'b'), | |
(201901, '1', 'v', 'c'), | |
(201901, '1', 'w', 'd'), | |
(201902, '0', 'x', 'a'), | |
(201902, '0', 'x', 'a'), | |
(201902, '1', 'y', 'b'), | |
(201902, '1', 'y', 'b'); | |
CREATE TABLE temp2 (cnpj STRING, feat0 STRING, feat1 STRING) | |
PARTITIONED BY (anomes INT); | |
INSERT INTO temp2 PARTITION(anomes=201901) | |
SELECT cnpj, feat0, feat1 | |
FROM ( | |
SELECT *, row_number() OVER (PARTITION BY cnpj) AS rnk | |
FROM temp1 | |
WHERE anomes=201901 | |
) t0 | |
WHERE rnk=1; | |
INSERT INTO temp2 PARTITION(anomes=201902) | |
SELECT cnpj, feat0, feat1 | |
FROM ( | |
SELECT *, row_number() OVER (PARTITION BY cnpj) AS rnk | |
FROM temp1 | |
WHERE anomes=201902 | |
) t0 | |
WHERE rnk=1; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment