Created
March 12, 2025 15:19
-
-
Save linminhtoo/94658bb25fb766a751164d1422004d13 to your computer and use it in GitHub Desktop.
https://leetcode.com/problems/accounts-merge/ this is slow, 1779 ms (5%), need to optimize
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import List | |
class UnionFind: | |
def __init__(self, total: int): | |
# each node's parent is itself | |
self.parents = list(range(total)) | |
# for union by rank optimisation | |
self.ranks = [1 for _ in range(total)] | |
def find(self, x: int): | |
'''find the root of x''' | |
# base case to prevent infinite recursion | |
if self.parents[x] != x: | |
# set x's parent to root of the tree, instead of whatever it used to be | |
# so that subsequent find(x) will only take O(1) instead | |
self.parents[x] = self.find(self.parents[x]) | |
return self.parents[x] | |
def union(self, x: int, y: int): | |
'''merge the root of x with the root of y''' | |
x_parent = self.find(x) | |
y_parent = self.find(y) | |
# no need to merge if parents are already identical | |
if x_parent != y_parent: | |
if self.ranks[x_parent] > self.ranks[y_parent]: | |
self.parents[y_parent] = x_parent | |
elif self.ranks[x_parent] < self.ranks[y_parent]: | |
self.parents[x_parent] = y_parent | |
else: | |
# just arbitrarily choose y_parent to be parent | |
self.parents[x_parent] = y_parent | |
# rmbr to increment rank of the new parent, y_parent | |
self.ranks[y_parent] += 1 | |
class Solution: | |
def accountsMerge(self, accounts: List[List[str]]) -> List[List[str]]: | |
len_accounts = len(accounts) | |
user_tree = UnionFind(len_accounts) | |
# preprocess | |
email_sets = [set(emails) for (_, *emails) in accounts] | |
# merge duplicate users | |
for i, (user_i, *_) in enumerate(accounts): | |
# starting with i + 1 breaks things --> cannot do this | |
for j, (user_j, *_) in enumerate(accounts): # must do this | |
if i == j: # pointless to merge self with self | |
continue | |
# check matching username first | |
# then check existence of at least 1 shared email | |
if user_i == user_j and len(email_sets[i] & email_sets[j]) > 0: | |
user_tree.union(i, j) | |
# break # cannot do this --> one union is not enough | |
# build the final answer | |
res = [] | |
# need to gather name of each unique user | |
# need to know how many unique parents (users) exist. this is the no. of sublists in our answer | |
uniq_parent_cnt = 0 | |
# key is the integer value in user_tree.parents array | |
# value is the position of sublist in res | |
parent_id_to_uniq_parent_cnt = {} | |
for i, (user, *_) in enumerate(accounts): | |
parent_id = user_tree.find(i) # must do this instead of user_tree.parents[i] | |
if parent_id not in parent_id_to_uniq_parent_cnt: | |
parent_id_to_uniq_parent_cnt[parent_id] = uniq_parent_cnt | |
res.append([user, email_sets[i]]) # we will sort later | |
uniq_parent_cnt += 1 | |
else: | |
# be careful not to override the value of uniq_parent_cnt here | |
idx = parent_id_to_uniq_parent_cnt[parent_id] | |
# do union of existing set with new set | |
res[idx][1].update(email_sets[i]) | |
return [[user, *sorted(emails_set)] for (user, emails_set) in res] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
the key is to maintain a hashmap of unique emails which map to their "owner" (index in accounts list).
this avoids the weird and slow N^2 loop over accounts.
below gets 23 ms, beats 80% runtime. 20.67 mb, beats 82% memory
other version, similar runtime & memory as above