Skip to content

Instantly share code, notes, and snippets.

@linminhtoo
Created March 12, 2025 15:19
Show Gist options
  • Save linminhtoo/94658bb25fb766a751164d1422004d13 to your computer and use it in GitHub Desktop.
Save linminhtoo/94658bb25fb766a751164d1422004d13 to your computer and use it in GitHub Desktop.
https://leetcode.com/problems/accounts-merge/ this is slow, 1779 ms (5%), need to optimize
from typing import List
class UnionFind:
def __init__(self, total: int):
# each node's parent is itself
self.parents = list(range(total))
# for union by rank optimisation
self.ranks = [1 for _ in range(total)]
def find(self, x: int):
'''find the root of x'''
# base case to prevent infinite recursion
if self.parents[x] != x:
# set x's parent to root of the tree, instead of whatever it used to be
# so that subsequent find(x) will only take O(1) instead
self.parents[x] = self.find(self.parents[x])
return self.parents[x]
def union(self, x: int, y: int):
'''merge the root of x with the root of y'''
x_parent = self.find(x)
y_parent = self.find(y)
# no need to merge if parents are already identical
if x_parent != y_parent:
if self.ranks[x_parent] > self.ranks[y_parent]:
self.parents[y_parent] = x_parent
elif self.ranks[x_parent] < self.ranks[y_parent]:
self.parents[x_parent] = y_parent
else:
# just arbitrarily choose y_parent to be parent
self.parents[x_parent] = y_parent
# rmbr to increment rank of the new parent, y_parent
self.ranks[y_parent] += 1
class Solution:
def accountsMerge(self, accounts: List[List[str]]) -> List[List[str]]:
len_accounts = len(accounts)
user_tree = UnionFind(len_accounts)
# preprocess
email_sets = [set(emails) for (_, *emails) in accounts]
# merge duplicate users
for i, (user_i, *_) in enumerate(accounts):
# starting with i + 1 breaks things --> cannot do this
for j, (user_j, *_) in enumerate(accounts): # must do this
if i == j: # pointless to merge self with self
continue
# check matching username first
# then check existence of at least 1 shared email
if user_i == user_j and len(email_sets[i] & email_sets[j]) > 0:
user_tree.union(i, j)
# break # cannot do this --> one union is not enough
# build the final answer
res = []
# need to gather name of each unique user
# need to know how many unique parents (users) exist. this is the no. of sublists in our answer
uniq_parent_cnt = 0
# key is the integer value in user_tree.parents array
# value is the position of sublist in res
parent_id_to_uniq_parent_cnt = {}
for i, (user, *_) in enumerate(accounts):
parent_id = user_tree.find(i) # must do this instead of user_tree.parents[i]
if parent_id not in parent_id_to_uniq_parent_cnt:
parent_id_to_uniq_parent_cnt[parent_id] = uniq_parent_cnt
res.append([user, email_sets[i]]) # we will sort later
uniq_parent_cnt += 1
else:
# be careful not to override the value of uniq_parent_cnt here
idx = parent_id_to_uniq_parent_cnt[parent_id]
# do union of existing set with new set
res[idx][1].update(email_sets[i])
return [[user, *sorted(emails_set)] for (user, emails_set) in res]
@linminhtoo
Copy link
Author

linminhtoo commented Mar 12, 2025

the key is to maintain a hashmap of unique emails which map to their "owner" (index in accounts list).
this avoids the weird and slow N^2 loop over accounts.

below gets 23 ms, beats 80% runtime. 20.67 mb, beats 82% memory

from typing import List
from collections import defaultdict


class UnionFind:
    def __init__(self, N: int):
        self.parent = list(range(N))
        self.rank = [1] * N
        # self.parent = {}  # Key: email, Value: root email
        # self.rank = {}  # Rank for union by rank

    def find(self, x):
        if self.parent[x] != x:
            self.parent[x] = self.find(self.parent[x])  # Path compression
        return self.parent[x]

    def union(self, x, y):
        root_x = self.find(x)
        root_y = self.find(y)
        if root_x != root_y:
            if self.rank[root_x] > self.rank[root_y]:
                self.parent[root_y] = root_x
            elif self.rank[root_x] < self.rank[root_y]:
                self.parent[root_x] = root_y
            else:
                self.parent[root_x] = root_y
                self.rank[root_y] += 1

class Solution:
    def accountsMerge(self, accounts: List[List[str]]) -> List[List[str]]:
        uf = UnionFind(len(accounts))

        # merge
        email_to_owner: dict[str, int] = {}
        for i, (user, *emails) in enumerate(accounts):
            for email in emails:
                if email not in email_to_owner:
                    email_to_owner[email] = i
                else:
                    uf.union(i, email_to_owner[email])

        # get results, grouping by owner
        res: dict[int, list[str]] = defaultdict(list)
        for email, owner in email_to_owner.items():
            parent_owner = uf.find(owner)
            res[parent_owner].append(email)

        # output results
        return [[accounts[i][0], *sorted(emails)] for i, emails in res.items()] 

other version, similar runtime & memory as above

class UF:
    def __init__(self, N):
        self.parents = list(range(N))
    def union(self, child, parent):
        self.parents[self.find(child)] = self.find(parent)
    def find(self, x):
        if x != self.parents[x]:
            self.parents[x] = self.find(self.parents[x])
        return self.parents[x]
    
class Solution:
    # 12 ms, 90%
    def accountsMerge(self, accounts: List[List[str]]) -> List[List[str]]:
        uf = UF(len(accounts))
        
        # Creat unions between indexes
        ownership = {}
        for i, (_, *emails) in enumerate(accounts):
            for email in emails:
                if email in ownership:
                    uf.union(i, ownership[email])
                ownership[email] = i
        
        # Append emails to correct index
        ans = collections.defaultdict(list)
        for email, owner in ownership.items():
            ans[uf.find(owner)].append(email)
        
        return [[accounts[i][0]] + sorted(emails) for i, emails in ans.items()]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment