linminhtoo · March 12, 2025 15:19 · linminhtoo · Mar 12, 2025
diff --git a/accounts_merge.py b/accounts_merge.py
 from typing import List


 class UnionFind:
    def __init__(self, total: int):
        # each node's parent is itself
        self.parents = list(range(total))
        # for union by rank optimisation
        self.ranks = [1 for _ in range(total)]

    def find(self, x: int):
        '''find the root of x'''
        # base case to prevent infinite recursion
        if self.parents[x] != x:
            # set x's parent to root of the tree, instead of whatever it used to be
            # so that subsequent find(x) will only take O(1) instead
            self.parents[x] = self.find(self.parents[x])
        return self.parents[x]

    def union(self, x: int, y: int):
        '''merge the root of x with the root of y'''
        x_parent = self.find(x)
        y_parent = self.find(y)

        # no need to merge if parents are already identical 
        if x_parent != y_parent:
            if self.ranks[x_parent] > self.ranks[y_parent]:
                self.parents[y_parent] = x_parent
            elif self.ranks[x_parent] < self.ranks[y_parent]:
                self.parents[x_parent] = y_parent
            else:
                # just arbitrarily choose y_parent to be parent
                self.parents[x_parent] = y_parent
                # rmbr to increment rank of the new parent, y_parent
                self.ranks[y_parent] += 1

                
 class Solution:
    def accountsMerge(self, accounts: List[List[str]]) -> List[List[str]]:
        len_accounts = len(accounts)
        user_tree = UnionFind(len_accounts)

        # preprocess
        email_sets = [set(emails) for (_, *emails) in accounts]
        
        # merge duplicate users
        for i, (user_i, *_) in enumerate(accounts):
            # starting with i + 1 breaks things --> cannot do this
            for j, (user_j, *_) in enumerate(accounts):  # must do this
                if i == j:  # pointless to merge self with self
                    continue
                # check matching username first
                # then check existence of at least 1 shared email
                if user_i == user_j and len(email_sets[i] & email_sets[j]) > 0:
                    user_tree.union(i, j)
                    # break  # cannot do this --> one union is not enough
    
        # build the final answer
        res = []
        # need to gather name of each unique user
        # need to know how many unique parents (users) exist. this is the no. of sublists in our answer
        uniq_parent_cnt = 0
        # key is the integer value in user_tree.parents array
        # value is the position of sublist in res
        parent_id_to_uniq_parent_cnt = {}
        for i, (user, *_) in enumerate(accounts):
            parent_id = user_tree.find(i)  # must do this instead of user_tree.parents[i]
            if parent_id not in parent_id_to_uniq_parent_cnt:
                parent_id_to_uniq_parent_cnt[parent_id] = uniq_parent_cnt
                res.append([user, email_sets[i]])  # we will sort later
                uniq_parent_cnt += 1
            else:
                # be careful not to override the value of uniq_parent_cnt here
                idx = parent_id_to_uniq_parent_cnt[parent_id]
                # do union of existing set with new set 
                res[idx][1].update(email_sets[i])

        return [[user, *sorted(emails_set)] for (user, emails_set) in res]
	from typing import List


	class UnionFind:
	def __init__(self, total: int):
	# each node's parent is itself
	self.parents = list(range(total))
	# for union by rank optimisation
	self.ranks = [1 for _ in range(total)]

	def find(self, x: int):
	'''find the root of x'''
	# base case to prevent infinite recursion
	if self.parents[x] != x:
	# set x's parent to root of the tree, instead of whatever it used to be
	# so that subsequent find(x) will only take O(1) instead
	self.parents[x] = self.find(self.parents[x])
	return self.parents[x]

	def union(self, x: int, y: int):
	'''merge the root of x with the root of y'''
	x_parent = self.find(x)
	y_parent = self.find(y)

	# no need to merge if parents are already identical
	if x_parent != y_parent:
	if self.ranks[x_parent] > self.ranks[y_parent]:
	self.parents[y_parent] = x_parent
	elif self.ranks[x_parent] < self.ranks[y_parent]:
	self.parents[x_parent] = y_parent
	else:
	# just arbitrarily choose y_parent to be parent
	self.parents[x_parent] = y_parent
	# rmbr to increment rank of the new parent, y_parent
	self.ranks[y_parent] += 1


	class Solution:
	def accountsMerge(self, accounts: List[List[str]]) -> List[List[str]]:
	len_accounts = len(accounts)
	user_tree = UnionFind(len_accounts)

	# preprocess
	email_sets = [set(emails) for (_, *emails) in accounts]

	# merge duplicate users
	for i, (user_i, *_) in enumerate(accounts):
	# starting with i + 1 breaks things --> cannot do this
	for j, (user_j, *_) in enumerate(accounts): # must do this
	if i == j: # pointless to merge self with self
	continue
	# check matching username first
	# then check existence of at least 1 shared email
	if user_i == user_j and len(email_sets[i] & email_sets[j]) > 0:
	user_tree.union(i, j)
	# break # cannot do this --> one union is not enough

	# build the final answer
	res = []
	# need to gather name of each unique user
	# need to know how many unique parents (users) exist. this is the no. of sublists in our answer
	uniq_parent_cnt = 0
	# key is the integer value in user_tree.parents array
	# value is the position of sublist in res
	parent_id_to_uniq_parent_cnt = {}
	for i, (user, *_) in enumerate(accounts):
	parent_id = user_tree.find(i) # must do this instead of user_tree.parents[i]
	if parent_id not in parent_id_to_uniq_parent_cnt:
	parent_id_to_uniq_parent_cnt[parent_id] = uniq_parent_cnt
	res.append([user, email_sets[i]]) # we will sort later
	uniq_parent_cnt += 1
	else:
	# be careful not to override the value of uniq_parent_cnt here
	idx = parent_id_to_uniq_parent_cnt[parent_id]
	# do union of existing set with new set
	res[idx][1].update(email_sets[i])

	return [[user, *sorted(emails_set)] for (user, emails_set) in res]