Skip to content

Instantly share code, notes, and snippets.

@linminhtoo
Last active March 10, 2025 15:21
Show Gist options
  • Save linminhtoo/f5df6033c57e1cb27f1efd2a4fd765c3 to your computer and use it in GitHub Desktop.
Save linminhtoo/f5df6033c57e1cb27f1efd2a4fd765c3 to your computer and use it in GitHub Desktop.
from collections import Counter
from typing import List
class Solution:
def findAnagrams(self, s: str, p: str) -> List[int]:
'''
anagram is just an unordered bag of letters
2 pointer sliding window approach on s?
i on the left, j on the right
at each "state", we maintain a Counter of char frequencies. we just make O(1) changes to this Counter.
to have a valid anagram from i to j -1 means our Counter's frequencies match p's frequencies.
if we already have an anagram from i to j - 1 (inclusive):
if s[j] == s[i], then we can drop i by replacing it with j, giving us a new start index at i + 1
if s[j] != s[i], then we broke the anagram. we have to find a new start index > i + 1
if we don't have an anagram from i to j - 1 (inclusive):
we have to find a new start index > i
'''
# unnecessary.
# def are_counters_same(counter_a, counter_b):
# for k, v in counter_a.items():
# if k not in counter_b:
# return False
# if counter_b[k] != counter_a[k]:
# return False
# return True
len_s = len(s)
i = 0
j = len(p)
ref_counter = Counter(p)
skip_check = False # whether counter needs to be checked (this is somewhat expensive so we want to avoid if possible)
is_valid = False # whether current sliding window is a valid anagram
valid_start_idxs = []
while i < j and j <= len_s:
# if are_counters_same(curr_counter, ref_counter):
if not skip_check:
# FIXME: dont rebuild counter from scratch. update it on the fly with O(1) ops.
curr_counter = Counter(s[i:j])
if curr_counter == ref_counter:
is_valid = True
valid_start_idxs.append(i)
else:
is_valid = False
if skip_check: # we already know this is a valid anagram, so dont need to check the counter
valid_start_idxs.append(i)
skip_check = False
if is_valid and (j < len_s and s[j] == s[i]):
skip_check = True
i += 1
j += 1
return valid_start_idxs
@linminhtoo
Copy link
Author

linminhtoo commented Mar 10, 2025

interestingly, this is 10x faster than the above. it means that rebuilding the entire Counter is too expensive. we should update it on every character.

from collections import Counter
from typing import List


class Solution:
    def findAnagrams(self, s: str, p: str) -> List[int]:
        len_p = len(p)
        ref_counter = Counter(p)
        
        # start 1 index short so that first iteration (idx := 0) will naturally fill the first counter
        curr_counter = Counter(s[:len_p - 1])
        valid_start_idxs = []

        for idx in range(len(s) - len_p + 1):
            curr_counter[s[idx + len_p - 1]] += 1
            if curr_counter == ref_counter:
                valid_start_idxs.append(idx)
            curr_counter[s[idx]] -= 1

        return valid_start_idxs

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment