Last active
March 10, 2025 15:21
-
-
Save linminhtoo/f5df6033c57e1cb27f1efd2a4fd765c3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import Counter | |
from typing import List | |
class Solution: | |
def findAnagrams(self, s: str, p: str) -> List[int]: | |
''' | |
anagram is just an unordered bag of letters | |
2 pointer sliding window approach on s? | |
i on the left, j on the right | |
at each "state", we maintain a Counter of char frequencies. we just make O(1) changes to this Counter. | |
to have a valid anagram from i to j -1 means our Counter's frequencies match p's frequencies. | |
if we already have an anagram from i to j - 1 (inclusive): | |
if s[j] == s[i], then we can drop i by replacing it with j, giving us a new start index at i + 1 | |
if s[j] != s[i], then we broke the anagram. we have to find a new start index > i + 1 | |
if we don't have an anagram from i to j - 1 (inclusive): | |
we have to find a new start index > i | |
''' | |
# unnecessary. | |
# def are_counters_same(counter_a, counter_b): | |
# for k, v in counter_a.items(): | |
# if k not in counter_b: | |
# return False | |
# if counter_b[k] != counter_a[k]: | |
# return False | |
# return True | |
len_s = len(s) | |
i = 0 | |
j = len(p) | |
ref_counter = Counter(p) | |
skip_check = False # whether counter needs to be checked (this is somewhat expensive so we want to avoid if possible) | |
is_valid = False # whether current sliding window is a valid anagram | |
valid_start_idxs = [] | |
while i < j and j <= len_s: | |
# if are_counters_same(curr_counter, ref_counter): | |
if not skip_check: | |
# FIXME: dont rebuild counter from scratch. update it on the fly with O(1) ops. | |
curr_counter = Counter(s[i:j]) | |
if curr_counter == ref_counter: | |
is_valid = True | |
valid_start_idxs.append(i) | |
else: | |
is_valid = False | |
if skip_check: # we already know this is a valid anagram, so dont need to check the counter | |
valid_start_idxs.append(i) | |
skip_check = False | |
if is_valid and (j < len_s and s[j] == s[i]): | |
skip_check = True | |
i += 1 | |
j += 1 | |
return valid_start_idxs |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
interestingly, this is 10x faster than the above. it means that rebuilding the entire Counter is too expensive. we should update it on every character.