Created
April 20, 2019 20:25
-
-
Save dpapathanasiou/4161bc161157006ec9ce5d1ff7e931ec to your computer and use it in GitHub Desktop.
Sliding Window
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
An implementation of the "sliding window" technique to find shortest matching subarrays, inspired by: | |
https://leetcode.com/problems/find-all-anagrams-in-a-string/discuss/92007/sliding-window-algorithm-template-to-solve-all-the-leetcode-substring-search-problem | |
""" | |
from sys import maxint | |
def smallest_subarray(lst, st): | |
"""Return the smallest subarray from the list (lst) which contains all the members of the set (st) using a sliding window technique""" | |
if len(lst) < len(st): | |
# the set is larger than the list, so there is no common subarray | |
return [] | |
m = {} # key = element from set, value = frequency in the set | |
for c in st: | |
freq = m.get(c, 0) | |
freq += 1 | |
m[c] = freq | |
# track the number of unique elements in the set | |
counter = len(m.keys()) | |
# left and right substring indices for iterating through the list | |
left = right = 0 | |
# head and tail (start, stop indices) of the substring window in the list | |
head = 0 | |
tail = maxint | |
while right < len(lst): | |
# start from left to right | |
c = lst[right] | |
if c in m: | |
# this character is in the set | |
freq = m[c] | |
freq -= 1 | |
m[c] = freq | |
if freq == 0: | |
# decrement counter to note that this character is now included in the slice lst[left:right] | |
counter -= 1 | |
right += 1 | |
while counter == 0: | |
# counter at zero means all the elements of the set have been found, | |
# now tighten the range from the left | |
c = lst[left] | |
if c in m: | |
# this character (in a narrower range than found previously) is in the set | |
freq = m[c] | |
freq += 1 | |
m[c] = freq | |
if freq > 0: | |
# break the loop once the desired frequency has been found | |
counter += 1 | |
if right-left < tail: | |
# use the tightened ranges as the new optimal window | |
tail = right-left | |
head = left | |
left += 1 | |
if tail == maxint: | |
# we never did find all the characters in the set in the desired frequencies | |
return [] | |
return lst[head:head+tail] | |
""" | |
Tests: | |
Input: "aabbcb" | |
Alphabet: {'a', 'b', 'c'} | |
Output: "abbc" | |
Input: "aaaaaaaaaabbbbbbbbccccccccsbabbbccc" | |
Alphabet: {'a', 'b', 'c'} | |
Output: "csba" | |
Input: "aaaaaaaaaabbbbbbbbccccccccsbabbbccc" | |
Alphabet: {'a', 'b', 'c', 'f'} | |
Output: "" # not possible | |
""" | |
assert smallest_subarray(list("aabbcb"), ['a', 'b', 'c']) == ['a', 'b', 'b', 'c'] | |
assert smallest_subarray(list("aaaaaaaaaabbbbbbbbccccccccsbabbbccc"), ['a', 'b', 'c']) == ['c', 's', 'b', 'a'] | |
assert smallest_subarray(list("aaaaaaaaaabbbbbbbbccccccccsbabbbccc"), ['a', 'b', 'c', 'f']) == [] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment