Created
July 9, 2018 05:22
-
-
Save gurimusan/31aeda010808a842ea1d9953f89c928e to your computer and use it in GitHub Desktop.
pythonによるダブル配列の実装
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
NOT_FOUND = -1 | |
class Trie(object): | |
def __init__(self): | |
self.base = [] | |
self.check = [] | |
self.char_table = {} | |
def traverse(self, s, c): | |
if c not in self.char_table: | |
return NOT_FOUND | |
t = self.base[s] + self._char_num(c) | |
if self.check[t] != s: | |
return NOT_FOUND | |
return t | |
def exact_match_search(self, word): | |
s = 1 | |
for c in word: | |
t = self.traverse(s, c) | |
if t == NOT_FOUND: | |
return t | |
s = t | |
return NOT_FOUND | |
def build(self, words): | |
self.base = [] | |
self.check = [] | |
for word in words: | |
self.add_word(word) | |
def add_word(self, word): | |
s = 1 | |
self._extend_array(s+1) | |
word_index = 0 | |
for c in word: | |
t = self.traverse(s, c) | |
if t == NOT_FOUND: | |
break | |
s = t | |
word_index += 1 | |
for c in word[word_index:]: | |
self._extend_array(s+1) | |
# Add char if not exists | |
if c not in self.char_table: | |
self.char_table[c] = len(self.char_table) + 1 | |
base = self.base[s] | |
# Unused | |
if base == 0: | |
new_base = 1 | |
char_num = self.char_table.get(c) | |
while True: | |
t = new_base + char_num | |
self._extend_array(t+1) | |
if self.check[t] <= 0: | |
break | |
new_base += 1 | |
self.base[s] = new_base | |
t = new_base + char_num | |
self.check[t] = s | |
s = t | |
continue | |
new_base = base | |
# Used, and no conflict | |
if not self._has_conflict(s, c): | |
char_num = self.char_table.get(c) | |
t = new_base + char_num | |
self._extend_array(t+1) | |
self.check[t] = s | |
s = t | |
continue | |
# Used, and has conflict | |
char_nums = [self.char_table.get(c)] + [ | |
ci - base for ci, ch in enumerate(self.check) | |
if ch > 0 and ch == s] | |
while True: | |
all_can_move = True | |
for char_num in char_nums: | |
t = new_base + char_num | |
self._extend_array(t+1) | |
if self.check[t] > 0: | |
new_base += 1 | |
all_can_move = False | |
break | |
if all_can_move: | |
break | |
self.base[s] = new_base | |
for char_num in char_nums: | |
t = new_base + char_num | |
old_t = base + char_num | |
self._extend_array(t+1 if t > old_t else old_t+1) | |
if char_num != char_nums[0] and self.check[old_t] > 0: | |
for i, check in enumerate(self.check): | |
if check > 0 and check == old_t: | |
self.check[i] = t | |
self.base[old_t] = 0 | |
self.check[old_t] = 0 | |
self.check[t] = s | |
s = new_base + char_nums[0] | |
def _char_num(self, c): | |
return self.char_table.get(c) | |
def _has_conflict(self, s, c): | |
base = self.base[s] if self.base[s] != 0 else 1 | |
t = base + self.char_table.get(c) | |
return len(self.check) > t and self.check[t] > 0 | |
def _extend_array(self, size): | |
for i in xrange(size - len(self.base)): | |
self.base.append(0) | |
self.check.append(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment