Created
March 22, 2016 22:04
-
-
Save PeterZhizhin/b55011a99097c400b5eb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from queue import PriorityQueue | |
import re | |
class HuffmanTree: | |
class HuffmanNode: | |
def __init__(self, node0=None, node1=None, symbol=None): | |
self.node0 = node0 | |
self.node1 = node1 | |
self.symbol = symbol | |
def __repr__(self): | |
if self.symbol is not None: | |
return str(self.symbol) | |
return "({left}, {right})".format(left=self.node0.__repr__(), | |
right=self.node1.__repr__()) | |
def __str__(self): | |
if self.symbol is not None: | |
return str(self.symbol) | |
return "({left}, {right})".format(left=self.node0.__str__(), | |
right=self.node1.__str__()) | |
# Regexp to build Huffman tree from code | |
tree_regexp = re.compile("\s*\((.*)\)\s*") | |
@staticmethod | |
def gen_node(node_str): | |
match = HuffmanTree.tree_regexp.match(node_str) | |
if match is None: | |
node_str = node_str.strip() | |
if node_str == '': | |
node_str = ' ' | |
assert len(node_str) == 1 | |
return HuffmanTree.HuffmanNode(symbol=node_str), set(node_str) | |
# Find zero-balance point in the center | |
node_str = match.groups()[0] | |
balance = 0 | |
left = "" | |
right = "" | |
for i in range(len(node_str)): | |
if node_str[i] == '(': | |
balance += 1 | |
elif node_str[i] == ')': | |
balance -= 1 | |
elif node_str[i] == ',' and balance == 0: | |
left = node_str[:i] | |
right = node_str[i + 1:] | |
break | |
node0, node0_symb = HuffmanTree.gen_node(left) | |
node1, node1_symb = HuffmanTree.gen_node(right) | |
node0_symb.update(node1_symb) | |
return HuffmanTree.HuffmanNode(node0=node0, node1=node1), \ | |
node0_symb | |
def get_table(self, str, node): | |
if node.symbol is not None: | |
self.table[node.symbol] = str | |
return | |
self.get_table(str + "0", node.node0) | |
self.get_table(str + "1", node.node1) | |
def __init__(self, tree): | |
assert isinstance(tree, str) | |
self.tree, self.symbols = HuffmanTree.gen_node(tree) | |
self.table = {} | |
if self.tree.node0 is None and self.tree.node1 is None: | |
self.table[self.tree.symbol] = '0' | |
else: | |
self.get_table("", self.tree) | |
def decompress(self, str): | |
if len(str) == 0: | |
return '' | |
assert all(c == '0' or c == '1' for c in str) | |
i = 0 | |
result = "" | |
current_node = self.tree | |
if current_node.node0 is None and current_node.node1 is None: | |
return current_node.symbol | |
while i < len(str): | |
if current_node.symbol is not None: | |
result += current_node.symbol | |
current_node = self.tree | |
else: | |
c = str[i] | |
i += 1 | |
if c == '1': | |
current_node = current_node.node1 | |
else: | |
current_node = current_node.node0 | |
assert current_node.symbol is not None | |
return result + current_node.symbol | |
if __name__ == "__main__": | |
tree = input() | |
data = input() | |
tree_class = HuffmanTree(tree) | |
print(tree_class.decompress(data)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from queue import PriorityQueue | |
from sys import stdin | |
class HuffmanTree: | |
class HuffmanNode: | |
def __init__(self, node0=None, node1=None, symbol=None): | |
self.node0 = node0 | |
self.node1 = node1 | |
self.symbol = symbol | |
def __repr__(self): | |
if self.symbol is not None: | |
return str(self.symbol) | |
return "({left},{right})".format(left=self.node0.__repr__(), | |
right=self.node1.__repr__()) | |
def __str__(self): | |
if self.symbol is not None: | |
return str(self.symbol) | |
return "({left},{right})".format(left=self.node0.__str__(), | |
right=self.node1.__str__()) | |
def __lt__(self, other): | |
return False | |
# Regexp to build Huffman tree from code | |
tree_regexp = re.compile("\((.*)\)") | |
@staticmethod | |
def gen_node(node_str): | |
match = HuffmanTree.tree_regexp.match(node_str) | |
if match is None: | |
assert len(node_str) == 1 | |
if node_str == 'з': | |
node_str = ',' | |
elif node_str == 'й': | |
node_str = ')' | |
elif node_str == 'ё': | |
node_str = '(' | |
elif node_str == 'ъ': | |
node_str = '\n' | |
return HuffmanTree.HuffmanNode(symbol=node_str), set(node_str) | |
# Find zero-balance point in the center | |
node_str = match.groups()[0] | |
balance = 0 | |
left = "" | |
right = "" | |
for i in range(len(node_str)): | |
if node_str[i] == '(': | |
balance += 1 | |
elif node_str[i] == ')': | |
balance -= 1 | |
elif node_str[i] == ',' and balance == 0: | |
left = node_str[:i] | |
right = node_str[i + 1:] | |
break | |
node0, node0_symb = HuffmanTree.gen_node(left) | |
node1, node1_symb = HuffmanTree.gen_node(right) | |
node0_symb.update(node1_symb) | |
return HuffmanTree.HuffmanNode(node0=node0, node1=node1), \ | |
node0_symb | |
@staticmethod | |
def create_tree(frequencies): | |
if len(frequencies) == 0: | |
return HuffmanTree.HuffmanNode(symbol='') | |
frequencies = [(f, a) for a, f in frequencies.items()] | |
p = PriorityQueue() | |
for val in frequencies: | |
p.put((val[0], HuffmanTree.HuffmanNode(symbol=val[1]))) | |
while p.qsize() > 1: | |
l, r = p.get(), p.get() | |
l_c = l[1] | |
r_c = r[1] | |
node = HuffmanTree.HuffmanNode(node0=l_c, node1=r_c) | |
p.put((l[0] + r[0], node)) | |
return p.get()[1] | |
def get_table(self, str, node): | |
if node.symbol is not None: | |
self.table[node.symbol] = str | |
return | |
self.get_table(str + "0", node.node0) | |
self.get_table(str + "1", node.node1) | |
@staticmethod | |
def build_prepared_tree(tree_str): | |
tree_str = tree_str.replace("\n", "ъ") | |
if ',,' in tree_str: | |
# Try to build tree changing the first comma | |
try: | |
return HuffmanTree.build_prepared_tree(tree_str.replace(",,", "з,")) | |
except AssertionError: | |
return HuffmanTree.build_prepared_tree(tree_str.replace(",,", ",з")) | |
if ",()," in tree_str: | |
if 'й' in tree_str: | |
return HuffmanTree.build_prepared_tree(tree_str.replace(",(),", ",ё),", 1)) | |
elif 'ё' in tree_str: | |
return HuffmanTree.build_prepared_tree(tree_str.replace(",(),", ",(й,", 1)) | |
else: | |
try: | |
return HuffmanTree.build_prepared_tree(tree_str.replace(",(),", ",(й,", 1)) | |
except AssertionError: | |
return HuffmanTree.build_prepared_tree(tree_str.replace(",(),", ",ё),", 1)) | |
else: | |
if 'й' not in tree_str: | |
tree_str = tree_str.replace("(),", "(й,") | |
if 'ё' not in tree_str: | |
tree_str = tree_str.replace(",()", ",ё)") | |
if 'й' not in tree_str: | |
tree_str = tree_str.replace(",)", ",й") | |
if 'ё' not in tree_str: | |
tree_str = tree_str.replace("(,", "ё,") | |
return HuffmanTree.gen_node(tree_str) | |
def __init__(self, tree): | |
assert isinstance(tree, (str, dict)) | |
if isinstance(tree, str): | |
if len(tree) == 0: | |
self.tree = HuffmanTree.HuffmanNode(symbol='') | |
self.symbols = set() | |
else: | |
self.tree, self.symbols = HuffmanTree.build_prepared_tree(tree) | |
else: | |
self.symbols = set(tree.keys()) | |
assert all(len(s) == 1 for s in self.symbols) | |
self.tree = HuffmanTree.create_tree(tree) | |
self.table = {} | |
if self.tree.node0 is None and self.tree.node1 is None: | |
self.table[self.tree.symbol] = '0' | |
else: | |
self.get_table("", self.tree) | |
def __str__(self): | |
s = "" | |
s += str(self.tree) | |
s += "\n" | |
s += str(self.symbols) | |
s += "\n" | |
s += str(self.table) | |
s += "\n" | |
return s | |
def decompress(self, str): | |
if len(str) == 0: | |
return '' | |
assert all(c == '0' or c == '1' for c in str) | |
i = 0 | |
result = "" | |
current_node = self.tree | |
if current_node.node0 is None and current_node.node1 is None: | |
return current_node.symbol | |
while i < len(str): | |
if current_node.symbol is not None: | |
result += current_node.symbol | |
current_node = self.tree | |
else: | |
c = str[i] | |
i += 1 | |
if c == '1': | |
current_node = current_node.node1 | |
else: | |
current_node = current_node.node0 | |
assert current_node.symbol is not None | |
return result + current_node.symbol | |
def compress(self, str): | |
assert all(c in self.table for c in str) | |
return "".join(self.table[c] for c in str) | |
if __name__ == "__main__": | |
for line in stdin: | |
tree_str = line.strip() | |
code = stdin.readline().strip() | |
tree = HuffmanTree(tree_str) | |
print(tree.decompress(code)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment