Last active
September 19, 2020 13:48
-
-
Save Masa331/ed5f89bb75d6ee4c0325ce7877c1c64a to your computer and use it in GitHub Desktop.
Grouping text entries by associated tags into hierarchical structure by theirs occurence across entries
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# Following is a code which groups some entries hierarchically by it's tags composition. The hierarchy is determined by tag occurence in other entries. | |
require 'pry' | |
# entry = [id, some text description, array of tags] | |
entries = [ | |
[1, 'lorem ipsum', ['#customer_x', '#administration', '#paperwork']], | |
[2, 'lorem ipsum', ['#customer_x', '#development']], | |
[3, 'lorem ipsum', ['#customer_x', '#administration']], | |
[4, 'lorem ipsum', ['#customer_y', '#administration']], | |
[5, 'lorem ipsum', ['#customer_y', '#administration', '#invoice']], | |
[6, 'lorem ipsum', ['#bike']], | |
[7, 'lorem ipsum', ['#project_foo', '#mvp']], | |
[8, 'lorem ipsum', ['#project_foo', '#docs']], | |
[9, 'lorem ipsum', ['#project_bar', '#docs']], | |
[10, 'lorem ipsum', ['#blog', '#article']], | |
[11, 'lorem ipsum', ['#customer_x', '#maintenance', '#infrastructure']], | |
[12, 'lorem ipsum', ['#customer_x', '#maintenance', '#infrastructure']], | |
[13, 'lorem ipsum', []] | |
] | |
expected_result = [ | |
{"#customer_x"=>[{"#administration"=>[{"#paperwork"=>[{}, [[1, "lorem ipsum", []]]]}, [[3, "lorem ipsum", []]]], "#development"=>[{}, [[2, "lorem ipsum", []]]], ["#infrastructure", "#maintenance"]=>[{}, [[11, "lorem ipsum", []], [12, "lorem ipsum", []]]]}, []], | |
"#administration"=>[{"#customer_x"=>[{"#paperwork"=>[{}, [[1, "lorem ipsum", []]]]}, [[3, "lorem ipsum", []]]], "#customer_y"=>[{"#invoice"=>[{}, [[5, "lorem ipsum", []]]]}, [[4, "lorem ipsum", []]]]}, []], | |
"#customer_y"=>[{"#administration"=>[{"#invoice"=>[{}, [[5, "lorem ipsum", []]]]}, [[4, "lorem ipsum", []]]]}, []], | |
"#bike"=>[{}, [[6, "lorem ipsum", []]]], | |
"#project_foo"=>[{"#mvp"=>[{}, [[7, "lorem ipsum", []]]], "#docs"=>[{}, [[8, "lorem ipsum", []]]]}, []], | |
"#docs"=>[{"#project_foo"=>[{}, [[8, "lorem ipsum", []]]], "#project_bar"=>[{}, [[9, "lorem ipsum", []]]]}, []], | |
["#article", "#blog"]=>[{}, [[10, "lorem ipsum", []]]]}, | |
[[13, "lorem ipsum", []]] | |
] | |
def tag_hierarchy(entries) | |
without_tag = [] | |
grouped = entries.inject({}) do |memo, entry| | |
id = entry[0] | |
desc = entry[1] | |
tags = entry[2] | |
without_tag << entry if tags.empty? | |
tags.each do |tag| | |
without = tags - [tag] | |
current = memo[tag] || [] | |
memo[tag] = current + [[id, desc, without]] | |
end | |
memo | |
end | |
merged = {} | |
grouped.each do |key, value| | |
other_tag_combinations = value.map { _1[2] }.uniq | |
other_tags = other_tag_combinations.flatten.uniq | |
# Entries don't have other tags | |
if other_tags.empty? | |
next | |
end | |
# Entries which are details of only one other tag | |
if other_tag_combinations.size == 1 | |
grouped.delete key | |
end | |
entry_ids = value.map &:first | |
# Entries which all have only same combination of tags | |
if other_tags.all? { |tag| grouped.fetch(tag, []).map(&:first) == entry_ids } | |
new_key = other_tags << key | |
new_key.each { grouped.delete _1 } | |
merged[new_key] = value.map { [_1[0], _1[1], []]} | |
end | |
end | |
merged = grouped.merge(merged) | |
[merged.transform_values { |vs| tag_hierarchy vs }, without_tag] | |
end | |
result = tag_hierarchy entries | |
if result == expected_result | |
puts "\e[32mCool, result matches expected value\e[0m" | |
else | |
puts "\e[31mResult doesn't match the expected value\e[0m" | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# Following is a code which groups entries by theirs's tags and then recursively does the some with subgroups | |
require 'pry' | |
# entry = [id, some text description, array of tags] | |
entries = [ | |
[1, 'lorem ipsum', ['#customer_x', '#administration', '#paperwork']], | |
[2, 'lorem ipsum', ['#customer_x', '#development']], | |
[3, 'lorem ipsum', ['#customer_x', '#administration']], | |
[4, 'lorem ipsum', ['#customer_y', '#administration']], | |
[5, 'lorem ipsum', ['#customer_y', '#administration', '#invoice']], | |
[6, 'lorem ipsum', ['#bike']], | |
[7, 'lorem ipsum', ['#project_foo', '#mvp']], | |
[8, 'lorem ipsum', ['#project_foo', '#docs']], | |
[9, 'lorem ipsum', ['#project_bar', '#docs']], | |
[10, 'lorem ipsum', ['#blog', '#article']], | |
[11, 'lorem ipsum', ['#customer_x', '#maintenance', '#infrastructure']], | |
[12, 'lorem ipsum', ['#customer_x', '#maintenance', '#infrastructure']], | |
[13, 'lorem ipsum', []] | |
] | |
expected_result = [ | |
{"#customer_x"=> | |
[{"#administration"=>[{"#paperwork"=>[{}, [[1, "lorem ipsum", []]]]}, [[3, "lorem ipsum", []]]], | |
"#paperwork"=>[{"#administration"=>[{}, [[1, "lorem ipsum", []]]]}, []], | |
"#development"=>[{}, [[2, "lorem ipsum", []]]], | |
"#maintenance"=>[{"#infrastructure"=>[{}, [[11, "lorem ipsum", []], [12, "lorem ipsum", []]]]}, []], | |
"#infrastructure"=>[{"#maintenance"=>[{}, [[11, "lorem ipsum", []], [12, "lorem ipsum", []]]]}, []]}, | |
[]], | |
"#administration"=> | |
[{"#customer_x"=>[{"#paperwork"=>[{}, [[1, "lorem ipsum", []]]]}, [[3, "lorem ipsum", []]]], | |
"#paperwork"=>[{"#customer_x"=>[{}, [[1, "lorem ipsum", []]]]}, []], | |
"#customer_y"=>[{"#invoice"=>[{}, [[5, "lorem ipsum", []]]]}, [[4, "lorem ipsum", []]]], | |
"#invoice"=>[{"#customer_y"=>[{}, [[5, "lorem ipsum", []]]]}, []]}, | |
[]], | |
"#paperwork"=>[{"#customer_x"=>[{"#administration"=>[{}, [[1, "lorem ipsum", []]]]}, []], "#administration"=>[{"#customer_x"=>[{}, [[1, "lorem ipsum", []]]]}, []]}, []], | |
"#development"=>[{"#customer_x"=>[{}, [[2, "lorem ipsum", []]]]}, []], | |
"#customer_y"=>[{"#administration"=>[{"#invoice"=>[{}, [[5, "lorem ipsum", []]]]}, [[4, "lorem ipsum", []]]], "#invoice"=>[{"#administration"=>[{}, [[5, "lorem ipsum", []]]]}, []]}, []], | |
"#invoice"=>[{"#customer_y"=>[{"#administration"=>[{}, [[5, "lorem ipsum", []]]]}, []], "#administration"=>[{"#customer_y"=>[{}, [[5, "lorem ipsum", []]]]}, []]}, []], | |
"#bike"=>[{}, [[6, "lorem ipsum", []]]], | |
"#project_foo"=>[{"#mvp"=>[{}, [[7, "lorem ipsum", []]]], "#docs"=>[{}, [[8, "lorem ipsum", []]]]}, []], | |
"#mvp"=>[{"#project_foo"=>[{}, [[7, "lorem ipsum", []]]]}, []], | |
"#docs"=>[{"#project_foo"=>[{}, [[8, "lorem ipsum", []]]], "#project_bar"=>[{}, [[9, "lorem ipsum", []]]]}, []], | |
"#project_bar"=>[{"#docs"=>[{}, [[9, "lorem ipsum", []]]]}, []], | |
"#blog"=>[{"#article"=>[{}, [[10, "lorem ipsum", []]]]}, []], | |
"#article"=>[{"#blog"=>[{}, [[10, "lorem ipsum", []]]]}, []], | |
"#maintenance"=>[{"#customer_x"=>[{"#infrastructure"=>[{}, [[11, "lorem ipsum", []], [12, "lorem ipsum", []]]]}, []], "#infrastructure"=>[{"#customer_x"=>[{}, [[11, "lorem ipsum", []], [12, "lorem ipsum", []]]]}, []]}, []], | |
"#infrastructure"=>[{"#customer_x"=>[{"#maintenance"=>[{}, [[11, "lorem ipsum", []], [12, "lorem ipsum", []]]]}, []], "#maintenance"=>[{"#customer_x"=>[{}, [[11, "lorem ipsum", []], [12, "lorem ipsum", []]]]}, []]}, []]}, | |
[[13, "lorem ipsum", []]] | |
] | |
def tag_pyramid(entries) | |
without_tag = [] | |
grouped = entries.reduce({}) do |memo, entry| | |
id = entry[0] | |
desc = entry[1] | |
tags = entry[2] | |
without_tag << entry if tags.empty? | |
tags.each do |tag| | |
current = memo[tag] || [] | |
memo[tag] = current + [[id, desc, tags - [tag]]] | |
end | |
memo | |
end | |
grouped.transform_values! { |subentries| tag_pyramid subentries } | |
[grouped, without_tag] | |
end | |
result = tag_pyramid entries | |
if result == expected_result | |
puts "\e[32mCool, result matches expected value\e[0m" | |
else | |
puts "\e[31mResult doesn't match the expected value\e[0m" | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment