joshleecreates · February 7, 2017 04:14
diff --git a/classification_helper.ex b/classification_helper.ex
 @doc """
 Example Usage
 ```
 items = [
  %{
    name: "thing",
    description: "foo bar baz"
  },
  %{
    name: "thing two",
    description: "biz bump boozle biz"
  },
 ]
 word_counts = maps_to_word_counts(items)
 word_list = generate_word_list(word_counts, 0.1, 0.5)
 table(:counts, word_counts, word_list)
 ```
 """
 defmodule ClassificationHelper do
  require Stemmer

  @stop_words ~w(
    a about above after again against all am an and any are arent as at be
    because been before being below between both but by cant cannot could
    couldnt did didnt do does doesnt doing dont down during each few for from
    further had hadnt has hasnt have havent having he hed hell hes her here
    heres hers herself him himself his how hows i id ill im ive if in into
    is isnt it its its itself lets me more most mustnt my myself no nor not of
    off on once only or other ought our ours ourselves out over own same shant
    she shed shell shes should shouldnt so some such than that thats the
    their theirs them themselves then there theres these they theyd theyll
    theyre theyve this those through to too under until up very was wasnt we
    wed well were weve were werent what whats when whens where wheres
    which while who whos whom why whys with wont would wouldnt you youd
    youll youre youve your yours yourself yourselves
  )

  def count_words(text, state \\ %{}, weight \\ 1) do
    text
    |> String.tokenize
    |> Enum.reduce(state, fn(word, acc) ->
      Map.update(acc, String.to_atom(word), 0, &(1*weight + &1))
    end)
  end

  def generate_word_list(word_counts_map, lower_bound, upper_bound) do
    n = Enum.count(word_counts_map)
    word_counts_map
    |> get_appearances
    |> Enum.filter_map(
      fn({_word, appearances}) ->
        (appearances/n) > lower_bound && (appearances/n) < upper_bound
      end,
      fn({word, _appearances}) -> word end
    )
  end

  def get_appearances(word_counts_map) do
    Enum.reduce(word_counts_map, %{}, fn({_item, row}, acc) ->
      row_appearances = row
      |> Map.keys()
      |> Enum.map(&({&1, Map.get(acc, &1, 0) + 1}))
      |> Enum.into(%{})
      Map.merge(acc, row_appearances)
    end)
  end


  @doc """
  """
  def maps_to_word_counts(items, name_field, fields) do
    items
    |> Enum.map(fn({_, item}) ->
        {
          Map.get(item, name_field),
          Enum.reduce(fields, %{}, &(count_field_weighted(&1, item, &2)))
        }
      end)
    |> Enum.into(%{})
  end

  def table(:bow, word_count_maps, word_list) do

  end

  def table(:tfidf, word_count_maps, word_list) do

  end

  def table(:counts, word_count_maps, word_list) do
    table = Enum.map(word_count_maps, fn({name, row})->
      [name | Enum.map(word_list, &(get_column_weighted(&1, row)))]
    end)
    table = [["" | word_list] | table]
  end

  def tokenize(text) do
    text
    |> String.replace(~r/[^A-z\s]/u, "")
    |> String.downcase
    |> String.split
    |> Enum.reject(&(&1 in @stop_words))
    |> Stemmer.stem
  end

  # private

  defp count_field_weighted({field, weight}, item, acc) do
    count_words(Map.get(item, field), acc, weight)
  end

  defp count_field_weighted(field, item, acc) do
    count_field_weighted({field, 1}, item, acc)
  end

  defp get_column_weighted({col, weight}, row), do: Map.get(row, col, 0) * weight
  defp get_column_weighted(col, row), do: get_column_weighted({col, 1}, row)
 end
	@doc """
	Example Usage
	```
	items = [
	%{
	name: "thing",
	description: "foo bar baz"
	},
	%{
	name: "thing two",
	description: "biz bump boozle biz"
	},
	]
	word_counts = maps_to_word_counts(items)
	word_list = generate_word_list(word_counts, 0.1, 0.5)
	table(:counts, word_counts, word_list)
	```
	"""
	defmodule ClassificationHelper do
	require Stemmer

	@stop_words ~w(
	a about above after again against all am an and any are arent as at be
	because been before being below between both but by cant cannot could
	couldnt did didnt do does doesnt doing dont down during each few for from
	further had hadnt has hasnt have havent having he hed hell hes her here
	heres hers herself him himself his how hows i id ill im ive if in into
	is isnt it its its itself lets me more most mustnt my myself no nor not of
	off on once only or other ought our ours ourselves out over own same shant
	she shed shell shes should shouldnt so some such than that thats the
	their theirs them themselves then there theres these they theyd theyll
	theyre theyve this those through to too under until up very was wasnt we
	wed well were weve were werent what whats when whens where wheres
	which while who whos whom why whys with wont would wouldnt you youd
	youll youre youve your yours yourself yourselves
	)

	def count_words(text, state \\ %{}, weight \\ 1) do
	text
	\|> String.tokenize
	\|> Enum.reduce(state, fn(word, acc) ->
	Map.update(acc, String.to_atom(word), 0, &(1*weight + &1))
	end)
	end

	def generate_word_list(word_counts_map, lower_bound, upper_bound) do
	n = Enum.count(word_counts_map)
	word_counts_map
	\|> get_appearances
	\|> Enum.filter_map(
	fn({_word, appearances}) ->
	(appearances/n) > lower_bound && (appearances/n) < upper_bound
	end,
	fn({word, _appearances}) -> word end
	)
	end

	def get_appearances(word_counts_map) do
	Enum.reduce(word_counts_map, %{}, fn({_item, row}, acc) ->
	row_appearances = row
	\|> Map.keys()
	\|> Enum.map(&({&1, Map.get(acc, &1, 0) + 1}))
	\|> Enum.into(%{})
	Map.merge(acc, row_appearances)
	end)
	end


	@doc """
	"""
	def maps_to_word_counts(items, name_field, fields) do
	items
	\|> Enum.map(fn({_, item}) ->
	{
	Map.get(item, name_field),
	Enum.reduce(fields, %{}, &(count_field_weighted(&1, item, &2)))
	}
	end)
	\|> Enum.into(%{})
	end

	def table(:bow, word_count_maps, word_list) do

	end

	def table(:tfidf, word_count_maps, word_list) do

	end

	def table(:counts, word_count_maps, word_list) do
	table = Enum.map(word_count_maps, fn({name, row})->
	[name \| Enum.map(word_list, &(get_column_weighted(&1, row)))]
	end)
	table = [["" \| word_list] \| table]
	end

	def tokenize(text) do
	text
	\|> String.replace(~r/[^A-z\s]/u, "")
	\|> String.downcase
	\|> String.split
	\|> Enum.reject(&(&1 in @stop_words))
	\|> Stemmer.stem
	end

	# private

	defp count_field_weighted({field, weight}, item, acc) do
	count_words(Map.get(item, field), acc, weight)
	end

	defp count_field_weighted(field, item, acc) do
	count_field_weighted({field, 1}, item, acc)
	end

	defp get_column_weighted({col, weight}, row), do: Map.get(row, col, 0) * weight
	defp get_column_weighted(col, row), do: get_column_weighted({col, 1}, row)
	end
No results found