Created
October 28, 2016 15:00
-
-
Save joekur/7ac5089bcd8167571857e837b9fbffff to your computer and use it in GitHub Desktop.
Plot records over time in your rails console
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Example usage: | |
# timeplot User.all | |
# timeplot User.all, bucket_size: day, field: :updated_at | |
# | |
# Arguments: | |
# bucket_size: One of [:day, :month, :year]. Defaults to :month. | |
# field: Timestamp field to plot against. Defaults to :created_at. | |
class TimePlot | |
BUCKET_SIZES = [:day, :month, :year] | |
X_AXIS_MAX = 100 | |
attr_reader :records, :bucket_size, :field | |
def initialize(records, bucket_size: :month, field: :created_at) | |
@records = records | |
@bucket_size = bucket_size | |
@field = field | |
end | |
def graph | |
max = buckets.values.max | |
buckets.each do |timestamp, count| | |
if count > 0 | |
puts "#{timestamp} | #{draw_line(count, max)} (#{count})" | |
else | |
puts "#{timestamp} |" | |
end | |
end; nil | |
end | |
def counts | |
buckets.each do |timestamp, count| | |
puts "#{timestamp} - #{count}" | |
end; nil | |
end | |
private | |
def timestamp_for(record) | |
record.send(field) | |
end | |
def buckets | |
@_buckets ||= ( | |
result = Hash.new(0) | |
last_timestamp = timestamp_for(ordered_records.first) | |
ordered_records.find_in_batches do |batch| | |
batch.each do |record| | |
this_timestamp = timestamp_for(record) | |
# when we skip a bucket or more, ensure we enter 0 for all skipped buckets | |
if key(this_timestamp) != key(last_timestamp) && | |
key(this_timestamp) != key(last_timestamp + 1.send(bucket_size)) | |
while key(this_timestamp) != key(last_timestamp) | |
last_timestamp += 1.send(bucket_size) | |
result[key(last_timestamp)] = 0 | |
end | |
end | |
result[key(this_timestamp)] += 1 | |
last_timestamp = this_timestamp | |
end | |
end | |
result | |
) | |
end | |
def ordered_records | |
records.order("#{field} ASC") | |
end | |
def key(timestamp) | |
if bucket_size == :day | |
timestamp.strftime("%d-%m-%Y") | |
elsif bucket_size == :month | |
timestamp.strftime("%m-%Y") | |
elsif bucket_size == :year | |
timestamp.strftime("%Y") | |
else | |
raise "Invalid bucket_size - #{bucket_size}" | |
end | |
end | |
def draw_line(count, max) | |
units = ((count.to_f / max) * X_AXIS_MAX).round | |
"=" * units | |
end | |
end | |
def timeplot(*args) | |
TimePlot.new(*args).graph | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
TODO - I'm pretty sure this doesn't work at plotting all 0-count buckets when using a field other than created_at. Since this is doing batching, which works on primary_key (id), I don't think the ordering does anything, so the records we get aren't guaranteed to be sorted by the specified timestamp field.