Skip to content

Instantly share code, notes, and snippets.

@vr
Created March 20, 2017 09:10
Show Gist options
  • Save vr/1dfffe73a437209fc6dfb36c4ae75cb3 to your computer and use it in GitHub Desktop.
Save vr/1dfffe73a437209fc6dfb36c4ae75cb3 to your computer and use it in GitHub Desktop.
haproxy collectd ruby plugin
#!/usr/bin/env ruby
require "rubygems"
require "open-uri"
require "optparse"
require "csv"
port = "8081"
host = "localhost"
override_host = nil
OptionParser.new do |o|
o.on("-H", "--host hostname") { |hostname| host = hostname }
o.on("-o", "--override-host hostname") { |arg| override_host = arg }
o.on("-p", "--port port") { |portnumber| port = portnumber }
o.on("-h") { puts o; exit }
o.parse!
end
STDOUT.sync = true
STDERR.sync = true
interval = 10
sleep_backoff = 0
override_host = host if override_host.nil?
url = "http://#{host}:#{port}/;csv"
while sleep(sleep_backoff * interval) do
begin
# fetch the CSV
CSV.parse(open(url), :headers => true) do |row|
# don't collect stats frontend data
next if row["# pxname"] == "stats"
next if row["# pxname"] =~ /-check/
# All possible values available in the row. warning, they may be null depending of the service
# In brackets after each field name are the types which may have a value for
# that field. The types are L (Listeners), F (Frontends), B (Backends), and
# S (Servers).
#
# 0. pxname [LFBS]: proxy name
# 1. svname [LFBS]: service name (FRONTEND for frontend, BACKEND for backend,
# any name for server/listener)
# 2. qcur [..BS]: current queued requests. For the backend this reports the
# number queued without a server assigned.
# 3. qmax [..BS]: max value of qcur
# 4. scur [LFBS]: current sessions
# 5. smax [LFBS]: max sessions
# 6. slim [LFBS]: configured session limit
# 7. stot [LFBS]: cumulative number of connections
# 8. bin [LFBS]: bytes in
# 9. bout [LFBS]: bytes out
# 10. dreq [LFB.]: requests denied because of security concerns.
# - For tcp this is because of a matched tcp-request content rule.
# - For http this is because of a matched http-request or tarpit rule.
# 11. dresp [LFBS]: responses denied because of security concerns.
# - For http this is because of a matched http-request rule, or
# "option checkcache".
# 12. ereq [LF..]: request errors. Some of the possible causes are:
# - early termination from the client, before the request has been sent.
# - read error from the client
# - client timeout
# - client closed connection
# - various bad requests from the client.
# - request was tarpitted.
# 13. econ [..BS]: number of requests that encountered an error trying to
# connect to a backend server. The backend stat is the sum of the stat
# for all servers of that backend, plus any connection errors not
# associated with a particular server (such as the backend having no
# active servers).
# 14. eresp [..BS]: response errors. srv_abrt will be counted here also.
# Some other errors are:
# - write error on the client socket (won't be counted for the server stat)
# - failure applying filters to the response.
# 15. wretr [..BS]: number of times a connection to a server was retried.
# 16. wredis [..BS]: number of times a request was redispatched to another
# server. The server value counts the number of times that server was
# switched away from.
# 17. status [LFBS]: status (UP/DOWN/NOLB/MAINT/MAINT(via)...)
# 18. weight [..BS]: server weight (server), total weight (backend)
# 19. act [..BS]: server is active (server), number of active servers (backend)
# 20. bck [..BS]: server is backup (server), number of backup servers (backend)
# 21. chkfail [...S]: number of failed checks. (Only counts checks failed when
# the server is up.)
# 22. chkdown [..BS]: number of UP->DOWN transitions. The backend counter counts
# transitions to the whole backend being down, rather than the sum of the
# counters for each server.
# 23. lastchg [..BS]: number of seconds since the last UP<->DOWN transition
# 24. downtime [..BS]: total downtime (in seconds). The value for the backend
# is the downtime for the whole backend, not the sum of the server downtime.
# 25. qlimit [...S]: configured maxqueue for the server, or nothing in the
# value is 0 (default, meaning no limit)
# 26. pid [LFBS]: process id (0 for first instance, 1 for second, ...)
# 27. iid [LFBS]: unique proxy id
# 28. sid [L..S]: server id (unique inside a proxy)
# 29. throttle [...S]: current throttle percentage for the server, when
# slowstart is active, or no value if not in slowstart.
# 30. lbtot [..BS]: total number of times a server was selected, either for new
# sessions, or when re-dispatching. The server counter is the number
# of times that server was selected.
# 31. tracked [...S]: id of proxy/server if tracking is enabled.
# 32. type [LFBS]: (0=frontend, 1=backend, 2=server, 3=socket/listener)
# 33. rate [.FBS]: number of sessions per second over last elapsed second
# 34. rate_lim [.F..]: configured limit on new sessions per second
# 35. rate_max [.FBS]: max number of new sessions per second
# 36. check_status [...S]: status of last health check, one of:
# UNK -> unknown
# INI -> initializing
# SOCKERR -> socket error
# L4OK -> check passed on layer 4, no upper layers testing enabled
# L4TMOUT -> layer 1-4 timeout
# L4CON -> layer 1-4 connection problem, for example
# "Connection refused" (tcp rst) or "No route to host" (icmp)
# L6OK -> check passed on layer 6
# L6TOUT -> layer 6 (SSL) timeout
# L6RSP -> layer 6 invalid response - protocol error
# L7OK -> check passed on layer 7
# L7OKC -> check conditionally passed on layer 7, for example 404 with
# disable-on-404
# L7TOUT -> layer 7 (HTTP/SMTP) timeout
# L7RSP -> layer 7 invalid response - protocol error
# L7STS -> layer 7 response error, for example HTTP 5xx
# 37. check_code [...S]: layer5-7 code, if available
# 38. check_duration [...S]: time in ms took to finish last health check
# 39. hrsp_1xx [.FBS]: http responses with 1xx code
# 40. hrsp_2xx [.FBS]: http responses with 2xx code
# 41. hrsp_3xx [.FBS]: http responses with 3xx code
# 42. hrsp_4xx [.FBS]: http responses with 4xx code
# 43. hrsp_5xx [.FBS]: http responses with 5xx code
# 44. hrsp_other [.FBS]: http responses with other codes (protocol error)
# 45. hanafail [...S]: failed health checks details
# 46. req_rate [.F..]: HTTP requests per second over last elapsed second
# 47. req_rate_max [.F..]: max number of HTTP requests per second observed
# 48. req_tot [.F..]: total number of HTTP requests received
# 49. cli_abrt [..BS]: number of data transfers aborted by the client
# 50. srv_abrt [..BS]: number of data transfers aborted by the server
# (inc. in eresp)
# 51. comp_in [.FB.]: number of HTTP response bytes fed to the compressor
# 52. comp_out [.FB.]: number of HTTP response bytes emitted by the compressor
# 53. comp_byp [.FB.]: number of bytes that bypassed the HTTP compressor
# (CPU/BW limit)
# 54. comp_rsp [.FB.]: number of HTTP responses that were compressed
# 55. lastsess [..BS]: number of seconds since last session assigned to
# server/backend
# 56. last_chk [...S]: last health check contents or textual error
# 57. last_agt [...S]: last agent check contents or textual error
# 58. qtime [..BS]: the average queue time in ms over the 1024 last requests
# 59. ctime [..BS]: the average connect time in ms over the 1024 last requests
# 60. rtime [..BS]: the average response time in ms over the 1024 last requests
# (0 for TCP)
# 61. ttime [..BS]: the average total session time in ms over the 1024 last
# requests
pxname = row["# pxname"]
svname = row["svname"]
metrics = {
"stot" => "derive",
"qcur" => "derive",
"bin" => "derive",
"bout" => "derive",
"eresp" => "derive",
"chkfail" => "derive",
}
if !row["ttime"].nil?
metrics.merge!({
"ttime" => "derive",
})
end
metrics.each do |field, field_type|
puts "PUTVAL #{override_host}/haproxy/#{field_type}-#{pxname}_#{svname}_#{field} interval=#{interval} N:#{row[field]}" if row[field]
end
end
sleep_backoff = 1
rescue Exception => e
sleep_backoff += 1 if sleep_backoff < 10
STDERR.puts "Error retrieving #{url}: #{e.message}. Suspending for #{sleep_backoff * interval} seconds"
next
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment