Skip to content

Instantly share code, notes, and snippets.

@SaveTheRbtz
Created April 16, 2012 10:46

Revisions

  1. SaveTheRbtz created this gist Apr 16, 2012.
    135 changes: 135 additions & 0 deletions aggregate_filestat.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,135 @@
    #!/usr/bin/env python

    import logging as log
    from fileinput import input
    from collections import namedtuple, defaultdict
    from pprint import pprint

    if __debug__:
    log_level = log.DEBUG
    else:
    log_level = log.WARNING
    log.basicConfig(level=log_level)

    # XXX: Backport @lru_cache
    CACHED_ATOF = defaultdict(dict)
    CACHED_FILES = {}

    class Stats(object):
    """Simple IO statistics class"""
    def __init__(self):
    self.perfile = defaultdict(int)
    self.iops = 0
    self.fault_iops = 0

    def clear(self):
    self.perfile.clear()
    self.iops = 0
    self.fault_iops = 0

    def __str__(self):
    return self.__dict__

    def address_in_range(address, map_start, map_end):
    """XXX:"""
    if map_start <= int(address, base=16) <= map_end:
    return True
    return False

    def update_file_cache(pid, address):
    """XXX:"""
    Map = namedtuple('Map', 'address perms offset dev inode pathname')
    try:
    filename = '/proc/' + pid + '/maps'
    if filename not in CACHED_FILES:
    CACHED_FILES[filename] = open(filename)
    for line in CACHED_FILES[filename].readlines():
    try:
    map_ = Map(*line.split())
    map_start, map_end = map(lambda x: int(x, base=16), map_.address.split('-'))
    if address_in_range(address, map_start, map_end):
    CACHED_ATOF[pid][map_start, map_end] = map_.pathname
    except Exception:
    log.debug("Can't parse line: [ {0} ]".format(line))
    CACHED_FILES[filename].seek(0)
    except Exception:
    log.info("Can't get file for pid: [ {0} ] from address [ {1} ]".format(pid, address), exc_info=True)

    def address_to_file(pid, address):
    """XXX:"""
    # XXX: Dirty hack for very loaded servers and static apps
    if pid not in CACHED_ATOF:
    print '{0:=^80}'.format(' CACHE MISS [ {0} ]'.format(pid))
    update_file_cache(pid, address)
    for map_start, map_end in CACHED_ATOF[pid]:
    if address_in_range(address, map_start, map_end):
    return CACHED_ATOF[pid][map_start, map_end]
    if __debug__:
    return pid + '/' + address
    return 'UNKNOWN'

    def acc_io(IO, line, read, write):
    """XXX:"""
    io = IO(*line.split())
    if 'read' in io.type:
    acc = read
    else:
    acc = write
    acc.perfile[io.file] += int(io.size)
    acc.iops += 1

    def acc_pfault(Pfault, line, read, write):
    """XXX:"""
    pfault = Pfault(*line.split())
    if '_r' in pfault.type:
    acc = read
    else:
    acc = write
    acc.perfile[address_to_file(pfault.pid, pfault.address)] += 4096 # XXX
    acc.fault_iops += 1
    acc.iops += 1

    def sort_by_value(dict_, reverse=True):
    """XXX:"""
    return sorted(dict_.items(), key=lambda x:x[1], reverse=reverse)

    def print_stats(read, write):
    """XXX:"""
    print "=== SUMMARY ==="
    print "Total IO: {0}".format(read.iops + write.iops)
    print "Pagefaults IO: {0}".format(read.fault_iops + write.fault_iops)
    print "Reads/Writes: {0}/{1}".format(read.iops, write.iops)

    read_kbs = sum(v for k,v in read.perfile.items())
    write_kbs = sum(v for k,v in write.perfile.items())
    print "Read/Write Kbs: {0}/{1}".format(read_kbs, write_kbs)

    for name, stat in zip(('read', 'write'), (read, write)):
    print "=== STAT {0} ===".format(name)
    for k,v in sort_by_value(stat.perfile)[:20]:
    print "{0}:\t{1}".format(k,v)

    if __debug__:
    print "=== RAW ==="
    pprint(sort_by_value(read.perfile))
    pprint(sort_by_value(write.perfile))

    print
    read.clear()
    write.clear()

    def main():
    read, write = Stats(), Stats()
    Pfault = namedtuple('Pfault', 'type pid address')
    IO = namedtuple('IO', 'type pid file size')
    for line in input():
    if line.startswith('vfs_'):
    acc_io(IO, line, read, write)
    elif line.startswith('pfault_'):
    acc_pfault(Pfault, line, read, write)
    elif line.startswith('__PRINT__'):
    print_stats(read, write)
    print_stats(read, write)

    if __name__ == '__main__':
    main()
    46 changes: 46 additions & 0 deletions filestat.stp
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,46 @@
    #!/usr/bin/env stap

    #
    # Usage: `stap filestat.stp app_name | python -u aggregate_filestat.py`
    #

    global PROCNAME = @1
    global fault_address, fault_access
    global time_offset

    probe begin { time_offset = gettimeofday_us() }

    probe vm.pagefault {
    p = pid()
    fault_address[p] = address
    fault_access[p] = write_access ? "w" : "r"
    }

    probe vm.pagefault.return {
    p = pid()
    if (execname() != PROCNAME) next
    if (!(p in fault_address)) next

    if (!vm_fault_contains(fault_type,VM_FAULT_MAJOR)) {
    delete fault_address[p]
    delete fault_access[p]
    next
    }
    printf("pfault_%s\t%d\t%p\n", fault_access[p], p, fault_address[p])

    delete fault_address[p]
    delete fault_access[p]
    }

    probe kernel.function("vfs_write").return,
    kernel.function("vfs_read").return {

    if (execname() == PROCNAME && $return > 0) {
    printf("%s\t%d\t%s\t%d\n", probefunc(), pid(), d_path(&$file->f_path), $return)
    }

    }

    probe timer.s(1) {
    printf("__PRINT__\n")
    }