diff options
author | kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk> | 2005-11-15 15:09:58 +0100 |
---|---|---|
committer | kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk> | 2005-11-15 15:09:58 +0100 |
commit | 0d5ddb880686349ab6c35a283033f2ecc987dd0f (patch) | |
tree | e2cd550b6670d35f247d2cf14a4b1019a6dbafe7 /tools/xenmon/xenmon.py | |
parent | 0e3a022f8cfe382c960da254f632eaec4ec6dfcf (diff) | |
download | xen-0d5ddb880686349ab6c35a283033f2ecc987dd0f.tar.gz xen-0d5ddb880686349ab6c35a283033f2ecc987dd0f.tar.bz2 xen-0d5ddb880686349ab6c35a283033f2ecc987dd0f.zip |
The new userland monitoring tool, XenMon.
Signed-off-by: Rob Gardner <rob.gardner@hp.com>
Diffstat (limited to 'tools/xenmon/xenmon.py')
-rw-r--r-- | tools/xenmon/xenmon.py | 578 |
1 files changed, 578 insertions, 0 deletions
diff --git a/tools/xenmon/xenmon.py b/tools/xenmon/xenmon.py new file mode 100644 index 0000000000..0c1f33c61f --- /dev/null +++ b/tools/xenmon/xenmon.py @@ -0,0 +1,578 @@ +#!/usr/bin/env python + +##################################################################### +# xenmon is a front-end for xenbaked. +# There is a curses interface for live monitoring. XenMon also allows +# logging to a file. For options, run python xenmon.py -h +# +# Copyright (C) 2005 by Hewlett Packard, Palo Alto and Fort Collins +# Authors: Lucy Cherkasova, lucy.cherkasova@hp.com +# Rob Gardner, rob.gardner@hp.com +# Diwaker Gupta, diwaker.gupta@hp.com +##################################################################### +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; under version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +##################################################################### + +import mmap +import struct +import os +import time +import optparse as _o +import curses as _c +import math +import sys + +# constants +NSAMPLES = 100 +NDOMAINS = 32 + +# the struct strings for qos_info +ST_DOM_INFO = "6Q4i32s" +ST_QDATA = "%dQ" % (6*NDOMAINS + 4) + +# size of mmaped file +QOS_DATA_SIZE = struct.calcsize(ST_QDATA)*NSAMPLES + struct.calcsize(ST_DOM_INFO)*NDOMAINS + struct.calcsize("4i") + +# location of mmaped file, hard coded right now +SHM_FILE = "/tmp/xenq-shm" + +# format strings +TOTALS = 15*' ' + "%6.2f%%" + 35*' ' + "%6.2f%%" + +ALLOCATED = "Allocated" +GOTTEN = "Gotten" +BLOCKED = "Blocked" +WAITED = "Waited" +IOCOUNT = "I/O Count" +EXCOUNT = "Exec Count" + +# globals +# our curses screen +stdscr = None + +# parsed options +options, args = None, None + +# the optparse module is quite smart +# to see help, just run xenmon -h +def setup_cmdline_parser(): + parser = _o.OptionParser() + parser.add_option("-l", "--live", dest="live", action="store_true", + default=True, help = "show the ncurses live monitoring frontend (default)") + parser.add_option("-n", "--notlive", dest="live", action="store_false", + default="True", help = "write to file instead of live monitoring") + parser.add_option("-p", "--prefix", dest="prefix", + default = "log", help="prefix to use for output files") + parser.add_option("-t", "--time", dest="duration", + action="store", type="int", default=10, + help="stop logging to file after this much time has elapsed (in seconds). set to 0 to keep logging indefinitely") + parser.add_option("-i", "--interval", dest="interval", + action="store", type="int", default=1000, + help="interval for logging (in ms)") + parser.add_option("--ms_per_sample", dest="mspersample", + action="store", type="int", default=100, + help = "determines how many ms worth of data goes in a sample") + return parser + +# encapsulate information about a domain +class DomainInfo: + def __init__(self): + self.allocated_samples = [] + self.gotten_samples = [] + self.blocked_samples = [] + self.waited_samples = [] + self.execcount_samples = [] + self.iocount_samples = [] + self.ffp_samples = [] + + def gotten_stats(self, passed): + total = float(sum(self.gotten_samples)) + per = 100*total/passed + exs = sum(self.execcount_samples) + if exs > 0: + avg = total/exs + else: + avg = 0 + return [total/(float(passed)/10**9), per, avg] + + def waited_stats(self, passed): + total = float(sum(self.waited_samples)) + per = 100*total/passed + exs = sum(self.execcount_samples) + if exs > 0: + avg = total/exs + else: + avg = 0 + return [total/(float(passed)/10**9), per, avg] + + def blocked_stats(self, passed): + total = float(sum(self.blocked_samples)) + per = 100*total/passed + ios = sum(self.iocount_samples) + if ios > 0: + avg = total/float(ios) + else: + avg = 0 + return [total/(float(passed)/10**9), per, avg] + + def allocated_stats(self, passed): + total = sum(self.allocated_samples) + exs = sum(self.execcount_samples) + if exs > 0: + return float(total)/exs + else: + return 0 + + def ec_stats(self, passed): + total = float(sum(self.execcount_samples))/(float(passed)/10**9) + return total + + def io_stats(self, passed): + total = float(sum(self.iocount_samples)) + exs = sum(self.execcount_samples) + if exs > 0: + avg = total/exs + else: + avg = 0 + return [total/(float(passed)/10**9), avg] + + def stats(self, passed): + return [self.gotten_stats(passed), self.allocated_stats(passed), self.blocked_stats(passed), + self.waited_stats(passed), self.ec_stats(passed), self.io_stats(passed)] + +# report values over desired interval +def summarize(startat, endat, duration, samples): + dominfos = {} + for i in range(0, NDOMAINS): + dominfos[i] = DomainInfo() + + passed = 1 # to prevent zero division + curid = startat + numbuckets = 0 + lost_samples = [] + ffp_samples = [] + + while passed < duration: + for i in range(0, NDOMAINS): + dominfos[i].gotten_samples.append(samples[curid][0*NDOMAINS + i]) + dominfos[i].allocated_samples.append(samples[curid][1*NDOMAINS + i]) + dominfos[i].waited_samples.append(samples[curid][2*NDOMAINS + i]) + dominfos[i].blocked_samples.append(samples[curid][3*NDOMAINS + i]) + dominfos[i].execcount_samples.append(samples[curid][4*NDOMAINS + i]) + dominfos[i].iocount_samples.append(samples[curid][5*NDOMAINS + i]) + + passed += samples[curid][6*NDOMAINS] + lost_samples.append(samples[curid][6*NDOMAINS + 2]) + ffp_samples.append(samples[curid][6*NDOMAINS + 3]) + + numbuckets += 1 + + if curid > 0: + curid -= 1 + else: + curid = NSAMPLES - 1 + if curid == endat: + break + + lostinfo = [min(lost_samples), sum(lost_samples), max(lost_samples)] + ffpinfo = [min(ffp_samples), sum(ffp_samples), max(ffp_samples)] + ldoms = map(lambda x: dominfos[x].stats(passed), range(0, NDOMAINS)) + + return [ldoms, lostinfo, ffpinfo] + +# scale microseconds to milliseconds or seconds as necessary +def time_scale(ns): + if ns < 1000: + return "%4.2f ns" % float(ns) + elif ns < 1000*1000: + return "%4.2f us" % (float(ns)/10**3) + elif ns < 10**9: + return "%4.2f ms" % (float(ns)/10**6) + else: + return "%4.2f s" % (float(ns)/10**9) + +# paint message on curses screen, but detect screen size errors +def display(scr, row, col, str, attr=0): + try: + scr.addstr(row, col, str, attr) + except: + scr.erase() + _c.nocbreak() + scr.keypad(0) + _c.echo() + _c.endwin() + print "Your terminal screen is not big enough; Please resize it." + print "row=%d, col=%d, str='%s'" % (row, col, str) + sys.exit(1) + + +# the live monitoring code +def show_livestats(): + cpu = 0 # cpu of interest to display data for + ncpu = 1 # number of cpu's on this platform + slen = 0 # size of shared data structure, incuding padding + + # mmap the (the first chunk of the) file + shmf = open(SHM_FILE, "r+") + shm = mmap.mmap(shmf.fileno(), QOS_DATA_SIZE) + + samples = [] + doms = [] + + # initialize curses + stdscr = _c.initscr() + _c.noecho() + _c.cbreak() + + stdscr.keypad(1) + stdscr.timeout(1000) + [maxy, maxx] = stdscr.getmaxyx() + + + + # display in a loop + while True: + + for cpuidx in range(0, ncpu): + + # calculate offset in mmap file to start from + idx = cpuidx * slen + + + samples = [] + doms = [] + + # read in data + for i in range(0, NSAMPLES): + len = struct.calcsize(ST_QDATA) + sample = struct.unpack(ST_QDATA, shm[idx:idx+len]) + samples.append(sample) + idx += len + + for i in range(0, NDOMAINS): + len = struct.calcsize(ST_DOM_INFO) + dom = struct.unpack(ST_DOM_INFO, shm[idx:idx+len]) + doms.append(dom) + idx += len + + len = struct.calcsize("4i") + oldncpu = ncpu + (next, ncpu, slen, freq) = struct.unpack("4i", shm[idx:idx+len]) + idx += len + + # xenbaked tells us how many cpu's it's got, so re-do + # the mmap if necessary to get multiple cpu data + if oldncpu != ncpu: + shm = mmap.mmap(shmf.fileno(), ncpu*slen) + + # if we've just calculated data for the cpu of interest, then + # stop examining mmap data and start displaying stuff + if cpuidx == cpu: + break + + # calculate starting and ending datapoints; never look at "next" since + # it represents live data that may be in transition. + startat = next - 1 + if next + 10 < NSAMPLES: + endat = next + 10 + else: + endat = 10 + + # get summary over desired interval + [h1, l1, f1] = summarize(startat, endat, 10**9, samples) + [h2, l2, f2] = summarize(startat, endat, 10 * 10**9, samples) + + # the actual display code + row = 0 + display(stdscr, row, 1, "CPU = %d" % cpu, _c.A_STANDOUT) + + display(stdscr, row, 10, "%sLast 10 seconds%sLast 1 second" % (6*' ', 30*' '), _c.A_BOLD) + row +=1 + display(stdscr, row, 1, "%s" % ((maxx-2)*'=')) + + total_h1_cpu = 0 + total_h2_cpu = 0 + + for dom in range(0, NDOMAINS): + if h1[dom][0][1] > 0 or dom == NDOMAINS - 1: + # display gotten + row += 1 + col = 2 + display(stdscr, row, col, "%d" % dom) + col += 4 + display(stdscr, row, col, "%s" % time_scale(h2[dom][0][0])) + col += 12 + display(stdscr, row, col, "%3.2f%%" % h2[dom][0][1]) + col += 12 + display(stdscr, row, col, "%s/ex" % time_scale(h2[dom][0][2])) + col += 18 + display(stdscr, row, col, "%s" % time_scale(h1[dom][0][0])) + col += 12 + display(stdscr, row, col, "%3.2f%%" % h1[dom][0][1]) + col += 12 + display(stdscr, row, col, "%s/ex" % time_scale(h1[dom][0][2])) + col += 18 + display(stdscr, row, col, "Gotten") + + # display allocated + row += 1 + col = 2 + display(stdscr, row, col, "%d" % dom) + col += 28 + display(stdscr, row, col, "%s/ex" % time_scale(h2[dom][1])) + col += 42 + display(stdscr, row, col, "%s/ex" % time_scale(h1[dom][1])) + col += 18 + display(stdscr, row, col, "Allocated") + + # display blocked + row += 1 + col = 2 + display(stdscr, row, col, "%d" % dom) + col += 4 + display(stdscr, row, col, "%s" % time_scale(h2[dom][2][0])) + col += 12 + display(stdscr, row, col, "%3.2f%%" % h2[dom][2][1]) + col += 12 + display(stdscr, row, col, "%s/io" % time_scale(h2[dom][2][2])) + col += 18 + display(stdscr, row, col, "%s" % time_scale(h1[dom][2][0])) + col += 12 + display(stdscr, row, col, "%3.2f%%" % h1[dom][2][1]) + col += 12 + display(stdscr, row, col, "%s/io" % time_scale(h1[dom][2][2])) + col += 18 + display(stdscr, row, col, "Blocked") + + # display waited + row += 1 + col = 2 + display(stdscr, row, col, "%d" % dom) + col += 4 + display(stdscr, row, col, "%s" % time_scale(h2[dom][3][0])) + col += 12 + display(stdscr, row, col, "%3.2f%%" % h2[dom][3][1]) + col += 12 + display(stdscr, row, col, "%s/ex" % time_scale(h2[dom][3][2])) + col += 18 + display(stdscr, row, col, "%s" % time_scale(h1[dom][3][0])) + col += 12 + display(stdscr, row, col, "%3.2f%%" % h1[dom][3][1]) + col += 12 + display(stdscr, row, col, "%s/ex" % time_scale(h1[dom][3][2])) + col += 18 + display(stdscr, row, col, "Waited") + + # display ex count + row += 1 + col = 2 + display(stdscr, row, col, "%d" % dom) + + col += 28 + display(stdscr, row, col, "%d/s" % h2[dom][4]) + col += 42 + display(stdscr, row, col, "%d" % h1[dom][4]) + col += 18 + display(stdscr, row, col, "Execution count") + + # display io count + row += 1 + col = 2 + display(stdscr, row, col, "%d" % dom) + col += 4 + display(stdscr, row, col, "%d/s" % h2[dom][5][0]) + col += 24 + display(stdscr, row, col, "%d/ex" % h2[dom][5][1]) + col += 18 + display(stdscr, row, col, "%d" % h1[dom][5][0]) + col += 24 + display(stdscr, row, col, "%3.2f/ex" % h1[dom][5][1]) + col += 18 + display(stdscr, row, col, "I/O Count") + + #row += 1 + #stdscr.hline(row, 1, '-', maxx - 2) + total_h1_cpu += h1[dom][0][1] + total_h2_cpu += h2[dom][0][1] + + + row += 1 + display(stdscr, row, 2, TOTALS % (total_h2_cpu, total_h1_cpu)) + row += 1 +# display(stdscr, row, 2, +# "\tFFP: %d (Min: %d, Max: %d)\t\t\tFFP: %d (Min: %d, Max %d)" % +# (math.ceil(f2[1]), f2[0], f2[2], math.ceil(f1[1]), f1[0], f1[2]), _c.A_BOLD) + + if l1[1] > 1 : + row += 1 + display(stdscr, row, 2, + "\tRecords lost: %d (Min: %d, Max: %d)\t\t\tRecords lost: %d (Min: %d, Max %d)" % + (math.ceil(l2[1]), l2[0], l2[2], math.ceil(l1[1]), l1[0], l1[2]), _c.A_BOLD) + + # grab a char from tty input; exit if interrupt hit + try: + c = stdscr.getch() + except: + break + + # q = quit + if c == ord('q'): + break + + # c = cycle to a new cpu of interest + if c == ord('c'): + cpu = (cpu + 1) % ncpu + + stdscr.erase() + + _c.nocbreak() + stdscr.keypad(0) + _c.echo() + _c.endwin() + shm.close() + shmf.close() + + +# simple functions to allow initialization of log files without actually +# physically creating files that are never used; only on the first real +# write does the file get created +class Delayed(file): + def __init__(self, filename, mode): + self.filename = filename + self.saved_mode = mode + self.delay_data = "" + self.opened = 0 + + def delayed_write(self, str): + self.delay_data = str + + def write(self, str): + if not self.opened: + self.file = open(self.filename, self.saved_mode) + self.opened = 1 + self.file.write(self.delay_data) + self.file.write(str) + + def flush(self): + if self.opened: + self.file.flush() + + def close(self): + if self.opened: + self.file.close() + + +def writelog(): + global options + + ncpu = 1 # number of cpu's + slen = 0 # size of shared structure inc. padding + + shmf = open(SHM_FILE, "r+") + shm = mmap.mmap(shmf.fileno(), QOS_DATA_SIZE) + + interval = 0 + outfiles = {} + for dom in range(0, NDOMAINS): + outfiles[dom] = Delayed("%s-dom%d.log" % (options.prefix, dom), 'w') + outfiles[dom].delayed_write("# passed cpu dom cpu(tot) cpu(%) cpu/ex allocated/ex blocked(tot) blocked(%) blocked/io waited(tot) waited(%) waited/ex ex/s io(tot) io/ex\n") + + while options.duration == 0 or interval < (options.duration * 1000): + for cpuidx in range(0, ncpu): + idx = cpuidx * slen # offset needed in mmap file + + + samples = [] + doms = [] + + for i in range(0, NSAMPLES): + len = struct.calcsize(ST_QDATA) + sample = struct.unpack(ST_QDATA, shm[idx:idx+len]) + samples.append(sample) + idx += len + + for i in range(0, NDOMAINS): + len = struct.calcsize(ST_DOM_INFO) + dom = struct.unpack(ST_DOM_INFO, shm[idx:idx+len]) + doms.append(dom) + idx += len + + len = struct.calcsize("4i") + oldncpu = ncpu + (next, ncpu, slen, freq) = struct.unpack("4i", shm[idx:idx+len]) + idx += len + + if oldncpu != ncpu: + shm = mmap.mmap(shmf.fileno(), ncpu*slen) + + startat = next - 1 + if next + 10 < NSAMPLES: + endat = next + 10 + else: + endat = 10 + + [h1,l1, f1] = summarize(startat, endat, options.interval * 10**6, samples) + for dom in range(0, NDOMAINS): + if h1[dom][0][1] > 0 or dom == NDOMAINS - 1: + outfiles[dom].write("%.3f %d %d %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f\n" % + (interval, cpuidx, dom, + h1[dom][0][0], h1[dom][0][1], h1[dom][0][2], + h1[dom][1], + h1[dom][2][0], h1[dom][2][1], h1[dom][2][2], + h1[dom][3][0], h1[dom][3][1], h1[dom][3][2], + h1[dom][4], + h1[dom][5][0], h1[dom][5][1])) + outfiles[dom].flush() + + interval += options.interval + time.sleep(1) + + for dom in range(0, NDOMAINS): + outfiles[dom].close() + +# start xenbaked +def start_xenbaked(): + global options + global args + + os.system("killall -9 xenbaked") + # assumes that xenbaked is in your path + os.system("xenbaked --ms_per_sample=%d &" % + options.mspersample) + time.sleep(1) + +# stop xenbaked +def stop_xenbaked(): + os.system("killall -s INT xenbaked") + +def main(): + global options + global args + global domains + + parser = setup_cmdline_parser() + (options, args) = parser.parse_args() + + start_xenbaked() + if options.live: + show_livestats() + else: + try: + writelog() + except: + print 'Quitting.' + stop_xenbaked() + +if __name__ == "__main__": + main() |