The new userland monitoring tool, XenMon.

Signed-off-by: Rob Gardner <rob.gardner@hp.com>
author: kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk> 2005-11-15 15:09:58 +0100
committer: kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk> 2005-11-15 15:09:58 +0100
commit: 0d5ddb880686349ab6c35a283033f2ecc987dd0f (patch)
tree: e2cd550b6670d35f247d2cf14a4b1019a6dbafe7 /tools/xenmon/xenbaked.c
parent: 0e3a022f8cfe382c960da254f632eaec4ec6dfcf (diff)
download: xen-0d5ddb880686349ab6c35a283033f2ecc987dd0f.tar.gz
xen-0d5ddb880686349ab6c35a283033f2ecc987dd0f.tar.bz2
xen-0d5ddb880686349ab6c35a283033f2ecc987dd0f.zip
1 files changed, 1029 insertions, 0 deletions
diff --git a/tools/xenmon/xenbaked.c b/tools/xenmon/xenbaked.c
new file mode 100644
index 0000000000..41b50a5daa
--- /dev/null
+++ b/tools/xenmon/xenbaked.c
@@ -0,0 +1,1029 @@
+/******************************************************************************
+ * tools/xenbaked.c
+ *
+ * Tool for collecting raw trace buffer data from Xen and 
+ *  performing some accumulation operations and other processing
+ *  on it.
+ *
+ * Copyright (C) 2004 by Intel Research Cambridge
+ * Copyright (C) 2005 by Hewlett Packard, Palo Alto and Fort Collins
+ *
+ * Authors: Diwaker Gupta, diwaker.gupta@hp.com
+ *          Rob Gardner, rob.gardner@hp.com
+ *          Lucy Cherkasova, lucy.cherkasova.hp.com
+ * Much code based on xentrace, authored by Mark Williamson, mark.a.williamson@intel.com
+ * Date:   November, 2005
+ * 
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <time.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <argp.h>
+#include <signal.h>
+#include <xenctrl.h>
+#include <xen/xen.h>
+#include <string.h>
+
+#include "xc_private.h"
+typedef struct { int counter; } atomic_t;
+#define _atomic_read(v)		((v).counter)
+
+#include <xen/trace.h>
+#include "xenbaked.h"
+
+extern FILE *stderr;
+
+/***** Compile time configuration of defaults ********************************/
+
+/* when we've got more records than this waiting, we log it to the output */
+#define NEW_DATA_THRESH 1
+
+/* sleep for this long (milliseconds) between checking the trace buffers */
+#define POLL_SLEEP_MILLIS 100
+
+/* Size of time period represented by each sample */
+#define MS_PER_SAMPLE 100
+
+/* CPU Frequency */
+#define MHZ
+#define CPU_FREQ 2660 MHZ
+
+/***** The code **************************************************************/
+
+typedef struct settings_st {
+    char *outfile;
+    struct timespec poll_sleep;
+    unsigned long new_data_thresh;
+    unsigned long ms_per_sample;
+    double cpu_freq;
+} settings_t;
+
+settings_t opts;
+
+int interrupted = 0; /* gets set if we get a SIGHUP */
+int rec_count = 0;
+time_t start_time;
+int dom0_flips = 0;
+
+_new_qos_data *new_qos;
+_new_qos_data **cpu_qos_data;
+
+
+#define ID(X) ((X>NDOMAINS-1)?(NDOMAINS-1):X)
+
+// array of currently running domains, indexed by cpu
+int *running = NULL;
+
+// number of cpu's on this platform
+int NCPU = 0;
+
+
+void init_current(int ncpu)
+{
+  running = calloc(ncpu, sizeof(int));
+  NCPU = ncpu;
+  printf("Initialized with %d %s\n", ncpu, (ncpu == 1) ? "cpu" : "cpu's");
+}
+
+int is_current(int domain, int cpu)
+{
+  //  int i;
+  
+  //  for (i=0; i<NCPU; i++)
+    if (running[cpu] == domain)
+      return 1;
+  return 0;
+}
+
+
+// return the domain that's currently running on the given cpu
+int current(int cpu)
+{
+  return running[cpu];
+}
+
+void set_current(int cpu, int domain)
+{
+  running[cpu] = domain;
+}
+
+
+
+void close_handler(int signal)
+{
+    interrupted = 1;
+}
+
+#if 0
+void dump_record(int cpu, struct t_rec *x)
+{
+    printf("record: cpu=%x, tsc=%lx, event=%x, d1=%lx\n", 
+            cpu, x->cycles, x->event, x->data[0]);
+}
+#endif
+
+/**
+ * millis_to_timespec - convert a time in milliseconds to a struct timespec
+ * @millis:             time interval in milliseconds
+ */
+struct timespec millis_to_timespec(unsigned long millis)
+{
+    struct timespec spec;
+
+    spec.tv_sec = millis / 1000;
+    spec.tv_nsec = (millis % 1000) * 1000;
+
+    return spec;
+}
+
+
+typedef struct 
+{
+    int event_count;
+    int event_id;
+    char *text;
+} stat_map_t;
+
+stat_map_t stat_map[] = {
+    { 0,       0, 	    "Other" },
+    { 0, TRC_SCHED_DOM_ADD, "Add Domain" },
+    { 0, TRC_SCHED_DOM_REM, "Remove Domain" },
+    { 0, TRC_SCHED_SLEEP, "Sleep" },
+    { 0, TRC_SCHED_WAKE,  "Wake" },
+    { 0, TRC_SCHED_BLOCK,  "Block" },
+    { 0, TRC_SCHED_SWITCH,  "Switch" },
+    { 0, TRC_SCHED_S_TIMER_FN, "Timer Func"},
+    { 0, TRC_SCHED_SWITCH_INFPREV,  "Switch Prev" },
+    { 0, TRC_SCHED_SWITCH_INFNEXT,  "Switch Next" },
+    { 0, TRC_MEM_PAGE_GRANT_MAP,  "Page Map" },
+    { 0, TRC_MEM_PAGE_GRANT_UNMAP,  "Page Unmap" },
+    { 0, TRC_MEM_PAGE_GRANT_TRANSFER,  "Page Transfer" },
+    { 0,      0, 		 0  }
+};
+
+
+void check_gotten_sum(void)
+{
+#if 0
+    uint64_t sum, ns;
+    extern uint64_t total_ns_gotten(uint64_t*);
+    double percent;
+    int i;
+
+    for (i=0; i<NCPU; i++) {
+      new_qos = cpu_qos_data[i];
+      ns = billion;
+      sum = total_ns_gotten(&ns);
+
+      printf("[cpu%d] ns_gotten over all domains = %lldns, over %lldns\n",
+	      i, sum, ns);
+      percent = (double) sum;
+      percent = (100.0*percent) / (double)ns;
+      printf(" ==> ns_gotten = %7.3f%%\n", percent);
+    }
+#endif
+}
+
+
+
+void dump_stats(void) 
+{
+    stat_map_t *smt = stat_map;
+    time_t end_time, run_time;
+
+    time(&end_time);
+
+    run_time = end_time - start_time;
+
+    printf("Event counts:\n");
+    while (smt->text != NULL) {
+        printf("%08d\t%s\n", smt->event_count, smt->text);
+        smt++;
+    }
+
+    printf("processed %d total records in %d seconds (%ld per second)\n",
+            rec_count, (int)run_time, rec_count/run_time);
+
+    check_gotten_sum();
+}
+
+void log_event(int event_id) 
+{
+    stat_map_t *smt = stat_map;
+
+    //  printf("event_id = 0x%x\n", event_id);
+
+    while (smt->text != NULL) {
+        if (smt->event_id == event_id) {
+            smt->event_count++;
+            return;
+        }
+        smt++;
+    }
+    if (smt->text == NULL)
+        stat_map[0].event_count++;	// other
+}
+
+
+
+/**
+ * get_tbufs - get pointer to and size of the trace buffers
+ * @mfn:  location to store mfn of the trace buffers to
+ * @size: location to store the size of a trace buffer to
+ *
+ * Gets the machine address of the trace pointer area and the size of the
+ * per CPU buffers.
+ */
+void get_tbufs(unsigned long *mfn, unsigned long *size)
+{
+    int ret;
+    dom0_op_t op;                        /* dom0 op we'll build             */
+    int xc_handle = xc_interface_open(); /* for accessing control interface */
+
+    op.cmd = DOM0_TBUFCONTROL;
+    op.interface_version = DOM0_INTERFACE_VERSION;
+    op.u.tbufcontrol.op  = DOM0_TBUF_GET_INFO;
+
+    ret = do_dom0_op(xc_handle, &op);
+
+    xc_interface_close(xc_handle);
+
+    if ( ret != 0 )
+    {
+        PERROR("Failure to get trace buffer pointer from Xen");
+        exit(EXIT_FAILURE);
+    }
+
+    *mfn  = op.u.tbufcontrol.buffer_mfn;
+    *size = op.u.tbufcontrol.size;
+}
+
+/**
+ * map_tbufs - memory map Xen trace buffers into user space
+ * @tbufs_mfn: mfn of the trace buffers
+ * @num:       number of trace buffers to map
+ * @size:      size of each trace buffer
+ *
+ * Maps the Xen trace buffers them into process address space.
+ */
+struct t_buf *map_tbufs(unsigned long tbufs_mfn, unsigned int num,
+                        unsigned long size)
+{
+    int xc_handle;                  /* file descriptor for /proc/xen/privcmd */
+    struct t_buf *tbufs_mapped;
+
+    xc_handle = xc_interface_open();
+
+    if ( xc_handle < 0 ) 
+    {
+        PERROR("Open /proc/xen/privcmd when mapping trace buffers\n");
+        exit(EXIT_FAILURE);
+    }
+
+    tbufs_mapped = xc_map_foreign_range(xc_handle, 0 /* Dom 0 ID */,
+                                        size * num, PROT_READ | PROT_WRITE,
+                                        tbufs_mfn);
+
+    xc_interface_close(xc_handle);
+
+    if ( tbufs_mapped == 0 ) 
+    {
+        PERROR("Failed to mmap trace buffers");
+        exit(EXIT_FAILURE);
+    }
+
+    return tbufs_mapped;
+}
+
+/**
+ * init_bufs_ptrs - initialises an array of pointers to the trace buffers
+ * @bufs_mapped:    the userspace address where the trace buffers are mapped
+ * @num:            number of trace buffers
+ * @size:           trace buffer size
+ *
+ * Initialises an array of pointers to individual trace buffers within the
+ * mapped region containing all trace buffers.
+ */
+struct t_buf **init_bufs_ptrs(void *bufs_mapped, unsigned int num,
+        unsigned long size)
+{
+    int i;
+    struct t_buf **user_ptrs;
+
+    user_ptrs = (struct t_buf **)calloc(num, sizeof(struct t_buf *));
+    if ( user_ptrs == NULL )
+    {
+        PERROR( "Failed to allocate memory for buffer pointers\n");
+        exit(EXIT_FAILURE);
+    }
+
+    /* initialise pointers to the trace buffers - given the size of a trace
+     * buffer and the value of bufs_maped, we can easily calculate these */
+    for ( i = 0; i<num; i++ )
+        user_ptrs[i] = (struct t_buf *)((unsigned long)bufs_mapped + size * i);
+
+    return user_ptrs;
+}
+
+
+/**
+ * init_rec_ptrs - initialises data area pointers to locations in user space
+ * @tbufs_mfn:     base mfn of the trace buffer area
+ * @tbufs_mapped:  user virtual address of base of trace buffer area
+ * @meta:          array of user-space pointers to struct t_buf's of metadata
+ * @num:           number of trace buffers
+ *
+ * Initialises data area pointers to the locations that data areas have been
+ * mapped in user space.  Note that the trace buffer metadata contains machine
+ * pointers - the array returned allows more convenient access to them.
+ */
+struct t_rec **init_rec_ptrs(struct t_buf **meta, unsigned int num)
+{
+    int i;
+    struct t_rec **data;
+    
+    data = calloc(num, sizeof(struct t_rec *));
+    if ( data == NULL )
+    {
+        PERROR("Failed to allocate memory for data pointers\n");
+        exit(EXIT_FAILURE);
+    }
+
+    for ( i = 0; i < num; i++ )
+        data[i] = (struct t_rec *)(meta[i] + 1);
+
+    return data;
+}
+
+
+
+/**
+ * get_num_cpus - get the number of logical CPUs
+ */
+unsigned int get_num_cpus()
+{
+    dom0_op_t op;
+    int xc_handle = xc_interface_open();
+    int ret;
+
+    op.cmd = DOM0_PHYSINFO;
+    op.interface_version = DOM0_INTERFACE_VERSION;
+
+    ret = xc_dom0_op(xc_handle, &op);
+
+    if ( ret != 0 )
+    {
+        PERROR("Failure to get logical CPU count from Xen");
+        exit(EXIT_FAILURE);
+    }
+
+    xc_interface_close(xc_handle);
+    opts.cpu_freq = (double)op.u.physinfo.cpu_khz/1000.0;
+
+    return (op.u.physinfo.threads_per_core *
+            op.u.physinfo.cores_per_socket *
+            op.u.physinfo.sockets_per_node *
+            op.u.physinfo.nr_nodes);
+}
+
+
+/**
+ * monitor_tbufs - monitor the contents of tbufs
+ */
+int monitor_tbufs()
+{
+    int i;
+    extern void process_record(int, struct t_rec *);
+    extern void alloc_qos_data(int ncpu);
+
+    void *tbufs_mapped;          /* pointer to where the tbufs are mapped    */
+    struct t_buf **meta;         /* pointers to the trace buffer metadata    */
+    struct t_rec **data;         /* pointers to the trace buffer data areas
+                                  * where they are mapped into user space.   */
+    unsigned long tbufs_mfn;     /* mfn of the tbufs                         */
+    unsigned int  num;           /* number of trace buffers / logical CPUS   */
+    unsigned long size;          /* size of a single trace buffer            */
+
+    int size_in_recs;
+
+    /* get number of logical CPUs (and therefore number of trace buffers) */
+    num = get_num_cpus();
+
+    init_current(num);
+    alloc_qos_data(num);
+
+    printf("CPU Frequency = %7.2f\n", opts.cpu_freq);
+    
+    /* setup access to trace buffers */
+    get_tbufs(&tbufs_mfn, &size);
+
+    //    printf("from dom0op: %ld, t_buf: %d, t_rec: %d\n",
+    //            size, sizeof(struct t_buf), sizeof(struct t_rec));
+
+    tbufs_mapped = map_tbufs(tbufs_mfn, num, size);
+
+    size_in_recs = (size - sizeof(struct t_buf)) / sizeof(struct t_rec);
+    //    fprintf(stderr, "size_in_recs = %d\n", size_in_recs);
+
+    /* build arrays of convenience ptrs */
+    meta  = init_bufs_ptrs (tbufs_mapped, num, size);
+    data  = init_rec_ptrs(meta, num);
+
+    /* now, scan buffers for events */
+    while ( !interrupted )
+    {
+        for ( i = 0; ( i < num ) && !interrupted; i++ )
+            while ( meta[i]->cons != meta[i]->prod )
+            {
+                rmb(); /* read prod, then read item. */
+                process_record(i, data[i] + meta[i]->cons % size_in_recs);
+                mb(); /* read item, then update cons. */
+                meta[i]->cons++;
+            }
+
+        nanosleep(&opts.poll_sleep, NULL);
+    }
+
+    /* cleanup */
+    free(meta);
+    free(data);
+    /* don't need to munmap - cleanup is automatic */
+
+    return 0;
+}
+
+
+/******************************************************************************
+ * Various declarations / definitions GNU argp needs to do its work
+ *****************************************************************************/
+
+
+/* command parser for GNU argp - see GNU docs for more info */
+error_t cmd_parser(int key, char *arg, struct argp_state *state)
+{
+    settings_t *setup = (settings_t *)state->input;
+
+    switch ( key )
+    {
+        case 't': /* set new records threshold for logging */
+            {
+                char *inval;
+                setup->new_data_thresh = strtol(arg, &inval, 0);
+                if ( inval == arg )
+                    argp_usage(state);
+            }
+            break;
+
+        case 's': /* set sleep time (given in milliseconds) */
+            {
+                char *inval;
+                setup->poll_sleep = millis_to_timespec(strtol(arg, &inval, 0));
+                if ( inval == arg )
+                    argp_usage(state);
+            }
+            break;
+
+        case 'm': /* set ms_per_sample */
+            {
+                char *inval;
+                setup->ms_per_sample = strtol(arg, &inval, 0);
+                if ( inval == arg )
+                    argp_usage(state);
+            }
+            break;
+
+        case ARGP_KEY_ARG:
+            {
+                if ( state->arg_num == 0 )
+                    setup->outfile = arg;
+                else
+                    argp_usage(state);
+            }
+            break;
+
+        default:
+            return ARGP_ERR_UNKNOWN;
+    }
+
+    return 0;
+}
+
+#define SHARED_MEM_FILE "/tmp/xenq-shm"
+void alloc_qos_data(int ncpu)
+{
+    int i, n, pgsize, off=0;
+    char *dummy;
+    int qos_fd;
+    void advance_next_datapoint(uint64_t);
+
+    cpu_qos_data = (_new_qos_data **) calloc(ncpu, sizeof(_new_qos_data *));
+
+
+    qos_fd = open(SHARED_MEM_FILE, O_RDWR|O_CREAT|O_TRUNC, 0777);
+    if (qos_fd < 0) {
+        PERROR(SHARED_MEM_FILE);
+        exit(2);
+    }
+    pgsize = getpagesize();
+    dummy = malloc(pgsize);
+
+    for (n=0; n<ncpu; n++) {
+
+      for (i=0; i<sizeof(_new_qos_data); i=i+pgsize)
+        write(qos_fd, dummy, pgsize);
+
+      new_qos = (_new_qos_data *) mmap(0, sizeof(_new_qos_data), PROT_READ|PROT_WRITE, 
+				       MAP_SHARED, qos_fd, off);
+      off += i;
+      if (new_qos == NULL) {
+        PERROR("mmap");
+        exit(3);
+      }
+      //  printf("new_qos = %p\n", new_qos);
+      memset(new_qos, 0, sizeof(_new_qos_data));
+      new_qos->next_datapoint = 0;
+      advance_next_datapoint(0);
+      new_qos->structlen = i;
+      new_qos->ncpu = ncpu;
+      //      printf("structlen = 0x%x\n", i);
+      cpu_qos_data[n] = new_qos;
+    }
+    free(dummy);
+    new_qos = NULL;
+}
+
+
+#define xstr(x) str(x)
+#define str(x) #x
+
+const struct argp_option cmd_opts[] =
+{
+    { .name = "log-thresh", .key='t', .arg="l",
+        .doc =
+            "Set number, l, of new records required to trigger a write to output "
+            "(default " xstr(NEW_DATA_THRESH) ")." },
+
+    { .name = "poll-sleep", .key='s', .arg="p",
+        .doc = 
+            "Set sleep time, p, in milliseconds between polling the trace buffer "
+            "for new data (default " xstr(POLL_SLEEP_MILLIS) ")." },
+
+    { .name = "ms_per_sample", .key='m', .arg="MS",
+        .doc = 
+            "Specify the number of milliseconds per sample "
+            " (default " xstr(MS_PER_SAMPLE) ")." },
+
+    {0}
+};
+
+const struct argp parser_def =
+{
+    .options = cmd_opts,
+    .parser = cmd_parser,
+    //    .args_doc = "[output file]",
+    .doc =
+        "Tool to capture and partially process Xen trace buffer data"
+        "\v"
+        "This tool is used to capture trace buffer data from Xen.  The data is "
+        "saved in a shared memory structure to be further processed by xenmon."
+};
+
+
+const char *argp_program_version     = "xenbaked v1.3";
+const char *argp_program_bug_address = "<rob.gardner@hp.com>";
+
+
+int main(int argc, char **argv)
+{
+    int ret;
+    struct sigaction act;
+
+    time(&start_time);
+    opts.outfile = 0;
+    opts.poll_sleep = millis_to_timespec(POLL_SLEEP_MILLIS);
+    opts.new_data_thresh = NEW_DATA_THRESH;
+    opts.ms_per_sample = MS_PER_SAMPLE;
+    opts.cpu_freq = CPU_FREQ;
+
+    argp_parse(&parser_def, argc, argv, 0, 0, &opts);
+    fprintf(stderr, "ms_per_sample = %ld\n", opts.ms_per_sample);
+
+
+    /* ensure that if we get a signal, we'll do cleanup, then exit */
+    act.sa_handler = close_handler;
+    act.sa_flags = 0;
+    sigemptyset(&act.sa_mask);
+    sigaction(SIGHUP,  &act, NULL);
+    sigaction(SIGTERM, &act, NULL);
+    sigaction(SIGINT,  &act, NULL);
+
+    ret = monitor_tbufs();
+
+    dump_stats();
+    msync(new_qos, sizeof(_new_qos_data), MS_SYNC);
+
+    return ret;
+}
+
+int domain_runnable(int domid)
+{
+    return new_qos->domain_info[ID(domid)].runnable;
+}
+
+
+void update_blocked_time(int domid, uint64_t now)
+{
+    uint64_t t_blocked;
+    int id = ID(domid);
+
+    if (new_qos->domain_info[id].blocked_start_time != 0) {
+        if (now >= new_qos->domain_info[id].blocked_start_time)
+            t_blocked = now - new_qos->domain_info[id].blocked_start_time;
+        else
+            t_blocked = now + (~0ULL - new_qos->domain_info[id].blocked_start_time);
+        new_qos->qdata[new_qos->next_datapoint].ns_blocked[id] += t_blocked;
+    }
+
+    if (domain_runnable(id))
+        new_qos->domain_info[id].blocked_start_time = 0;
+    else
+        new_qos->domain_info[id].blocked_start_time = now;
+}
+
+
+// advance to next datapoint for all domains
+void advance_next_datapoint(uint64_t now)
+{
+    int new, old, didx;
+
+    old = new_qos->next_datapoint;
+    new = QOS_INCR(old);
+    new_qos->next_datapoint = new;
+    //	memset(&new_qos->qdata[new], 0, sizeof(uint64_t)*(2+5*NDOMAINS));
+    for (didx = 0; didx < NDOMAINS; didx++) {
+        new_qos->qdata[new].ns_gotten[didx] = 0;
+        new_qos->qdata[new].ns_allocated[didx] = 0;
+        new_qos->qdata[new].ns_waiting[didx] = 0;
+        new_qos->qdata[new].ns_blocked[didx] = 0;
+        new_qos->qdata[new].switchin_count[didx] = 0;
+        new_qos->qdata[new].io_count[didx] = 0;
+    }
+    new_qos->qdata[new].ns_passed = 0;
+    new_qos->qdata[new].lost_records = 0;
+    new_qos->qdata[new].flip_free_periods = 0;
+
+    new_qos->qdata[new].timestamp = now;
+}
+
+
+
+void qos_update_thread(int cpu, int domid, uint64_t now)
+{
+    int n, id;
+    uint64_t last_update_time, start;
+    int64_t time_since_update, run_time = 0;
+
+    id = ID(domid);
+
+    n = new_qos->next_datapoint;
+    last_update_time = new_qos->domain_info[id].last_update_time;
+
+    time_since_update = now - last_update_time;
+
+    if (time_since_update < 0) {
+      // what happened here? either a timestamp wraparound, or more likely,
+      // a slight inconsistency among timestamps from various cpu's
+      if (-time_since_update < billion) {
+	// fairly small difference, let's just adjust 'now' to be a little
+	// beyond last_update_time
+	time_since_update = -time_since_update;
+      }
+      else if ( ((~0ULL - last_update_time) < billion) && (now < billion) ) {
+	// difference is huge, must be a wraparound
+	// last_update time should be "near" ~0ULL,
+	// and now should be "near" 0
+	time_since_update = now + (~0ULL - last_update_time);
+	printf("time wraparound\n");
+      }
+      else {
+	// none of the above, may be an out of order record
+	// no good solution, just ignore and update again later
+	return;
+      }
+    }
+	
+    new_qos->domain_info[id].last_update_time = now;
+
+    if (new_qos->domain_info[id].runnable_at_last_update && is_current(domid, cpu)) {
+        start = new_qos->domain_info[id].start_time;
+        if (start > now) {		// wrapped around
+            run_time = now + (~0ULL - start);
+	    printf("warning: start > now\n");
+        }
+        else
+            run_time = now - start;
+	//	if (run_time < 0)	// should not happen
+	//	  printf("warning: run_time < 0; start = %lld now= %lld\n", start, now);
+        new_qos->domain_info[id].ns_oncpu_since_boot += run_time;
+        new_qos->domain_info[id].start_time = now;
+        new_qos->domain_info[id].ns_since_boot += time_since_update;
+#if 1
+	new_qos->qdata[n].ns_gotten[id] += run_time;
+	if (domid == 0 && cpu == 1)
+	  printf("adding run time for dom0 on cpu1\r\n");
+#endif
+    }
+
+    new_qos->domain_info[id].runnable_at_last_update = domain_runnable(domid);
+
+    update_blocked_time(domid, now);
+
+    // how much time passed since this datapoint was updated?
+    if (now >= new_qos->qdata[n].timestamp) {
+        // all is right with the world, time is increasing
+        new_qos->qdata[n].ns_passed += (now - new_qos->qdata[n].timestamp);
+    }
+    else {
+        // time wrapped around
+        //new_qos->qdata[n].ns_passed += (now + (~0LL - new_qos->qdata[n].timestamp));
+        //    printf("why timewrap?\r\n");
+    }
+    new_qos->qdata[n].timestamp = now;
+}
+
+
+// called by dump routines to update all structures
+void qos_update_all(uint64_t now, int cpu)
+{
+    int i;
+
+    for (i=0; i<NDOMAINS; i++)
+        if (new_qos->domain_info[i].in_use)
+            qos_update_thread(cpu, i, now);
+}
+
+
+void qos_update_thread_stats(int cpu, int domid, uint64_t now)
+{
+    if (new_qos->qdata[new_qos->next_datapoint].ns_passed > (million*opts.ms_per_sample)) {
+        qos_update_all(now, cpu);
+        advance_next_datapoint(now);
+        return;
+    }
+    qos_update_thread(cpu, domid, now);
+}
+
+
+void qos_init_domain(int cpu, int domid, uint64_t now)
+{
+    int i, id;
+
+    id = ID(domid);
+
+    if (new_qos->domain_info[id].in_use)
+        return;
+
+
+    memset(&new_qos->domain_info[id], 0, sizeof(_domain_info));
+    new_qos->domain_info[id].last_update_time = now;
+    //  runnable_start_time[id] = 0;
+    new_qos->domain_info[id].runnable_start_time = 0; // invalidate
+    new_qos->domain_info[id].in_use = 1;
+    new_qos->domain_info[id].blocked_start_time = 0;
+    new_qos->domain_info[id].id = id;
+    if (domid == IDLE_DOMAIN_ID)
+        sprintf(new_qos->domain_info[id].name, "Idle Task%d", cpu);
+    else
+        sprintf(new_qos->domain_info[id].name, "Domain#%d", domid);
+
+    for (i=0; i<NSAMPLES; i++) {
+        new_qos->qdata[i].ns_gotten[id] = 0;
+        new_qos->qdata[i].ns_allocated[id] = 0;
+        new_qos->qdata[i].ns_waiting[id] = 0;
+        new_qos->qdata[i].ns_blocked[id] = 0;
+        new_qos->qdata[i].switchin_count[id] = 0;
+        new_qos->qdata[i].io_count[id] = 0;
+    }
+}
+
+
+// called when a new thread gets the cpu
+void qos_switch_in(int cpu, int domid, uint64_t now, unsigned long ns_alloc, unsigned long ns_waited)
+{
+    int id = ID(domid);
+
+    new_qos->domain_info[id].runnable = 1;
+    update_blocked_time(domid, now);
+    new_qos->domain_info[id].blocked_start_time = 0; // invalidate
+    new_qos->domain_info[id].runnable_start_time = 0; // invalidate
+    //runnable_start_time[id] = 0;
+
+    new_qos->domain_info[id].start_time = now;
+    new_qos->qdata[new_qos->next_datapoint].switchin_count[id]++;
+    new_qos->qdata[new_qos->next_datapoint].ns_allocated[id] += ns_alloc;
+    new_qos->qdata[new_qos->next_datapoint].ns_waiting[id] += ns_waited;
+    qos_update_thread_stats(cpu, domid, now);
+    set_current(cpu, id);
+
+    // count up page flips for dom0 execution
+    if (id == 0)
+      dom0_flips = 0;
+}
+
+// called when the current thread is taken off the cpu
+void qos_switch_out(int cpu, int domid, uint64_t now, unsigned long gotten)
+{
+    int id = ID(domid);
+    int n;
+
+    if (!is_current(id, cpu)) {
+        //    printf("switching out domain %d but it is not current. gotten=%ld\r\n", id, gotten);
+    }
+
+    if (gotten == 0) {
+        printf("gotten==0 in qos_switchout(domid=%d)\n", domid);
+    }
+
+    if (gotten < 100) {
+        printf("gotten<100ns in qos_switchout(domid=%d)\n", domid);
+    }
+
+
+    n = new_qos->next_datapoint;
+#if 0
+    new_qos->qdata[n].ns_gotten[id] += gotten;
+    if (gotten > new_qos->qdata[n].ns_passed)
+      printf("inconsistency #257, diff = %lld\n",
+	    gotten - new_qos->qdata[n].ns_passed );
+#endif
+    new_qos->domain_info[id].ns_oncpu_since_boot += gotten;
+    new_qos->domain_info[id].runnable_start_time = now;
+    //  runnable_start_time[id] = now;
+    qos_update_thread_stats(cpu, id, now);
+
+    // process dom0 page flips
+    if (id == 0)
+      if (dom0_flips == 0)
+	new_qos->qdata[n].flip_free_periods++;
+}
+
+// called when domain is put to sleep, may also be called
+// when thread is already asleep
+void qos_state_sleeping(int cpu, int domid, uint64_t now) 
+{
+    int id = ID(domid);
+
+    if (!domain_runnable(id))	// double call?
+        return;
+
+    new_qos->domain_info[id].runnable = 0;
+    new_qos->domain_info[id].blocked_start_time = now;
+    new_qos->domain_info[id].runnable_start_time = 0; // invalidate
+    //  runnable_start_time[id] = 0; // invalidate
+    qos_update_thread_stats(cpu, domid, now);
+}
+
+
+
+void qos_kill_thread(int domid)
+{
+    new_qos->domain_info[ID(domid)].in_use = 0;
+}
+
+
+// called when thread becomes runnable, may also be called
+// when thread is already runnable
+void qos_state_runnable(int cpu, int domid, uint64_t now)
+{
+    int id = ID(domid);
+
+    if (domain_runnable(id))	// double call?
+        return;
+    new_qos->domain_info[id].runnable = 1;
+    update_blocked_time(domid, now);
+
+    qos_update_thread_stats(cpu, domid, now);
+
+    new_qos->domain_info[id].blocked_start_time = 0; /* invalidate */
+    new_qos->domain_info[id].runnable_start_time = now;
+    //  runnable_start_time[id] = now;
+}
+
+
+void qos_count_packets(domid_t domid, uint64_t now)
+{
+  int i, id = ID(domid);
+  _new_qos_data *cpu_data;
+
+  for (i=0; i<NCPU; i++) {
+    cpu_data = cpu_qos_data[i];
+    if (cpu_data->domain_info[id].in_use) {
+      cpu_data->qdata[cpu_data->next_datapoint].io_count[id]++;
+    }
+  }
+
+  new_qos->qdata[new_qos->next_datapoint].io_count[0]++;
+  dom0_flips++;
+}
+
+
+int domain_ok(int cpu, int domid, uint64_t now)
+{
+    if (domid == IDLE_DOMAIN_ID)
+        domid = NDOMAINS-1;
+    if (domid < 0 || domid >= NDOMAINS) {
+        printf("bad domain id: %d\n", domid);
+        return 0;
+    }
+    if (new_qos->domain_info[domid].in_use == 0)
+        qos_init_domain(cpu, domid, now);
+    return 1;
+}
+
+
+void process_record(int cpu, struct t_rec *r)
+{
+  uint64_t now;
+
+
+  new_qos = cpu_qos_data[cpu];
+
+  rec_count++;
+
+  now = ((double)r->cycles) / (opts.cpu_freq / 1000.0);
+
+  log_event(r->event);
+
+  switch (r->event) {
+
+  case TRC_SCHED_SWITCH_INFPREV:
+    // domain data[0] just switched out and received data[1] ns of cpu time
+    if (domain_ok(cpu, r->data[0], now))
+      qos_switch_out(cpu, r->data[0], now, r->data[1]);
+    //    printf("ns_gotten %ld\n", r->data[1]);
+    break;
+    
+  case TRC_SCHED_SWITCH_INFNEXT:
+    // domain data[0] just switched in and
+    // waited data[1] ns, and was allocated data[2] ns of cpu time
+    if (domain_ok(cpu, r->data[0], now))
+      qos_switch_in(cpu, r->data[0], now, r->data[2], r->data[1]);
+    break;
+    
+  case TRC_SCHED_DOM_ADD:
+    if (domain_ok(cpu, r->data[0], now))
+      qos_init_domain(cpu, r->data[0],  now);
+    break;
+    
+  case TRC_SCHED_DOM_REM:
+    if (domain_ok(cpu, r->data[0], now))
+      qos_kill_thread(r->data[0]);
+    break;
+    
+  case TRC_SCHED_SLEEP:
+    if (domain_ok(cpu, r->data[0], now))
+      qos_state_sleeping(cpu, r->data[0], now);
+    break;
+    
+  case TRC_SCHED_WAKE:
+    if (domain_ok(cpu, r->data[0], now))
+      qos_state_runnable(cpu, r->data[0], now);
+    break;
+    
+  case TRC_SCHED_BLOCK:
+    if (domain_ok(cpu, r->data[0], now))
+      qos_state_sleeping(cpu, r->data[0], now);
+    break;
+    
+  case TRC_MEM_PAGE_GRANT_TRANSFER:
+    if (domain_ok(cpu, r->data[0], now))
+      qos_count_packets(r->data[0], now);
+    break;
+    
+  default:
+    break;
+  }
+  new_qos = NULL;
+}
+
+
+
author	kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>	2005-11-15 15:09:58 +0100
committer	kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>	2005-11-15 15:09:58 +0100
commit	0d5ddb880686349ab6c35a283033f2ecc987dd0f (patch)
tree	e2cd550b6670d35f247d2cf14a4b1019a6dbafe7 /tools/xenmon/xenbaked.c
parent	0e3a022f8cfe382c960da254f632eaec4ec6dfcf (diff)
download	xen-0d5ddb880686349ab6c35a283033f2ecc987dd0f.tar.gz xen-0d5ddb880686349ab6c35a283033f2ecc987dd0f.tar.bz2 xen-0d5ddb880686349ab6c35a283033f2ecc987dd0f.zip