diff options
Diffstat (limited to 'tools/xenmon')
-rw-r--r-- | tools/xenmon/xenbaked.c | 267 | ||||
-rw-r--r-- | tools/xenmon/xenmon.py | 52 |
2 files changed, 197 insertions, 122 deletions
diff --git a/tools/xenmon/xenbaked.c b/tools/xenmon/xenbaked.c index 945a5c47f9..3789f20c84 100644 --- a/tools/xenmon/xenbaked.c +++ b/tools/xenmon/xenbaked.c @@ -95,6 +95,8 @@ int dom0_flips = 0; _new_qos_data *new_qos; _new_qos_data **cpu_qos_data; +int global_cpu; +uint64_t global_now; // array of currently running domains, indexed by cpu int *running = NULL; @@ -678,7 +680,7 @@ const struct argp parser_def = }; -const char *argp_program_version = "xenbaked v1.3"; +const char *argp_program_version = "xenbaked v1.4"; const char *argp_program_bug_address = "<rob.gardner@hp.com>"; @@ -715,16 +717,117 @@ int main(int argc, char **argv) return ret; } +void qos_init_domain(int domid, int idx) +{ + int i; + + memset(&new_qos->domain_info[idx], 0, sizeof(_domain_info)); + new_qos->domain_info[idx].last_update_time = global_now; + // runnable_start_time[idx] = 0; + new_qos->domain_info[idx].runnable_start_time = 0; // invalidate + new_qos->domain_info[idx].in_use = 1; + new_qos->domain_info[idx].blocked_start_time = 0; + new_qos->domain_info[idx].id = domid; + if (domid == IDLE_DOMAIN_ID) + sprintf(new_qos->domain_info[idx].name, "Idle Task%d", global_cpu); + else + sprintf(new_qos->domain_info[idx].name, "Domain#%d", domid); + + for (i=0; i<NSAMPLES; i++) { + new_qos->qdata[i].ns_gotten[idx] = 0; + new_qos->qdata[i].ns_allocated[idx] = 0; + new_qos->qdata[i].ns_waiting[idx] = 0; + new_qos->qdata[i].ns_blocked[idx] = 0; + new_qos->qdata[i].switchin_count[idx] = 0; + new_qos->qdata[i].io_count[idx] = 0; + } +} + +void global_init_domain(int domid, int idx) +{ + int cpu; + _new_qos_data *saved_qos; + + saved_qos = new_qos; + + for (cpu=0; cpu<NCPU; cpu++) { + new_qos = cpu_qos_data[cpu]; + qos_init_domain(domid, idx); + } + new_qos = saved_qos; +} + + +// give index of this domain in the qos data array +int indexof(int domid) +{ + int idx; + xc_dominfo_t dominfo[NDOMAINS]; + int xc_handle, ndomains; + extern void qos_kill_thread(int domid); + + if (domid < 0 || domid >= NDOMAINS) { // shouldn't happen + if (domid != IDLE_DOMAIN_ID) { + printf("bad domain id: %d\r\n", domid); + return 0; + } + } + + for (idx=0; idx<NDOMAINS; idx++) + if ( (new_qos->domain_info[idx].id == domid) && new_qos->domain_info[idx].in_use) + return idx; + + // not found, make a new entry + for (idx=0; idx<NDOMAINS; idx++) + if (new_qos->domain_info[idx].in_use == 0) { + global_init_domain(domid, idx); + return idx; + } + + // call domaininfo hypercall to try and garbage collect unused entries + xc_handle = xc_interface_open(); + ndomains = xc_domain_getinfo(xc_handle, 0, NDOMAINS, dominfo); + xc_interface_close(xc_handle); + + // for each domain in our data, look for it in the system dominfo structure + // and purge the domain's data from our state if it does not exist in the + // dominfo structure + for (idx=0; idx<NDOMAINS; idx++) { + int domid = new_qos->domain_info[idx].id; + int jdx; + + for (jdx=0; jdx<ndomains; jdx++) { + if (dominfo[jdx].domid == domid) + break; + } + if (jdx == ndomains) // we didn't find domid in the dominfo struct + if (domid != IDLE_DOMAIN_ID) // exception for idle domain, which is not + // contained in dominfo + qos_kill_thread(domid); // purge our stale data + } + + // look again for a free slot + for (idx=0; idx<NDOMAINS; idx++) + if (new_qos->domain_info[idx].in_use == 0) { + global_init_domain(domid, idx); + return idx; + } + + // still no space found, so bail + fprintf(stderr, "out of space in domain table, increase NDOMAINS\r\n"); + exit(2); +} + int domain_runnable(int domid) { - return new_qos->domain_info[ID(domid)].runnable; + return new_qos->domain_info[indexof(domid)].runnable; } void update_blocked_time(int domid, uint64_t now) { uint64_t t_blocked; - int id = ID(domid); + int id = indexof(domid); if (new_qos->domain_info[id].blocked_start_time != 0) { if (now >= new_qos->domain_info[id].blocked_start_time) @@ -734,7 +837,7 @@ void update_blocked_time(int domid, uint64_t now) new_qos->qdata[new_qos->next_datapoint].ns_blocked[id] += t_blocked; } - if (domain_runnable(id)) + if (domain_runnable(domid)) new_qos->domain_info[id].blocked_start_time = 0; else new_qos->domain_info[id].blocked_start_time = now; @@ -773,7 +876,7 @@ void qos_update_thread(int cpu, int domid, uint64_t now) uint64_t last_update_time, start; int64_t time_since_update, run_time = 0; - id = ID(domid); + id = indexof(domid); n = new_qos->next_datapoint; last_update_time = new_qos->domain_info[id].last_update_time; @@ -851,7 +954,7 @@ void qos_update_all(uint64_t now, int cpu) for (i=0; i<NDOMAINS; i++) if (new_qos->domain_info[i].in_use) - qos_update_thread(cpu, i, now); + qos_update_thread(cpu, new_qos->domain_info[i].id, now); } @@ -866,69 +969,37 @@ void qos_update_thread_stats(int cpu, int domid, uint64_t now) } -void qos_init_domain(int cpu, int domid, uint64_t now) -{ - int i, id; - - id = ID(domid); - - if (new_qos->domain_info[id].in_use) - return; - - - memset(&new_qos->domain_info[id], 0, sizeof(_domain_info)); - new_qos->domain_info[id].last_update_time = now; - // runnable_start_time[id] = 0; - new_qos->domain_info[id].runnable_start_time = 0; // invalidate - new_qos->domain_info[id].in_use = 1; - new_qos->domain_info[id].blocked_start_time = 0; - new_qos->domain_info[id].id = id; - if (domid == IDLE_DOMAIN_ID) - sprintf(new_qos->domain_info[id].name, "Idle Task%d", cpu); - else - sprintf(new_qos->domain_info[id].name, "Domain#%d", domid); - - for (i=0; i<NSAMPLES; i++) { - new_qos->qdata[i].ns_gotten[id] = 0; - new_qos->qdata[i].ns_allocated[id] = 0; - new_qos->qdata[i].ns_waiting[id] = 0; - new_qos->qdata[i].ns_blocked[id] = 0; - new_qos->qdata[i].switchin_count[id] = 0; - new_qos->qdata[i].io_count[id] = 0; - } -} - // called when a new thread gets the cpu void qos_switch_in(int cpu, int domid, uint64_t now, unsigned long ns_alloc, unsigned long ns_waited) { - int id = ID(domid); + int idx = indexof(domid); - new_qos->domain_info[id].runnable = 1; + new_qos->domain_info[idx].runnable = 1; update_blocked_time(domid, now); - new_qos->domain_info[id].blocked_start_time = 0; // invalidate - new_qos->domain_info[id].runnable_start_time = 0; // invalidate - //runnable_start_time[id] = 0; - - new_qos->domain_info[id].start_time = now; - new_qos->qdata[new_qos->next_datapoint].switchin_count[id]++; - new_qos->qdata[new_qos->next_datapoint].ns_allocated[id] += ns_alloc; - new_qos->qdata[new_qos->next_datapoint].ns_waiting[id] += ns_waited; + new_qos->domain_info[idx].blocked_start_time = 0; // invalidate + new_qos->domain_info[idx].runnable_start_time = 0; // invalidate + //runnable_start_time[idx] = 0; + + new_qos->domain_info[idx].start_time = now; + new_qos->qdata[new_qos->next_datapoint].switchin_count[idx]++; + new_qos->qdata[new_qos->next_datapoint].ns_allocated[idx] += ns_alloc; + new_qos->qdata[new_qos->next_datapoint].ns_waiting[idx] += ns_waited; qos_update_thread_stats(cpu, domid, now); - set_current(cpu, id); + set_current(cpu, domid); // count up page flips for dom0 execution - if (id == 0) + if (domid == 0) dom0_flips = 0; } // called when the current thread is taken off the cpu void qos_switch_out(int cpu, int domid, uint64_t now, unsigned long gotten) { - int id = ID(domid); + int idx = indexof(domid); int n; - if (!is_current(id, cpu)) { + if (!is_current(domid, cpu)) { // printf("switching out domain %d but it is not current. gotten=%ld\r\n", id, gotten); } @@ -943,18 +1014,18 @@ void qos_switch_out(int cpu, int domid, uint64_t now, unsigned long gotten) n = new_qos->next_datapoint; #if 0 - new_qos->qdata[n].ns_gotten[id] += gotten; + new_qos->qdata[n].ns_gotten[idx] += gotten; if (gotten > new_qos->qdata[n].ns_passed) printf("inconsistency #257, diff = %lld\n", gotten - new_qos->qdata[n].ns_passed ); #endif - new_qos->domain_info[id].ns_oncpu_since_boot += gotten; - new_qos->domain_info[id].runnable_start_time = now; + new_qos->domain_info[idx].ns_oncpu_since_boot += gotten; + new_qos->domain_info[idx].runnable_start_time = now; // runnable_start_time[id] = now; - qos_update_thread_stats(cpu, id, now); + qos_update_thread_stats(cpu, domid, now); // process dom0 page flips - if (id == 0) + if (domid == 0) if (dom0_flips == 0) new_qos->qdata[n].flip_free_periods++; } @@ -963,23 +1034,30 @@ void qos_switch_out(int cpu, int domid, uint64_t now, unsigned long gotten) // when thread is already asleep void qos_state_sleeping(int cpu, int domid, uint64_t now) { - int id = ID(domid); + int idx; - if (!domain_runnable(id)) // double call? + if (!domain_runnable(domid)) // double call? return; - new_qos->domain_info[id].runnable = 0; - new_qos->domain_info[id].blocked_start_time = now; - new_qos->domain_info[id].runnable_start_time = 0; // invalidate - // runnable_start_time[id] = 0; // invalidate + idx = indexof(domid); + new_qos->domain_info[idx].runnable = 0; + new_qos->domain_info[idx].blocked_start_time = now; + new_qos->domain_info[idx].runnable_start_time = 0; // invalidate + // runnable_start_time[idx] = 0; // invalidate qos_update_thread_stats(cpu, domid, now); } +// domain died, presume it's dead on all cpu's, not just mostly dead void qos_kill_thread(int domid) { - new_qos->domain_info[ID(domid)].in_use = 0; + int cpu; + + for (cpu=0; cpu<NCPU; cpu++) { + cpu_qos_data[cpu]->domain_info[indexof(domid)].in_use = 0; + } + } @@ -987,30 +1065,33 @@ void qos_kill_thread(int domid) // when thread is already runnable void qos_state_runnable(int cpu, int domid, uint64_t now) { - int id = ID(domid); + int idx; + qos_update_thread_stats(cpu, domid, now); - if (domain_runnable(id)) // double call? + if (domain_runnable(domid)) // double call? return; - new_qos->domain_info[id].runnable = 1; + + idx = indexof(domid); + new_qos->domain_info[idx].runnable = 1; update_blocked_time(domid, now); - new_qos->domain_info[id].blocked_start_time = 0; /* invalidate */ - new_qos->domain_info[id].runnable_start_time = now; + new_qos->domain_info[idx].blocked_start_time = 0; /* invalidate */ + new_qos->domain_info[idx].runnable_start_time = now; // runnable_start_time[id] = now; } void qos_count_packets(domid_t domid, uint64_t now) { - int i, id = ID(domid); + int i, idx = indexof(domid); _new_qos_data *cpu_data; for (i=0; i<NCPU; i++) { cpu_data = cpu_qos_data[i]; - if (cpu_data->domain_info[id].in_use) { - cpu_data->qdata[cpu_data->next_datapoint].io_count[id]++; + if (cpu_data->domain_info[idx].in_use) { + cpu_data->qdata[cpu_data->next_datapoint].io_count[idx]++; } } @@ -1019,77 +1100,57 @@ void qos_count_packets(domid_t domid, uint64_t now) } -int domain_ok(int cpu, int domid, uint64_t now) -{ - if (domid == IDLE_DOMAIN_ID) - domid = NDOMAINS-1; - if (domid < 0 || domid >= NDOMAINS) { - printf("bad domain id: %d\r\n", domid); - return 0; - } - if (new_qos->domain_info[domid].in_use == 0) - qos_init_domain(cpu, domid, now); - return 1; -} - - void process_record(int cpu, struct t_rec *r) { uint64_t now; - new_qos = cpu_qos_data[cpu]; rec_count++; now = ((double)r->cycles) / (opts.cpu_freq / 1000.0); + global_now = now; + global_cpu = cpu; + log_event(r->event); switch (r->event) { case TRC_SCHED_SWITCH_INFPREV: // domain data[0] just switched out and received data[1] ns of cpu time - if (domain_ok(cpu, r->data[0], now)) - qos_switch_out(cpu, r->data[0], now, r->data[1]); + qos_switch_out(cpu, r->data[0], now, r->data[1]); // printf("ns_gotten %ld\n", r->data[1]); break; case TRC_SCHED_SWITCH_INFNEXT: // domain data[0] just switched in and // waited data[1] ns, and was allocated data[2] ns of cpu time - if (domain_ok(cpu, r->data[0], now)) - qos_switch_in(cpu, r->data[0], now, r->data[2], r->data[1]); + qos_switch_in(cpu, r->data[0], now, r->data[2], r->data[1]); break; case TRC_SCHED_DOM_ADD: - if (domain_ok(cpu, r->data[0], now)) - qos_init_domain(cpu, r->data[0], now); + (void) indexof(r->data[0]); break; case TRC_SCHED_DOM_REM: - if (domain_ok(cpu, r->data[0], now)) - qos_kill_thread(r->data[0]); + qos_kill_thread(r->data[0]); break; case TRC_SCHED_SLEEP: - if (domain_ok(cpu, r->data[0], now)) - qos_state_sleeping(cpu, r->data[0], now); + qos_state_sleeping(cpu, r->data[0], now); break; case TRC_SCHED_WAKE: - if (domain_ok(cpu, r->data[0], now)) - qos_state_runnable(cpu, r->data[0], now); + qos_state_runnable(cpu, r->data[0], now); break; case TRC_SCHED_BLOCK: - if (domain_ok(cpu, r->data[0], now)) - qos_state_sleeping(cpu, r->data[0], now); + qos_state_sleeping(cpu, r->data[0], now); break; case TRC_MEM_PAGE_GRANT_TRANSFER: - if (domain_ok(cpu, r->data[0], now)) - qos_count_packets(r->data[0], now); + qos_count_packets(r->data[0], now); break; default: diff --git a/tools/xenmon/xenmon.py b/tools/xenmon/xenmon.py index c8e2babe2f..1b74d19403 100644 --- a/tools/xenmon/xenmon.py +++ b/tools/xenmon/xenmon.py @@ -36,10 +36,10 @@ import sys # constants NSAMPLES = 100 NDOMAINS = 32 -IDLE_DOMAIN = 31 # idle domain's ID +IDLE_DOMAIN = -1 # idle domain's ID # the struct strings for qos_info -ST_DOM_INFO = "6Q4i32s" +ST_DOM_INFO = "6Q3i2H32s" ST_QDATA = "%dQ" % (6*NDOMAINS + 4) # size of mmaped file @@ -297,6 +297,7 @@ def show_livestats(cpu): samples = [] doms = [] dom_in_use = [] + domain_id = [] # read in data for i in range(0, NSAMPLES): @@ -311,9 +312,13 @@ def show_livestats(cpu): doms.append(dom) # (last_update_time, start_time, runnable_start_time, blocked_start_time, # ns_since_boot, ns_oncpu_since_boot, runnable_at_last_update, -# runnable, in_use, domid, name) = dom +# runnable, in_use, domid, junk, name) = dom # dom_in_use.append(in_use) dom_in_use.append(dom[8]) + domid = dom[9] + if domid == 32767 : + domid = IDLE_DOMAIN + domain_id.append(domid) idx += len # print "dom_in_use(cpu=%d): " % cpuidx, dom_in_use @@ -366,16 +371,16 @@ def show_livestats(cpu): if not dom_in_use[dom]: continue - if h1[dom][0][1] > 0 or dom == NDOMAINS - 1: + if h1[dom][0][1] > 0 or domain_id[dom] == IDLE_DOMAIN: # display gotten row += 1 col = 2 - display_domain_id(stdscr, row, col, dom) + display_domain_id(stdscr, row, col, domain_id[dom]) col += 4 display(stdscr, row, col, "%s" % time_scale(h2[dom][0][0])) col += 12 display(stdscr, row, col, "%3.2f%%" % h2[dom][0][1]) - if dom != NDOMAINS - 1: + if dom != IDLE_DOMAIN: cpu_10sec_usage += h2[dom][0][1] col += 12 display(stdscr, row, col, "%s/ex" % time_scale(h2[dom][0][2])) @@ -388,14 +393,14 @@ def show_livestats(cpu): col += 18 display(stdscr, row, col, "Gotten") - if dom != NDOMAINS - 1: + if dom != IDLE_DOMAIN: cpu_1sec_usage = cpu_1sec_usage + h1[dom][0][1] # display allocated if options.allocated: row += 1 col = 2 - display_domain_id(stdscr, row, col, dom) + display_domain_id(stdscr, row, col, domain_id[dom]) col += 28 display(stdscr, row, col, "%s/ex" % time_scale(h2[dom][1])) col += 42 @@ -407,7 +412,7 @@ def show_livestats(cpu): if options.blocked: row += 1 col = 2 - display_domain_id(stdscr, row, col, dom) + display_domain_id(stdscr, row, col, domain_id[dom]) col += 4 display(stdscr, row, col, "%s" % time_scale(h2[dom][2][0])) col += 12 @@ -427,7 +432,7 @@ def show_livestats(cpu): if options.waited: row += 1 col = 2 - display_domain_id(stdscr, row, col, dom) + display_domain_id(stdscr, row, col, domain_id[dom]) col += 4 display(stdscr, row, col, "%s" % time_scale(h2[dom][3][0])) col += 12 @@ -447,7 +452,7 @@ def show_livestats(cpu): if options.excount: row += 1 col = 2 - display_domain_id(stdscr, row, col, dom) + display_domain_id(stdscr, row, col, domain_id[dom]) col += 28 display(stdscr, row, col, "%d/s" % h2[dom][4]) @@ -460,7 +465,7 @@ def show_livestats(cpu): if options.iocount: row += 1 col = 2 - display_domain_id(stdscr, row, col, dom) + display_domain_id(stdscr, row, col, domain_id[dom]) col += 4 display(stdscr, row, col, "%d/s" % h2[dom][5][0]) col += 24 @@ -544,6 +549,9 @@ class Delayed(file): self.file.write(self.delay_data) self.file.write(str) + def rename(self, name): + self.filename = name + def flush(self): if self.opened: self.file.flush() @@ -567,10 +575,7 @@ def writelog(): curr = last = time.time() outfiles = {} for dom in range(0, NDOMAINS): - if dom == IDLE_DOMAIN: - outfiles[dom] = Delayed("%s-idle.log" % options.prefix, 'w') - else: - outfiles[dom] = Delayed("%s-dom%d.log" % (options.prefix, dom), 'w') + outfiles[dom] = Delayed("%s-dom%d.log" % (options.prefix, dom), 'w') outfiles[dom].delayed_write("# passed cpu dom cpu(tot) cpu(%) cpu/ex allocated/ex blocked(tot) blocked(%) blocked/io waited(tot) waited(%) waited/ex ex/s io(tot) io/ex\n") while options.duration == 0 or interval < (options.duration * 1000): @@ -582,6 +587,7 @@ def writelog(): samples = [] doms = [] dom_in_use = [] + domain_id = [] for i in range(0, NSAMPLES): len = struct.calcsize(ST_QDATA) @@ -595,8 +601,16 @@ def writelog(): # doms.append(dom) # (last_update_time, start_time, runnable_start_time, blocked_start_time, # ns_since_boot, ns_oncpu_since_boot, runnable_at_last_update, -# runnable, in_use, domid, name) = dom +# runnable, in_use, domid, junk, name) = dom dom_in_use.append(dom[8]) + domid = dom[9] + if domid == 32767: + domid = IDLE_DOMAIN + domain_id.append(domid) + if domid == IDLE_DOMAIN: + outfiles[i].rename("%s-idle.log" % options.prefix) + else: + outfiles[i].rename("%s-dom%d.log" % (options.prefix, domid)) idx += len len = struct.calcsize("4i") @@ -617,9 +631,9 @@ def writelog(): for dom in range(0, NDOMAINS): if not dom_in_use[dom]: continue - if h1[dom][0][1] > 0 or dom == NDOMAINS - 1: + if h1[dom][0][1] > 0 or dom == IDLE_DOMAIN: outfiles[dom].write("%.3f %d %d %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f\n" % - (interval, cpuidx, dom, + (interval, cpuidx, domain_id[dom], h1[dom][0][0], h1[dom][0][1], h1[dom][0][2], h1[dom][1], h1[dom][2][0], h1[dom][2][1], h1[dom][2][2], |