aboutsummaryrefslogtreecommitdiffstats
path: root/tools/xenmon
diff options
context:
space:
mode:
authorkfraser@localhost.localdomain <kfraser@localhost.localdomain>2006-07-10 16:09:20 +0100
committerkfraser@localhost.localdomain <kfraser@localhost.localdomain>2006-07-10 16:09:20 +0100
commit16b671d72bd6836dd09c21feb238d9abe4034b2d (patch)
treebd05579838f0680347f28362c2b44c49bb892115 /tools/xenmon
parentc11016e71cb6173a5666428fc4f7cc691db38571 (diff)
downloadxen-16b671d72bd6836dd09c21feb238d9abe4034b2d.tar.gz
xen-16b671d72bd6836dd09c21feb238d9abe4034b2d.tar.bz2
xen-16b671d72bd6836dd09c21feb238d9abe4034b2d.zip
Domain ID/index mapping in xenmon.
This patch addresses the problem of xenbaked/xenmon not dealing with large domain ID's. Xen Domain ID's increase monotonically as domains are created; The ID's are not (often) recycled. Xenbaked was using the domain ID's as indices to arrays of data, and this scheme blows up as soon as a domain ID exceeds the array size. Code has been changed in xenbaked and xenmon to isolate domain id's from array indices, so everything is indirect. Users should not notice any difference in behavior. From: Rob Gardner <rob.gardner@hp.com> Signed-off-by: Keir Fraser <keir@xensource.com>
Diffstat (limited to 'tools/xenmon')
-rw-r--r--tools/xenmon/xenbaked.c267
-rw-r--r--tools/xenmon/xenmon.py52
2 files changed, 197 insertions, 122 deletions
diff --git a/tools/xenmon/xenbaked.c b/tools/xenmon/xenbaked.c
index 945a5c47f9..3789f20c84 100644
--- a/tools/xenmon/xenbaked.c
+++ b/tools/xenmon/xenbaked.c
@@ -95,6 +95,8 @@ int dom0_flips = 0;
_new_qos_data *new_qos;
_new_qos_data **cpu_qos_data;
+int global_cpu;
+uint64_t global_now;
// array of currently running domains, indexed by cpu
int *running = NULL;
@@ -678,7 +680,7 @@ const struct argp parser_def =
};
-const char *argp_program_version = "xenbaked v1.3";
+const char *argp_program_version = "xenbaked v1.4";
const char *argp_program_bug_address = "<rob.gardner@hp.com>";
@@ -715,16 +717,117 @@ int main(int argc, char **argv)
return ret;
}
+void qos_init_domain(int domid, int idx)
+{
+ int i;
+
+ memset(&new_qos->domain_info[idx], 0, sizeof(_domain_info));
+ new_qos->domain_info[idx].last_update_time = global_now;
+ // runnable_start_time[idx] = 0;
+ new_qos->domain_info[idx].runnable_start_time = 0; // invalidate
+ new_qos->domain_info[idx].in_use = 1;
+ new_qos->domain_info[idx].blocked_start_time = 0;
+ new_qos->domain_info[idx].id = domid;
+ if (domid == IDLE_DOMAIN_ID)
+ sprintf(new_qos->domain_info[idx].name, "Idle Task%d", global_cpu);
+ else
+ sprintf(new_qos->domain_info[idx].name, "Domain#%d", domid);
+
+ for (i=0; i<NSAMPLES; i++) {
+ new_qos->qdata[i].ns_gotten[idx] = 0;
+ new_qos->qdata[i].ns_allocated[idx] = 0;
+ new_qos->qdata[i].ns_waiting[idx] = 0;
+ new_qos->qdata[i].ns_blocked[idx] = 0;
+ new_qos->qdata[i].switchin_count[idx] = 0;
+ new_qos->qdata[i].io_count[idx] = 0;
+ }
+}
+
+void global_init_domain(int domid, int idx)
+{
+ int cpu;
+ _new_qos_data *saved_qos;
+
+ saved_qos = new_qos;
+
+ for (cpu=0; cpu<NCPU; cpu++) {
+ new_qos = cpu_qos_data[cpu];
+ qos_init_domain(domid, idx);
+ }
+ new_qos = saved_qos;
+}
+
+
+// give index of this domain in the qos data array
+int indexof(int domid)
+{
+ int idx;
+ xc_dominfo_t dominfo[NDOMAINS];
+ int xc_handle, ndomains;
+ extern void qos_kill_thread(int domid);
+
+ if (domid < 0 || domid >= NDOMAINS) { // shouldn't happen
+ if (domid != IDLE_DOMAIN_ID) {
+ printf("bad domain id: %d\r\n", domid);
+ return 0;
+ }
+ }
+
+ for (idx=0; idx<NDOMAINS; idx++)
+ if ( (new_qos->domain_info[idx].id == domid) && new_qos->domain_info[idx].in_use)
+ return idx;
+
+ // not found, make a new entry
+ for (idx=0; idx<NDOMAINS; idx++)
+ if (new_qos->domain_info[idx].in_use == 0) {
+ global_init_domain(domid, idx);
+ return idx;
+ }
+
+ // call domaininfo hypercall to try and garbage collect unused entries
+ xc_handle = xc_interface_open();
+ ndomains = xc_domain_getinfo(xc_handle, 0, NDOMAINS, dominfo);
+ xc_interface_close(xc_handle);
+
+ // for each domain in our data, look for it in the system dominfo structure
+ // and purge the domain's data from our state if it does not exist in the
+ // dominfo structure
+ for (idx=0; idx<NDOMAINS; idx++) {
+ int domid = new_qos->domain_info[idx].id;
+ int jdx;
+
+ for (jdx=0; jdx<ndomains; jdx++) {
+ if (dominfo[jdx].domid == domid)
+ break;
+ }
+ if (jdx == ndomains) // we didn't find domid in the dominfo struct
+ if (domid != IDLE_DOMAIN_ID) // exception for idle domain, which is not
+ // contained in dominfo
+ qos_kill_thread(domid); // purge our stale data
+ }
+
+ // look again for a free slot
+ for (idx=0; idx<NDOMAINS; idx++)
+ if (new_qos->domain_info[idx].in_use == 0) {
+ global_init_domain(domid, idx);
+ return idx;
+ }
+
+ // still no space found, so bail
+ fprintf(stderr, "out of space in domain table, increase NDOMAINS\r\n");
+ exit(2);
+}
+
int domain_runnable(int domid)
{
- return new_qos->domain_info[ID(domid)].runnable;
+ return new_qos->domain_info[indexof(domid)].runnable;
}
void update_blocked_time(int domid, uint64_t now)
{
uint64_t t_blocked;
- int id = ID(domid);
+ int id = indexof(domid);
if (new_qos->domain_info[id].blocked_start_time != 0) {
if (now >= new_qos->domain_info[id].blocked_start_time)
@@ -734,7 +837,7 @@ void update_blocked_time(int domid, uint64_t now)
new_qos->qdata[new_qos->next_datapoint].ns_blocked[id] += t_blocked;
}
- if (domain_runnable(id))
+ if (domain_runnable(domid))
new_qos->domain_info[id].blocked_start_time = 0;
else
new_qos->domain_info[id].blocked_start_time = now;
@@ -773,7 +876,7 @@ void qos_update_thread(int cpu, int domid, uint64_t now)
uint64_t last_update_time, start;
int64_t time_since_update, run_time = 0;
- id = ID(domid);
+ id = indexof(domid);
n = new_qos->next_datapoint;
last_update_time = new_qos->domain_info[id].last_update_time;
@@ -851,7 +954,7 @@ void qos_update_all(uint64_t now, int cpu)
for (i=0; i<NDOMAINS; i++)
if (new_qos->domain_info[i].in_use)
- qos_update_thread(cpu, i, now);
+ qos_update_thread(cpu, new_qos->domain_info[i].id, now);
}
@@ -866,69 +969,37 @@ void qos_update_thread_stats(int cpu, int domid, uint64_t now)
}
-void qos_init_domain(int cpu, int domid, uint64_t now)
-{
- int i, id;
-
- id = ID(domid);
-
- if (new_qos->domain_info[id].in_use)
- return;
-
-
- memset(&new_qos->domain_info[id], 0, sizeof(_domain_info));
- new_qos->domain_info[id].last_update_time = now;
- // runnable_start_time[id] = 0;
- new_qos->domain_info[id].runnable_start_time = 0; // invalidate
- new_qos->domain_info[id].in_use = 1;
- new_qos->domain_info[id].blocked_start_time = 0;
- new_qos->domain_info[id].id = id;
- if (domid == IDLE_DOMAIN_ID)
- sprintf(new_qos->domain_info[id].name, "Idle Task%d", cpu);
- else
- sprintf(new_qos->domain_info[id].name, "Domain#%d", domid);
-
- for (i=0; i<NSAMPLES; i++) {
- new_qos->qdata[i].ns_gotten[id] = 0;
- new_qos->qdata[i].ns_allocated[id] = 0;
- new_qos->qdata[i].ns_waiting[id] = 0;
- new_qos->qdata[i].ns_blocked[id] = 0;
- new_qos->qdata[i].switchin_count[id] = 0;
- new_qos->qdata[i].io_count[id] = 0;
- }
-}
-
// called when a new thread gets the cpu
void qos_switch_in(int cpu, int domid, uint64_t now, unsigned long ns_alloc, unsigned long ns_waited)
{
- int id = ID(domid);
+ int idx = indexof(domid);
- new_qos->domain_info[id].runnable = 1;
+ new_qos->domain_info[idx].runnable = 1;
update_blocked_time(domid, now);
- new_qos->domain_info[id].blocked_start_time = 0; // invalidate
- new_qos->domain_info[id].runnable_start_time = 0; // invalidate
- //runnable_start_time[id] = 0;
-
- new_qos->domain_info[id].start_time = now;
- new_qos->qdata[new_qos->next_datapoint].switchin_count[id]++;
- new_qos->qdata[new_qos->next_datapoint].ns_allocated[id] += ns_alloc;
- new_qos->qdata[new_qos->next_datapoint].ns_waiting[id] += ns_waited;
+ new_qos->domain_info[idx].blocked_start_time = 0; // invalidate
+ new_qos->domain_info[idx].runnable_start_time = 0; // invalidate
+ //runnable_start_time[idx] = 0;
+
+ new_qos->domain_info[idx].start_time = now;
+ new_qos->qdata[new_qos->next_datapoint].switchin_count[idx]++;
+ new_qos->qdata[new_qos->next_datapoint].ns_allocated[idx] += ns_alloc;
+ new_qos->qdata[new_qos->next_datapoint].ns_waiting[idx] += ns_waited;
qos_update_thread_stats(cpu, domid, now);
- set_current(cpu, id);
+ set_current(cpu, domid);
// count up page flips for dom0 execution
- if (id == 0)
+ if (domid == 0)
dom0_flips = 0;
}
// called when the current thread is taken off the cpu
void qos_switch_out(int cpu, int domid, uint64_t now, unsigned long gotten)
{
- int id = ID(domid);
+ int idx = indexof(domid);
int n;
- if (!is_current(id, cpu)) {
+ if (!is_current(domid, cpu)) {
// printf("switching out domain %d but it is not current. gotten=%ld\r\n", id, gotten);
}
@@ -943,18 +1014,18 @@ void qos_switch_out(int cpu, int domid, uint64_t now, unsigned long gotten)
n = new_qos->next_datapoint;
#if 0
- new_qos->qdata[n].ns_gotten[id] += gotten;
+ new_qos->qdata[n].ns_gotten[idx] += gotten;
if (gotten > new_qos->qdata[n].ns_passed)
printf("inconsistency #257, diff = %lld\n",
gotten - new_qos->qdata[n].ns_passed );
#endif
- new_qos->domain_info[id].ns_oncpu_since_boot += gotten;
- new_qos->domain_info[id].runnable_start_time = now;
+ new_qos->domain_info[idx].ns_oncpu_since_boot += gotten;
+ new_qos->domain_info[idx].runnable_start_time = now;
// runnable_start_time[id] = now;
- qos_update_thread_stats(cpu, id, now);
+ qos_update_thread_stats(cpu, domid, now);
// process dom0 page flips
- if (id == 0)
+ if (domid == 0)
if (dom0_flips == 0)
new_qos->qdata[n].flip_free_periods++;
}
@@ -963,23 +1034,30 @@ void qos_switch_out(int cpu, int domid, uint64_t now, unsigned long gotten)
// when thread is already asleep
void qos_state_sleeping(int cpu, int domid, uint64_t now)
{
- int id = ID(domid);
+ int idx;
- if (!domain_runnable(id)) // double call?
+ if (!domain_runnable(domid)) // double call?
return;
- new_qos->domain_info[id].runnable = 0;
- new_qos->domain_info[id].blocked_start_time = now;
- new_qos->domain_info[id].runnable_start_time = 0; // invalidate
- // runnable_start_time[id] = 0; // invalidate
+ idx = indexof(domid);
+ new_qos->domain_info[idx].runnable = 0;
+ new_qos->domain_info[idx].blocked_start_time = now;
+ new_qos->domain_info[idx].runnable_start_time = 0; // invalidate
+ // runnable_start_time[idx] = 0; // invalidate
qos_update_thread_stats(cpu, domid, now);
}
+// domain died, presume it's dead on all cpu's, not just mostly dead
void qos_kill_thread(int domid)
{
- new_qos->domain_info[ID(domid)].in_use = 0;
+ int cpu;
+
+ for (cpu=0; cpu<NCPU; cpu++) {
+ cpu_qos_data[cpu]->domain_info[indexof(domid)].in_use = 0;
+ }
+
}
@@ -987,30 +1065,33 @@ void qos_kill_thread(int domid)
// when thread is already runnable
void qos_state_runnable(int cpu, int domid, uint64_t now)
{
- int id = ID(domid);
+ int idx;
+
qos_update_thread_stats(cpu, domid, now);
- if (domain_runnable(id)) // double call?
+ if (domain_runnable(domid)) // double call?
return;
- new_qos->domain_info[id].runnable = 1;
+
+ idx = indexof(domid);
+ new_qos->domain_info[idx].runnable = 1;
update_blocked_time(domid, now);
- new_qos->domain_info[id].blocked_start_time = 0; /* invalidate */
- new_qos->domain_info[id].runnable_start_time = now;
+ new_qos->domain_info[idx].blocked_start_time = 0; /* invalidate */
+ new_qos->domain_info[idx].runnable_start_time = now;
// runnable_start_time[id] = now;
}
void qos_count_packets(domid_t domid, uint64_t now)
{
- int i, id = ID(domid);
+ int i, idx = indexof(domid);
_new_qos_data *cpu_data;
for (i=0; i<NCPU; i++) {
cpu_data = cpu_qos_data[i];
- if (cpu_data->domain_info[id].in_use) {
- cpu_data->qdata[cpu_data->next_datapoint].io_count[id]++;
+ if (cpu_data->domain_info[idx].in_use) {
+ cpu_data->qdata[cpu_data->next_datapoint].io_count[idx]++;
}
}
@@ -1019,77 +1100,57 @@ void qos_count_packets(domid_t domid, uint64_t now)
}
-int domain_ok(int cpu, int domid, uint64_t now)
-{
- if (domid == IDLE_DOMAIN_ID)
- domid = NDOMAINS-1;
- if (domid < 0 || domid >= NDOMAINS) {
- printf("bad domain id: %d\r\n", domid);
- return 0;
- }
- if (new_qos->domain_info[domid].in_use == 0)
- qos_init_domain(cpu, domid, now);
- return 1;
-}
-
-
void process_record(int cpu, struct t_rec *r)
{
uint64_t now;
-
new_qos = cpu_qos_data[cpu];
rec_count++;
now = ((double)r->cycles) / (opts.cpu_freq / 1000.0);
+ global_now = now;
+ global_cpu = cpu;
+
log_event(r->event);
switch (r->event) {
case TRC_SCHED_SWITCH_INFPREV:
// domain data[0] just switched out and received data[1] ns of cpu time
- if (domain_ok(cpu, r->data[0], now))
- qos_switch_out(cpu, r->data[0], now, r->data[1]);
+ qos_switch_out(cpu, r->data[0], now, r->data[1]);
// printf("ns_gotten %ld\n", r->data[1]);
break;
case TRC_SCHED_SWITCH_INFNEXT:
// domain data[0] just switched in and
// waited data[1] ns, and was allocated data[2] ns of cpu time
- if (domain_ok(cpu, r->data[0], now))
- qos_switch_in(cpu, r->data[0], now, r->data[2], r->data[1]);
+ qos_switch_in(cpu, r->data[0], now, r->data[2], r->data[1]);
break;
case TRC_SCHED_DOM_ADD:
- if (domain_ok(cpu, r->data[0], now))
- qos_init_domain(cpu, r->data[0], now);
+ (void) indexof(r->data[0]);
break;
case TRC_SCHED_DOM_REM:
- if (domain_ok(cpu, r->data[0], now))
- qos_kill_thread(r->data[0]);
+ qos_kill_thread(r->data[0]);
break;
case TRC_SCHED_SLEEP:
- if (domain_ok(cpu, r->data[0], now))
- qos_state_sleeping(cpu, r->data[0], now);
+ qos_state_sleeping(cpu, r->data[0], now);
break;
case TRC_SCHED_WAKE:
- if (domain_ok(cpu, r->data[0], now))
- qos_state_runnable(cpu, r->data[0], now);
+ qos_state_runnable(cpu, r->data[0], now);
break;
case TRC_SCHED_BLOCK:
- if (domain_ok(cpu, r->data[0], now))
- qos_state_sleeping(cpu, r->data[0], now);
+ qos_state_sleeping(cpu, r->data[0], now);
break;
case TRC_MEM_PAGE_GRANT_TRANSFER:
- if (domain_ok(cpu, r->data[0], now))
- qos_count_packets(r->data[0], now);
+ qos_count_packets(r->data[0], now);
break;
default:
diff --git a/tools/xenmon/xenmon.py b/tools/xenmon/xenmon.py
index c8e2babe2f..1b74d19403 100644
--- a/tools/xenmon/xenmon.py
+++ b/tools/xenmon/xenmon.py
@@ -36,10 +36,10 @@ import sys
# constants
NSAMPLES = 100
NDOMAINS = 32
-IDLE_DOMAIN = 31 # idle domain's ID
+IDLE_DOMAIN = -1 # idle domain's ID
# the struct strings for qos_info
-ST_DOM_INFO = "6Q4i32s"
+ST_DOM_INFO = "6Q3i2H32s"
ST_QDATA = "%dQ" % (6*NDOMAINS + 4)
# size of mmaped file
@@ -297,6 +297,7 @@ def show_livestats(cpu):
samples = []
doms = []
dom_in_use = []
+ domain_id = []
# read in data
for i in range(0, NSAMPLES):
@@ -311,9 +312,13 @@ def show_livestats(cpu):
doms.append(dom)
# (last_update_time, start_time, runnable_start_time, blocked_start_time,
# ns_since_boot, ns_oncpu_since_boot, runnable_at_last_update,
-# runnable, in_use, domid, name) = dom
+# runnable, in_use, domid, junk, name) = dom
# dom_in_use.append(in_use)
dom_in_use.append(dom[8])
+ domid = dom[9]
+ if domid == 32767 :
+ domid = IDLE_DOMAIN
+ domain_id.append(domid)
idx += len
# print "dom_in_use(cpu=%d): " % cpuidx, dom_in_use
@@ -366,16 +371,16 @@ def show_livestats(cpu):
if not dom_in_use[dom]:
continue
- if h1[dom][0][1] > 0 or dom == NDOMAINS - 1:
+ if h1[dom][0][1] > 0 or domain_id[dom] == IDLE_DOMAIN:
# display gotten
row += 1
col = 2
- display_domain_id(stdscr, row, col, dom)
+ display_domain_id(stdscr, row, col, domain_id[dom])
col += 4
display(stdscr, row, col, "%s" % time_scale(h2[dom][0][0]))
col += 12
display(stdscr, row, col, "%3.2f%%" % h2[dom][0][1])
- if dom != NDOMAINS - 1:
+ if dom != IDLE_DOMAIN:
cpu_10sec_usage += h2[dom][0][1]
col += 12
display(stdscr, row, col, "%s/ex" % time_scale(h2[dom][0][2]))
@@ -388,14 +393,14 @@ def show_livestats(cpu):
col += 18
display(stdscr, row, col, "Gotten")
- if dom != NDOMAINS - 1:
+ if dom != IDLE_DOMAIN:
cpu_1sec_usage = cpu_1sec_usage + h1[dom][0][1]
# display allocated
if options.allocated:
row += 1
col = 2
- display_domain_id(stdscr, row, col, dom)
+ display_domain_id(stdscr, row, col, domain_id[dom])
col += 28
display(stdscr, row, col, "%s/ex" % time_scale(h2[dom][1]))
col += 42
@@ -407,7 +412,7 @@ def show_livestats(cpu):
if options.blocked:
row += 1
col = 2
- display_domain_id(stdscr, row, col, dom)
+ display_domain_id(stdscr, row, col, domain_id[dom])
col += 4
display(stdscr, row, col, "%s" % time_scale(h2[dom][2][0]))
col += 12
@@ -427,7 +432,7 @@ def show_livestats(cpu):
if options.waited:
row += 1
col = 2
- display_domain_id(stdscr, row, col, dom)
+ display_domain_id(stdscr, row, col, domain_id[dom])
col += 4
display(stdscr, row, col, "%s" % time_scale(h2[dom][3][0]))
col += 12
@@ -447,7 +452,7 @@ def show_livestats(cpu):
if options.excount:
row += 1
col = 2
- display_domain_id(stdscr, row, col, dom)
+ display_domain_id(stdscr, row, col, domain_id[dom])
col += 28
display(stdscr, row, col, "%d/s" % h2[dom][4])
@@ -460,7 +465,7 @@ def show_livestats(cpu):
if options.iocount:
row += 1
col = 2
- display_domain_id(stdscr, row, col, dom)
+ display_domain_id(stdscr, row, col, domain_id[dom])
col += 4
display(stdscr, row, col, "%d/s" % h2[dom][5][0])
col += 24
@@ -544,6 +549,9 @@ class Delayed(file):
self.file.write(self.delay_data)
self.file.write(str)
+ def rename(self, name):
+ self.filename = name
+
def flush(self):
if self.opened:
self.file.flush()
@@ -567,10 +575,7 @@ def writelog():
curr = last = time.time()
outfiles = {}
for dom in range(0, NDOMAINS):
- if dom == IDLE_DOMAIN:
- outfiles[dom] = Delayed("%s-idle.log" % options.prefix, 'w')
- else:
- outfiles[dom] = Delayed("%s-dom%d.log" % (options.prefix, dom), 'w')
+ outfiles[dom] = Delayed("%s-dom%d.log" % (options.prefix, dom), 'w')
outfiles[dom].delayed_write("# passed cpu dom cpu(tot) cpu(%) cpu/ex allocated/ex blocked(tot) blocked(%) blocked/io waited(tot) waited(%) waited/ex ex/s io(tot) io/ex\n")
while options.duration == 0 or interval < (options.duration * 1000):
@@ -582,6 +587,7 @@ def writelog():
samples = []
doms = []
dom_in_use = []
+ domain_id = []
for i in range(0, NSAMPLES):
len = struct.calcsize(ST_QDATA)
@@ -595,8 +601,16 @@ def writelog():
# doms.append(dom)
# (last_update_time, start_time, runnable_start_time, blocked_start_time,
# ns_since_boot, ns_oncpu_since_boot, runnable_at_last_update,
-# runnable, in_use, domid, name) = dom
+# runnable, in_use, domid, junk, name) = dom
dom_in_use.append(dom[8])
+ domid = dom[9]
+ if domid == 32767:
+ domid = IDLE_DOMAIN
+ domain_id.append(domid)
+ if domid == IDLE_DOMAIN:
+ outfiles[i].rename("%s-idle.log" % options.prefix)
+ else:
+ outfiles[i].rename("%s-dom%d.log" % (options.prefix, domid))
idx += len
len = struct.calcsize("4i")
@@ -617,9 +631,9 @@ def writelog():
for dom in range(0, NDOMAINS):
if not dom_in_use[dom]:
continue
- if h1[dom][0][1] > 0 or dom == NDOMAINS - 1:
+ if h1[dom][0][1] > 0 or dom == IDLE_DOMAIN:
outfiles[dom].write("%.3f %d %d %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f\n" %
- (interval, cpuidx, dom,
+ (interval, cpuidx, domain_id[dom],
h1[dom][0][0], h1[dom][0][1], h1[dom][0][2],
h1[dom][1],
h1[dom][2][0], h1[dom][2][1], h1[dom][2][2],