aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeir Fraser <keir.fraser@citrix.com>2010-01-20 20:33:35 +0000
committerKeir Fraser <keir.fraser@citrix.com>2010-01-20 20:33:35 +0000
commit650afa3995c32c50f3272dbe4514781abed8bdc0 (patch)
treefe5662f5a55bdc654d48b93c20bbf1514174764b
parent9e5e0eb8408c37b42411c6686c36eccd2426e7fd (diff)
downloadxen-650afa3995c32c50f3272dbe4514781abed8bdc0.tar.gz
xen-650afa3995c32c50f3272dbe4514781abed8bdc0.tar.bz2
xen-650afa3995c32c50f3272dbe4514781abed8bdc0.zip
xentrace: Per-cpu xentrace buffers
In the current xentrace configuration, xentrace buffers are all allocated in a single contiguous chunk, and then divided among logical cpus, one buffer per cpu. The size of an allocatable chunk is fairly limited, in my experience about 128 pages (512KiB). As the number of logical cores increase, this means a much smaller maximum per-cpu trace buffer per cpu; on my dual-socket quad-core nehalem box with hyperthreading (16 logical cpus), that comes to 8 pages per logical cpu. This patch addresses this issue by allocating per-cpu buffers separately. Signed-off-by: George Dunlap <dunlapg@umich.edu>
-rw-r--r--tools/xentrace/xentrace.c133
-rw-r--r--xen/common/trace.c142
-rw-r--r--xen/include/public/sysctl.h2
-rw-r--r--xen/include/public/trace.h10
4 files changed, 187 insertions, 100 deletions
diff --git a/tools/xentrace/xentrace.c b/tools/xentrace/xentrace.c
index 2b5647e6fb..b6da08e38c 100644
--- a/tools/xentrace/xentrace.c
+++ b/tools/xentrace/xentrace.c
@@ -61,6 +61,12 @@ typedef struct settings_st {
disable_tracing:1;
} settings_t;
+struct t_struct {
+ struct t_info *t_info; /* Structure with information about individual buffers */
+ struct t_buf **meta; /* Pointers to trace buffer metadata */
+ unsigned char **data; /* Pointers to trace buffer data areas */
+};
+
settings_t opts;
int interrupted = 0; /* gets set if we get a SIGHUP */
@@ -446,22 +452,61 @@ static void get_tbufs(unsigned long *mfn, unsigned long *size)
*
* Maps the Xen trace buffers them into process address space.
*/
-static struct t_buf *map_tbufs(unsigned long tbufs_mfn, unsigned int num,
- unsigned long size)
+static struct t_struct *map_tbufs(unsigned long tbufs_mfn, unsigned int num,
+ unsigned long tinfo_size)
{
- struct t_buf *tbufs_mapped;
+ static struct t_struct tbufs = { 0 };
+ int i;
- tbufs_mapped = xc_map_foreign_range(xc_handle, DOMID_XEN,
- size * num, PROT_READ | PROT_WRITE,
+ /* Map t_info metadata structure */
+ tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN,
+ tinfo_size, PROT_READ | PROT_WRITE,
tbufs_mfn);
- if ( tbufs_mapped == 0 )
+ if ( tbufs.t_info == 0 )
{
PERROR("Failed to mmap trace buffers");
exit(EXIT_FAILURE);
}
- return tbufs_mapped;
+ if ( tbufs.t_info->tbuf_size == 0 )
+ {
+ fprintf(stderr, "%s: tbuf_size 0!\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Map per-cpu buffers */
+ tbufs.meta = (struct t_buf **)calloc(num, sizeof(struct t_buf *));
+ tbufs.data = (unsigned char **)calloc(num, sizeof(unsigned char *));
+ if ( tbufs.meta == NULL || tbufs.data == NULL )
+ {
+ PERROR( "Failed to allocate memory for buffer pointers\n");
+ exit(EXIT_FAILURE);
+ }
+
+ for(i=0; i<num; i++)
+ {
+
+ uint32_t *mfn_list = ((uint32_t *)tbufs.t_info) + tbufs.t_info->mfn_offset[i];
+ int j;
+ xen_pfn_t pfn_list[tbufs.t_info->tbuf_size];
+
+ for ( j=0; j<tbufs.t_info->tbuf_size; j++)
+ pfn_list[j] = (xen_pfn_t)mfn_list[j];
+
+ tbufs.meta[i] = xc_map_foreign_batch(xc_handle, DOMID_XEN,
+ PROT_READ | PROT_WRITE,
+ pfn_list,
+ tbufs.t_info->tbuf_size);
+ if ( tbufs.meta[i] == NULL )
+ {
+ PERROR("Failed to map cpu buffer!");
+ exit(EXIT_FAILURE);
+ }
+ tbufs.data[i] = (unsigned char *)(tbufs.meta[i]+1);
+ }
+
+ return &tbufs;
}
/**
@@ -490,66 +535,6 @@ static void set_mask(uint32_t mask, int type)
}
/**
- * init_bufs_ptrs - initialises an array of pointers to the trace buffers
- * @bufs_mapped: the userspace address where the trace buffers are mapped
- * @num: number of trace buffers
- * @size: trace buffer size
- *
- * Initialises an array of pointers to individual trace buffers within the
- * mapped region containing all trace buffers.
- */
-static struct t_buf **init_bufs_ptrs(void *bufs_mapped, unsigned int num,
- unsigned long size)
-{
- int i;
- struct t_buf **user_ptrs;
-
- user_ptrs = (struct t_buf **)calloc(num, sizeof(struct t_buf *));
- if ( user_ptrs == NULL )
- {
- PERROR( "Failed to allocate memory for buffer pointers\n");
- exit(EXIT_FAILURE);
- }
-
- /* initialise pointers to the trace buffers - given the size of a trace
- * buffer and the value of bufs_maped, we can easily calculate these */
- for ( i = 0; i<num; i++ )
- user_ptrs[i] = (struct t_buf *)((unsigned long)bufs_mapped + size * i);
-
- return user_ptrs;
-}
-
-
-/**
- * init_rec_ptrs - initialises data area pointers to locations in user space
- * @tbufs_mfn: base mfn of the trace buffer area
- * @tbufs_mapped: user virtual address of base of trace buffer area
- * @meta: array of user-space pointers to struct t_buf's of metadata
- * @num: number of trace buffers
- *
- * Initialises data area pointers to the locations that data areas have been
- * mapped in user space. Note that the trace buffer metadata contains machine
- * pointers - the array returned allows more convenient access to them.
- */
-static unsigned char **init_rec_ptrs(struct t_buf **meta, unsigned int num)
-{
- int i;
- unsigned char **data;
-
- data = calloc(num, sizeof(unsigned char *));
- if ( data == NULL )
- {
- PERROR("Failed to allocate memory for data pointers\n");
- exit(EXIT_FAILURE);
- }
-
- for ( i = 0; i < num; i++ )
- data[i] = (unsigned char *)(meta[i] + 1);
-
- return data;
-}
-
-/**
* get_num_cpus - get the number of logical CPUs
*/
static unsigned int get_num_cpus(void)
@@ -638,12 +623,13 @@ static int monitor_tbufs(void)
{
int i;
- void *tbufs_mapped; /* pointer to where the tbufs are mapped */
+ struct t_struct *tbufs; /* Pointer to hypervisor maps */
struct t_buf **meta; /* pointers to the trace buffer metadata */
unsigned char **data; /* pointers to the trace buffer data areas
* where they are mapped into user space. */
unsigned long tbufs_mfn; /* mfn of the tbufs */
unsigned int num; /* number of trace buffers / logical CPUS */
+ unsigned long tinfo_size; /* size of t_info metadata map */
unsigned long size; /* size of a single trace buffer */
unsigned long data_size;
@@ -655,14 +641,15 @@ static int monitor_tbufs(void)
num = get_num_cpus();
/* setup access to trace buffers */
- get_tbufs(&tbufs_mfn, &size);
- tbufs_mapped = map_tbufs(tbufs_mfn, num, size);
+ get_tbufs(&tbufs_mfn, &tinfo_size);
+ tbufs = map_tbufs(tbufs_mfn, num, tinfo_size);
+
+ size = tbufs->t_info->tbuf_size * PAGE_SIZE;
data_size = size - sizeof(struct t_buf);
- /* build arrays of convenience ptrs */
- meta = init_bufs_ptrs(tbufs_mapped, num, size);
- data = init_rec_ptrs(meta, num);
+ meta = tbufs->meta;
+ data = tbufs->data;
if ( opts.discard )
for ( i = 0; i < num; i++ )
diff --git a/xen/common/trace.c b/xen/common/trace.c
index 594a3d68a0..d8eaf669ff 100644
--- a/xen/common/trace.c
+++ b/xen/common/trace.c
@@ -46,8 +46,11 @@ static unsigned int opt_tbuf_size = 0;
integer_param("tbuf_size", opt_tbuf_size);
/* Pointers to the meta-data objects for all system trace buffers */
+static struct t_info *t_info;
+#define T_INFO_PAGES 2 /* Size fixed at 2 pages for now. */
static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
+static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
static int data_size;
/* High water mark for trace buffers; */
@@ -80,41 +83,104 @@ static u32 tb_event_mask = TRC_ALL;
*/
static int alloc_trace_bufs(void)
{
- int i, order;
+ int i, cpu, order;
unsigned long nr_pages;
- char *rawbuf;
- struct t_buf *buf;
+ /* Start after a fixed-size array of NR_CPUS */
+ uint32_t *t_info_mfn_list = (uint32_t *)t_info;
+ int offset = (NR_CPUS * 2 + 1 + 1) / 4;
if ( opt_tbuf_size == 0 )
return -EINVAL;
- nr_pages = num_online_cpus() * opt_tbuf_size;
- order = get_order_from_pages(nr_pages);
- data_size = (opt_tbuf_size * PAGE_SIZE - sizeof(struct t_buf));
-
- if ( (rawbuf = alloc_xenheap_pages(order, 0)) == NULL )
+ if ( !t_info )
{
- printk("Xen trace buffers: memory allocation failed\n");
- opt_tbuf_size = 0;
+ printk("%s: t_info not allocated, cannot allocate trace buffers!\n",
+ __func__);
return -EINVAL;
}
- /* Share pages so that xentrace can map them. */
- for ( i = 0; i < nr_pages; i++ )
- share_xen_page_with_privileged_guests(
- virt_to_page(rawbuf) + i, XENSHARE_writable);
+ t_info->tbuf_size = opt_tbuf_size;
+ printk("tbuf_size %d\n", t_info->tbuf_size);
+
+ nr_pages = opt_tbuf_size;
+ order = get_order_from_pages(nr_pages);
- for_each_online_cpu ( i )
+ /*
+ * First, allocate buffers for all of the cpus. If any
+ * fails, deallocate what you have so far and exit.
+ */
+ for_each_online_cpu(cpu)
{
- buf = per_cpu(t_bufs, i) = (struct t_buf *)
- &rawbuf[i*opt_tbuf_size*PAGE_SIZE];
+ int flags;
+ char *rawbuf;
+ struct t_buf *buf;
+
+ if ( (rawbuf = alloc_xenheap_pages(order, 0)) == NULL )
+ {
+ printk("Xen trace buffers: memory allocation failed\n");
+ opt_tbuf_size = 0;
+ goto out_dealloc;
+ }
+
+ spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
+
+ buf = per_cpu(t_bufs, cpu) = (struct t_buf *)rawbuf;
buf->cons = buf->prod = 0;
- per_cpu(t_data, i) = (unsigned char *)(buf + 1);
+ per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
+
+ spin_unlock_irqrestore(&per_cpu(t_lock, cpu), flags);
+
}
+ /*
+ * Now share the pages to xentrace can map them, and write them in
+ * the global t_info structure.
+ */
+ for_each_online_cpu(cpu)
+ {
+ /* Share pages so that xentrace can map them. */
+ char *rawbuf;
+
+ if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) )
+ {
+ struct page_info *p = virt_to_page(rawbuf);
+ uint32_t mfn = virt_to_mfn(rawbuf);
+
+ for ( i = 0; i < nr_pages; i++ )
+ {
+ share_xen_page_with_privileged_guests(
+ p + i, XENSHARE_writable);
+
+ t_info_mfn_list[offset + i]=mfn + i;
+ }
+ /* Write list first, then write per-cpu offset. */
+ wmb();
+ t_info->mfn_offset[cpu]=offset;
+ printk("p%d mfn %"PRIx32" offset %d\n",
+ cpu, mfn, offset);
+ offset+=i;
+ }
+ }
+
+ data_size = (opt_tbuf_size * PAGE_SIZE - sizeof(struct t_buf));
t_buf_highwater = data_size >> 1; /* 50% high water */
return 0;
+out_dealloc:
+ for_each_online_cpu(cpu)
+ {
+ int flags;
+ char * rawbuf;
+
+ spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
+ if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) )
+ {
+ ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
+ free_xenheap_pages(rawbuf, order);
+ }
+ spin_unlock_irqrestore(&per_cpu(t_lock, cpu), flags);
+ }
+ return -EINVAL;
}
@@ -181,6 +247,26 @@ int trace_will_trace_event(u32 event)
*/
void __init init_trace_bufs(void)
{
+ int i;
+ /* t_info size fixed at 2 pages for now. That should be big enough / small enough
+ * until it's worth making it dynamic. */
+ t_info = alloc_xenheap_pages(1, 0);
+
+ if ( t_info == NULL )
+ {
+ printk("Xen trace buffers: t_info allocation failed! Tracing disabled.\n");
+ return;
+ }
+
+ for(i = 0; i < NR_CPUS; i++)
+ spin_lock_init(&per_cpu(t_lock, i));
+
+ for(i=0; i<T_INFO_PAGES; i++)
+ share_xen_page_with_privileged_guests(
+ virt_to_page(t_info) + i, XENSHARE_writable);
+
+
+
if ( opt_tbuf_size == 0 )
{
printk("Xen trace buffers: disabled\n");
@@ -210,8 +296,8 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc)
{
case XEN_SYSCTL_TBUFOP_get_info:
tbc->evt_mask = tb_event_mask;
- tbc->buffer_mfn = opt_tbuf_size ? virt_to_mfn(per_cpu(t_bufs, 0)) : 0;
- tbc->size = opt_tbuf_size * PAGE_SIZE;
+ tbc->buffer_mfn = t_info ? virt_to_mfn(t_info) : 0;
+ tbc->size = T_INFO_PAGES;
break;
case XEN_SYSCTL_TBUFOP_set_cpu_mask:
xenctl_cpumap_to_cpumask(&tb_cpu_mask, &tbc->cpu_mask);
@@ -220,7 +306,7 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc)
tb_event_mask = tbc->evt_mask;
break;
case XEN_SYSCTL_TBUFOP_set_size:
- rc = !tb_init_done ? tb_set_size(tbc->size) : -EINVAL;
+ rc = tb_set_size(tbc->size);
break;
case XEN_SYSCTL_TBUFOP_enable:
/* Enable trace buffers. Check buffers are already allocated. */
@@ -428,7 +514,7 @@ void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data)
unsigned long flags, bytes_to_tail, bytes_to_wrap;
int rec_size, total_size;
int extra_word;
- int started_below_highwater;
+ int started_below_highwater = 0;
if( !tb_init_done )
return;
@@ -462,9 +548,12 @@ void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data)
/* Read tb_init_done /before/ t_bufs. */
rmb();
+ spin_lock_irqsave(&this_cpu(t_lock), flags);
+
buf = this_cpu(t_bufs);
- local_irq_save(flags);
+ if ( unlikely(!buf) )
+ goto unlock;
started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater);
@@ -511,8 +600,8 @@ void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data)
{
if ( ++this_cpu(lost_records) == 1 )
this_cpu(lost_records_first_tsc)=(u64)get_cycles();
- local_irq_restore(flags);
- return;
+ started_below_highwater = 0;
+ goto unlock;
}
/*
@@ -541,7 +630,8 @@ void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data)
/* Write the original record */
__insert_record(buf, event, extra, cycles, rec_size, extra_data);
- local_irq_restore(flags);
+unlock:
+ spin_unlock_irqrestore(&this_cpu(t_lock), flags);
/* Notify trace buffer consumer that we've crossed the high water mark. */
if ( started_below_highwater &&
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index f869f9c5af..bc02643ad7 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -75,7 +75,7 @@ struct xen_sysctl_tbuf_op {
uint32_t evt_mask;
/* OUT variables */
uint64_aligned_t buffer_mfn;
- uint32_t size;
+ uint32_t size; /* Also an IN variable! */
};
typedef struct xen_sysctl_tbuf_op xen_sysctl_tbuf_op_t;
DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tbuf_op_t);
diff --git a/xen/include/public/trace.h b/xen/include/public/trace.h
index 6eb125a510..b6f112e201 100644
--- a/xen/include/public/trace.h
+++ b/xen/include/public/trace.h
@@ -195,6 +195,16 @@ struct t_buf {
/* Records follow immediately after the meta-data header. */
};
+/* Structure used to pass MFNs to the trace buffers back to trace consumers.
+ * Offset is an offset into the mapped structure where the mfn list will be held.
+ * MFNs will be at ((unsigned long *)(t_info))+(t_info->cpu_offset[cpu]).
+ */
+struct t_info {
+ uint16_t tbuf_size; /* Size in pages of each trace buffer */
+ uint16_t mfn_offset[]; /* Offset within t_info structure of the page list per cpu */
+ /* MFN lists immediately after the header */
+};
+
#endif /* __XEN_PUBLIC_TRACE_H__ */
/*