aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>2003-10-10 19:27:04 +0000
committeriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>2003-10-10 19:27:04 +0000
commit353e94eeb8b8f9b8a661614c2fa926932c72083c (patch)
tree4fe39b6a1c4864dff1b5031b34b8ed6fc9e132fb
parentce8c8fcd8bac4e0866fea83ecfe7cb007ed77ca5 (diff)
downloadxen-353e94eeb8b8f9b8a661614c2fa926932c72083c.tar.gz
xen-353e94eeb8b8f9b8a661614c2fa926932c72083c.tar.bz2
xen-353e94eeb8b8f9b8a661614c2fa926932c72083c.zip
bitkeeper revision 1.498.1.2 (3f870808Dvsdzic2p7uQgZ0pTQzx7w)
add a handy tool for reading the P4 performance counters: xen_perfctr
-rw-r--r--.rootkeys2
-rw-r--r--tools/misc/Makefile4
-rw-r--r--tools/misc/p4perf.h559
-rw-r--r--tools/misc/xen_cpuperf.c265
4 files changed, 828 insertions, 2 deletions
diff --git a/.rootkeys b/.rootkeys
index f6203c2848..48b0bfefb6 100644
--- a/.rootkeys
+++ b/.rootkeys
@@ -177,9 +177,11 @@
3f6dc136ZKOjd8PIqLbFBl_v-rnkGg tools/misc/miniterm/Makefile
3f6dc140C8tAeBfroAF24VrmCS4v_w tools/misc/miniterm/README
3f6dc142IHaf6XIcAYGmhV9nNSIHFQ tools/misc/miniterm/miniterm.c
+3f870808_8aFBAcZbWiWGdgrGQyIEw tools/misc/p4perf.h
3f5ef5a2ir1kVAthS14Dc5QIRCEFWg tools/misc/xen-clone
3f5ef5a2dTZP0nnsFoeq2jRf3mWDDg tools/misc/xen-clone.README
3f1668d4-FUY6Enc7MB3GcwUtfJ5HA tools/misc/xen-mkdevnodes
+3f870808zS6T6iFhqYPGelroZlVfGQ tools/misc/xen_cpuperf.c
3f13d81eQ9Vz-h-6RDGFkNR9CRP95g tools/misc/xen_nat_enable
3f13d81e6Z6806ihYYUw8GVKNkYnuw tools/misc/xen_nat_enable.README
3f1668d4F29Jsw0aC0bJEIkOBiagiQ tools/misc/xen_read_console.c
diff --git a/tools/misc/Makefile b/tools/misc/Makefile
index 62e1ab7fee..59899f5f53 100644
--- a/tools/misc/Makefile
+++ b/tools/misc/Makefile
@@ -1,13 +1,13 @@
CC = gcc
CFLAGS = -Wall -O3
-CFLAGS += -I../../xen/include -I../../xenolinux-sparse/include
+CFLAGS += -I../../xen/include -I../../xenolinux-sparse/include -I../internal
HDRS = $(wildcard *.h)
SRCS = $(wildcard *.c)
OBJS = $(patsubst %.c,%.o,$(SRCS))
-TARGETS = xen_read_console
+TARGETS = xen_read_console xen_cpuperf
INSTALL = $(TARGETS) xen-mkdevnodes xen-clone
diff --git a/tools/misc/p4perf.h b/tools/misc/p4perf.h
new file mode 100644
index 0000000000..4f681b636d
--- /dev/null
+++ b/tools/misc/p4perf.h
@@ -0,0 +1,559 @@
+/*
+ * For P6 use PERFCTR1 (0 used for APIC NMI watchdog). Must setup after
+ * APIC NMI watchdog setup. Note that if this previous setup doesn't happen
+ * we still must enable both counters.
+ *
+ * P4 Xeon with Hyperthreading has counters per physical package which can
+ * count events from either logical CPU. However, in many cases more than
+ * ECSR and CCCR/counter can be used to count the same event. For instr or
+ * uops retired, use either ESCR0/IQ_CCCR0 ESCR1/IQ_CCCR2.
+ *
+ * USE CONFIG_MPENTIUM4_HT for a P4 Xeon with hyperthreading.
+ *
+ * Note that the counters may be initialised on each logical processor
+ * which will cause each physical processor to be initialised twice. This
+ * should not cause a problem.
+ */
+
+#ifndef P4PERF_H
+#define P4PERF_H
+
+#ifdef __KERNEL__
+#include <asm/msr.h>
+#endif
+
+/*****************************************************************************
+ * Performance counter configuration. *
+ *****************************************************************************/
+
+#ifndef P6_EVNTSEL_OS
+# define P6_EVNTSEL_OS (1 << 17)
+# define P6_EVNTSEL_USR (1 << 16)
+# define P6_EVNTSEL_E (1 << 18)
+# define P6_EVNTSEL_EN (1 << 22)
+#endif
+#define P6_PERF_INST_RETIRED 0xc0
+#define P6_PERF_UOPS_RETIRED 0xc2
+
+#define P4_ESCR_USR (1 << 2)
+#define P4_ESCR_OS (1 << 3)
+#define P4_ESCR_T0_USR (1 << 2) /* First logical CPU */
+#define P4_ESCR_T0_OS (1 << 3)
+#define P4_ESCR_T1_USR (1 << 0) /* Second logical CPU */
+#define P4_ESCR_T1_OS (1 << 1)
+#define P4_ESCR_TE (1 << 4)
+#define P4_ESCR_THREADS(t) (t)
+#define P4_ESCR_TV(tag) (tag << 5)
+#define P4_ESCR_EVNTSEL(e) (e << 25)
+#define P4_ESCR_EVNTMASK(e) (e << 9)
+
+#define P4_ESCR_EVNTSEL_FRONT_END 0x08
+#define P4_ESCR_EVNTSEL_EXECUTION 0x0c
+#define P4_ESCR_EVNTSEL_REPLAY 0x09
+#define P4_ESCR_EVNTSEL_INSTR_RETIRED 0x02
+#define P4_ESCR_EVNTSEL_UOPS_RETIRED 0x01
+#define P4_ESCR_EVNTSEL_UOP_TYPE 0x02
+#define P4_ESCR_EVNTSEL_RET_MBR_TYPE 0x05
+//#define P4_ESCR_EVNTSEL_RET_MBR_TYPE 0x04
+
+#define P4_ESCR_EVNTMASK_FE_NBOGUS 0x01
+#define P4_ESCR_EVNTMASK_FE_BOGUS 0x02
+
+#define P4_ESCR_EVNTMASK_EXEC_NBOGUS0 0x01
+#define P4_ESCR_EVNTMASK_EXEC_NBOGUS1 0x02
+#define P4_ESCR_EVNTMASK_EXEC_NBOGUS2 0x04
+#define P4_ESCR_EVNTMASK_EXEC_NBOGUS3 0x08
+#define P4_ESCR_EVNTMASK_EXEC_BOGUS0 0x10
+#define P4_ESCR_EVNTMASK_EXEC_BOGUS1 0x20
+#define P4_ESCR_EVNTMASK_EXEC_BOGUS2 0x40
+#define P4_ESCR_EVNTMASK_EXEC_BOGUS3 0x80
+
+#define P4_ESCR_EVNTMASK_REPLAY_NBOGUS 0x01
+#define P4_ESCR_EVNTMASK_REPLAY_BOGUS 0x02
+
+#define P4_ESCR_EVNTMASK_IRET_NB_NTAG 0x01
+#define P4_ESCR_EVNTMASK_IRET_NB_TAG 0x02
+#define P4_ESCR_EVNTMASK_IRET_B_NTAG 0x04
+#define P4_ESCR_EVNTMASK_IRET_B_TAG 0x08
+
+#define P4_ESCR_EVNTMASK_URET_NBOGUS 0x01
+#define P4_ESCR_EVNTMASK_URET_BOGUS 0x02
+
+#define P4_ESCR_EVNTMASK_UOP_LOADS 0x02
+#define P4_ESCR_EVNTMASK_UOP_STORES 0x04
+
+#define P4_ESCR_EVNTMASK_RMBRT_COND 0x02
+#define P4_ESCR_EVNTMASK_RMBRT_CALL 0x04
+#define P4_ESCR_EVNTMASK_RMBRT_RETURN 0x08
+#define P4_ESCR_EVNTMASK_RMBRT_INDIR 0x10
+
+#define P4_ESCR_EVNTMASK_RBRT_COND 0x02
+#define P4_ESCR_EVNTMASK_RBRT_CALL 0x04
+#define P4_ESCR_EVNTMASK_RBRT_RETURN 0x08
+#define P4_ESCR_EVNTMASK_RBRT_INDIR 0x10
+
+//#define P4_ESCR_EVNTMASK_INSTR_RETIRED 0x01 /* Non bogus, not tagged */
+//#define P4_ESCR_EVNTMASK_UOPS_RETIRED 0x01 /* Non bogus */
+
+#define P4_CCCR_OVF (1 << 31)
+#define P4_CCCR_CASCADE (1 << 30)
+#define P4_CCCR_FORCE_OVF (1 << 25)
+#define P4_CCCR_EDGE (1 << 24)
+#define P4_CCCR_COMPLEMENT (1 << 19)
+#define P4_CCCR_COMPARE (1 << 18)
+#define P4_CCCR_THRESHOLD(t) (t << 20)
+#define P4_CCCR_ENABLE (1 << 12)
+#define P4_CCCR_ESCR(escr) (escr << 13)
+#define P4_CCCR_ACTIVE_THREAD(t) (t << 16) /* Set to 11 */
+#define P4_CCCR_OVF_PMI_T0 (1 << 26)
+#define P4_CCCR_OVF_PMI_T1 (1 << 27)
+#define P4_CCCR_RESERVED (3 << 16)
+#define P4_CCCR_OVF_PMI (1 << 26)
+
+// BPU
+#define MSR_P4_BPU_COUNTER0 0x300
+#define MSR_P4_BPU_COUNTER1 0x301
+#define MSR_P4_BPU_CCCR0 0x360
+#define MSR_P4_BPU_CCCR1 0x361
+
+#define MSR_P4_BPU_COUNTER2 0x302
+#define MSR_P4_BPU_COUNTER3 0x303
+#define MSR_P4_BPU_CCCR2 0x362
+#define MSR_P4_BPU_CCCR3 0x363
+
+#define MSR_P4_BSU_ESCR0 0x3a0
+#define MSR_P4_FSB_ESCR0 0x3a2
+#define MSR_P4_MOB_ESCR0 0x3aa
+#define MSR_P4_PMH_ESCR0 0x3ac
+#define MSR_P4_BPU_ESCR0 0x3b2
+#define MSR_P4_IS_ESCR0 0x3b4
+#define MSR_P4_ITLB_ESCR0 0x3b6
+#define MSR_P4_IX_ESCR0 0x3c8
+
+#define P4_BSU_ESCR0_NUMBER 7
+#define P4_FSB_ESCR0_NUMBER 6
+#define P4_MOB_ESCR0_NUMBER 2
+#define P4_PMH_ESCR0_NUMBER 4
+#define P4_BPU_ESCR0_NUMBER 0
+#define P4_IS_ESCR0_NUMBER 1
+#define P4_ITLB_ESCR0_NUMBER 3
+#define P4_IX_ESCR0_NUMBER 5
+
+#define MSR_P4_BSU_ESCR1 0x3a1
+#define MSR_P4_FSB_ESCR1 0x3a3
+#define MSR_P4_MOB_ESCR1 0x3ab
+#define MSR_P4_PMH_ESCR1 0x3ad
+#define MSR_P4_BPU_ESCR1 0x3b3
+#define MSR_P4_IS_ESCR1 0x3b5
+#define MSR_P4_ITLB_ESCR1 0x3b7
+#define MSR_P4_IX_ESCR1 0x3c9
+
+#define P4_BSU_ESCR1_NUMBER 7
+#define P4_FSB_ESCR1_NUMBER 6
+#define P4_MOB_ESCR1_NUMBER 2
+#define P4_PMH_ESCR1_NUMBER 4
+#define P4_BPU_ESCR1_NUMBER 0
+#define P4_IS_ESCR1_NUMBER 1
+#define P4_ITLB_ESCR1_NUMBER 3
+#define P4_IX_ESCR1_NUMBER 5
+
+// MS
+#define MSR_P4_MS_COUNTER0 0x304
+#define MSR_P4_MS_COUNTER1 0x305
+#define MSR_P4_MS_CCCR0 0x364
+#define MSR_P4_MS_CCCR1 0x365
+
+#define MSR_P4_MS_COUNTER2 0x306
+#define MSR_P4_MS_COUNTER3 0x307
+#define MSR_P4_MS_CCCR2 0x366
+#define MSR_P4_MS_CCCR3 0x367
+
+#define MSR_P4_MS_ESCR0 0x3c0
+#define MSR_P4_TBPU_ESCR0 0x3c2
+#define MSR_P4_TC_ESCR0 0x3c4
+
+#define P4_MS_ESCR0_NUMBER 0
+#define P4_TBPU_ESCR0_NUMBER 2
+#define P4_TC_ESCR0_NUMBER 1
+
+#define MSR_P4_MS_ESCR1 0x3c1
+#define MSR_P4_TBPU_ESCR1 0x3c3
+#define MSR_P4_TC_ESCR1 0x3c5
+
+#define P4_MS_ESCR1_NUMBER 0
+#define P4_TBPU_ESCR1_NUMBER 2
+#define P4_TC_ESCR1_NUMBER 1
+
+// FLAME
+#define MSR_P4_FLAME_COUNTER0 0x308
+#define MSR_P4_FLAME_COUNTER1 0x309
+#define MSR_P4_FLAME_CCCR0 0x368
+#define MSR_P4_FLAME_CCCR1 0x369
+
+#define MSR_P4_FLAME_COUNTER2 0x30a
+#define MSR_P4_FLAME_COUNTER3 0x30b
+#define MSR_P4_FLAME_CCCR2 0x36a
+#define MSR_P4_FLAME_CCCR3 0x36b
+
+#define MSR_P4_FIRM_ESCR0 0x3a4
+#define MSR_P4_FLAME_ESCR0 0x3a6
+#define MSR_P4_DAC_ESCR0 0x3a8
+#define MSR_P4_SAAT_ESCR0 0x3ae
+#define MSR_P4_U2L_ESCR0 0x3b0
+
+#define P4_FIRM_ESCR0_NUMBER 1
+#define P4_FLAME_ESCR0_NUMBER 0
+#define P4_DAC_ESCR0_NUMBER 5
+#define P4_SAAT_ESCR0_NUMBER 2
+#define P4_U2L_ESCR0_NUMBER 3
+
+#define MSR_P4_FIRM_ESCR1 0x3a5
+#define MSR_P4_FLAME_ESCR1 0x3a7
+#define MSR_P4_DAC_ESCR1 0x3a9
+#define MSR_P4_SAAT_ESCR1 0x3af
+#define MSR_P4_U2L_ESCR1 0x3b1
+
+#define P4_FIRM_ESCR1_NUMBER 1
+#define P4_FLAME_ESCR1_NUMBER 0
+#define P4_DAC_ESCR1_NUMBER 5
+#define P4_SAAT_ESCR1_NUMBER 2
+#define P4_U2L_ESCR1_NUMBER 3
+
+// IQ
+#define MSR_P4_IQ_COUNTER0 0x30c
+#define MSR_P4_IQ_COUNTER1 0x30d
+#define MSR_P4_IQ_CCCR0 0x36c
+#define MSR_P4_IQ_CCCR1 0x36d
+
+#define MSR_P4_IQ_COUNTER2 0x30e
+#define MSR_P4_IQ_COUNTER3 0x30f
+#define MSR_P4_IQ_CCCR2 0x36e
+#define MSR_P4_IQ_CCCR3 0x36f
+
+#define MSR_P4_IQ_COUNTER4 0x310
+#define MSR_P4_IQ_COUNTER5 0x311
+#define MSR_P4_IQ_CCCR4 0x370
+#define MSR_P4_IQ_CCCR5 0x371
+
+#define MSR_P4_CRU_ESCR0 0x3b8
+#define MSR_P4_CRU_ESCR2 0x3cc
+#define MSR_P4_CRU_ESCR4 0x3e0
+#define MSR_P4_IQ_ESCR0 0x3ba
+#define MSR_P4_RAT_ESCR0 0x3bc
+#define MSR_P4_SSU_ESCR0 0x3be
+#define MSR_P4_ALF_ESCR0 0x3ca
+
+#define P4_CRU_ESCR0_NUMBER 4
+#define P4_CRU_ESCR2_NUMBER 5
+#define P4_CRU_ESCR4_NUMBER 6
+#define P4_IQ_ESCR0_NUMBER 0
+#define P4_RAT_ESCR0_NUMBER 2
+#define P4_SSU_ESCR0_NUMBER 3
+#define P4_ALF_ESCR0_NUMBER 1
+
+#define MSR_P4_CRU_ESCR1 0x3b9
+#define MSR_P4_CRU_ESCR3 0x3cd
+#define MSR_P4_CRU_ESCR5 0x3e1
+#define MSR_P4_IQ_ESCR1 0x3bb
+#define MSR_P4_RAT_ESCR1 0x3bd
+#define MSR_P4_ALF_ESCR1 0x3cb
+
+#define P4_CRU_ESCR1_NUMBER 4
+#define P4_CRU_ESCR3_NUMBER 5
+#define P4_CRU_ESCR5_NUMBER 6
+#define P4_IQ_ESCR1_NUMBER 0
+#define P4_RAT_ESCR1_NUMBER 2
+#define P4_ALF_ESCR1_NUMBER 1
+
+#define P4_BPU_COUNTER0_NUMBER 0
+#define P4_BPU_COUNTER1_NUMBER 1
+#define P4_BPU_COUNTER2_NUMBER 2
+#define P4_BPU_COUNTER3_NUMBER 3
+
+#define P4_MS_COUNTER0_NUMBER 4
+#define P4_MS_COUNTER1_NUMBER 5
+#define P4_MS_COUNTER2_NUMBER 6
+#define P4_MS_COUNTER3_NUMBER 7
+
+#define P4_FLAME_COUNTER0_NUMBER 8
+#define P4_FLAME_COUNTER1_NUMBER 9
+#define P4_FLAME_COUNTER2_NUMBER 10
+#define P4_FLAME_COUNTER3_NUMBER 11
+
+#define P4_IQ_COUNTER0_NUMBER 12
+#define P4_IQ_COUNTER1_NUMBER 13
+#define P4_IQ_COUNTER2_NUMBER 14
+#define P4_IQ_COUNTER3_NUMBER 15
+#define P4_IQ_COUNTER4_NUMBER 16
+#define P4_IQ_COUNTER5_NUMBER 17
+
+/* PEBS
+ */
+#define MSR_P4_PEBS_ENABLE 0x3F1
+#define MSR_P4_PEBS_MATRIX_VERT 0x3F2
+
+#define P4_PEBS_ENABLE_MY_THR (1 << 25)
+#define P4_PEBS_ENABLE_OTH_THR (1 << 26)
+#define P4_PEBS_ENABLE (1 << 24)
+#define P4_PEBS_BIT0 (1 << 0)
+#define P4_PEBS_BIT1 (1 << 1)
+#define P4_PEBS_BIT2 (1 << 2)
+
+#define P4_PEBS_MATRIX_VERT_BIT0 (1 << 0)
+#define P4_PEBS_MATRIX_VERT_BIT1 (1 << 1)
+#define P4_PEBS_MATRIX_VERT_BIT2 (1 << 2)
+
+/* Replay tagging.
+ */
+#define P4_REPLAY_TAGGING_PEBS_L1LMR P4_PEBS_BIT0
+#define P4_REPLAY_TAGGING_PEBS_L2LMR P4_PEBS_BIT1
+#define P4_REPLAY_TAGGING_PEBS_DTLMR P4_PEBS_BIT2
+#define P4_REPLAY_TAGGING_PEBS_DTSMR P4_PEBS_BIT2
+#define P4_REPLAY_TAGGING_PEBS_DTAMR P4_PEBS_BIT2
+
+#define P4_REPLAY_TAGGING_VERT_L1LMR P4_PEBS_MATRIX_VERT_BIT0
+#define P4_REPLAY_TAGGING_VERT_L2LMR P4_PEBS_MATRIX_VERT_BIT0
+#define P4_REPLAY_TAGGING_VERT_DTLMR P4_PEBS_MATRIX_VERT_BIT0
+#define P4_REPLAY_TAGGING_VERT_DTSMR P4_PEBS_MATRIX_VERT_BIT1
+#define P4_REPLAY_TAGGING_VERT_DTAMR P4_PEBS_MATRIX_VERT_BIT0 | P4_PEBS_MATRIX_VERT_BIT1
+
+
+
+
+/*****************************************************************************
+ * *
+ *****************************************************************************/
+
+// x87_FP_uop
+#define EVENT_SEL_x87_FP_uop 0x04
+#define EVENT_MASK_x87_FP_uop_ALL (1 << 15)
+
+// execution event (at retirement)
+#define EVENT_SEL_execution_event 0x0C
+
+// scalar_SP_uop
+#define EVENT_SEL_scalar_SP_uop 0x0a
+#define EVENT_MASK_scalar_SP_uop_ALL (1 << 15)
+
+// scalar_DP_uop
+#define EVENT_SEL_scalar_DP_uop 0x0e
+#define EVENT_MASK_scalar_DP_uop_ALL (1 << 15)
+
+// Instruction retired
+#define EVENT_SEL_instr_retired 0x02
+#define EVENT_MASK_instr_retired_ALL 0x0f
+
+// uOps retired
+#define EVENT_SEL_uops_retired 0x01
+#define EVENT_MASK_uops_retired_ALL 0x03
+
+// L1 misses retired
+#define EVENT_SEL_replay_event 0x09
+#define EVENT_MASK_replay_event_ALL 0x03
+
+// Trace cache
+#define EVENT_SEL_BPU_fetch_request 0x03
+#define EVENT_MASK_BPU_fetch_request_TCMISS 0x01
+
+// Bus activity
+#define EVENT_SEL_FSB_data_activity 0x17
+#define EVENT_MASK_FSB_data_activity_DRDY_DRV 0x01
+#define EVENT_MASK_FSB_data_activity_DRDY_OWN 0x02
+#define EVENT_MASK_FSB_data_activity_DRDY_OOTHER 0x04
+#define EVENT_MASK_FSB_data_activity_DBSY_DRV 0x08
+#define EVENT_MASK_FSB_data_activity_DBSY_OWN 0x10
+#define EVENT_MASK_FSB_data_activity_DBSY_OOTHER 0x20
+
+// Cache L2
+#define EVENT_SEL_BSQ_cache_reference 0x0c
+#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITS 0x001
+#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITE 0x002
+#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITM 0x004
+
+#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITS 0x008
+#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITE 0x010
+#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITM 0x020
+
+#define EVENT_MASK_BSQ_cache_reference_RD_L2_MISS 0x100
+#define EVENT_MASK_BSQ_cache_reference_RD_L3_MISS 0x200
+#define EVENT_MASK_BSQ_cache_reference_WR_L2_MISS 0x400
+
+/*****************************************************************************
+ * *
+ *****************************************************************************/
+
+
+/* The following turn configuration macros into 1/0 to allow code to be
+ * selected using if(MPENTIUM4_HT) rather then #ifdef (to avoid stale code).
+ * We rely on the compiler to optimise out unreachable code,
+ */
+#ifdef CONFIG_MPENTIUM4_HT
+# define MPENTIUM4_HT 1
+#else
+# define MPENTIUM4_HT 0
+#endif
+
+#ifdef CONFIG_MPENTIUMIII
+# define MPENTIUMIII 1
+#else
+# define MPENTIUMIII 0
+#endif
+
+#ifdef CONFIG_MPENTIUM4
+# define MPENTIUM4 1
+#else
+# define MPENTIUM4 0
+#endif
+
+/*****************************************************************************
+ * MSR access macros *
+ *****************************************************************************/
+
+/* rpcc: get full 64-bit Pentium TSC value
+ */
+static __inline__ unsigned long long int rpcc(void)
+{
+ unsigned int __h, __l;
+ __asm__ __volatile__ ("rdtsc" :"=a" (__l), "=d" (__h));
+ return (((unsigned long long)__h) << 32) + __l;
+}
+
+/*****************************************************************************
+ * Functions. *
+ *****************************************************************************/
+
+#ifdef __KERNEL__
+static inline void smt_sched_setup(void)
+{
+ if (MPENTIUMIII) {
+ unsigned int evntsel, x;
+
+ /* Make sure counters enabled. */
+ rdmsr(MSR_P6_EVNTSEL0, evntsel, x);
+ evntsel |= P6_EVNTSEL_EN;
+ wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
+
+ evntsel =
+ P6_PERF_INST_RETIRED |
+ P6_EVNTSEL_OS |
+ P6_EVNTSEL_USR |
+ P6_EVNTSEL_E;
+ wrmsr(MSR_P6_EVNTSEL1, evntsel, 0);
+ }
+
+ if(MPENTIUM4) {
+ unsigned int x;
+
+ /* Program the ESCR */
+ x = P4_ESCR_USR |
+ P4_ESCR_OS |
+ P4_ESCR_EVNTSEL(P4_ESCR_EVNTSEL_INSTR_RETIRED) |
+ P4_ESCR_EVNTMASK(P4_ESCR_EVNTMASK_IRET_NB_NTAG);
+ wrmsr(MSR_P4_CRU_ESCR0, x, 0);
+
+ /* Program the CCCR */
+ if (MPENTIUM4_HT) {
+ x = P4_CCCR_ENABLE |
+ P4_CCCR_ESCR(P4_CRU_ESCR0_NUMBER) |
+ P4_CCCR_ACTIVE_THREAD(3);
+ }
+ else {
+ x = P4_CCCR_ENABLE |
+ P4_CCCR_ESCR(P4_CRU_ESCR0_NUMBER) |
+ P4_CCCR_RESERVED;
+ }
+ wrmsr(MSR_P4_IQ_CCCR0, x, 0);
+
+ if (MPENTIUM4_HT) {
+
+ /* Program the second ESCR */
+ x = P4_ESCR_T1_USR |
+ P4_ESCR_T1_OS |
+ P4_ESCR_EVNTSEL(P4_ESCR_EVNTSEL_INSTR_RETIRED) |
+ P4_ESCR_EVNTMASK(P4_ESCR_EVNTMASK_IRET_NB_NTAG);
+ wrmsr(MSR_P4_CRU_ESCR1, x, 0);
+
+ /* Program the second CCCR */
+ x = P4_CCCR_ENABLE |
+ P4_CCCR_ESCR(P4_CRU_ESCR1_NUMBER) |
+ P4_CCCR_ACTIVE_THREAD(3);
+ wrmsr(MSR_P4_IQ_CCCR2, x, 0);
+ }
+ }
+
+ if (!MPENTIUMIII && !MPENTIUM4) {
+ printk("WARNING: Not setting up IPC performance counters.\n");
+ } else {
+ printk("Setting up IPC performance counters.\n");
+ }
+}
+
+#ifdef CONFIG_MPENTIUMIII
+# define MY_MSR_COUNTER MSR_P6_PERFCTR1
+#endif
+#ifdef CONFIG_MPENTIUM4
+# define MY_MSR_COUNTER MSR_P4_IQ_COUNTER0
+#endif
+#ifndef MY_MSR_COUNTER
+# define MY_MSR_COUNTER 0 /* Never used but ensures compilation */
+#endif
+#define MY_MSR_COUNTER0 MSR_P4_IQ_COUNTER0
+#define MY_MSR_COUNTER1 MSR_P4_IQ_COUNTER2
+
+# define smt_sched_start_sample(task) \
+{ \
+ unsigned int l, h; \
+ \
+ if (MPENTIUM4_HT) { \
+ unsigned int msr = \
+ (task->processor & 1)?MY_MSR_COUNTER1:MY_MSR_COUNTER0; \
+ rdmsr(msr, l, h); \
+ } \
+ else { \
+ rdmsr(MY_MSR_COUNTER, l, h); \
+ } \
+ task->ipc_sample_start_count_lo = l; \
+ task->ipc_sample_start_count_hi = h; \
+ rdtsc(l, h); \
+ task->ipc_sample_start_cycle_lo = l; \
+ task->ipc_sample_start_cycle_hi = h; \
+}
+
+# define smt_sched_stop_sample(task) \
+{ \
+ if (task->ipc_sample_start_cycle_hi != 0) \
+ { \
+ unsigned int cl, ch, tl, th; \
+ unsigned int c, t; \
+ \
+ if (MPENTIUM4_HT) { \
+ unsigned int msr = \
+ (task->processor & 1)?MY_MSR_COUNTER1:MY_MSR_COUNTER0; \
+ rdmsr(msr, cl, ch); \
+ } \
+ else { \
+ rdmsr(MY_MSR_COUNTER, cl, ch); \
+ } \
+ \
+ rdtsc(tl, th); \
+ \
+ c = cl - task->ipc_sample_start_count_lo; \
+ t = tl - task->ipc_sample_start_cycle_lo; \
+ task->ipc_average = IPC_AVERAGE(task->ipc_average, \
+ ((double)c)/((double)t)); \
+ task->ipc_sample_start_cycle_hi = 0; \
+ \
+ } \
+ else \
+ task->ipc_average = 0.0; \
+ \
+}
+
+// task->ipc_sample_latest =
+// (unsigned int)(1000.0*((double)c)/((double)t));
+#endif /* __KERNEL__ */
+
+
+#endif /* P4PERF_H */
+
+/* End of $RCSfile$ */
diff --git a/tools/misc/xen_cpuperf.c b/tools/misc/xen_cpuperf.c
new file mode 100644
index 0000000000..293997b755
--- /dev/null
+++ b/tools/misc/xen_cpuperf.c
@@ -0,0 +1,265 @@
+/*
+ * User mode program to prod MSR values through /proc/perfcntr
+ *
+ *
+ * $Id$
+ *
+ * $Log$
+ */
+
+#include <sys/types.h>
+#include <sched.h>
+#include <error.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "p4perf.h"
+#include "hypervisor-ifs/dom0_ops.h"
+#include "dom0_defs.h"
+
+void dom0_wrmsr( int cpu_mask, int msr, unsigned int low, unsigned int high )
+{
+ dom0_op_t op;
+ op.cmd = DOM0_MSR;
+ op.u.msr.write = 1;
+ op.u.msr.msr = msr;
+ op.u.msr.cpu_mask = cpu_mask;
+ op.u.msr.in1 = low;
+ op.u.msr.in2 = high;
+ do_dom0_op(&op);
+}
+
+unsigned long long dom0_rdmsr( int cpu_mask, int msr )
+{
+ dom0_op_t op;
+ op.cmd = DOM0_MSR;
+ op.u.msr.write = 0;
+ op.u.msr.msr = msr;
+ op.u.msr.cpu_mask = cpu_mask;
+ do_dom0_op(&op);
+ return (((unsigned long long)op.u.msr.out2)<<32) | op.u.msr.out1 ;
+}
+
+struct macros {
+ char *name;
+ unsigned long msr_addr;
+ int number;
+};
+
+struct macros msr[] = {
+ {"BPU_COUNTER0", 0x300, 0},
+ {"BPU_COUNTER1", 0x301, 1},
+ {"BPU_COUNTER2", 0x302, 2},
+ {"BPU_COUNTER3", 0x303, 3},
+ {"MS_COUNTER0", 0x304, 4},
+ {"MS_COUNTER1", 0x305, 5},
+ {"MS_COUNTER2", 0x306, 6},
+ {"MS_COUNTER3", 0x307, 7},
+ {"FLAME_COUNTER0", 0x308, 8},
+ {"FLAME_COUNTER1", 0x309, 9},
+ {"FLAME_COUNTER2", 0x30a, 10},
+ {"FLAME_COUNTER3", 0x30b, 11},
+ {"IQ_COUNTER0", 0x30c, 12},
+ {"IQ_COUNTER1", 0x30d, 13},
+ {"IQ_COUNTER2", 0x30e, 14},
+ {"IQ_COUNTER3", 0x30f, 15},
+ {"IQ_COUNTER4", 0x310, 16},
+ {"IQ_COUNTER5", 0x311, 17},
+ {"BPU_CCCR0", 0x360, 0},
+ {"BPU_CCCR1", 0x361, 1},
+ {"BPU_CCCR2", 0x362, 2},
+ {"BPU_CCCR3", 0x363, 3},
+ {"MS_CCCR0", 0x364, 4},
+ {"MS_CCCR1", 0x365, 5},
+ {"MS_CCCR2", 0x366, 6},
+ {"MS_CCCR3", 0x367, 7},
+ {"FLAME_CCCR0", 0x368, 8},
+ {"FLAME_CCCR1", 0x369, 9},
+ {"FLAME_CCCR2", 0x36a, 10},
+ {"FLAME_CCCR3", 0x36b, 11},
+ {"IQ_CCCR0", 0x36c, 12},
+ {"IQ_CCCR1", 0x36d, 13},
+ {"IQ_CCCR2", 0x36e, 14},
+ {"IQ_CCCR3", 0x36f, 15},
+ {"IQ_CCCR4", 0x370, 16},
+ {"IQ_CCCR5", 0x371, 17},
+ {"BSU_ESCR0", 0x3a0, 7},
+ {"BSU_ESCR1", 0x3a1, 7},
+ {"FSB_ESCR0", 0x3a2, 6},
+ {"FSB_ESCR1", 0x3a3, 6},
+ {"MOB_ESCR0", 0x3aa, 2},
+ {"MOB_ESCR1", 0x3ab, 2},
+ {"PMH_ESCR0", 0x3ac, 4},
+ {"PMH_ESCR1", 0x3ad, 4},
+ {"BPU_ESCR0", 0x3b2, 0},
+ {"BPU_ESCR1", 0x3b3, 0},
+ {"IS_ESCR0", 0x3b4, 1},
+ {"IS_ESCR1", 0x3b5, 1},
+ {"ITLB_ESCR0", 0x3b6, 3},
+ {"ITLB_ESCR1", 0x3b7, 3},
+ {"IX_ESCR0", 0x3c8, 5},
+ {"IX_ESCR1", 0x3c9, 5},
+ {"MS_ESCR0", 0x3c0, 0},
+ {"MS_ESCR1", 0x3c1, 0},
+ {"TBPU_ESCR0", 0x3c2, 2},
+ {"TBPU_ESCR1", 0x3c3, 2},
+ {"TC_ESCR0", 0x3c4, 1},
+ {"TC_ESCR1", 0x3c5, 1},
+ {"FIRM_ESCR0", 0x3a4, 1},
+ {"FIRM_ESCR1", 0x3a5, 1},
+ {"FLAME_ESCR0", 0x3a6, 0},
+ {"FLAME_ESCR1", 0x3a7, 0},
+ {"DAC_ESCR0", 0x3a8, 5},
+ {"DAC_ESCR1", 0x3a9, 5},
+ {"SAAT_ESCR0", 0x3ae, 2},
+ {"SAAT_ESCR1", 0x3af, 2},
+ {"U2L_ESCR0", 0x3b0, 3},
+ {"U2L_ESCR1", 0x3b1, 3},
+ {"CRU_ESCR0", 0x3b8, 4},
+ {"CRU_ESCR1", 0x3b9, 4},
+ {"CRU_ESCR2", 0x3cc, 5},
+ {"CRU_ESCR3", 0x3cd, 5},
+ {"CRU_ESCR4", 0x3e0, 6},
+ {"CRU_ESCR5", 0x3e1, 6},
+ {"IQ_ESCR0", 0x3ba, 0},
+ {"IQ_ESCR1", 0x3bb, 0},
+ {"RAT_ESCR0", 0x3bc, 2},
+ {"RAT_ESCR1", 0x3bd, 2},
+ {"SSU_ESCR0", 0x3be, 3},
+ {"SSU_ESCR1", 0x3bf, 3},
+ {"ALF_ESCR0", 0x3ca, 1},
+ {"ALF_ESCR1", 0x3cb, 1},
+ {"PEBS_ENABLE", 0x3f1, 0},
+ {"PEBS_MATRIX_VERT", 0x3f2, 0},
+ {NULL, 0, 0}
+};
+
+struct macros *lookup_macro(char *str)
+{
+ struct macros *m;
+
+ m = msr;
+ while (m->name) {
+ if (strcmp(m->name, str) == 0)
+ return m;
+ m++;
+ }
+ return NULL;
+}
+
+int main(int argc, char **argv)
+{
+ int c, t = 0xc, es = 0, em = 0, tv = 0, te = 0;
+ unsigned int cpu_mask = 1;
+ struct macros *escr = NULL, *cccr = NULL;
+ unsigned long escr_val, cccr_val;
+ int debug = 0;
+ unsigned long pebs = 0, pebs_vert = 0;
+ int pebs_x = 0, pebs_vert_x = 0;
+ int read = 0;
+
+ while ((c = getopt(argc, argv, "dc:t:e:m:T:E:C:P:V:r")) != -1) {
+ switch((char)c) {
+ case 'P':
+ pebs |= 1 << atoi(optarg);
+ pebs_x = 1;
+ break;
+ case 'V':
+ pebs_vert |= 1 << atoi(optarg);
+ pebs_vert_x = 1;
+ break;
+ case 'd':
+ debug = 1;
+ break;
+ case 'c':
+ {
+ int cpu = atoi(optarg);
+ cpu_mask = (cpu == -1)?(~0):(1<<cpu);
+ break;
+ }
+ case 't': // ESCR thread bits
+ t = atoi(optarg);
+ break;
+ case 'e': // eventsel
+ es = atoi(optarg);
+ break;
+ case 'm': // eventmask
+ em = atoi(optarg);
+ break;
+ case 'T': // tag value
+ tv = atoi(optarg);
+ te = 1;
+ break;
+ case 'E':
+ escr = lookup_macro(optarg);
+ if (!escr) {
+ fprintf(stderr, "Macro '%s' not found.\n", optarg);
+ exit(1);
+ }
+ break;
+ case 'C':
+ cccr = lookup_macro(optarg);
+ if (!cccr) {
+ fprintf(stderr, "Macro '%s' not found.\n", optarg);
+ exit(1);
+ }
+ break;
+ case 'r':
+ read = 1;
+ break;
+ }
+ }
+
+ if (read) {
+ while((cpu_mask&1)) {
+ int i;
+ for (i=0x300;i<0x312;i++)
+ {
+ printf("%010llx ",dom0_rdmsr( cpu_mask, i ) );
+ }
+ printf("\n");
+ cpu_mask>>=1;
+ }
+ exit(1);
+ }
+
+ if (!escr) {
+ fprintf(stderr, "Need an ESCR.\n");
+ exit(1);
+ }
+ if (!cccr) {
+ fprintf(stderr, "Need a counter number.\n");
+ exit(1);
+ }
+
+ escr_val = P4_ESCR_THREADS(t) | P4_ESCR_EVNTSEL(es) |
+ P4_ESCR_EVNTMASK(em) | P4_ESCR_TV(tv) | ((te)?P4_ESCR_TE:0);
+ cccr_val = P4_CCCR_ENABLE | P4_CCCR_ESCR(escr->number) |
+ P4_CCCR_ACTIVE_THREAD(3)/*reserved*/;
+
+ if (debug) {
+ fprintf(stderr, "ESCR 0x%lx <= 0x%08lx\n", escr->msr_addr, escr_val);
+ fprintf(stderr, "CCCR 0x%lx <= 0x%08lx (%u)\n",
+ cccr->msr_addr, cccr_val, cccr->number);
+ if (pebs_x)
+ fprintf(stderr, "PEBS 0x%x <= 0x%08lx\n",
+ MSR_P4_PEBS_ENABLE, pebs);
+ if (pebs_vert_x)
+ fprintf(stderr, "PMV 0x%x <= 0x%08lx\n",
+ MSR_P4_PEBS_MATRIX_VERT, pebs_vert);
+ }
+
+ dom0_wrmsr( cpu_mask, escr->msr_addr, escr_val, 0 );
+ dom0_wrmsr( cpu_mask, cccr->msr_addr, cccr_val, 0 );
+
+ if (pebs_x)
+ dom0_wrmsr( cpu_mask, MSR_P4_PEBS_ENABLE, pebs, 0 );
+
+ if (pebs_vert_x)
+ dom0_wrmsr( cpu_mask, MSR_P4_PEBS_MATRIX_VERT, pebs_vert, 0 );
+
+ return 0;
+}
+