diff options
author | iap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk> | 2003-10-10 19:27:04 +0000 |
---|---|---|
committer | iap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk> | 2003-10-10 19:27:04 +0000 |
commit | 353e94eeb8b8f9b8a661614c2fa926932c72083c (patch) | |
tree | 4fe39b6a1c4864dff1b5031b34b8ed6fc9e132fb | |
parent | ce8c8fcd8bac4e0866fea83ecfe7cb007ed77ca5 (diff) | |
download | xen-353e94eeb8b8f9b8a661614c2fa926932c72083c.tar.gz xen-353e94eeb8b8f9b8a661614c2fa926932c72083c.tar.bz2 xen-353e94eeb8b8f9b8a661614c2fa926932c72083c.zip |
bitkeeper revision 1.498.1.2 (3f870808Dvsdzic2p7uQgZ0pTQzx7w)
add a handy tool for reading the P4 performance counters: xen_perfctr
-rw-r--r-- | .rootkeys | 2 | ||||
-rw-r--r-- | tools/misc/Makefile | 4 | ||||
-rw-r--r-- | tools/misc/p4perf.h | 559 | ||||
-rw-r--r-- | tools/misc/xen_cpuperf.c | 265 |
4 files changed, 828 insertions, 2 deletions
@@ -177,9 +177,11 @@ 3f6dc136ZKOjd8PIqLbFBl_v-rnkGg tools/misc/miniterm/Makefile 3f6dc140C8tAeBfroAF24VrmCS4v_w tools/misc/miniterm/README 3f6dc142IHaf6XIcAYGmhV9nNSIHFQ tools/misc/miniterm/miniterm.c +3f870808_8aFBAcZbWiWGdgrGQyIEw tools/misc/p4perf.h 3f5ef5a2ir1kVAthS14Dc5QIRCEFWg tools/misc/xen-clone 3f5ef5a2dTZP0nnsFoeq2jRf3mWDDg tools/misc/xen-clone.README 3f1668d4-FUY6Enc7MB3GcwUtfJ5HA tools/misc/xen-mkdevnodes +3f870808zS6T6iFhqYPGelroZlVfGQ tools/misc/xen_cpuperf.c 3f13d81eQ9Vz-h-6RDGFkNR9CRP95g tools/misc/xen_nat_enable 3f13d81e6Z6806ihYYUw8GVKNkYnuw tools/misc/xen_nat_enable.README 3f1668d4F29Jsw0aC0bJEIkOBiagiQ tools/misc/xen_read_console.c diff --git a/tools/misc/Makefile b/tools/misc/Makefile index 62e1ab7fee..59899f5f53 100644 --- a/tools/misc/Makefile +++ b/tools/misc/Makefile @@ -1,13 +1,13 @@ CC = gcc CFLAGS = -Wall -O3 -CFLAGS += -I../../xen/include -I../../xenolinux-sparse/include +CFLAGS += -I../../xen/include -I../../xenolinux-sparse/include -I../internal HDRS = $(wildcard *.h) SRCS = $(wildcard *.c) OBJS = $(patsubst %.c,%.o,$(SRCS)) -TARGETS = xen_read_console +TARGETS = xen_read_console xen_cpuperf INSTALL = $(TARGETS) xen-mkdevnodes xen-clone diff --git a/tools/misc/p4perf.h b/tools/misc/p4perf.h new file mode 100644 index 0000000000..4f681b636d --- /dev/null +++ b/tools/misc/p4perf.h @@ -0,0 +1,559 @@ +/* + * For P6 use PERFCTR1 (0 used for APIC NMI watchdog). Must setup after + * APIC NMI watchdog setup. Note that if this previous setup doesn't happen + * we still must enable both counters. + * + * P4 Xeon with Hyperthreading has counters per physical package which can + * count events from either logical CPU. However, in many cases more than + * ECSR and CCCR/counter can be used to count the same event. For instr or + * uops retired, use either ESCR0/IQ_CCCR0 ESCR1/IQ_CCCR2. + * + * USE CONFIG_MPENTIUM4_HT for a P4 Xeon with hyperthreading. + * + * Note that the counters may be initialised on each logical processor + * which will cause each physical processor to be initialised twice. This + * should not cause a problem. + */ + +#ifndef P4PERF_H +#define P4PERF_H + +#ifdef __KERNEL__ +#include <asm/msr.h> +#endif + +/***************************************************************************** + * Performance counter configuration. * + *****************************************************************************/ + +#ifndef P6_EVNTSEL_OS +# define P6_EVNTSEL_OS (1 << 17) +# define P6_EVNTSEL_USR (1 << 16) +# define P6_EVNTSEL_E (1 << 18) +# define P6_EVNTSEL_EN (1 << 22) +#endif +#define P6_PERF_INST_RETIRED 0xc0 +#define P6_PERF_UOPS_RETIRED 0xc2 + +#define P4_ESCR_USR (1 << 2) +#define P4_ESCR_OS (1 << 3) +#define P4_ESCR_T0_USR (1 << 2) /* First logical CPU */ +#define P4_ESCR_T0_OS (1 << 3) +#define P4_ESCR_T1_USR (1 << 0) /* Second logical CPU */ +#define P4_ESCR_T1_OS (1 << 1) +#define P4_ESCR_TE (1 << 4) +#define P4_ESCR_THREADS(t) (t) +#define P4_ESCR_TV(tag) (tag << 5) +#define P4_ESCR_EVNTSEL(e) (e << 25) +#define P4_ESCR_EVNTMASK(e) (e << 9) + +#define P4_ESCR_EVNTSEL_FRONT_END 0x08 +#define P4_ESCR_EVNTSEL_EXECUTION 0x0c +#define P4_ESCR_EVNTSEL_REPLAY 0x09 +#define P4_ESCR_EVNTSEL_INSTR_RETIRED 0x02 +#define P4_ESCR_EVNTSEL_UOPS_RETIRED 0x01 +#define P4_ESCR_EVNTSEL_UOP_TYPE 0x02 +#define P4_ESCR_EVNTSEL_RET_MBR_TYPE 0x05 +//#define P4_ESCR_EVNTSEL_RET_MBR_TYPE 0x04 + +#define P4_ESCR_EVNTMASK_FE_NBOGUS 0x01 +#define P4_ESCR_EVNTMASK_FE_BOGUS 0x02 + +#define P4_ESCR_EVNTMASK_EXEC_NBOGUS0 0x01 +#define P4_ESCR_EVNTMASK_EXEC_NBOGUS1 0x02 +#define P4_ESCR_EVNTMASK_EXEC_NBOGUS2 0x04 +#define P4_ESCR_EVNTMASK_EXEC_NBOGUS3 0x08 +#define P4_ESCR_EVNTMASK_EXEC_BOGUS0 0x10 +#define P4_ESCR_EVNTMASK_EXEC_BOGUS1 0x20 +#define P4_ESCR_EVNTMASK_EXEC_BOGUS2 0x40 +#define P4_ESCR_EVNTMASK_EXEC_BOGUS3 0x80 + +#define P4_ESCR_EVNTMASK_REPLAY_NBOGUS 0x01 +#define P4_ESCR_EVNTMASK_REPLAY_BOGUS 0x02 + +#define P4_ESCR_EVNTMASK_IRET_NB_NTAG 0x01 +#define P4_ESCR_EVNTMASK_IRET_NB_TAG 0x02 +#define P4_ESCR_EVNTMASK_IRET_B_NTAG 0x04 +#define P4_ESCR_EVNTMASK_IRET_B_TAG 0x08 + +#define P4_ESCR_EVNTMASK_URET_NBOGUS 0x01 +#define P4_ESCR_EVNTMASK_URET_BOGUS 0x02 + +#define P4_ESCR_EVNTMASK_UOP_LOADS 0x02 +#define P4_ESCR_EVNTMASK_UOP_STORES 0x04 + +#define P4_ESCR_EVNTMASK_RMBRT_COND 0x02 +#define P4_ESCR_EVNTMASK_RMBRT_CALL 0x04 +#define P4_ESCR_EVNTMASK_RMBRT_RETURN 0x08 +#define P4_ESCR_EVNTMASK_RMBRT_INDIR 0x10 + +#define P4_ESCR_EVNTMASK_RBRT_COND 0x02 +#define P4_ESCR_EVNTMASK_RBRT_CALL 0x04 +#define P4_ESCR_EVNTMASK_RBRT_RETURN 0x08 +#define P4_ESCR_EVNTMASK_RBRT_INDIR 0x10 + +//#define P4_ESCR_EVNTMASK_INSTR_RETIRED 0x01 /* Non bogus, not tagged */ +//#define P4_ESCR_EVNTMASK_UOPS_RETIRED 0x01 /* Non bogus */ + +#define P4_CCCR_OVF (1 << 31) +#define P4_CCCR_CASCADE (1 << 30) +#define P4_CCCR_FORCE_OVF (1 << 25) +#define P4_CCCR_EDGE (1 << 24) +#define P4_CCCR_COMPLEMENT (1 << 19) +#define P4_CCCR_COMPARE (1 << 18) +#define P4_CCCR_THRESHOLD(t) (t << 20) +#define P4_CCCR_ENABLE (1 << 12) +#define P4_CCCR_ESCR(escr) (escr << 13) +#define P4_CCCR_ACTIVE_THREAD(t) (t << 16) /* Set to 11 */ +#define P4_CCCR_OVF_PMI_T0 (1 << 26) +#define P4_CCCR_OVF_PMI_T1 (1 << 27) +#define P4_CCCR_RESERVED (3 << 16) +#define P4_CCCR_OVF_PMI (1 << 26) + +// BPU +#define MSR_P4_BPU_COUNTER0 0x300 +#define MSR_P4_BPU_COUNTER1 0x301 +#define MSR_P4_BPU_CCCR0 0x360 +#define MSR_P4_BPU_CCCR1 0x361 + +#define MSR_P4_BPU_COUNTER2 0x302 +#define MSR_P4_BPU_COUNTER3 0x303 +#define MSR_P4_BPU_CCCR2 0x362 +#define MSR_P4_BPU_CCCR3 0x363 + +#define MSR_P4_BSU_ESCR0 0x3a0 +#define MSR_P4_FSB_ESCR0 0x3a2 +#define MSR_P4_MOB_ESCR0 0x3aa +#define MSR_P4_PMH_ESCR0 0x3ac +#define MSR_P4_BPU_ESCR0 0x3b2 +#define MSR_P4_IS_ESCR0 0x3b4 +#define MSR_P4_ITLB_ESCR0 0x3b6 +#define MSR_P4_IX_ESCR0 0x3c8 + +#define P4_BSU_ESCR0_NUMBER 7 +#define P4_FSB_ESCR0_NUMBER 6 +#define P4_MOB_ESCR0_NUMBER 2 +#define P4_PMH_ESCR0_NUMBER 4 +#define P4_BPU_ESCR0_NUMBER 0 +#define P4_IS_ESCR0_NUMBER 1 +#define P4_ITLB_ESCR0_NUMBER 3 +#define P4_IX_ESCR0_NUMBER 5 + +#define MSR_P4_BSU_ESCR1 0x3a1 +#define MSR_P4_FSB_ESCR1 0x3a3 +#define MSR_P4_MOB_ESCR1 0x3ab +#define MSR_P4_PMH_ESCR1 0x3ad +#define MSR_P4_BPU_ESCR1 0x3b3 +#define MSR_P4_IS_ESCR1 0x3b5 +#define MSR_P4_ITLB_ESCR1 0x3b7 +#define MSR_P4_IX_ESCR1 0x3c9 + +#define P4_BSU_ESCR1_NUMBER 7 +#define P4_FSB_ESCR1_NUMBER 6 +#define P4_MOB_ESCR1_NUMBER 2 +#define P4_PMH_ESCR1_NUMBER 4 +#define P4_BPU_ESCR1_NUMBER 0 +#define P4_IS_ESCR1_NUMBER 1 +#define P4_ITLB_ESCR1_NUMBER 3 +#define P4_IX_ESCR1_NUMBER 5 + +// MS +#define MSR_P4_MS_COUNTER0 0x304 +#define MSR_P4_MS_COUNTER1 0x305 +#define MSR_P4_MS_CCCR0 0x364 +#define MSR_P4_MS_CCCR1 0x365 + +#define MSR_P4_MS_COUNTER2 0x306 +#define MSR_P4_MS_COUNTER3 0x307 +#define MSR_P4_MS_CCCR2 0x366 +#define MSR_P4_MS_CCCR3 0x367 + +#define MSR_P4_MS_ESCR0 0x3c0 +#define MSR_P4_TBPU_ESCR0 0x3c2 +#define MSR_P4_TC_ESCR0 0x3c4 + +#define P4_MS_ESCR0_NUMBER 0 +#define P4_TBPU_ESCR0_NUMBER 2 +#define P4_TC_ESCR0_NUMBER 1 + +#define MSR_P4_MS_ESCR1 0x3c1 +#define MSR_P4_TBPU_ESCR1 0x3c3 +#define MSR_P4_TC_ESCR1 0x3c5 + +#define P4_MS_ESCR1_NUMBER 0 +#define P4_TBPU_ESCR1_NUMBER 2 +#define P4_TC_ESCR1_NUMBER 1 + +// FLAME +#define MSR_P4_FLAME_COUNTER0 0x308 +#define MSR_P4_FLAME_COUNTER1 0x309 +#define MSR_P4_FLAME_CCCR0 0x368 +#define MSR_P4_FLAME_CCCR1 0x369 + +#define MSR_P4_FLAME_COUNTER2 0x30a +#define MSR_P4_FLAME_COUNTER3 0x30b +#define MSR_P4_FLAME_CCCR2 0x36a +#define MSR_P4_FLAME_CCCR3 0x36b + +#define MSR_P4_FIRM_ESCR0 0x3a4 +#define MSR_P4_FLAME_ESCR0 0x3a6 +#define MSR_P4_DAC_ESCR0 0x3a8 +#define MSR_P4_SAAT_ESCR0 0x3ae +#define MSR_P4_U2L_ESCR0 0x3b0 + +#define P4_FIRM_ESCR0_NUMBER 1 +#define P4_FLAME_ESCR0_NUMBER 0 +#define P4_DAC_ESCR0_NUMBER 5 +#define P4_SAAT_ESCR0_NUMBER 2 +#define P4_U2L_ESCR0_NUMBER 3 + +#define MSR_P4_FIRM_ESCR1 0x3a5 +#define MSR_P4_FLAME_ESCR1 0x3a7 +#define MSR_P4_DAC_ESCR1 0x3a9 +#define MSR_P4_SAAT_ESCR1 0x3af +#define MSR_P4_U2L_ESCR1 0x3b1 + +#define P4_FIRM_ESCR1_NUMBER 1 +#define P4_FLAME_ESCR1_NUMBER 0 +#define P4_DAC_ESCR1_NUMBER 5 +#define P4_SAAT_ESCR1_NUMBER 2 +#define P4_U2L_ESCR1_NUMBER 3 + +// IQ +#define MSR_P4_IQ_COUNTER0 0x30c +#define MSR_P4_IQ_COUNTER1 0x30d +#define MSR_P4_IQ_CCCR0 0x36c +#define MSR_P4_IQ_CCCR1 0x36d + +#define MSR_P4_IQ_COUNTER2 0x30e +#define MSR_P4_IQ_COUNTER3 0x30f +#define MSR_P4_IQ_CCCR2 0x36e +#define MSR_P4_IQ_CCCR3 0x36f + +#define MSR_P4_IQ_COUNTER4 0x310 +#define MSR_P4_IQ_COUNTER5 0x311 +#define MSR_P4_IQ_CCCR4 0x370 +#define MSR_P4_IQ_CCCR5 0x371 + +#define MSR_P4_CRU_ESCR0 0x3b8 +#define MSR_P4_CRU_ESCR2 0x3cc +#define MSR_P4_CRU_ESCR4 0x3e0 +#define MSR_P4_IQ_ESCR0 0x3ba +#define MSR_P4_RAT_ESCR0 0x3bc +#define MSR_P4_SSU_ESCR0 0x3be +#define MSR_P4_ALF_ESCR0 0x3ca + +#define P4_CRU_ESCR0_NUMBER 4 +#define P4_CRU_ESCR2_NUMBER 5 +#define P4_CRU_ESCR4_NUMBER 6 +#define P4_IQ_ESCR0_NUMBER 0 +#define P4_RAT_ESCR0_NUMBER 2 +#define P4_SSU_ESCR0_NUMBER 3 +#define P4_ALF_ESCR0_NUMBER 1 + +#define MSR_P4_CRU_ESCR1 0x3b9 +#define MSR_P4_CRU_ESCR3 0x3cd +#define MSR_P4_CRU_ESCR5 0x3e1 +#define MSR_P4_IQ_ESCR1 0x3bb +#define MSR_P4_RAT_ESCR1 0x3bd +#define MSR_P4_ALF_ESCR1 0x3cb + +#define P4_CRU_ESCR1_NUMBER 4 +#define P4_CRU_ESCR3_NUMBER 5 +#define P4_CRU_ESCR5_NUMBER 6 +#define P4_IQ_ESCR1_NUMBER 0 +#define P4_RAT_ESCR1_NUMBER 2 +#define P4_ALF_ESCR1_NUMBER 1 + +#define P4_BPU_COUNTER0_NUMBER 0 +#define P4_BPU_COUNTER1_NUMBER 1 +#define P4_BPU_COUNTER2_NUMBER 2 +#define P4_BPU_COUNTER3_NUMBER 3 + +#define P4_MS_COUNTER0_NUMBER 4 +#define P4_MS_COUNTER1_NUMBER 5 +#define P4_MS_COUNTER2_NUMBER 6 +#define P4_MS_COUNTER3_NUMBER 7 + +#define P4_FLAME_COUNTER0_NUMBER 8 +#define P4_FLAME_COUNTER1_NUMBER 9 +#define P4_FLAME_COUNTER2_NUMBER 10 +#define P4_FLAME_COUNTER3_NUMBER 11 + +#define P4_IQ_COUNTER0_NUMBER 12 +#define P4_IQ_COUNTER1_NUMBER 13 +#define P4_IQ_COUNTER2_NUMBER 14 +#define P4_IQ_COUNTER3_NUMBER 15 +#define P4_IQ_COUNTER4_NUMBER 16 +#define P4_IQ_COUNTER5_NUMBER 17 + +/* PEBS + */ +#define MSR_P4_PEBS_ENABLE 0x3F1 +#define MSR_P4_PEBS_MATRIX_VERT 0x3F2 + +#define P4_PEBS_ENABLE_MY_THR (1 << 25) +#define P4_PEBS_ENABLE_OTH_THR (1 << 26) +#define P4_PEBS_ENABLE (1 << 24) +#define P4_PEBS_BIT0 (1 << 0) +#define P4_PEBS_BIT1 (1 << 1) +#define P4_PEBS_BIT2 (1 << 2) + +#define P4_PEBS_MATRIX_VERT_BIT0 (1 << 0) +#define P4_PEBS_MATRIX_VERT_BIT1 (1 << 1) +#define P4_PEBS_MATRIX_VERT_BIT2 (1 << 2) + +/* Replay tagging. + */ +#define P4_REPLAY_TAGGING_PEBS_L1LMR P4_PEBS_BIT0 +#define P4_REPLAY_TAGGING_PEBS_L2LMR P4_PEBS_BIT1 +#define P4_REPLAY_TAGGING_PEBS_DTLMR P4_PEBS_BIT2 +#define P4_REPLAY_TAGGING_PEBS_DTSMR P4_PEBS_BIT2 +#define P4_REPLAY_TAGGING_PEBS_DTAMR P4_PEBS_BIT2 + +#define P4_REPLAY_TAGGING_VERT_L1LMR P4_PEBS_MATRIX_VERT_BIT0 +#define P4_REPLAY_TAGGING_VERT_L2LMR P4_PEBS_MATRIX_VERT_BIT0 +#define P4_REPLAY_TAGGING_VERT_DTLMR P4_PEBS_MATRIX_VERT_BIT0 +#define P4_REPLAY_TAGGING_VERT_DTSMR P4_PEBS_MATRIX_VERT_BIT1 +#define P4_REPLAY_TAGGING_VERT_DTAMR P4_PEBS_MATRIX_VERT_BIT0 | P4_PEBS_MATRIX_VERT_BIT1 + + + + +/***************************************************************************** + * * + *****************************************************************************/ + +// x87_FP_uop +#define EVENT_SEL_x87_FP_uop 0x04 +#define EVENT_MASK_x87_FP_uop_ALL (1 << 15) + +// execution event (at retirement) +#define EVENT_SEL_execution_event 0x0C + +// scalar_SP_uop +#define EVENT_SEL_scalar_SP_uop 0x0a +#define EVENT_MASK_scalar_SP_uop_ALL (1 << 15) + +// scalar_DP_uop +#define EVENT_SEL_scalar_DP_uop 0x0e +#define EVENT_MASK_scalar_DP_uop_ALL (1 << 15) + +// Instruction retired +#define EVENT_SEL_instr_retired 0x02 +#define EVENT_MASK_instr_retired_ALL 0x0f + +// uOps retired +#define EVENT_SEL_uops_retired 0x01 +#define EVENT_MASK_uops_retired_ALL 0x03 + +// L1 misses retired +#define EVENT_SEL_replay_event 0x09 +#define EVENT_MASK_replay_event_ALL 0x03 + +// Trace cache +#define EVENT_SEL_BPU_fetch_request 0x03 +#define EVENT_MASK_BPU_fetch_request_TCMISS 0x01 + +// Bus activity +#define EVENT_SEL_FSB_data_activity 0x17 +#define EVENT_MASK_FSB_data_activity_DRDY_DRV 0x01 +#define EVENT_MASK_FSB_data_activity_DRDY_OWN 0x02 +#define EVENT_MASK_FSB_data_activity_DRDY_OOTHER 0x04 +#define EVENT_MASK_FSB_data_activity_DBSY_DRV 0x08 +#define EVENT_MASK_FSB_data_activity_DBSY_OWN 0x10 +#define EVENT_MASK_FSB_data_activity_DBSY_OOTHER 0x20 + +// Cache L2 +#define EVENT_SEL_BSQ_cache_reference 0x0c +#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITS 0x001 +#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITE 0x002 +#define EVENT_MASK_BSQ_cache_reference_RD_L2_HITM 0x004 + +#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITS 0x008 +#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITE 0x010 +#define EVENT_MASK_BSQ_cache_reference_RD_L3_HITM 0x020 + +#define EVENT_MASK_BSQ_cache_reference_RD_L2_MISS 0x100 +#define EVENT_MASK_BSQ_cache_reference_RD_L3_MISS 0x200 +#define EVENT_MASK_BSQ_cache_reference_WR_L2_MISS 0x400 + +/***************************************************************************** + * * + *****************************************************************************/ + + +/* The following turn configuration macros into 1/0 to allow code to be + * selected using if(MPENTIUM4_HT) rather then #ifdef (to avoid stale code). + * We rely on the compiler to optimise out unreachable code, + */ +#ifdef CONFIG_MPENTIUM4_HT +# define MPENTIUM4_HT 1 +#else +# define MPENTIUM4_HT 0 +#endif + +#ifdef CONFIG_MPENTIUMIII +# define MPENTIUMIII 1 +#else +# define MPENTIUMIII 0 +#endif + +#ifdef CONFIG_MPENTIUM4 +# define MPENTIUM4 1 +#else +# define MPENTIUM4 0 +#endif + +/***************************************************************************** + * MSR access macros * + *****************************************************************************/ + +/* rpcc: get full 64-bit Pentium TSC value + */ +static __inline__ unsigned long long int rpcc(void) +{ + unsigned int __h, __l; + __asm__ __volatile__ ("rdtsc" :"=a" (__l), "=d" (__h)); + return (((unsigned long long)__h) << 32) + __l; +} + +/***************************************************************************** + * Functions. * + *****************************************************************************/ + +#ifdef __KERNEL__ +static inline void smt_sched_setup(void) +{ + if (MPENTIUMIII) { + unsigned int evntsel, x; + + /* Make sure counters enabled. */ + rdmsr(MSR_P6_EVNTSEL0, evntsel, x); + evntsel |= P6_EVNTSEL_EN; + wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); + + evntsel = + P6_PERF_INST_RETIRED | + P6_EVNTSEL_OS | + P6_EVNTSEL_USR | + P6_EVNTSEL_E; + wrmsr(MSR_P6_EVNTSEL1, evntsel, 0); + } + + if(MPENTIUM4) { + unsigned int x; + + /* Program the ESCR */ + x = P4_ESCR_USR | + P4_ESCR_OS | + P4_ESCR_EVNTSEL(P4_ESCR_EVNTSEL_INSTR_RETIRED) | + P4_ESCR_EVNTMASK(P4_ESCR_EVNTMASK_IRET_NB_NTAG); + wrmsr(MSR_P4_CRU_ESCR0, x, 0); + + /* Program the CCCR */ + if (MPENTIUM4_HT) { + x = P4_CCCR_ENABLE | + P4_CCCR_ESCR(P4_CRU_ESCR0_NUMBER) | + P4_CCCR_ACTIVE_THREAD(3); + } + else { + x = P4_CCCR_ENABLE | + P4_CCCR_ESCR(P4_CRU_ESCR0_NUMBER) | + P4_CCCR_RESERVED; + } + wrmsr(MSR_P4_IQ_CCCR0, x, 0); + + if (MPENTIUM4_HT) { + + /* Program the second ESCR */ + x = P4_ESCR_T1_USR | + P4_ESCR_T1_OS | + P4_ESCR_EVNTSEL(P4_ESCR_EVNTSEL_INSTR_RETIRED) | + P4_ESCR_EVNTMASK(P4_ESCR_EVNTMASK_IRET_NB_NTAG); + wrmsr(MSR_P4_CRU_ESCR1, x, 0); + + /* Program the second CCCR */ + x = P4_CCCR_ENABLE | + P4_CCCR_ESCR(P4_CRU_ESCR1_NUMBER) | + P4_CCCR_ACTIVE_THREAD(3); + wrmsr(MSR_P4_IQ_CCCR2, x, 0); + } + } + + if (!MPENTIUMIII && !MPENTIUM4) { + printk("WARNING: Not setting up IPC performance counters.\n"); + } else { + printk("Setting up IPC performance counters.\n"); + } +} + +#ifdef CONFIG_MPENTIUMIII +# define MY_MSR_COUNTER MSR_P6_PERFCTR1 +#endif +#ifdef CONFIG_MPENTIUM4 +# define MY_MSR_COUNTER MSR_P4_IQ_COUNTER0 +#endif +#ifndef MY_MSR_COUNTER +# define MY_MSR_COUNTER 0 /* Never used but ensures compilation */ +#endif +#define MY_MSR_COUNTER0 MSR_P4_IQ_COUNTER0 +#define MY_MSR_COUNTER1 MSR_P4_IQ_COUNTER2 + +# define smt_sched_start_sample(task) \ +{ \ + unsigned int l, h; \ + \ + if (MPENTIUM4_HT) { \ + unsigned int msr = \ + (task->processor & 1)?MY_MSR_COUNTER1:MY_MSR_COUNTER0; \ + rdmsr(msr, l, h); \ + } \ + else { \ + rdmsr(MY_MSR_COUNTER, l, h); \ + } \ + task->ipc_sample_start_count_lo = l; \ + task->ipc_sample_start_count_hi = h; \ + rdtsc(l, h); \ + task->ipc_sample_start_cycle_lo = l; \ + task->ipc_sample_start_cycle_hi = h; \ +} + +# define smt_sched_stop_sample(task) \ +{ \ + if (task->ipc_sample_start_cycle_hi != 0) \ + { \ + unsigned int cl, ch, tl, th; \ + unsigned int c, t; \ + \ + if (MPENTIUM4_HT) { \ + unsigned int msr = \ + (task->processor & 1)?MY_MSR_COUNTER1:MY_MSR_COUNTER0; \ + rdmsr(msr, cl, ch); \ + } \ + else { \ + rdmsr(MY_MSR_COUNTER, cl, ch); \ + } \ + \ + rdtsc(tl, th); \ + \ + c = cl - task->ipc_sample_start_count_lo; \ + t = tl - task->ipc_sample_start_cycle_lo; \ + task->ipc_average = IPC_AVERAGE(task->ipc_average, \ + ((double)c)/((double)t)); \ + task->ipc_sample_start_cycle_hi = 0; \ + \ + } \ + else \ + task->ipc_average = 0.0; \ + \ +} + +// task->ipc_sample_latest = +// (unsigned int)(1000.0*((double)c)/((double)t)); +#endif /* __KERNEL__ */ + + +#endif /* P4PERF_H */ + +/* End of $RCSfile$ */ diff --git a/tools/misc/xen_cpuperf.c b/tools/misc/xen_cpuperf.c new file mode 100644 index 0000000000..293997b755 --- /dev/null +++ b/tools/misc/xen_cpuperf.c @@ -0,0 +1,265 @@ +/* + * User mode program to prod MSR values through /proc/perfcntr + * + * + * $Id$ + * + * $Log$ + */ + +#include <sys/types.h> +#include <sched.h> +#include <error.h> +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> + +#include "p4perf.h" +#include "hypervisor-ifs/dom0_ops.h" +#include "dom0_defs.h" + +void dom0_wrmsr( int cpu_mask, int msr, unsigned int low, unsigned int high ) +{ + dom0_op_t op; + op.cmd = DOM0_MSR; + op.u.msr.write = 1; + op.u.msr.msr = msr; + op.u.msr.cpu_mask = cpu_mask; + op.u.msr.in1 = low; + op.u.msr.in2 = high; + do_dom0_op(&op); +} + +unsigned long long dom0_rdmsr( int cpu_mask, int msr ) +{ + dom0_op_t op; + op.cmd = DOM0_MSR; + op.u.msr.write = 0; + op.u.msr.msr = msr; + op.u.msr.cpu_mask = cpu_mask; + do_dom0_op(&op); + return (((unsigned long long)op.u.msr.out2)<<32) | op.u.msr.out1 ; +} + +struct macros { + char *name; + unsigned long msr_addr; + int number; +}; + +struct macros msr[] = { + {"BPU_COUNTER0", 0x300, 0}, + {"BPU_COUNTER1", 0x301, 1}, + {"BPU_COUNTER2", 0x302, 2}, + {"BPU_COUNTER3", 0x303, 3}, + {"MS_COUNTER0", 0x304, 4}, + {"MS_COUNTER1", 0x305, 5}, + {"MS_COUNTER2", 0x306, 6}, + {"MS_COUNTER3", 0x307, 7}, + {"FLAME_COUNTER0", 0x308, 8}, + {"FLAME_COUNTER1", 0x309, 9}, + {"FLAME_COUNTER2", 0x30a, 10}, + {"FLAME_COUNTER3", 0x30b, 11}, + {"IQ_COUNTER0", 0x30c, 12}, + {"IQ_COUNTER1", 0x30d, 13}, + {"IQ_COUNTER2", 0x30e, 14}, + {"IQ_COUNTER3", 0x30f, 15}, + {"IQ_COUNTER4", 0x310, 16}, + {"IQ_COUNTER5", 0x311, 17}, + {"BPU_CCCR0", 0x360, 0}, + {"BPU_CCCR1", 0x361, 1}, + {"BPU_CCCR2", 0x362, 2}, + {"BPU_CCCR3", 0x363, 3}, + {"MS_CCCR0", 0x364, 4}, + {"MS_CCCR1", 0x365, 5}, + {"MS_CCCR2", 0x366, 6}, + {"MS_CCCR3", 0x367, 7}, + {"FLAME_CCCR0", 0x368, 8}, + {"FLAME_CCCR1", 0x369, 9}, + {"FLAME_CCCR2", 0x36a, 10}, + {"FLAME_CCCR3", 0x36b, 11}, + {"IQ_CCCR0", 0x36c, 12}, + {"IQ_CCCR1", 0x36d, 13}, + {"IQ_CCCR2", 0x36e, 14}, + {"IQ_CCCR3", 0x36f, 15}, + {"IQ_CCCR4", 0x370, 16}, + {"IQ_CCCR5", 0x371, 17}, + {"BSU_ESCR0", 0x3a0, 7}, + {"BSU_ESCR1", 0x3a1, 7}, + {"FSB_ESCR0", 0x3a2, 6}, + {"FSB_ESCR1", 0x3a3, 6}, + {"MOB_ESCR0", 0x3aa, 2}, + {"MOB_ESCR1", 0x3ab, 2}, + {"PMH_ESCR0", 0x3ac, 4}, + {"PMH_ESCR1", 0x3ad, 4}, + {"BPU_ESCR0", 0x3b2, 0}, + {"BPU_ESCR1", 0x3b3, 0}, + {"IS_ESCR0", 0x3b4, 1}, + {"IS_ESCR1", 0x3b5, 1}, + {"ITLB_ESCR0", 0x3b6, 3}, + {"ITLB_ESCR1", 0x3b7, 3}, + {"IX_ESCR0", 0x3c8, 5}, + {"IX_ESCR1", 0x3c9, 5}, + {"MS_ESCR0", 0x3c0, 0}, + {"MS_ESCR1", 0x3c1, 0}, + {"TBPU_ESCR0", 0x3c2, 2}, + {"TBPU_ESCR1", 0x3c3, 2}, + {"TC_ESCR0", 0x3c4, 1}, + {"TC_ESCR1", 0x3c5, 1}, + {"FIRM_ESCR0", 0x3a4, 1}, + {"FIRM_ESCR1", 0x3a5, 1}, + {"FLAME_ESCR0", 0x3a6, 0}, + {"FLAME_ESCR1", 0x3a7, 0}, + {"DAC_ESCR0", 0x3a8, 5}, + {"DAC_ESCR1", 0x3a9, 5}, + {"SAAT_ESCR0", 0x3ae, 2}, + {"SAAT_ESCR1", 0x3af, 2}, + {"U2L_ESCR0", 0x3b0, 3}, + {"U2L_ESCR1", 0x3b1, 3}, + {"CRU_ESCR0", 0x3b8, 4}, + {"CRU_ESCR1", 0x3b9, 4}, + {"CRU_ESCR2", 0x3cc, 5}, + {"CRU_ESCR3", 0x3cd, 5}, + {"CRU_ESCR4", 0x3e0, 6}, + {"CRU_ESCR5", 0x3e1, 6}, + {"IQ_ESCR0", 0x3ba, 0}, + {"IQ_ESCR1", 0x3bb, 0}, + {"RAT_ESCR0", 0x3bc, 2}, + {"RAT_ESCR1", 0x3bd, 2}, + {"SSU_ESCR0", 0x3be, 3}, + {"SSU_ESCR1", 0x3bf, 3}, + {"ALF_ESCR0", 0x3ca, 1}, + {"ALF_ESCR1", 0x3cb, 1}, + {"PEBS_ENABLE", 0x3f1, 0}, + {"PEBS_MATRIX_VERT", 0x3f2, 0}, + {NULL, 0, 0} +}; + +struct macros *lookup_macro(char *str) +{ + struct macros *m; + + m = msr; + while (m->name) { + if (strcmp(m->name, str) == 0) + return m; + m++; + } + return NULL; +} + +int main(int argc, char **argv) +{ + int c, t = 0xc, es = 0, em = 0, tv = 0, te = 0; + unsigned int cpu_mask = 1; + struct macros *escr = NULL, *cccr = NULL; + unsigned long escr_val, cccr_val; + int debug = 0; + unsigned long pebs = 0, pebs_vert = 0; + int pebs_x = 0, pebs_vert_x = 0; + int read = 0; + + while ((c = getopt(argc, argv, "dc:t:e:m:T:E:C:P:V:r")) != -1) { + switch((char)c) { + case 'P': + pebs |= 1 << atoi(optarg); + pebs_x = 1; + break; + case 'V': + pebs_vert |= 1 << atoi(optarg); + pebs_vert_x = 1; + break; + case 'd': + debug = 1; + break; + case 'c': + { + int cpu = atoi(optarg); + cpu_mask = (cpu == -1)?(~0):(1<<cpu); + break; + } + case 't': // ESCR thread bits + t = atoi(optarg); + break; + case 'e': // eventsel + es = atoi(optarg); + break; + case 'm': // eventmask + em = atoi(optarg); + break; + case 'T': // tag value + tv = atoi(optarg); + te = 1; + break; + case 'E': + escr = lookup_macro(optarg); + if (!escr) { + fprintf(stderr, "Macro '%s' not found.\n", optarg); + exit(1); + } + break; + case 'C': + cccr = lookup_macro(optarg); + if (!cccr) { + fprintf(stderr, "Macro '%s' not found.\n", optarg); + exit(1); + } + break; + case 'r': + read = 1; + break; + } + } + + if (read) { + while((cpu_mask&1)) { + int i; + for (i=0x300;i<0x312;i++) + { + printf("%010llx ",dom0_rdmsr( cpu_mask, i ) ); + } + printf("\n"); + cpu_mask>>=1; + } + exit(1); + } + + if (!escr) { + fprintf(stderr, "Need an ESCR.\n"); + exit(1); + } + if (!cccr) { + fprintf(stderr, "Need a counter number.\n"); + exit(1); + } + + escr_val = P4_ESCR_THREADS(t) | P4_ESCR_EVNTSEL(es) | + P4_ESCR_EVNTMASK(em) | P4_ESCR_TV(tv) | ((te)?P4_ESCR_TE:0); + cccr_val = P4_CCCR_ENABLE | P4_CCCR_ESCR(escr->number) | + P4_CCCR_ACTIVE_THREAD(3)/*reserved*/; + + if (debug) { + fprintf(stderr, "ESCR 0x%lx <= 0x%08lx\n", escr->msr_addr, escr_val); + fprintf(stderr, "CCCR 0x%lx <= 0x%08lx (%u)\n", + cccr->msr_addr, cccr_val, cccr->number); + if (pebs_x) + fprintf(stderr, "PEBS 0x%x <= 0x%08lx\n", + MSR_P4_PEBS_ENABLE, pebs); + if (pebs_vert_x) + fprintf(stderr, "PMV 0x%x <= 0x%08lx\n", + MSR_P4_PEBS_MATRIX_VERT, pebs_vert); + } + + dom0_wrmsr( cpu_mask, escr->msr_addr, escr_val, 0 ); + dom0_wrmsr( cpu_mask, cccr->msr_addr, cccr_val, 0 ); + + if (pebs_x) + dom0_wrmsr( cpu_mask, MSR_P4_PEBS_ENABLE, pebs, 0 ); + + if (pebs_vert_x) + dom0_wrmsr( cpu_mask, MSR_P4_PEBS_MATRIX_VERT, pebs_vert, 0 ); + + return 0; +} + |