diff options
author | Ian Jackson <Ian.Jackson@eu.citrix.com> | 2012-03-01 16:48:36 +0000 |
---|---|---|
committer | Ian Jackson <Ian.Jackson@eu.citrix.com> | 2012-03-01 16:48:36 +0000 |
commit | ff7c56c82c6f58d201c0bfcf57f58f574de7a57e (patch) | |
tree | 7e7859a9350125b56b6058821aaf9dbc6cf9b6b4 | |
parent | e44490b9a512b955fcaf4dea3c35382990549e72 (diff) | |
parent | c933085d093fe64a08a6b86829015e9788192d64 (diff) | |
download | xen-ff7c56c82c6f58d201c0bfcf57f58f574de7a57e.tar.gz xen-ff7c56c82c6f58d201c0bfcf57f58f574de7a57e.tar.bz2 xen-ff7c56c82c6f58d201c0bfcf57f58f574de7a57e.zip |
Merge
-rw-r--r-- | .hgignore | 1 | ||||
-rw-r--r-- | tools/libxc/xc_cpufeature.h | 2 | ||||
-rw-r--r-- | tools/libxc/xc_cpuid_x86.c | 4 | ||||
-rw-r--r-- | tools/misc/Makefile | 8 | ||||
-rw-r--r-- | tools/misc/xen-lowmemd.c | 148 | ||||
-rw-r--r-- | xen/arch/x86/hvm/rtc.c | 8 | ||||
-rw-r--r-- | xen/common/grant_table.c | 2 | ||||
-rw-r--r-- | xen/common/page_alloc.c | 108 | ||||
-rw-r--r-- | xen/drivers/passthrough/amd/iommu_init.c | 18 | ||||
-rw-r--r-- | xen/include/asm-x86/amd-iommu.h | 1 | ||||
-rw-r--r-- | xen/include/public/xen.h | 1 |
11 files changed, 288 insertions, 13 deletions
@@ -202,6 +202,7 @@ ^tools/misc/xenperf$ ^tools/misc/xenpm$ ^tools/misc/xen-hvmctx$ +^tools/misc/xen-lowmemd$ ^tools/misc/gtraceview$ ^tools/misc/gtracestat$ ^tools/misc/xenlockprof$ diff --git a/tools/libxc/xc_cpufeature.h b/tools/libxc/xc_cpufeature.h index ff7c3d0042..e1772337ad 100644 --- a/tools/libxc/xc_cpufeature.h +++ b/tools/libxc/xc_cpufeature.h @@ -129,10 +129,12 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx) */ #define X86_FEATURE_FSGSBASE 0 /* {RD,WR}{FS,GS}BASE instructions */ #define X86_FEATURE_BMI1 3 /* 1st group bit manipulation extensions */ +#define X86_FEATURE_HLE 4 /* Hardware Lock Elision */ #define X86_FEATURE_AVX2 5 /* AVX2 instructions */ #define X86_FEATURE_SMEP 7 /* Supervisor Mode Execution Protection */ #define X86_FEATURE_BMI2 8 /* 2nd group bit manipulation extensions */ #define X86_FEATURE_ERMS 9 /* Enhanced REP MOVSB/STOSB */ #define X86_FEATURE_INVPCID 10 /* Invalidate Process Context ID */ +#define X86_FEATURE_RTM 11 /* Restricted Transactional Memory */ #endif /* __LIBXC_CPUFEATURE_H */ diff --git a/tools/libxc/xc_cpuid_x86.c b/tools/libxc/xc_cpuid_x86.c index d8a910a6e4..0882ce6554 100644 --- a/tools/libxc/xc_cpuid_x86.c +++ b/tools/libxc/xc_cpuid_x86.c @@ -363,11 +363,13 @@ static void xc_cpuid_hvm_policy( case 0x00000007: /* Intel-defined CPU features */ if ( input[1] == 0 ) { regs[1] &= (bitmaskof(X86_FEATURE_BMI1) | + bitmaskof(X86_FEATURE_HLE) | bitmaskof(X86_FEATURE_AVX2) | bitmaskof(X86_FEATURE_SMEP) | bitmaskof(X86_FEATURE_BMI2) | bitmaskof(X86_FEATURE_ERMS) | bitmaskof(X86_FEATURE_INVPCID) | + bitmaskof(X86_FEATURE_RTM) | bitmaskof(X86_FEATURE_FSGSBASE)); } else regs[1] = 0; @@ -496,9 +498,11 @@ static void xc_cpuid_pv_policy( case 0x00000007: if ( input[1] == 0 ) regs[1] &= (bitmaskof(X86_FEATURE_BMI1) | + bitmaskof(X86_FEATURE_HLE) | bitmaskof(X86_FEATURE_AVX2) | bitmaskof(X86_FEATURE_BMI2) | bitmaskof(X86_FEATURE_ERMS) | + bitmaskof(X86_FEATURE_RTM) | bitmaskof(X86_FEATURE_FSGSBASE)); else regs[1] = 0; diff --git a/tools/misc/Makefile b/tools/misc/Makefile index 817398bc9b..2c691a4329 100644 --- a/tools/misc/Makefile +++ b/tools/misc/Makefile @@ -5,11 +5,12 @@ CFLAGS += -Werror CFLAGS += $(CFLAGS_libxenctrl) CFLAGS += $(CFLAGS_xeninclude) +CFLAGS += $(CFLAGS_libxenstore) HDRS = $(wildcard *.h) TARGETS-y := xenperf xenpm xen-tmem-list-parse gtraceview gtracestat xenlockprof xenwatchdogd -TARGETS-$(CONFIG_X86) += xen-detect xen-hvmctx xen-hvmcrash +TARGETS-$(CONFIG_X86) += xen-detect xen-hvmctx xen-hvmcrash xen-lowmemd TARGETS-$(CONFIG_MIGRATE) += xen-hptool TARGETS := $(TARGETS-y) @@ -21,7 +22,7 @@ INSTALL_BIN-y := xencons INSTALL_BIN-$(CONFIG_X86) += xen-detect INSTALL_BIN := $(INSTALL_BIN-y) -INSTALL_SBIN-y := xm xen-bugtool xen-python-path xend xenperf xsview xenpm xen-tmem-list-parse gtraceview gtracestat xenlockprof xenwatchdogd xen-ringwatch +INSTALL_SBIN-y := xm xen-bugtool xen-python-path xend xenperf xsview xenpm xen-tmem-list-parse gtraceview gtracestat xenlockprof xenwatchdogd xen-ringwatch xen-lowmemd INSTALL_SBIN-$(CONFIG_X86) += xen-hvmctx xen-hvmcrash INSTALL_SBIN-$(CONFIG_MIGRATE) += xen-hptool INSTALL_SBIN := $(INSTALL_SBIN-y) @@ -70,6 +71,9 @@ xen-hptool: xen-hptool.o xenwatchdogd: xenwatchdogd.o $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS_libxenctrl) $(APPEND_LDFLAGS) +xen-lowmemd: xen-lowmemd.o + $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS_libxenctrl) $(LDLIBS_libxenstore) $(APPEND_LDFLAGS) + gtraceview: gtraceview.o $(CC) $(LDFLAGS) -o $@ $< $(CURSES_LIBS) $(APPEND_LDFLAGS) diff --git a/tools/misc/xen-lowmemd.c b/tools/misc/xen-lowmemd.c new file mode 100644 index 0000000000..42098e4d6f --- /dev/null +++ b/tools/misc/xen-lowmemd.c @@ -0,0 +1,148 @@ +/* + * xen-lowmemd: demo VIRQ_ENOMEM + * Andres Lagar-Cavilla (GridCentric Inc.) + */ + +#include <stdio.h> +#include <xenctrl.h> +#include <xs.h> +#include <stdlib.h> +#include <string.h> + +static evtchn_port_t virq_port = -1; +static xc_evtchn *xce_handle = NULL; +static xc_interface *xch = NULL; +static struct xs_handle *xs_handle = NULL; + +void cleanup(void) +{ + if (virq_port > -1) + xc_evtchn_unbind(xce_handle, virq_port); + if (xce_handle) + xc_evtchn_close(xce_handle); + if (xch) + xc_interface_close(xch); + if (xs_handle) + xs_daemon_close(xs_handle); +} + +/* Never shrink dom0 below 1 GiB */ +#define DOM0_FLOOR (1 << 30) +#define DOM0_FLOOR_PG ((DOM0_FLOOR) >> 12) + +/* Act if free memory is less than 92 MiB */ +#define THRESHOLD (92 << 20) +#define THRESHOLD_PG ((THRESHOLD) >> 12) + +#define BUFSZ 512 +void handle_low_mem(void) +{ + xc_dominfo_t dom0_info; + xc_physinfo_t info; + unsigned long long free_pages, dom0_pages, diff, dom0_target; + char data[BUFSZ], error[BUFSZ]; + + if (xc_physinfo(xch, &info) < 0) + { + perror("Getting physinfo failed"); + return; + } + + free_pages = (unsigned long long) info.free_pages; + printf("Available free pages: 0x%llx:%llux\n", + free_pages, free_pages); + + /* Don't do anything if we have more than the threshold free */ + if ( free_pages >= THRESHOLD_PG ) + return; + diff = THRESHOLD_PG - free_pages; + + if (xc_domain_getinfo(xch, 0, 1, &dom0_info) < 1) + { + perror("Failed to get dom0 info"); + return; + } + + dom0_pages = (unsigned long long) dom0_info.nr_pages; + printf("Dom0 pages: 0x%llx:%llu\n", dom0_pages, dom0_pages); + dom0_target = dom0_pages - diff; + if (dom0_target <= DOM0_FLOOR_PG) + return; + + printf("Shooting for dom0 target 0x%llx:%llu\n", + dom0_target, dom0_target); + + snprintf(data, BUFSZ, "%llu", dom0_target); + if (!xs_write(xs_handle, XBT_NULL, + "/local/domain/0/memory/target", data, strlen(data))) + { + snprintf(error, BUFSZ,"Failed to write target %s to xenstore", data); + perror(error); + } +} + +int main(int argc, char *argv[]) +{ + int rc; + + atexit(cleanup); + + xch = xc_interface_open(NULL, NULL, 0); + if (xch == NULL) + { + perror("Failed to open xc interface"); + return 1; + } + + xce_handle = xc_evtchn_open(NULL, 0); + if (xce_handle == NULL) + { + perror("Failed to open evtchn device"); + return 2; + } + + xs_handle = xs_daemon_open(); + if (xs_handle == NULL) + { + perror("Failed to open xenstore connection"); + return 3; + } + + if ((rc = xc_evtchn_bind_virq(xce_handle, VIRQ_ENOMEM)) == -1) + { + perror("Failed to bind to domain exception virq port"); + return 4; + } + + virq_port = rc; + + while(1) + { + evtchn_port_t port; + + if ((port = xc_evtchn_pending(xce_handle)) == -1) + { + perror("Failed to listen for pending event channel"); + return 5; + } + + if (port != virq_port) + { + char data[BUFSZ]; + snprintf(data, BUFSZ, "Wrong port, got %d expected %d", port, virq_port); + perror(data); + return 6; + } + + if (xc_evtchn_unmask(xce_handle, port) == -1) + { + perror("Failed to unmask port"); + return 7; + } + + printf("Got a virq kick, time to get work\n"); + handle_low_mem(); + } + + return 0; +} diff --git a/xen/arch/x86/hvm/rtc.c b/xen/arch/x86/hvm/rtc.c index 81a74a33c6..e1139da89f 100644 --- a/xen/arch/x86/hvm/rtc.c +++ b/xen/arch/x86/hvm/rtc.c @@ -33,6 +33,8 @@ #define vrtc_domain(x) (container_of((x), struct domain, \ arch.hvm_domain.pl_time.vrtc)) #define vrtc_vcpu(x) (pt_global_vcpu_target(vrtc_domain(x))) +#define epoch_year 1900 +#define get_year(x) (x + epoch_year) static void rtc_periodic_cb(struct vcpu *v, void *opaque) { @@ -165,7 +167,7 @@ static void rtc_set_time(RTCState *s) ASSERT(spin_is_locked(&s->lock)); - before = mktime(tm->tm_year, tm->tm_mon + 1, tm->tm_mday, + before = mktime(get_year(tm->tm_year), tm->tm_mon + 1, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec); tm->tm_sec = from_bcd(s, s->hw.cmos_data[RTC_SECONDS]); @@ -179,7 +181,7 @@ static void rtc_set_time(RTCState *s) tm->tm_mon = from_bcd(s, s->hw.cmos_data[RTC_MONTH]) - 1; tm->tm_year = from_bcd(s, s->hw.cmos_data[RTC_YEAR]) + 100; - after = mktime(tm->tm_year, tm->tm_mon + 1, tm->tm_mday, + after = mktime(get_year(tm->tm_year), tm->tm_mon + 1, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec); /* We use the guest's setting of the RTC to define the local-time @@ -257,7 +259,7 @@ static void rtc_next_second(RTCState *s) if ( (unsigned)tm->tm_wday >= 7 ) tm->tm_wday = 0; days_in_month = get_days_in_month(tm->tm_mon, - tm->tm_year + 1900); + get_year(tm->tm_year)); tm->tm_mday++; if ( tm->tm_mday < 1 ) { diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c index 4d637833d6..28a256197e 100644 --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -585,6 +585,8 @@ __gnttab_map_grant_ref( act->start = 0; act->length = PAGE_SIZE; act->is_sub_page = 0; + act->trans_domain = rd; + act->trans_gref = op->ref; } } diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index 249bb355dd..7cb16940c8 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -35,6 +35,7 @@ #include <xen/perfc.h> #include <xen/numa.h> #include <xen/nodemask.h> +#include <xen/event.h> #include <xen/tmem.h> #include <xen/tmem_xen.h> #include <public/sysctl.h> @@ -300,6 +301,107 @@ static unsigned long init_node_heap(int node, unsigned long mfn, return needed; } +/* Default to 64 MiB */ +#define DEFAULT_LOW_MEM_VIRQ (((paddr_t) 64) << 20) +#define MAX_LOW_MEM_VIRQ (((paddr_t) 1024) << 20) + +static paddr_t __read_mostly opt_low_mem_virq = ((paddr_t) -1); +size_param("low_mem_virq_limit", opt_low_mem_virq); + +/* Thresholds to control hysteresis. In pages */ +/* When memory grows above this threshold, reset hysteresis. + * -1 initially to not reset until at least one virq issued. */ +static unsigned long low_mem_virq_high = -1UL; +/* Threshold at which we issue virq */ +static unsigned long low_mem_virq_th = 0; +/* Original threshold after all checks completed */ +static unsigned long low_mem_virq_orig = 0; +/* Order for current threshold */ +static unsigned int low_mem_virq_th_order = 0; + +/* Perform bootstrapping checks and set bounds */ +static void __init setup_low_mem_virq(void) +{ + unsigned int order; + paddr_t threshold; + bool_t halve; + + /* If the user specifies zero, then he/she doesn't want this virq + * to ever trigger. */ + if ( opt_low_mem_virq == 0 ) + { + low_mem_virq_th = -1UL; + return; + } + + /* If the user did not specify a knob, remember that */ + halve = (opt_low_mem_virq == ((paddr_t) -1)); + threshold = halve ? DEFAULT_LOW_MEM_VIRQ : opt_low_mem_virq; + + /* Dom0 has already been allocated by now. So check we won't be + * complaining immediately with whatever's left of the heap. */ + threshold = min(threshold, + ((paddr_t) total_avail_pages) << PAGE_SHIFT); + + /* Then, cap to some predefined maximum */ + threshold = min(threshold, MAX_LOW_MEM_VIRQ); + + /* If the user specified no knob, and we are at the current available + * level, halve the threshold. */ + if ( halve && + (threshold == (((paddr_t) total_avail_pages) << PAGE_SHIFT)) ) + threshold >>= 1; + + /* Zero? Have to fire immediately */ + threshold = max(threshold, (paddr_t) PAGE_SIZE); + + /* Threshold bytes -> pages */ + low_mem_virq_th = threshold >> PAGE_SHIFT; + + /* Next, round the threshold down to the next order */ + order = get_order_from_pages(low_mem_virq_th); + if ( (1UL << order) > low_mem_virq_th ) + order--; + + /* Set bounds, ready to go */ + low_mem_virq_th = low_mem_virq_orig = 1UL << order; + low_mem_virq_th_order = order; + + printk("Initial low memory virq threshold set at 0x%lx pages.\n", + low_mem_virq_th); +} + +static void check_low_mem_virq(void) +{ + if ( unlikely(total_avail_pages <= low_mem_virq_th) ) + { + send_global_virq(VIRQ_ENOMEM); + + /* Update thresholds. Next warning will be when we drop below + * next order. However, we wait until we grow beyond one + * order above us to complain again at the current order */ + low_mem_virq_high = 1UL << (low_mem_virq_th_order + 1); + if ( low_mem_virq_th_order > 0 ) + low_mem_virq_th_order--; + low_mem_virq_th = 1UL << low_mem_virq_th_order; + return; + } + + if ( unlikely(total_avail_pages >= low_mem_virq_high) ) + { + /* Reset hysteresis. Bring threshold up one order. + * If we are back where originally set, set high + * threshold to -1 to avoid further growth of + * virq threshold. */ + low_mem_virq_th_order++; + low_mem_virq_th = 1UL << low_mem_virq_th_order; + if ( low_mem_virq_th == low_mem_virq_orig ) + low_mem_virq_high = -1UL; + else + low_mem_virq_high = 1UL << (low_mem_virq_th_order + 2); + } +} + /* Allocate 2^@order contiguous pages. */ static struct page_info *alloc_heap_pages( unsigned int zone_lo, unsigned int zone_hi, @@ -420,6 +522,8 @@ static struct page_info *alloc_heap_pages( total_avail_pages -= request; ASSERT(total_avail_pages >= 0); + check_low_mem_virq(); + if ( d != NULL ) d->last_alloc_node = node; @@ -1022,6 +1126,10 @@ void __init scrub_heap_pages(void) } printk("done.\n"); + + /* Now that the heap is initialized, run checks and set bounds + * for the low mem virq algorithm. */ + setup_low_mem_virq(); } diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c index e091fb2490..3ce0acbda4 100644 --- a/xen/drivers/passthrough/amd/iommu_init.c +++ b/xen/drivers/passthrough/amd/iommu_init.c @@ -367,6 +367,8 @@ static int iommu_read_log(struct amd_iommu *iommu, u32 tail, head, *entry, tail_offest, head_offset; BUG_ON(!iommu || ((log != &iommu->event_log) && (log != &iommu->ppr_log))); + + spin_lock(&log->lock); /* make sure there's an entry in the log */ tail_offest = ( log == &iommu->event_log ) ? @@ -396,6 +398,8 @@ static int iommu_read_log(struct amd_iommu *iommu, writel(head, iommu->mmio_base + head_offset); } + spin_unlock(&log->lock); + return 0; } @@ -618,11 +622,11 @@ static void iommu_check_event_log(struct amd_iommu *iommu) u32 entry; unsigned long flags; - spin_lock_irqsave(&iommu->lock, flags); - iommu_read_log(iommu, &iommu->event_log, sizeof(event_entry_t), parse_event_log_entry); + spin_lock_irqsave(&iommu->lock, flags); + /*check event overflow */ entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); @@ -651,14 +655,10 @@ void parse_ppr_log_entry(struct amd_iommu *iommu, u32 entry[]) bus = PCI_BUS(device_id); devfn = PCI_DEVFN2(device_id); - local_irq_enable(); - spin_lock(&pcidevs_lock); pdev = pci_get_pdev(iommu->seg, bus, devfn); spin_unlock(&pcidevs_lock); - local_irq_disable(); - if ( pdev == NULL ) return; @@ -672,10 +672,10 @@ static void iommu_check_ppr_log(struct amd_iommu *iommu) u32 entry; unsigned long flags; - spin_lock_irqsave(&iommu->lock, flags); - iommu_read_log(iommu, &iommu->ppr_log, sizeof(ppr_entry_t), parse_ppr_log_entry); + + spin_lock_irqsave(&iommu->lock, flags); /*check event overflow */ entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); @@ -852,6 +852,8 @@ static void * __init allocate_ring_buffer(struct ring_buffer *ring_buf, ring_buf->head = 0; ring_buf->tail = 0; + spin_lock_init(&ring_buf->lock); + ring_buf->alloc_size = PAGE_SIZE << get_order_from_bytes(entries * entry_size); ring_buf->entries = ring_buf->alloc_size / entry_size; diff --git a/xen/include/asm-x86/amd-iommu.h b/xen/include/asm-x86/amd-iommu.h index 0a781c90d2..92423e21da 100644 --- a/xen/include/asm-x86/amd-iommu.h +++ b/xen/include/asm-x86/amd-iommu.h @@ -65,6 +65,7 @@ struct ring_buffer { unsigned long alloc_size; uint32_t tail; uint32_t head; + spinlock_t lock; /* protect buffer pointers */ }; typedef struct iommu_cap { diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h index 041ad0be33..b2f6c507b9 100644 --- a/xen/include/public/xen.h +++ b/xen/include/public/xen.h @@ -158,6 +158,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); #define VIRQ_PCPU_STATE 9 /* G. (DOM0) PCPU state changed */ #define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */ #define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */ +#define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */ /* Architecture-specific VIRQ definitions. */ #define VIRQ_ARCH_0 16 |