aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIan Jackson <Ian.Jackson@eu.citrix.com>2012-03-01 16:48:36 +0000
committerIan Jackson <Ian.Jackson@eu.citrix.com>2012-03-01 16:48:36 +0000
commitff7c56c82c6f58d201c0bfcf57f58f574de7a57e (patch)
tree7e7859a9350125b56b6058821aaf9dbc6cf9b6b4
parente44490b9a512b955fcaf4dea3c35382990549e72 (diff)
parentc933085d093fe64a08a6b86829015e9788192d64 (diff)
downloadxen-ff7c56c82c6f58d201c0bfcf57f58f574de7a57e.tar.gz
xen-ff7c56c82c6f58d201c0bfcf57f58f574de7a57e.tar.bz2
xen-ff7c56c82c6f58d201c0bfcf57f58f574de7a57e.zip
Merge
-rw-r--r--.hgignore1
-rw-r--r--tools/libxc/xc_cpufeature.h2
-rw-r--r--tools/libxc/xc_cpuid_x86.c4
-rw-r--r--tools/misc/Makefile8
-rw-r--r--tools/misc/xen-lowmemd.c148
-rw-r--r--xen/arch/x86/hvm/rtc.c8
-rw-r--r--xen/common/grant_table.c2
-rw-r--r--xen/common/page_alloc.c108
-rw-r--r--xen/drivers/passthrough/amd/iommu_init.c18
-rw-r--r--xen/include/asm-x86/amd-iommu.h1
-rw-r--r--xen/include/public/xen.h1
11 files changed, 288 insertions, 13 deletions
diff --git a/.hgignore b/.hgignore
index 46655adff6..008e5fb27c 100644
--- a/.hgignore
+++ b/.hgignore
@@ -202,6 +202,7 @@
^tools/misc/xenperf$
^tools/misc/xenpm$
^tools/misc/xen-hvmctx$
+^tools/misc/xen-lowmemd$
^tools/misc/gtraceview$
^tools/misc/gtracestat$
^tools/misc/xenlockprof$
diff --git a/tools/libxc/xc_cpufeature.h b/tools/libxc/xc_cpufeature.h
index ff7c3d0042..e1772337ad 100644
--- a/tools/libxc/xc_cpufeature.h
+++ b/tools/libxc/xc_cpufeature.h
@@ -129,10 +129,12 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx) */
#define X86_FEATURE_FSGSBASE 0 /* {RD,WR}{FS,GS}BASE instructions */
#define X86_FEATURE_BMI1 3 /* 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE 4 /* Hardware Lock Elision */
#define X86_FEATURE_AVX2 5 /* AVX2 instructions */
#define X86_FEATURE_SMEP 7 /* Supervisor Mode Execution Protection */
#define X86_FEATURE_BMI2 8 /* 2nd group bit manipulation extensions */
#define X86_FEATURE_ERMS 9 /* Enhanced REP MOVSB/STOSB */
#define X86_FEATURE_INVPCID 10 /* Invalidate Process Context ID */
+#define X86_FEATURE_RTM 11 /* Restricted Transactional Memory */
#endif /* __LIBXC_CPUFEATURE_H */
diff --git a/tools/libxc/xc_cpuid_x86.c b/tools/libxc/xc_cpuid_x86.c
index d8a910a6e4..0882ce6554 100644
--- a/tools/libxc/xc_cpuid_x86.c
+++ b/tools/libxc/xc_cpuid_x86.c
@@ -363,11 +363,13 @@ static void xc_cpuid_hvm_policy(
case 0x00000007: /* Intel-defined CPU features */
if ( input[1] == 0 ) {
regs[1] &= (bitmaskof(X86_FEATURE_BMI1) |
+ bitmaskof(X86_FEATURE_HLE) |
bitmaskof(X86_FEATURE_AVX2) |
bitmaskof(X86_FEATURE_SMEP) |
bitmaskof(X86_FEATURE_BMI2) |
bitmaskof(X86_FEATURE_ERMS) |
bitmaskof(X86_FEATURE_INVPCID) |
+ bitmaskof(X86_FEATURE_RTM) |
bitmaskof(X86_FEATURE_FSGSBASE));
} else
regs[1] = 0;
@@ -496,9 +498,11 @@ static void xc_cpuid_pv_policy(
case 0x00000007:
if ( input[1] == 0 )
regs[1] &= (bitmaskof(X86_FEATURE_BMI1) |
+ bitmaskof(X86_FEATURE_HLE) |
bitmaskof(X86_FEATURE_AVX2) |
bitmaskof(X86_FEATURE_BMI2) |
bitmaskof(X86_FEATURE_ERMS) |
+ bitmaskof(X86_FEATURE_RTM) |
bitmaskof(X86_FEATURE_FSGSBASE));
else
regs[1] = 0;
diff --git a/tools/misc/Makefile b/tools/misc/Makefile
index 817398bc9b..2c691a4329 100644
--- a/tools/misc/Makefile
+++ b/tools/misc/Makefile
@@ -5,11 +5,12 @@ CFLAGS += -Werror
CFLAGS += $(CFLAGS_libxenctrl)
CFLAGS += $(CFLAGS_xeninclude)
+CFLAGS += $(CFLAGS_libxenstore)
HDRS = $(wildcard *.h)
TARGETS-y := xenperf xenpm xen-tmem-list-parse gtraceview gtracestat xenlockprof xenwatchdogd
-TARGETS-$(CONFIG_X86) += xen-detect xen-hvmctx xen-hvmcrash
+TARGETS-$(CONFIG_X86) += xen-detect xen-hvmctx xen-hvmcrash xen-lowmemd
TARGETS-$(CONFIG_MIGRATE) += xen-hptool
TARGETS := $(TARGETS-y)
@@ -21,7 +22,7 @@ INSTALL_BIN-y := xencons
INSTALL_BIN-$(CONFIG_X86) += xen-detect
INSTALL_BIN := $(INSTALL_BIN-y)
-INSTALL_SBIN-y := xm xen-bugtool xen-python-path xend xenperf xsview xenpm xen-tmem-list-parse gtraceview gtracestat xenlockprof xenwatchdogd xen-ringwatch
+INSTALL_SBIN-y := xm xen-bugtool xen-python-path xend xenperf xsview xenpm xen-tmem-list-parse gtraceview gtracestat xenlockprof xenwatchdogd xen-ringwatch xen-lowmemd
INSTALL_SBIN-$(CONFIG_X86) += xen-hvmctx xen-hvmcrash
INSTALL_SBIN-$(CONFIG_MIGRATE) += xen-hptool
INSTALL_SBIN := $(INSTALL_SBIN-y)
@@ -70,6 +71,9 @@ xen-hptool: xen-hptool.o
xenwatchdogd: xenwatchdogd.o
$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS_libxenctrl) $(APPEND_LDFLAGS)
+xen-lowmemd: xen-lowmemd.o
+ $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS_libxenctrl) $(LDLIBS_libxenstore) $(APPEND_LDFLAGS)
+
gtraceview: gtraceview.o
$(CC) $(LDFLAGS) -o $@ $< $(CURSES_LIBS) $(APPEND_LDFLAGS)
diff --git a/tools/misc/xen-lowmemd.c b/tools/misc/xen-lowmemd.c
new file mode 100644
index 0000000000..42098e4d6f
--- /dev/null
+++ b/tools/misc/xen-lowmemd.c
@@ -0,0 +1,148 @@
+/*
+ * xen-lowmemd: demo VIRQ_ENOMEM
+ * Andres Lagar-Cavilla (GridCentric Inc.)
+ */
+
+#include <stdio.h>
+#include <xenctrl.h>
+#include <xs.h>
+#include <stdlib.h>
+#include <string.h>
+
+static evtchn_port_t virq_port = -1;
+static xc_evtchn *xce_handle = NULL;
+static xc_interface *xch = NULL;
+static struct xs_handle *xs_handle = NULL;
+
+void cleanup(void)
+{
+ if (virq_port > -1)
+ xc_evtchn_unbind(xce_handle, virq_port);
+ if (xce_handle)
+ xc_evtchn_close(xce_handle);
+ if (xch)
+ xc_interface_close(xch);
+ if (xs_handle)
+ xs_daemon_close(xs_handle);
+}
+
+/* Never shrink dom0 below 1 GiB */
+#define DOM0_FLOOR (1 << 30)
+#define DOM0_FLOOR_PG ((DOM0_FLOOR) >> 12)
+
+/* Act if free memory is less than 92 MiB */
+#define THRESHOLD (92 << 20)
+#define THRESHOLD_PG ((THRESHOLD) >> 12)
+
+#define BUFSZ 512
+void handle_low_mem(void)
+{
+ xc_dominfo_t dom0_info;
+ xc_physinfo_t info;
+ unsigned long long free_pages, dom0_pages, diff, dom0_target;
+ char data[BUFSZ], error[BUFSZ];
+
+ if (xc_physinfo(xch, &info) < 0)
+ {
+ perror("Getting physinfo failed");
+ return;
+ }
+
+ free_pages = (unsigned long long) info.free_pages;
+ printf("Available free pages: 0x%llx:%llux\n",
+ free_pages, free_pages);
+
+ /* Don't do anything if we have more than the threshold free */
+ if ( free_pages >= THRESHOLD_PG )
+ return;
+ diff = THRESHOLD_PG - free_pages;
+
+ if (xc_domain_getinfo(xch, 0, 1, &dom0_info) < 1)
+ {
+ perror("Failed to get dom0 info");
+ return;
+ }
+
+ dom0_pages = (unsigned long long) dom0_info.nr_pages;
+ printf("Dom0 pages: 0x%llx:%llu\n", dom0_pages, dom0_pages);
+ dom0_target = dom0_pages - diff;
+ if (dom0_target <= DOM0_FLOOR_PG)
+ return;
+
+ printf("Shooting for dom0 target 0x%llx:%llu\n",
+ dom0_target, dom0_target);
+
+ snprintf(data, BUFSZ, "%llu", dom0_target);
+ if (!xs_write(xs_handle, XBT_NULL,
+ "/local/domain/0/memory/target", data, strlen(data)))
+ {
+ snprintf(error, BUFSZ,"Failed to write target %s to xenstore", data);
+ perror(error);
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ int rc;
+
+ atexit(cleanup);
+
+ xch = xc_interface_open(NULL, NULL, 0);
+ if (xch == NULL)
+ {
+ perror("Failed to open xc interface");
+ return 1;
+ }
+
+ xce_handle = xc_evtchn_open(NULL, 0);
+ if (xce_handle == NULL)
+ {
+ perror("Failed to open evtchn device");
+ return 2;
+ }
+
+ xs_handle = xs_daemon_open();
+ if (xs_handle == NULL)
+ {
+ perror("Failed to open xenstore connection");
+ return 3;
+ }
+
+ if ((rc = xc_evtchn_bind_virq(xce_handle, VIRQ_ENOMEM)) == -1)
+ {
+ perror("Failed to bind to domain exception virq port");
+ return 4;
+ }
+
+ virq_port = rc;
+
+ while(1)
+ {
+ evtchn_port_t port;
+
+ if ((port = xc_evtchn_pending(xce_handle)) == -1)
+ {
+ perror("Failed to listen for pending event channel");
+ return 5;
+ }
+
+ if (port != virq_port)
+ {
+ char data[BUFSZ];
+ snprintf(data, BUFSZ, "Wrong port, got %d expected %d", port, virq_port);
+ perror(data);
+ return 6;
+ }
+
+ if (xc_evtchn_unmask(xce_handle, port) == -1)
+ {
+ perror("Failed to unmask port");
+ return 7;
+ }
+
+ printf("Got a virq kick, time to get work\n");
+ handle_low_mem();
+ }
+
+ return 0;
+}
diff --git a/xen/arch/x86/hvm/rtc.c b/xen/arch/x86/hvm/rtc.c
index 81a74a33c6..e1139da89f 100644
--- a/xen/arch/x86/hvm/rtc.c
+++ b/xen/arch/x86/hvm/rtc.c
@@ -33,6 +33,8 @@
#define vrtc_domain(x) (container_of((x), struct domain, \
arch.hvm_domain.pl_time.vrtc))
#define vrtc_vcpu(x) (pt_global_vcpu_target(vrtc_domain(x)))
+#define epoch_year 1900
+#define get_year(x) (x + epoch_year)
static void rtc_periodic_cb(struct vcpu *v, void *opaque)
{
@@ -165,7 +167,7 @@ static void rtc_set_time(RTCState *s)
ASSERT(spin_is_locked(&s->lock));
- before = mktime(tm->tm_year, tm->tm_mon + 1, tm->tm_mday,
+ before = mktime(get_year(tm->tm_year), tm->tm_mon + 1, tm->tm_mday,
tm->tm_hour, tm->tm_min, tm->tm_sec);
tm->tm_sec = from_bcd(s, s->hw.cmos_data[RTC_SECONDS]);
@@ -179,7 +181,7 @@ static void rtc_set_time(RTCState *s)
tm->tm_mon = from_bcd(s, s->hw.cmos_data[RTC_MONTH]) - 1;
tm->tm_year = from_bcd(s, s->hw.cmos_data[RTC_YEAR]) + 100;
- after = mktime(tm->tm_year, tm->tm_mon + 1, tm->tm_mday,
+ after = mktime(get_year(tm->tm_year), tm->tm_mon + 1, tm->tm_mday,
tm->tm_hour, tm->tm_min, tm->tm_sec);
/* We use the guest's setting of the RTC to define the local-time
@@ -257,7 +259,7 @@ static void rtc_next_second(RTCState *s)
if ( (unsigned)tm->tm_wday >= 7 )
tm->tm_wday = 0;
days_in_month = get_days_in_month(tm->tm_mon,
- tm->tm_year + 1900);
+ get_year(tm->tm_year));
tm->tm_mday++;
if ( tm->tm_mday < 1 )
{
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index 4d637833d6..28a256197e 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -585,6 +585,8 @@ __gnttab_map_grant_ref(
act->start = 0;
act->length = PAGE_SIZE;
act->is_sub_page = 0;
+ act->trans_domain = rd;
+ act->trans_gref = op->ref;
}
}
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 249bb355dd..7cb16940c8 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -35,6 +35,7 @@
#include <xen/perfc.h>
#include <xen/numa.h>
#include <xen/nodemask.h>
+#include <xen/event.h>
#include <xen/tmem.h>
#include <xen/tmem_xen.h>
#include <public/sysctl.h>
@@ -300,6 +301,107 @@ static unsigned long init_node_heap(int node, unsigned long mfn,
return needed;
}
+/* Default to 64 MiB */
+#define DEFAULT_LOW_MEM_VIRQ (((paddr_t) 64) << 20)
+#define MAX_LOW_MEM_VIRQ (((paddr_t) 1024) << 20)
+
+static paddr_t __read_mostly opt_low_mem_virq = ((paddr_t) -1);
+size_param("low_mem_virq_limit", opt_low_mem_virq);
+
+/* Thresholds to control hysteresis. In pages */
+/* When memory grows above this threshold, reset hysteresis.
+ * -1 initially to not reset until at least one virq issued. */
+static unsigned long low_mem_virq_high = -1UL;
+/* Threshold at which we issue virq */
+static unsigned long low_mem_virq_th = 0;
+/* Original threshold after all checks completed */
+static unsigned long low_mem_virq_orig = 0;
+/* Order for current threshold */
+static unsigned int low_mem_virq_th_order = 0;
+
+/* Perform bootstrapping checks and set bounds */
+static void __init setup_low_mem_virq(void)
+{
+ unsigned int order;
+ paddr_t threshold;
+ bool_t halve;
+
+ /* If the user specifies zero, then he/she doesn't want this virq
+ * to ever trigger. */
+ if ( opt_low_mem_virq == 0 )
+ {
+ low_mem_virq_th = -1UL;
+ return;
+ }
+
+ /* If the user did not specify a knob, remember that */
+ halve = (opt_low_mem_virq == ((paddr_t) -1));
+ threshold = halve ? DEFAULT_LOW_MEM_VIRQ : opt_low_mem_virq;
+
+ /* Dom0 has already been allocated by now. So check we won't be
+ * complaining immediately with whatever's left of the heap. */
+ threshold = min(threshold,
+ ((paddr_t) total_avail_pages) << PAGE_SHIFT);
+
+ /* Then, cap to some predefined maximum */
+ threshold = min(threshold, MAX_LOW_MEM_VIRQ);
+
+ /* If the user specified no knob, and we are at the current available
+ * level, halve the threshold. */
+ if ( halve &&
+ (threshold == (((paddr_t) total_avail_pages) << PAGE_SHIFT)) )
+ threshold >>= 1;
+
+ /* Zero? Have to fire immediately */
+ threshold = max(threshold, (paddr_t) PAGE_SIZE);
+
+ /* Threshold bytes -> pages */
+ low_mem_virq_th = threshold >> PAGE_SHIFT;
+
+ /* Next, round the threshold down to the next order */
+ order = get_order_from_pages(low_mem_virq_th);
+ if ( (1UL << order) > low_mem_virq_th )
+ order--;
+
+ /* Set bounds, ready to go */
+ low_mem_virq_th = low_mem_virq_orig = 1UL << order;
+ low_mem_virq_th_order = order;
+
+ printk("Initial low memory virq threshold set at 0x%lx pages.\n",
+ low_mem_virq_th);
+}
+
+static void check_low_mem_virq(void)
+{
+ if ( unlikely(total_avail_pages <= low_mem_virq_th) )
+ {
+ send_global_virq(VIRQ_ENOMEM);
+
+ /* Update thresholds. Next warning will be when we drop below
+ * next order. However, we wait until we grow beyond one
+ * order above us to complain again at the current order */
+ low_mem_virq_high = 1UL << (low_mem_virq_th_order + 1);
+ if ( low_mem_virq_th_order > 0 )
+ low_mem_virq_th_order--;
+ low_mem_virq_th = 1UL << low_mem_virq_th_order;
+ return;
+ }
+
+ if ( unlikely(total_avail_pages >= low_mem_virq_high) )
+ {
+ /* Reset hysteresis. Bring threshold up one order.
+ * If we are back where originally set, set high
+ * threshold to -1 to avoid further growth of
+ * virq threshold. */
+ low_mem_virq_th_order++;
+ low_mem_virq_th = 1UL << low_mem_virq_th_order;
+ if ( low_mem_virq_th == low_mem_virq_orig )
+ low_mem_virq_high = -1UL;
+ else
+ low_mem_virq_high = 1UL << (low_mem_virq_th_order + 2);
+ }
+}
+
/* Allocate 2^@order contiguous pages. */
static struct page_info *alloc_heap_pages(
unsigned int zone_lo, unsigned int zone_hi,
@@ -420,6 +522,8 @@ static struct page_info *alloc_heap_pages(
total_avail_pages -= request;
ASSERT(total_avail_pages >= 0);
+ check_low_mem_virq();
+
if ( d != NULL )
d->last_alloc_node = node;
@@ -1022,6 +1126,10 @@ void __init scrub_heap_pages(void)
}
printk("done.\n");
+
+ /* Now that the heap is initialized, run checks and set bounds
+ * for the low mem virq algorithm. */
+ setup_low_mem_virq();
}
diff --git a/xen/drivers/passthrough/amd/iommu_init.c b/xen/drivers/passthrough/amd/iommu_init.c
index e091fb2490..3ce0acbda4 100644
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -367,6 +367,8 @@ static int iommu_read_log(struct amd_iommu *iommu,
u32 tail, head, *entry, tail_offest, head_offset;
BUG_ON(!iommu || ((log != &iommu->event_log) && (log != &iommu->ppr_log)));
+
+ spin_lock(&log->lock);
/* make sure there's an entry in the log */
tail_offest = ( log == &iommu->event_log ) ?
@@ -396,6 +398,8 @@ static int iommu_read_log(struct amd_iommu *iommu,
writel(head, iommu->mmio_base + head_offset);
}
+ spin_unlock(&log->lock);
+
return 0;
}
@@ -618,11 +622,11 @@ static void iommu_check_event_log(struct amd_iommu *iommu)
u32 entry;
unsigned long flags;
- spin_lock_irqsave(&iommu->lock, flags);
-
iommu_read_log(iommu, &iommu->event_log,
sizeof(event_entry_t), parse_event_log_entry);
+ spin_lock_irqsave(&iommu->lock, flags);
+
/*check event overflow */
entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
@@ -651,14 +655,10 @@ void parse_ppr_log_entry(struct amd_iommu *iommu, u32 entry[])
bus = PCI_BUS(device_id);
devfn = PCI_DEVFN2(device_id);
- local_irq_enable();
-
spin_lock(&pcidevs_lock);
pdev = pci_get_pdev(iommu->seg, bus, devfn);
spin_unlock(&pcidevs_lock);
- local_irq_disable();
-
if ( pdev == NULL )
return;
@@ -672,10 +672,10 @@ static void iommu_check_ppr_log(struct amd_iommu *iommu)
u32 entry;
unsigned long flags;
- spin_lock_irqsave(&iommu->lock, flags);
-
iommu_read_log(iommu, &iommu->ppr_log,
sizeof(ppr_entry_t), parse_ppr_log_entry);
+
+ spin_lock_irqsave(&iommu->lock, flags);
/*check event overflow */
entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
@@ -852,6 +852,8 @@ static void * __init allocate_ring_buffer(struct ring_buffer *ring_buf,
ring_buf->head = 0;
ring_buf->tail = 0;
+ spin_lock_init(&ring_buf->lock);
+
ring_buf->alloc_size = PAGE_SIZE << get_order_from_bytes(entries *
entry_size);
ring_buf->entries = ring_buf->alloc_size / entry_size;
diff --git a/xen/include/asm-x86/amd-iommu.h b/xen/include/asm-x86/amd-iommu.h
index 0a781c90d2..92423e21da 100644
--- a/xen/include/asm-x86/amd-iommu.h
+++ b/xen/include/asm-x86/amd-iommu.h
@@ -65,6 +65,7 @@ struct ring_buffer {
unsigned long alloc_size;
uint32_t tail;
uint32_t head;
+ spinlock_t lock; /* protect buffer pointers */
};
typedef struct iommu_cap {
diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
index 041ad0be33..b2f6c507b9 100644
--- a/xen/include/public/xen.h
+++ b/xen/include/public/xen.h
@@ -158,6 +158,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
#define VIRQ_PCPU_STATE 9 /* G. (DOM0) PCPU state changed */
#define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */
#define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */
+#define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */
/* Architecture-specific VIRQ definitions. */
#define VIRQ_ARCH_0 16