aboutsummaryrefslogtreecommitdiffstats
path: root/linux-2.6-xen-sparse
diff options
context:
space:
mode:
Diffstat (limited to 'linux-2.6-xen-sparse')
-rw-r--r--linux-2.6-xen-sparse/arch/i386/Kconfig3
-rw-r--r--linux-2.6-xen-sparse/arch/i386/kernel/fixup.c3
-rw-r--r--linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S3
-rw-r--r--linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c38
-rw-r--r--linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c40
-rw-r--r--linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c2
-rw-r--r--linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c1
-rw-r--r--linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c8
-rw-r--r--linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c16
-rw-r--r--linux-2.6-xen-sparse/arch/i386/mm/init-xen.c10
-rw-r--r--linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c10
-rw-r--r--linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c27
-rw-r--r--linux-2.6-xen-sparse/arch/i386/oprofile/Makefile5
-rw-r--r--linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c545
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/Kconfig17
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/dig/setup.c23
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/kernel/Makefile62
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S1
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/kernel/setup.c4
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/Makefile3
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c375
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/util.c2
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/xcom_hcall.c303
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c319
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/xcom_privcmd.c656
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/xencomm.c263
-rw-r--r--linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S21
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/Kconfig2
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/acpi/Makefile1
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S7
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c1
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c43
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c5
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c41
-rw-r--r--linux-2.6-xen-sparse/arch/x86_64/oprofile/Makefile5
-rw-r--r--linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c12
-rw-r--r--linux-2.6-xen-sparse/drivers/char/tty_io.c14
-rw-r--r--linux-2.6-xen-sparse/drivers/serial/Kconfig1
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/balloon/Makefile2
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c201
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/balloon/common.h58
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/balloon/sysfs.c165
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c119
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blkback/common.h7
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c5
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c23
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c74
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blkfront/block.h2
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c85
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c799
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c7
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/char/mem.c51
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/console/console.c36
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/core/Makefile2
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/core/features.c4
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/core/gnttab.c4
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c185
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/core/reboot.c210
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/core/skbuff.c7
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c3
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/netback/common.h7
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/netback/interface.c80
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/netback/loopback.c62
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/netback/netback.c216
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c123
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c249
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c195
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/tpmback/common.h8
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c19
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c14
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c4
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile1
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c9
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c11
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c5
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c297
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.h74
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe_backend.c271
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c11
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenoprof/xenoprofile.c500
-rw-r--r--linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/fixmap.h3
-rw-r--r--linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h10
-rw-r--r--linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h13
-rw-r--r--linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/io.h3
-rw-r--r--linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/maddr.h56
-rw-r--r--linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h21
-rw-r--r--linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level-defs.h1
-rw-r--r--linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level-defs.h1
-rw-r--r--linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/synch_bitops.h4
-rw-r--r--linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/xenoprof.h48
-rw-r--r--linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h21
-rw-r--r--linux-2.6-xen-sparse/include/asm-ia64/hypercall.h179
-rw-r--r--linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h28
-rw-r--r--linux-2.6-xen-sparse/include/asm-ia64/maddr.h20
-rw-r--r--linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h3
-rw-r--r--linux-2.6-xen-sparse/include/asm-ia64/xen/xcom_hcall.h76
-rw-r--r--linux-2.6-xen-sparse/include/asm-ia64/xen/xencomm.h60
-rw-r--r--linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/fixmap.h1
-rw-r--r--linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h10
-rw-r--r--linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h3
-rw-r--r--linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/maddr.h39
-rw-r--r--linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h21
-rw-r--r--linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h19
-rw-r--r--linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/setup_arch_post.h17
-rw-r--r--linux-2.6-xen-sparse/include/linux/skbuff.h24
-rw-r--r--linux-2.6-xen-sparse/include/xen/balloon.h20
-rw-r--r--linux-2.6-xen-sparse/include/xen/gnttab.h5
-rw-r--r--linux-2.6-xen-sparse/include/xen/public/evtchn.h3
-rw-r--r--linux-2.6-xen-sparse/include/xen/xenbus.h1
-rw-r--r--linux-2.6-xen-sparse/include/xen/xencons.h3
-rw-r--r--linux-2.6-xen-sparse/include/xen/xenoprof.h42
-rw-r--r--linux-2.6-xen-sparse/lib/Makefile2
-rw-r--r--linux-2.6-xen-sparse/mm/Kconfig2
-rw-r--r--linux-2.6-xen-sparse/mm/memory.c6
-rw-r--r--linux-2.6-xen-sparse/mm/mmap.c17
-rw-r--r--linux-2.6-xen-sparse/mm/page_alloc.c3
-rw-r--r--linux-2.6-xen-sparse/net/core/skbuff.c107
117 files changed, 5600 insertions, 2389 deletions
diff --git a/linux-2.6-xen-sparse/arch/i386/Kconfig b/linux-2.6-xen-sparse/arch/i386/Kconfig
index 661d0bbecd..b3ef1013c4 100644
--- a/linux-2.6-xen-sparse/arch/i386/Kconfig
+++ b/linux-2.6-xen-sparse/arch/i386/Kconfig
@@ -789,6 +789,9 @@ config DOUBLEFAULT
endmenu
+config ARCH_ENABLE_MEMORY_HOTPLUG
+ def_bool y
+ depends on HIGHMEM
menu "Power management options (ACPI, APM)"
depends on !(X86_VOYAGER || XEN_UNPRIVILEGED_GUEST)
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/fixup.c b/linux-2.6-xen-sparse/arch/i386/kernel/fixup.c
index 20535e8fd4..2bf16fb732 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/fixup.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/fixup.c
@@ -46,6 +46,9 @@ fastcall void do_fixup_4gb_segment(struct pt_regs *regs, long error_code)
if (test_and_set_bit(0, &printed))
return;
+ if (current->tgid == 1) /* Ignore statically linked init */
+ return;
+
HYPERVISOR_vm_assist(
VMASST_CMD_disable, VMASST_TYPE_4gb_segments_notify);
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S b/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S
index dcef669792..1d0278c69b 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S
@@ -9,7 +9,7 @@
#include <asm/page.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
-#include <xen/interface/arch-x86_32.h>
+#include <xen/interface/xen.h>
#include <xen/interface/elfnote.h>
/*
@@ -192,6 +192,7 @@ ENTRY(cpu_gdt_table)
#endif /* !CONFIG_XEN_COMPAT_030002 */
ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long, startup_32)
ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long, hypercall_page)
+ ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long, HYPERVISOR_VIRT_START)
ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel")
#ifdef CONFIG_X86_PAE
ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "yes")
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c
index 65d6ff995e..926ba175c3 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/microcode-xen.c
@@ -50,9 +50,6 @@ MODULE_LICENSE("GPL");
/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
static DECLARE_MUTEX(microcode_sem);
-
-static void __user *user_buffer; /* user area microcode data buffer */
-static unsigned int user_buffer_size; /* it's size */
static int microcode_open (struct inode *unused1, struct file *unused2)
{
@@ -60,21 +57,26 @@ static int microcode_open (struct inode *unused1, struct file *unused2)
}
-static int do_microcode_update (void)
+static int do_microcode_update (const void __user *ubuf, size_t len)
{
int err;
- dom0_op_t op;
+ void *kbuf;
+
+ kbuf = vmalloc(len);
+ if (!kbuf)
+ return -ENOMEM;
- err = sys_mlock((unsigned long)user_buffer, user_buffer_size);
- if (err != 0)
- return err;
+ if (copy_from_user(kbuf, ubuf, len) == 0) {
+ dom0_op_t op;
- op.cmd = DOM0_MICROCODE;
- set_xen_guest_handle(op.u.microcode.data, user_buffer);
- op.u.microcode.length = user_buffer_size;
- err = HYPERVISOR_dom0_op(&op);
+ op.cmd = DOM0_MICROCODE;
+ set_xen_guest_handle(op.u.microcode.data, kbuf);
+ op.u.microcode.length = len;
+ err = HYPERVISOR_dom0_op(&op);
+ } else
+ err = -EFAULT;
- (void)sys_munlock((unsigned long)user_buffer, user_buffer_size);
+ vfree(kbuf);
return err;
}
@@ -88,17 +90,9 @@ static ssize_t microcode_write (struct file *file, const char __user *buf, size_
return -EINVAL;
}
- if ((len >> PAGE_SHIFT) > num_physpages) {
- printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages);
- return -EINVAL;
- }
-
down(&microcode_sem);
- user_buffer = (void __user *) buf;
- user_buffer_size = (int) len;
-
- ret = do_microcode_update();
+ ret = do_microcode_update(buf, len);
if (!ret)
ret = (ssize_t)len;
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
index 3457e31c8c..2586296dbb 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c
@@ -65,6 +65,7 @@
#include <xen/interface/physdev.h>
#include <xen/interface/memory.h>
#include <xen/features.h>
+#include <xen/xencons.h>
#include "setup_arch_pre.h"
#include <bios_ebda.h>
@@ -155,6 +156,9 @@ struct ist_info ist_info;
EXPORT_SYMBOL(ist_info);
#endif
struct e820map e820;
+#ifdef CONFIG_XEN
+struct e820map machine_e820;
+#endif
extern void early_cpu_init(void);
extern void generic_apic_probe(char *);
@@ -1450,7 +1454,6 @@ e820_setup_gap(struct e820entry *e820, int nr_map)
static void __init register_memory(void)
{
#ifdef CONFIG_XEN
- struct e820entry *machine_e820;
struct xen_memory_map memmap;
#endif
int i;
@@ -1460,14 +1463,14 @@ static void __init register_memory(void)
return;
#ifdef CONFIG_XEN
- machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE);
-
memmap.nr_entries = E820MAX;
- set_xen_guest_handle(memmap.buffer, machine_e820);
+ set_xen_guest_handle(memmap.buffer, machine_e820.map);
- BUG_ON(HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap));
+ if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
+ BUG();
+ machine_e820.nr_map = memmap.nr_entries;
- legacy_init_iomem_resources(machine_e820, memmap.nr_entries,
+ legacy_init_iomem_resources(machine_e820.map, machine_e820.nr_map,
&code_resource, &data_resource);
#else
if (efi_enabled)
@@ -1485,8 +1488,7 @@ static void __init register_memory(void)
request_resource(&ioport_resource, &standard_io_resources[i]);
#ifdef CONFIG_XEN
- e820_setup_gap(machine_e820, memmap.nr_entries);
- free_bootmem(__pa(machine_e820), PAGE_SIZE);
+ e820_setup_gap(machine_e820.map, machine_e820.nr_map);
#else
e820_setup_gap(e820.map, e820.nr_map);
#endif
@@ -1665,33 +1667,15 @@ void __init setup_arch(char **cmdline_p)
screen_info.orig_video_cols = 80;
screen_info.orig_video_ega_bx = 3;
screen_info.orig_video_points = 16;
+ screen_info.orig_y = screen_info.orig_video_lines - 1;
if (xen_start_info->console.dom0.info_size >=
sizeof(struct dom0_vga_console_info)) {
const struct dom0_vga_console_info *info =
(struct dom0_vga_console_info *)(
(char *)xen_start_info +
xen_start_info->console.dom0.info_off);
- screen_info.orig_video_mode = info->txt_mode;
- screen_info.orig_video_isVGA = info->video_type;
- screen_info.orig_video_lines = info->video_height;
- screen_info.orig_video_cols = info->video_width;
- screen_info.orig_video_points = info->txt_points;
- screen_info.lfb_width = info->video_width;
- screen_info.lfb_height = info->video_height;
- screen_info.lfb_depth = info->lfb_depth;
- screen_info.lfb_base = info->lfb_base;
- screen_info.lfb_size = info->lfb_size;
- screen_info.lfb_linelength = info->lfb_linelen;
- screen_info.red_size = info->red_size;
- screen_info.red_pos = info->red_pos;
- screen_info.green_size = info->green_size;
- screen_info.green_pos = info->green_pos;
- screen_info.blue_size = info->blue_size;
- screen_info.blue_pos = info->blue_pos;
- screen_info.rsvd_size = info->rsvd_size;
- screen_info.rsvd_pos = info->rsvd_pos;
+ dom0_init_screen_info(info);
}
- screen_info.orig_y = screen_info.orig_video_lines - 1;
xen_start_info->console.domU.mfn = 0;
xen_start_info->console.domU.evtchn = 0;
} else
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c b/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c
index 844c87e78c..f300bd159e 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/sysenter.c
@@ -60,7 +60,7 @@ int __init sysenter_setup(void)
#ifdef CONFIG_XEN
if (boot_cpu_has(X86_FEATURE_SEP)) {
- struct callback_register sysenter = {
+ static struct callback_register __initdata sysenter = {
.type = CALLBACKTYPE_sysenter,
.address = { __KERNEL_CS, (unsigned long)sysenter_entry },
};
diff --git a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c
index cc9907901b..05f3c47e50 100644
--- a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c
@@ -716,6 +716,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
rcu_check_callbacks(cpu, user_mode(regs));
scheduler_tick();
run_posix_cpu_timers(current);
+ profile_tick(CPU_PROFILING, regs);
return IRQ_HANDLED;
}
diff --git a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c
index 16a0155ecb..4939ab106e 100644
--- a/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/mm/fault-xen.c
@@ -282,12 +282,6 @@ static int spurious_fault(struct pt_regs *regs,
pmd_t *pmd;
pte_t *pte;
-#ifdef CONFIG_XEN
- /* Faults in hypervisor area are never spurious. */
- if (address >= HYPERVISOR_VIRT_START)
- return 0;
-#endif
-
/* Reserved-bit violation or user access to kernel space? */
if (error_code & 0x0c)
return 0;
@@ -372,7 +366,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
if (unlikely(address >= TASK_SIZE)) {
#ifdef CONFIG_XEN
/* Faults in hypervisor area can never be patched up. */
- if (address >= HYPERVISOR_VIRT_START)
+ if (address >= hypervisor_virt_start)
goto bad_area_nosemaphore;
#endif
if (!(error_code & 5))
diff --git a/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c b/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c
index 5dc6646cf5..3c20351e92 100644
--- a/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c
+++ b/linux-2.6-xen-sparse/arch/i386/mm/hypervisor.c
@@ -99,18 +99,6 @@ void xen_l4_entry_update(pgd_t *ptr, pgd_t val)
}
#endif /* CONFIG_X86_64 */
-void xen_machphys_update(unsigned long mfn, unsigned long pfn)
-{
- mmu_update_t u;
- if (xen_feature(XENFEAT_auto_translated_physmap)) {
- BUG_ON(pfn != mfn);
- return;
- }
- u.ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
- u.val = pfn;
- BUG_ON(HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF) < 0);
-}
-
void xen_pt_switch(unsigned long ptr)
{
struct mmuext_op op;
@@ -325,6 +313,7 @@ int xen_create_contiguous_region(
success = (exchange.nr_exchanged == (1UL << order));
BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
BUG_ON(success && (rc != 0));
+#ifdef CONFIG_XEN_COMPAT_030002
if (unlikely(rc == -ENOSYS)) {
/* Compatibility when XENMEM_exchange is unsupported. */
if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
@@ -341,6 +330,7 @@ int xen_create_contiguous_region(
BUG();
}
}
+#endif
/* 3. Map the new extent in place of old pages. */
for (i = 0; i < (1UL<<order); i++) {
@@ -419,6 +409,7 @@ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
success = (exchange.nr_exchanged == 1);
BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
BUG_ON(success && (rc != 0));
+#ifdef CONFIG_XEN_COMPAT_030002
if (unlikely(rc == -ENOSYS)) {
/* Compatibility when XENMEM_exchange is unsupported. */
if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
@@ -429,6 +420,7 @@ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
BUG();
success = 1;
}
+#endif
/* 4. Map new pages in place of old pages. */
for (i = 0; i < (1UL<<order); i++) {
diff --git a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c
index c0c0bb2fa8..4d2b33068f 100644
--- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c
@@ -130,7 +130,7 @@ static void __init page_table_range_init (unsigned long start, unsigned long end
pud = pud_offset(pgd, vaddr);
pmd = pmd_offset(pud, vaddr);
for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
- if (vaddr < HYPERVISOR_VIRT_START && pmd_none(*pmd))
+ if (vaddr < hypervisor_virt_start && pmd_none(*pmd))
one_page_table_init(pmd);
vaddr += PMD_SIZE;
@@ -187,7 +187,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
pmd += pmd_idx;
for (; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn; pmd++, pmd_idx++) {
unsigned int address = pfn * PAGE_SIZE + PAGE_OFFSET;
- if (address >= HYPERVISOR_VIRT_START)
+ if (address >= hypervisor_virt_start)
continue;
/* Map with big pages if possible, otherwise create normal page tables. */
@@ -410,7 +410,7 @@ static void __init pagetable_init (void)
* created - mappings will be set by set_fixmap():
*/
vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
- page_table_range_init(vaddr, 0, pgd_base);
+ page_table_range_init(vaddr, hypervisor_virt_start, pgd_base);
permanent_kmaps_init(pgd_base);
}
@@ -663,8 +663,8 @@ void __init mem_init(void)
totalram_pages += free_all_bootmem();
/* XEN: init and count low-mem pages outside initial allocation. */
for (pfn = xen_start_info->nr_pages; pfn < max_low_pfn; pfn++) {
- ClearPageReserved(&mem_map[pfn]);
- set_page_count(&mem_map[pfn], 1);
+ ClearPageReserved(pfn_to_page(pfn));
+ set_page_count(pfn_to_page(pfn), 1);
totalram_pages++;
}
diff --git a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c
index 2fac26719c..b2e8832b85 100644
--- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c
@@ -29,6 +29,8 @@ static int direct_remap_area_pte_fn(pte_t *pte,
{
mmu_update_t **v = (mmu_update_t **)data;
+ BUG_ON(!pte_none(*pte));
+
(*v)->ptr = ((u64)pfn_to_mfn(page_to_pfn(pmd_page)) <<
PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK);
(*v)++;
@@ -110,12 +112,14 @@ int direct_remap_pfn_range(struct vm_area_struct *vma,
pgprot_t prot,
domid_t domid)
{
- /* Same as remap_pfn_range(). */
- vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return remap_pfn_range(vma, address, mfn, size, prot);
if (domid == DOMID_SELF)
return -EINVAL;
+ vma->vm_flags |= VM_IO | VM_RESERVED;
+
vma->vm_mm->context.has_foreign_mappings = 1;
return __direct_remap_pfn_range(
@@ -245,7 +249,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
return NULL;
area->phys_addr = phys_addr;
addr = (void __iomem *) area->addr;
- flags |= _PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
+ flags |= _KERNPG_TABLE;
if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr,
phys_addr>>PAGE_SHIFT,
size, __pgprot(flags), domid)) {
diff --git a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c
index 843c9d0fd3..0ff01f52f8 100644
--- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c
+++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c
@@ -102,8 +102,11 @@ static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
return;
}
pte = pte_offset_kernel(pmd, vaddr);
- /* <pfn,flags> stored as-is, to permit clearing entries */
- set_pte(pte, pfn_pte(pfn, flags));
+ if (pgprot_val(flags))
+ /* <pfn,flags> stored as-is, to permit clearing entries */
+ set_pte(pte, pfn_pte(pfn, flags));
+ else
+ pte_clear(&init_mm, vaddr, pte);
/*
* It's enough to flush this one mapping.
@@ -140,8 +143,11 @@ static void set_pte_pfn_ma(unsigned long vaddr, unsigned long pfn,
return;
}
pte = pte_offset_kernel(pmd, vaddr);
- /* <pfn,flags> stored as-is, to permit clearing entries */
- set_pte(pte, pfn_pte_ma(pfn, flags));
+ if (pgprot_val(flags))
+ /* <pfn,flags> stored as-is, to permit clearing entries */
+ set_pte(pte, pfn_pte_ma(pfn, flags));
+ else
+ pte_clear(&init_mm, vaddr, pte);
/*
* It's enough to flush this one mapping.
@@ -186,9 +192,16 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
}
static int nr_fixmaps = 0;
+unsigned long hypervisor_virt_start = HYPERVISOR_VIRT_START;
unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - 2 * PAGE_SIZE);
EXPORT_SYMBOL(__FIXADDR_TOP);
+void __init set_fixaddr_top()
+{
+ BUG_ON(nr_fixmaps > 0);
+ __FIXADDR_TOP = hypervisor_virt_start - 2 * PAGE_SIZE;
+}
+
void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
{
unsigned long address = __fix_to_virt(idx);
@@ -211,12 +224,6 @@ void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
nr_fixmaps++;
}
-void set_fixaddr_top(unsigned long top)
-{
- BUG_ON(nr_fixmaps > 0);
- __FIXADDR_TOP = top - PAGE_SIZE;
-}
-
pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
diff --git a/linux-2.6-xen-sparse/arch/i386/oprofile/Makefile b/linux-2.6-xen-sparse/arch/i386/oprofile/Makefile
index e596c39c09..caaff108dc 100644
--- a/linux-2.6-xen-sparse/arch/i386/oprofile/Makefile
+++ b/linux-2.6-xen-sparse/arch/i386/oprofile/Makefile
@@ -7,7 +7,10 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
timer_int.o )
ifdef CONFIG_XEN
-oprofile-y := $(DRIVER_OBJS) xenoprof.o
+XENOPROF_COMMON_OBJS = $(addprefix ../../../drivers/xen/xenoprof/, \
+ xenoprofile.o)
+oprofile-y := $(DRIVER_OBJS) \
+ $(XENOPROF_COMMON_OBJS) xenoprof.o
else
oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o \
diff --git a/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c b/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c
index ed648c72cd..cf6d463fe7 100644
--- a/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c
+++ b/linux-2.6-xen-sparse/arch/i386/oprofile/xenoprof.c
@@ -9,249 +9,83 @@
* Modified by Aravind Menon and Jose Renato Santos for Xen
* These modifications are:
* Copyright (C) 2005 Hewlett-Packard Co.
+ *
+ * x86-specific part
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
*/
#include <linux/init.h>
-#include <linux/notifier.h>
-#include <linux/smp.h>
#include <linux/oprofile.h>
-#include <linux/sysdev.h>
-#include <linux/slab.h>
-#include <linux/interrupt.h>
-#include <linux/vmalloc.h>
-#include <asm/nmi.h>
-#include <asm/msr.h>
-#include <asm/apic.h>
+#include <linux/sched.h>
#include <asm/pgtable.h>
-#include <xen/evtchn.h>
-#include "op_counter.h"
#include <xen/driver_util.h>
#include <xen/interface/xen.h>
#include <xen/interface/xenoprof.h>
-#include <../../../drivers/oprofile/cpu_buffer.h>
-#include <../../../drivers/oprofile/event_buffer.h>
-
-#define MAX_XENOPROF_SAMPLES 16
-
-static int xenoprof_start(void);
-static void xenoprof_stop(void);
+#include <xen/xenoprof.h>
+#include "op_counter.h"
-static int xenoprof_enabled = 0;
static unsigned int num_events = 0;
-static int is_primary = 0;
-static int active_defined;
-/* sample buffers shared with Xen */
-xenoprof_buf_t * xenoprof_buf[MAX_VIRT_CPUS];
-/* Shared buffer area */
-char * shared_buffer = NULL;
-/* Number of buffers in shared area (one per VCPU) */
-int nbuf;
-/* Mappings of VIRQ_XENOPROF to irq number (per cpu) */
-int ovf_irq[NR_CPUS];
-/* cpu model type string - copied from Xen memory space on XENOPROF_init command */
-char cpu_type[XENOPROF_CPU_TYPE_SIZE];
-
-/* Passive sample buffers shared with Xen */
-xenoprof_buf_t *p_xenoprof_buf[MAX_OPROF_DOMAINS][MAX_VIRT_CPUS];
-/* Passive shared buffer area */
-char *p_shared_buffer[MAX_OPROF_DOMAINS];
-
-#ifdef CONFIG_PM
-
-static int xenoprof_suspend(struct sys_device * dev, pm_message_t state)
+void __init xenoprof_arch_init_counter(struct xenoprof_init *init)
{
- if (xenoprof_enabled == 1)
- xenoprof_stop();
- return 0;
-}
-
-
-static int xenoprof_resume(struct sys_device * dev)
-{
- if (xenoprof_enabled == 1)
- xenoprof_start();
- return 0;
-}
-
-
-static struct sysdev_class oprofile_sysclass = {
- set_kset_name("oprofile"),
- .resume = xenoprof_resume,
- .suspend = xenoprof_suspend
-};
-
-
-static struct sys_device device_oprofile = {
- .id = 0,
- .cls = &oprofile_sysclass,
-};
-
-
-static int __init init_driverfs(void)
-{
- int error;
- if (!(error = sysdev_class_register(&oprofile_sysclass)))
- error = sysdev_register(&device_oprofile);
- return error;
-}
-
-
-static void __exit exit_driverfs(void)
-{
- sysdev_unregister(&device_oprofile);
- sysdev_class_unregister(&oprofile_sysclass);
-}
-
-#else
-#define init_driverfs() do { } while (0)
-#define exit_driverfs() do { } while (0)
-#endif /* CONFIG_PM */
-
-unsigned long long oprofile_samples = 0;
-unsigned long long p_oprofile_samples = 0;
-
-unsigned int pdomains;
-struct xenoprof_passive passive_domains[MAX_OPROF_DOMAINS];
-
-static void xenoprof_add_pc(xenoprof_buf_t *buf, int is_passive)
-{
- int head, tail, size;
-
- head = buf->event_head;
- tail = buf->event_tail;
- size = buf->event_size;
-
- if (tail > head) {
- while (tail < size) {
- oprofile_add_pc(buf->event_log[tail].eip,
- buf->event_log[tail].mode,
- buf->event_log[tail].event);
- if (!is_passive)
- oprofile_samples++;
- else
- p_oprofile_samples++;
- tail++;
- }
- tail = 0;
- }
- while (tail < head) {
- oprofile_add_pc(buf->event_log[tail].eip,
- buf->event_log[tail].mode,
- buf->event_log[tail].event);
- if (!is_passive)
- oprofile_samples++;
- else
- p_oprofile_samples++;
- tail++;
+ num_events = init->num_events;
+ /* just in case - make sure we do not overflow event list
+ (i.e. counter_config list) */
+ if (num_events > OP_MAX_COUNTER) {
+ num_events = OP_MAX_COUNTER;
+ init->num_events = num_events;
}
-
- buf->event_tail = tail;
}
-static void xenoprof_handle_passive(void)
+void xenoprof_arch_counter(void)
{
- int i, j;
- int flag_domain, flag_switch = 0;
-
- for (i = 0; i < pdomains; i++) {
- flag_domain = 0;
- for (j = 0; j < passive_domains[i].nbuf; j++) {
- xenoprof_buf_t *buf = p_xenoprof_buf[i][j];
- if (buf->event_head == buf->event_tail)
- continue;
- if (!flag_domain) {
- if (!oprofile_add_domain_switch(passive_domains[i].
- domain_id))
- goto done;
- flag_domain = 1;
- }
- xenoprof_add_pc(buf, 1);
- flag_switch = 1;
- }
+ int i;
+ struct xenoprof_counter counter;
+
+ for (i=0; i<num_events; i++) {
+ counter.ind = i;
+ counter.count = (uint64_t)counter_config[i].count;
+ counter.enabled = (uint32_t)counter_config[i].enabled;
+ counter.event = (uint32_t)counter_config[i].event;
+ counter.kernel = (uint32_t)counter_config[i].kernel;
+ counter.user = (uint32_t)counter_config[i].user;
+ counter.unit_mask = (uint64_t)counter_config[i].unit_mask;
+ HYPERVISOR_xenoprof_op(XENOPROF_counter,
+ &counter);
}
-done:
- if (flag_switch)
- oprofile_add_domain_switch(COORDINATOR_DOMAIN);
}
-static irqreturn_t
-xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs)
+void xenoprof_arch_start(void)
{
- struct xenoprof_buf * buf;
- int cpu;
- static unsigned long flag;
-
- cpu = smp_processor_id();
- buf = xenoprof_buf[cpu];
-
- xenoprof_add_pc(buf, 0);
-
- if (is_primary && !test_and_set_bit(0, &flag)) {
- xenoprof_handle_passive();
- smp_mb__before_clear_bit();
- clear_bit(0, &flag);
- }
-
- return IRQ_HANDLED;
+ /* nothing */
}
-
-static void unbind_virq(void)
+void xenoprof_arch_stop(void)
{
- int i;
-
- for_each_cpu(i) {
- if (ovf_irq[i] >= 0) {
- unbind_from_irqhandler(ovf_irq[i], NULL);
- ovf_irq[i] = -1;
- }
- }
+ /* nothing */
}
-
-static int bind_virq(void)
+void xenoprof_arch_unmap_shared_buffer(struct xenoprof_shared_buffer * sbuf)
{
- int i, result;
-
- for_each_cpu(i) {
- result = bind_virq_to_irqhandler(VIRQ_XENOPROF,
- i,
- xenoprof_ovf_interrupt,
- SA_INTERRUPT,
- "xenoprof",
- NULL);
-
- if (result < 0) {
- unbind_virq();
- return result;
- }
-
- ovf_irq[i] = result;
+ if (sbuf->buffer) {
+ vunmap(sbuf->buffer);
+ sbuf->buffer = NULL;
}
-
- return 0;
}
-
-static int map_xenoprof_buffer(int max_samples)
+int xenoprof_arch_map_shared_buffer(struct xenoprof_get_buffer * get_buffer,
+ struct xenoprof_shared_buffer * sbuf)
{
- struct xenoprof_get_buffer get_buffer;
- struct xenoprof_buf *buf;
- int npages, ret, i;
+ int npages, ret;
struct vm_struct *area;
- if ( shared_buffer )
- return 0;
-
- get_buffer.max_samples = max_samples;
-
- if ( (ret = HYPERVISOR_xenoprof_op(XENOPROF_get_buffer, &get_buffer)) )
+ sbuf->buffer = NULL;
+ if ( (ret = HYPERVISOR_xenoprof_op(XENOPROF_get_buffer, get_buffer)) )
return ret;
- nbuf = get_buffer.nbuf;
- npages = (get_buffer.bufsize * nbuf - 1) / PAGE_SIZE + 1;
+ npages = (get_buffer->bufsize * get_buffer->nbuf - 1) / PAGE_SIZE + 1;
area = alloc_vm_area(npages * PAGE_SIZE);
if (area == NULL)
@@ -259,231 +93,55 @@ static int map_xenoprof_buffer(int max_samples)
if ( (ret = direct_kernel_remap_pfn_range(
(unsigned long)area->addr,
- get_buffer.buf_maddr >> PAGE_SHIFT,
- npages * PAGE_SIZE, __pgprot(_KERNPG_TABLE), DOMID_SELF)) ) {
+ get_buffer->buf_gmaddr >> PAGE_SHIFT,
+ npages * PAGE_SIZE, __pgprot(_KERNPG_TABLE),
+ DOMID_SELF)) ) {
vunmap(area->addr);
return ret;
}
- shared_buffer = area->addr;
- for (i=0; i< nbuf; i++) {
- buf = (struct xenoprof_buf*)
- &shared_buffer[i * get_buffer.bufsize];
- BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS);
- xenoprof_buf[buf->vcpu_id] = buf;
- }
-
- return 0;
-}
-
-
-static int xenoprof_setup(void)
-{
- int ret;
- int i;
-
- if ( (ret = map_xenoprof_buffer(MAX_XENOPROF_SAMPLES)) )
- return ret;
-
- if ( (ret = bind_virq()) )
- return ret;
-
- if (is_primary) {
- struct xenoprof_counter counter;
-
- /* Define dom0 as an active domain if not done yet */
- if (!active_defined) {
- domid_t domid;
- ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL);
- if (ret)
- goto err;
- domid = 0;
- ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
- if (ret)
- goto err;
- active_defined = 1;
- }
-
- ret = HYPERVISOR_xenoprof_op(XENOPROF_reserve_counters, NULL);
- if (ret)
- goto err;
- for (i=0; i<num_events; i++) {
- counter.ind = i;
- counter.count = (uint64_t)counter_config[i].count;
- counter.enabled = (uint32_t)counter_config[i].enabled;
- counter.event = (uint32_t)counter_config[i].event;
- counter.kernel = (uint32_t)counter_config[i].kernel;
- counter.user = (uint32_t)counter_config[i].user;
- counter.unit_mask = (uint64_t)counter_config[i].unit_mask;
- HYPERVISOR_xenoprof_op(XENOPROF_counter,
- &counter);
- }
- ret = HYPERVISOR_xenoprof_op(XENOPROF_setup_events, NULL);
-
- if (ret)
- goto err;
- }
-
- ret = HYPERVISOR_xenoprof_op(XENOPROF_enable_virq, NULL);
- if (ret)
- goto err;
-
- xenoprof_enabled = 1;
- return 0;
- err:
- unbind_virq();
- return ret;
-}
-
-
-static void xenoprof_shutdown(void)
-{
- xenoprof_enabled = 0;
-
- HYPERVISOR_xenoprof_op(XENOPROF_disable_virq, NULL);
-
- if (is_primary) {
- HYPERVISOR_xenoprof_op(XENOPROF_release_counters, NULL);
- active_defined = 0;
- }
-
- unbind_virq();
-
-}
-
-
-static int xenoprof_start(void)
-{
- int ret = 0;
-
- if (is_primary)
- ret = HYPERVISOR_xenoprof_op(XENOPROF_start, NULL);
-
- return ret;
-}
-
-
-static void xenoprof_stop(void)
-{
- if (is_primary)
- HYPERVISOR_xenoprof_op(XENOPROF_stop, NULL);
-}
-
-
-static int xenoprof_set_active(int * active_domains,
- unsigned int adomains)
-{
- int ret = 0;
- int i;
- int set_dom0 = 0;
- domid_t domid;
-
- if (!is_primary)
- return 0;
-
- if (adomains > MAX_OPROF_DOMAINS)
- return -E2BIG;
-
- ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL);
- if (ret)
- return ret;
-
- for (i=0; i<adomains; i++) {
- domid = active_domains[i];
- if (domid != active_domains[i]) {
- ret = -EINVAL;
- goto out;
- }
- ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
- if (ret)
- goto out;
- if (active_domains[i] == 0)
- set_dom0 = 1;
- }
- /* dom0 must always be active but may not be in the list */
- if (!set_dom0) {
- domid = 0;
- ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
- }
-
-out:
- if (ret)
- HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL);
- active_defined = !ret;
+ sbuf->buffer = area->addr;
return ret;
}
-static int xenoprof_set_passive(int * p_domains,
- unsigned int pdoms)
+int xenoprof_arch_set_passive(struct xenoprof_passive * pdomain,
+ struct xenoprof_shared_buffer * sbuf)
{
int ret;
- int i, j;
int npages;
- struct xenoprof_buf *buf;
struct vm_struct *area;
pgprot_t prot = __pgprot(_KERNPG_TABLE);
- if (!is_primary)
- return 0;
-
- if (pdoms > MAX_OPROF_DOMAINS)
- return -E2BIG;
-
- ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_passive_list, NULL);
+ sbuf->buffer = NULL;
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_set_passive, pdomain);
if (ret)
- return ret;
-
- for (i = 0; i < pdoms; i++) {
- passive_domains[i].domain_id = p_domains[i];
- passive_domains[i].max_samples = 2048;
- ret = HYPERVISOR_xenoprof_op(XENOPROF_set_passive,
- &passive_domains[i]);
- if (ret)
- goto out;
-
- npages = (passive_domains[i].bufsize * passive_domains[i].nbuf - 1) / PAGE_SIZE + 1;
-
- area = alloc_vm_area(npages * PAGE_SIZE);
- if (area == NULL) {
- ret = -ENOMEM;
- goto out;
- }
-
- ret = direct_kernel_remap_pfn_range(
- (unsigned long)area->addr,
- passive_domains[i].buf_maddr >> PAGE_SHIFT,
- npages * PAGE_SIZE, prot, DOMID_SELF);
- if (ret) {
- vunmap(area->addr);
- goto out;
- }
+ goto out;
- p_shared_buffer[i] = area->addr;
-
- for (j = 0; j < passive_domains[i].nbuf; j++) {
- buf = (struct xenoprof_buf *)
- &p_shared_buffer[i][j * passive_domains[i].bufsize];
- BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS);
- p_xenoprof_buf[i][buf->vcpu_id] = buf;
- }
+ npages = (pdomain->bufsize * pdomain->nbuf - 1) / PAGE_SIZE + 1;
+ area = alloc_vm_area(npages * PAGE_SIZE);
+ if (area == NULL) {
+ ret = -ENOMEM;
+ goto out;
}
- pdomains = pdoms;
- return 0;
-
-out:
- for (j = 0; j < i; j++) {
- vunmap(p_shared_buffer[j]);
- p_shared_buffer[j] = NULL;
+ ret = direct_kernel_remap_pfn_range(
+ (unsigned long)area->addr,
+ pdomain->buf_gmaddr >> PAGE_SHIFT,
+ npages * PAGE_SIZE, prot, DOMID_SELF);
+ if (ret) {
+ vunmap(area->addr);
+ goto out;
}
+ sbuf->buffer = area->addr;
- return ret;
+out:
+ return ret;
}
struct op_counter_config counter_config[OP_MAX_COUNTER];
-static int xenoprof_create_files(struct super_block * sb, struct dentry * root)
+int xenoprof_create_files(struct super_block * sb, struct dentry * root)
{
unsigned int i;
@@ -510,75 +168,12 @@ static int xenoprof_create_files(struct super_block * sb, struct dentry * root)
return 0;
}
-
-struct oprofile_operations xenoprof_ops = {
- .create_files = xenoprof_create_files,
- .set_active = xenoprof_set_active,
- .set_passive = xenoprof_set_passive,
- .setup = xenoprof_setup,
- .shutdown = xenoprof_shutdown,
- .start = xenoprof_start,
- .stop = xenoprof_stop
-};
-
-
-/* in order to get driverfs right */
-static int using_xenoprof;
-
int __init oprofile_arch_init(struct oprofile_operations * ops)
{
- struct xenoprof_init init;
- int ret, i;
-
- ret = HYPERVISOR_xenoprof_op(XENOPROF_init, &init);
-
- if (!ret) {
- num_events = init.num_events;
- is_primary = init.is_primary;
-
- /* just in case - make sure we do not overflow event list
- (i.e. counter_config list) */
- if (num_events > OP_MAX_COUNTER)
- num_events = OP_MAX_COUNTER;
-
- /* cpu_type is detected by Xen */
- cpu_type[XENOPROF_CPU_TYPE_SIZE-1] = 0;
- strncpy(cpu_type, init.cpu_type, XENOPROF_CPU_TYPE_SIZE - 1);
- xenoprof_ops.cpu_type = cpu_type;
-
- init_driverfs();
- using_xenoprof = 1;
- *ops = xenoprof_ops;
-
- for (i=0; i<NR_CPUS; i++)
- ovf_irq[i] = -1;
-
- active_defined = 0;
- }
- printk(KERN_INFO "oprofile_arch_init: ret %d, events %d, "
- "is_primary %d\n", ret, num_events, is_primary);
- return ret;
+ return xenoprofile_init(ops);
}
-
-void __exit oprofile_arch_exit(void)
+void oprofile_arch_exit(void)
{
- int i;
-
- if (using_xenoprof)
- exit_driverfs();
-
- if (shared_buffer) {
- vunmap(shared_buffer);
- shared_buffer = NULL;
- }
- if (is_primary) {
- for (i = 0; i < pdomains; i++)
- if (p_shared_buffer[i]) {
- vunmap(p_shared_buffer[i]);
- p_shared_buffer[i] = NULL;
- }
- HYPERVISOR_xenoprof_op(XENOPROF_shutdown, NULL);
- }
-
+ xenoprofile_exit();
}
diff --git a/linux-2.6-xen-sparse/arch/ia64/Kconfig b/linux-2.6-xen-sparse/arch/ia64/Kconfig
index c32f4cee4c..4073a04638 100644
--- a/linux-2.6-xen-sparse/arch/ia64/Kconfig
+++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig
@@ -64,6 +64,20 @@ config XEN_IA64_VDSO_PARAVIRT
help
vDSO paravirtualization
+config XEN_IA64_EXPOSE_P2M
+ bool "Xen/IA64 exposure p2m table"
+ depends on XEN
+ default y
+ help
+ expose p2m from xen
+
+config XEN_IA64_EXPOSE_P2M_USE_DTR
+ bool "Xen/IA64 map p2m table with dtr"
+ depends on XEN_IA64_EXPOSE_P2M
+ default y
+ help
+ use dtr to map the exposed p2m table
+
config SCHED_NO_NO_OMIT_FRAME_POINTER
bool
default y
@@ -276,6 +290,9 @@ config HOTPLUG_CPU
can be controlled through /sys/devices/system/cpu/cpu#.
Say N if you want to disable CPU hotplug.
+config ARCH_ENABLE_MEMORY_HOTPLUG
+ def_bool y
+
config SCHED_SMT
bool "SMT scheduler support"
depends on SMP
diff --git a/linux-2.6-xen-sparse/arch/ia64/dig/setup.c b/linux-2.6-xen-sparse/arch/ia64/dig/setup.c
index 90d6ab64fa..7f3826991a 100644
--- a/linux-2.6-xen-sparse/arch/ia64/dig/setup.c
+++ b/linux-2.6-xen-sparse/arch/ia64/dig/setup.c
@@ -25,6 +25,8 @@
#include <asm/machvec.h>
#include <asm/system.h>
+#include <xen/xencons.h>
+
void __init
dig_setup (char **cmdline_p)
{
@@ -78,27 +80,8 @@ dig_setup (char **cmdline_p)
(struct dom0_vga_console_info *)(
(char *)xen_start_info +
xen_start_info->console.dom0.info_off);
- screen_info.orig_video_mode = info->txt_mode;
- screen_info.orig_video_isVGA = info->video_type;
- screen_info.orig_video_lines = info->video_height;
- screen_info.orig_video_cols = info->video_width;
- screen_info.orig_video_points = info->txt_points;
- screen_info.lfb_width = info->video_width;
- screen_info.lfb_height = info->video_height;
- screen_info.lfb_depth = info->lfb_depth;
- screen_info.lfb_base = info->lfb_base;
- screen_info.lfb_size = info->lfb_size;
- screen_info.lfb_linelength = info->lfb_linelen;
- screen_info.red_size = info->red_size;
- screen_info.red_pos = info->red_pos;
- screen_info.green_size = info->green_size;
- screen_info.green_pos = info->green_pos;
- screen_info.blue_size = info->blue_size;
- screen_info.blue_pos = info->blue_pos;
- screen_info.rsvd_size = info->rsvd_size;
- screen_info.rsvd_pos = info->rsvd_pos;
+ dom0_init_screen_info(info);
}
- screen_info.orig_y = screen_info.orig_video_lines - 1;
xen_start_info->console.domU.mfn = 0;
xen_start_info->console.domU.evtchn = 0;
#endif
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/Makefile b/linux-2.6-xen-sparse/arch/ia64/kernel/Makefile
new file mode 100644
index 0000000000..003e9ee600
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/Makefile
@@ -0,0 +1,62 @@
+#
+# Makefile for the linux kernel.
+#
+
+extra-y := head.o init_task.o vmlinux.lds
+
+obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \
+ irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \
+ salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
+ unwind.o mca.o mca_asm.o topology.o
+
+obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o
+obj-$(CONFIG_IA64_GENERIC) += acpi-ext.o
+obj-$(CONFIG_IA64_HP_ZX1) += acpi-ext.o
+obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += acpi-ext.o
+
+ifneq ($(CONFIG_ACPI_PROCESSOR),)
+obj-y += acpi-processor.o
+endif
+
+obj-$(CONFIG_IA64_PALINFO) += palinfo.o
+obj-$(CONFIG_IOSAPIC) += iosapic.o
+obj-$(CONFIG_MODULES) += module.o
+obj-$(CONFIG_SMP) += smp.o smpboot.o
+obj-$(CONFIG_NUMA) += numa.o
+obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o
+obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
+obj-$(CONFIG_CPU_FREQ) += cpufreq/
+obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
+obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o
+obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o
+mca_recovery-y += mca_drv.o mca_drv_asm.o
+
+# The gate DSO image is built using a special linker script.
+targets += gate.so gate-syms.o
+
+extra-y += gate.so gate-syms.o gate.lds gate.o
+
+# fp_emulate() expects f2-f5,f16-f31 to contain the user-level state.
+CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31
+
+CPPFLAGS_gate.lds := -P -C -U$(ARCH)
+
+quiet_cmd_gate = GATE $@
+ cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@
+
+GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \
+ $(call ld-option, -Wl$(comma)--hash-style=sysv)
+$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE
+ $(call if_changed,gate)
+
+$(obj)/built-in.o: $(obj)/gate-syms.o
+$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o
+
+GATECFLAGS_gate-syms.o = -r
+$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE
+ $(call if_changed,gate)
+
+# gate-data.o contains the gate DSO image as data in section .data.gate.
+# We must build gate.so before we can assemble it.
+# Note: kbuild does not track this dependency due to usage of .incbin
+$(obj)/gate-data.o: $(obj)/gate.so
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S b/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S
index 5f0163f0be..45377beaa4 100644
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/gate.lds.S
@@ -13,6 +13,7 @@ SECTIONS
. = GATE_ADDR + SIZEOF_HEADERS;
.hash : { *(.hash) } :readable
+ .gnu.hash : { *(.gnu.hash) }
.dynsym : { *(.dynsym) }
.dynstr : { *(.dynstr) }
.gnu.version : { *(.gnu.version) }
diff --git a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c
index f792900cf9..8f15b3001c 100644
--- a/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c
+++ b/linux-2.6-xen-sparse/arch/ia64/kernel/setup.c
@@ -63,6 +63,7 @@
#include <asm/system.h>
#ifdef CONFIG_XEN
#include <asm/hypervisor.h>
+#include <asm/xen/xencomm.h>
#endif
#include <linux/dma-mapping.h>
@@ -433,6 +434,9 @@ setup_arch (char **cmdline_p)
#ifdef CONFIG_XEN
if (is_running_on_xen()) {
+ /* Must be done before any hypercall. */
+ xencomm_init();
+
setup_xen_features();
/* Register a call for panic conditions. */
notifier_chain_register(&panic_notifier_list, &xen_panic_block);
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/Makefile b/linux-2.6-xen-sparse/arch/ia64/xen/Makefile
index c2b4f94edd..36434aac72 100644
--- a/linux-2.6-xen-sparse/arch/ia64/xen/Makefile
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/Makefile
@@ -3,6 +3,7 @@
#
obj-y := hypercall.o xenivt.o xenentry.o xensetup.o xenpal.o xenhpski.o \
- hypervisor.o pci-dma-xen.o util.o
+ hypervisor.o pci-dma-xen.o util.o xencomm.o xcom_hcall.o \
+ xcom_mini.o xcom_privcmd.o
pci-dma-xen-y := ../../i386/kernel/pci-dma-xen.o
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c
index 0b286047b9..2a85caa0d5 100644
--- a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c
@@ -40,59 +40,11 @@ EXPORT_SYMBOL(xen_start_info);
int running_on_xen;
EXPORT_SYMBOL(running_on_xen);
-//XXX xen/ia64 copy_from_guest() is broken.
-// This is a temporal work around until it is fixed.
-// used by balloon.c netfront.c
-
-// get_xen_guest_handle is defined only when __XEN_TOOLS__ is defined
-// if the definition in arch-ia64.h is changed, this must be updated.
-#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0)
-
-int
-ia64_xenmem_reservation_op(unsigned long op,
- struct xen_memory_reservation* reservation__)
-{
- struct xen_memory_reservation reservation = *reservation__;
- unsigned long* frame_list;
- unsigned long nr_extents = reservation__->nr_extents;
- int ret = 0;
- get_xen_guest_handle(frame_list, reservation__->extent_start);
-
- BUG_ON(op != XENMEM_increase_reservation &&
- op != XENMEM_decrease_reservation &&
- op != XENMEM_populate_physmap);
-
- while (nr_extents > 0) {
- int tmp_ret;
- volatile unsigned long dummy;
-
- set_xen_guest_handle(reservation.extent_start, frame_list);
- reservation.nr_extents = nr_extents;
-
- dummy = frame_list[0];// re-install tlb entry before hypercall
- tmp_ret = ____HYPERVISOR_memory_op(op, &reservation);
- if (tmp_ret < 0) {
- if (ret == 0) {
- ret = tmp_ret;
- }
- break;
- }
- if (tmp_ret == 0) {
- //XXX dirty work around for skbuff_ctor()
- // of a non-privileged domain,
- if ((op == XENMEM_increase_reservation ||
- op == XENMEM_populate_physmap) &&
- !is_initial_xendomain() &&
- reservation.extent_order > 0)
- return ret;
- }
- frame_list += tmp_ret;
- nr_extents -= tmp_ret;
- ret += tmp_ret;
- }
- return ret;
-}
-EXPORT_SYMBOL(ia64_xenmem_reservation_op);
+#ifdef CONFIG_XEN_IA64_EXPOSE_P2M
+static int p2m_expose_init(void);
+#else
+#define p2m_expose_init() (-ENOSYS)
+#endif
//XXX same as i386, x86_64 contiguous_bitmap_set(), contiguous_bitmap_clear()
// move those to lib/contiguous_bitmap?
@@ -371,8 +323,6 @@ gnttab_map_grant_ref_pre(struct gnttab_map_grant_ref *uop)
int
HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
{
- __u64 va1, va2, pa1, pa2;
-
if (cmd == GNTTABOP_map_grant_ref) {
unsigned int i;
for (i = 0; i < count; i++) {
@@ -380,29 +330,7 @@ HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
(struct gnttab_map_grant_ref*)uop + i);
}
}
- va1 = (__u64)uop & PAGE_MASK;
- pa1 = pa2 = 0;
- if ((REGION_NUMBER(va1) == 5) &&
- ((va1 - KERNEL_START) >= KERNEL_TR_PAGE_SIZE)) {
- pa1 = ia64_tpa(va1);
- if (cmd <= GNTTABOP_transfer) {
- static uint32_t uop_size[GNTTABOP_transfer + 1] = {
- sizeof(struct gnttab_map_grant_ref),
- sizeof(struct gnttab_unmap_grant_ref),
- sizeof(struct gnttab_setup_table),
- sizeof(struct gnttab_dump_table),
- sizeof(struct gnttab_transfer),
- };
- va2 = (__u64)uop + (uop_size[cmd] * count) - 1;
- va2 &= PAGE_MASK;
- if (va1 != va2) {
- /* maximum size of uop is 2pages */
- BUG_ON(va2 > va1 + PAGE_SIZE);
- pa2 = ia64_tpa(va2);
- }
- }
- }
- return ____HYPERVISOR_grant_table_op(cmd, uop, count, pa1, pa2);
+ return xencomm_mini_hypercall_grant_table_op(cmd, uop, count);
}
EXPORT_SYMBOL(HYPERVISOR_grant_table_op);
@@ -526,6 +454,10 @@ out:
privcmd_resource_min, privcmd_resource_max,
(privcmd_resource_max - privcmd_resource_min) >> 20);
BUG_ON(privcmd_resource_min >= privcmd_resource_max);
+
+ // XXX this should be somewhere appropriate
+ (void)p2m_expose_init();
+
return 0;
}
late_initcall(xen_ia64_privcmd_init);
@@ -546,6 +478,7 @@ struct xen_ia64_privcmd_range {
};
struct xen_ia64_privcmd_vma {
+ int is_privcmd_mmapped;
struct xen_ia64_privcmd_range* range;
unsigned long num_entries;
@@ -684,12 +617,15 @@ __xen_ia64_privcmd_vma_open(struct vm_area_struct* vma,
static void
xen_ia64_privcmd_vma_open(struct vm_area_struct* vma)
{
+ struct xen_ia64_privcmd_vma* old_privcmd_vma = (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
struct xen_ia64_privcmd_vma* privcmd_vma = (struct xen_ia64_privcmd_vma*)vma->vm_private_data;
struct xen_ia64_privcmd_range* privcmd_range = privcmd_vma->range;
atomic_inc(&privcmd_range->ref_count);
// vm_op->open() can't fail.
privcmd_vma = kmalloc(sizeof(*privcmd_vma), GFP_KERNEL | __GFP_NOFAIL);
+ // copy original value if necessary
+ privcmd_vma->is_privcmd_mmapped = old_privcmd_vma->is_privcmd_mmapped;
__xen_ia64_privcmd_vma_open(vma, privcmd_vma, privcmd_range);
}
@@ -725,6 +661,14 @@ xen_ia64_privcmd_vma_close(struct vm_area_struct* vma)
}
int
+privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
+{
+ struct xen_ia64_privcmd_vma* privcmd_vma =
+ (struct xen_ia64_privcmd_vma *)vma->vm_private_data;
+ return (xchg(&privcmd_vma->is_privcmd_mmapped, 1) == 0);
+}
+
+int
privcmd_mmap(struct file * file, struct vm_area_struct * vma)
{
int error;
@@ -749,6 +693,8 @@ privcmd_mmap(struct file * file, struct vm_area_struct * vma)
if (privcmd_vma == NULL) {
goto out_enomem1;
}
+ privcmd_vma->is_privcmd_mmapped = 0;
+
res = kzalloc(sizeof(*res), GFP_KERNEL);
if (res == NULL) {
goto out_enomem1;
@@ -831,3 +777,276 @@ time_resume(void)
/* Just trigger a tick. */
ia64_cpu_local_tick();
}
+
+///////////////////////////////////////////////////////////////////////////
+// expose p2m table
+#ifdef CONFIG_XEN_IA64_EXPOSE_P2M
+#include <linux/cpu.h>
+#include <asm/uaccess.h>
+
+int p2m_initialized __read_mostly = 0;
+
+unsigned long p2m_min_low_pfn __read_mostly;
+unsigned long p2m_max_low_pfn __read_mostly;
+unsigned long p2m_convert_min_pfn __read_mostly;
+unsigned long p2m_convert_max_pfn __read_mostly;
+
+static struct resource p2m_resource = {
+ .name = "Xen p2m table",
+ .flags = IORESOURCE_MEM,
+};
+static unsigned long p2m_assign_start_pfn __read_mostly;
+static unsigned long p2m_assign_end_pfn __read_mostly;
+volatile const pte_t* p2m_pte __read_mostly;
+
+#define GRNULE_PFN PTRS_PER_PTE
+static unsigned long p2m_granule_pfn __read_mostly = GRNULE_PFN;
+
+#define ROUNDDOWN(x, y) ((x) & ~((y) - 1))
+#define ROUNDUP(x, y) (((x) + (y) - 1) & ~((y) - 1))
+
+#define P2M_PREFIX "Xen p2m: "
+
+static int xen_ia64_p2m_expose __read_mostly = 1;
+module_param(xen_ia64_p2m_expose, int, 0);
+MODULE_PARM_DESC(xen_ia64_p2m_expose,
+ "enable/disable xen/ia64 p2m exposure optimization\n");
+
+#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
+static int xen_ia64_p2m_expose_use_dtr __read_mostly = 1;
+module_param(xen_ia64_p2m_expose_use_dtr, int, 0);
+MODULE_PARM_DESC(xen_ia64_p2m_expose_use_dtr,
+ "use/unuse dtr to map exposed p2m table\n");
+
+static const int p2m_page_shifts[] = {
+ _PAGE_SIZE_4K,
+ _PAGE_SIZE_8K,
+ _PAGE_SIZE_16K,
+ _PAGE_SIZE_64K,
+ _PAGE_SIZE_256K,
+ _PAGE_SIZE_1M,
+ _PAGE_SIZE_4M,
+ _PAGE_SIZE_16M,
+ _PAGE_SIZE_64M,
+ _PAGE_SIZE_256M,
+};
+
+struct p2m_itr_arg {
+ unsigned long vaddr;
+ unsigned long pteval;
+ unsigned long log_page_size;
+};
+static struct p2m_itr_arg p2m_itr_arg __read_mostly;
+
+// This should be in asm-ia64/kregs.h
+#define IA64_TR_P2M_TABLE 3
+
+static void
+p2m_itr(void* info)
+{
+ struct p2m_itr_arg* arg = (struct p2m_itr_arg*)info;
+ ia64_itr(0x2, IA64_TR_P2M_TABLE,
+ arg->vaddr, arg->pteval, arg->log_page_size);
+ ia64_srlz_d();
+}
+
+static int
+p2m_expose_dtr_call(struct notifier_block *self,
+ unsigned long event, void* ptr)
+{
+ unsigned int cpu = (unsigned int)(long)ptr;
+ if (event != CPU_ONLINE)
+ return 0;
+ if (!(p2m_initialized && xen_ia64_p2m_expose_use_dtr))
+ smp_call_function_single(cpu, &p2m_itr, &p2m_itr_arg, 1, 1);
+ return 0;
+}
+
+static struct notifier_block p2m_expose_dtr_hotplug_notifier = {
+ .notifier_call = p2m_expose_dtr_call,
+ .next = NULL,
+ .priority = 0
+};
+#endif
+
+static int
+p2m_expose_init(void)
+{
+ unsigned long num_pfn;
+ unsigned long size = 0;
+ unsigned long p2m_size = 0;
+ unsigned long align = ~0UL;
+ int error = 0;
+#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
+ int i;
+ unsigned long page_size;
+ unsigned long log_page_size = 0;
+#endif
+
+ if (!xen_ia64_p2m_expose)
+ return -ENOSYS;
+ if (p2m_initialized)
+ return 0;
+
+#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
+ error = register_cpu_notifier(&p2m_expose_dtr_hotplug_notifier);
+ if (error < 0)
+ return error;
+#endif
+
+ lock_cpu_hotplug();
+ if (p2m_initialized)
+ goto out;
+
+#ifdef CONFIG_DISCONTIGMEM
+ p2m_min_low_pfn = min_low_pfn;
+ p2m_max_low_pfn = max_low_pfn;
+#else
+ p2m_min_low_pfn = 0;
+ p2m_max_low_pfn = max_pfn;
+#endif
+
+#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
+ if (xen_ia64_p2m_expose_use_dtr) {
+ unsigned long granule_pfn = 0;
+ p2m_size = p2m_max_low_pfn - p2m_min_low_pfn;
+ for (i = 0;
+ i < sizeof(p2m_page_shifts)/sizeof(p2m_page_shifts[0]);
+ i++) {
+ log_page_size = p2m_page_shifts[i];
+ page_size = 1UL << log_page_size;
+ if (page_size < p2m_size)
+ continue;
+
+ granule_pfn = max(page_size >> PAGE_SHIFT,
+ p2m_granule_pfn);
+ p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn,
+ granule_pfn);
+ p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn,
+ granule_pfn);
+ num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn;
+ size = num_pfn << PAGE_SHIFT;
+ p2m_size = num_pfn / PTRS_PER_PTE;
+ p2m_size = ROUNDUP(p2m_size, granule_pfn << PAGE_SHIFT);
+ if (p2m_size == page_size)
+ break;
+ }
+ if (p2m_size != page_size) {
+ printk(KERN_ERR "p2m_size != page_size\n");
+ error = -EINVAL;
+ goto out;
+ }
+ align = max(privcmd_resource_align, granule_pfn << PAGE_SHIFT);
+ } else
+#endif
+ {
+ BUG_ON(p2m_granule_pfn & (p2m_granule_pfn - 1));
+ p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn,
+ p2m_granule_pfn);
+ p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn, p2m_granule_pfn);
+ num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn;
+ size = num_pfn << PAGE_SHIFT;
+ p2m_size = num_pfn / PTRS_PER_PTE;
+ p2m_size = ROUNDUP(p2m_size, p2m_granule_pfn << PAGE_SHIFT);
+ align = max(privcmd_resource_align,
+ p2m_granule_pfn << PAGE_SHIFT);
+ }
+
+ // use privcmd region
+ error = allocate_resource(&iomem_resource, &p2m_resource, p2m_size,
+ privcmd_resource_min, privcmd_resource_max,
+ align, NULL, NULL);
+ if (error) {
+ printk(KERN_ERR P2M_PREFIX
+ "can't allocate region for p2m exposure "
+ "[0x%016lx, 0x%016lx) 0x%016lx\n",
+ p2m_convert_min_pfn, p2m_convert_max_pfn, p2m_size);
+ goto out;
+ }
+
+ p2m_assign_start_pfn = p2m_resource.start >> PAGE_SHIFT;
+ p2m_assign_end_pfn = p2m_resource.end >> PAGE_SHIFT;
+
+ error = HYPERVISOR_expose_p2m(p2m_convert_min_pfn,
+ p2m_assign_start_pfn,
+ size, p2m_granule_pfn);
+ if (error) {
+ printk(KERN_ERR P2M_PREFIX "failed expose p2m hypercall %d\n",
+ error);
+ printk(KERN_ERR P2M_PREFIX "conv 0x%016lx assign 0x%016lx "
+ "size 0x%016lx granule 0x%016lx\n",
+ p2m_convert_min_pfn, p2m_assign_start_pfn,
+ size, p2m_granule_pfn);;
+ release_resource(&p2m_resource);
+ goto out;
+ }
+ p2m_pte = (volatile const pte_t*)pfn_to_kaddr(p2m_assign_start_pfn);
+#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
+ if (xen_ia64_p2m_expose_use_dtr) {
+ p2m_itr_arg.vaddr = (unsigned long)__va(p2m_assign_start_pfn
+ << PAGE_SHIFT);
+ p2m_itr_arg.pteval = pte_val(pfn_pte(p2m_assign_start_pfn,
+ PAGE_KERNEL));
+ p2m_itr_arg.log_page_size = log_page_size;
+ smp_mb();
+ smp_call_function(&p2m_itr, &p2m_itr_arg, 1, 1);
+ p2m_itr(&p2m_itr_arg);
+ }
+#endif
+ smp_mb();
+ p2m_initialized = 1;
+ printk(P2M_PREFIX "assign p2m table of [0x%016lx, 0x%016lx)\n",
+ p2m_convert_min_pfn << PAGE_SHIFT,
+ p2m_convert_max_pfn << PAGE_SHIFT);
+ printk(P2M_PREFIX "to [0x%016lx, 0x%016lx) (%ld KBytes)\n",
+ p2m_assign_start_pfn << PAGE_SHIFT,
+ p2m_assign_end_pfn << PAGE_SHIFT,
+ p2m_size / 1024);
+out:
+ unlock_cpu_hotplug();
+ return error;
+}
+
+#ifdef notyet
+void
+p2m_expose_cleanup(void)
+{
+ BUG_ON(!p2m_initialized);
+#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
+ unregister_cpu_notifier(&p2m_expose_dtr_hotplug_notifier);
+#endif
+ release_resource(&p2m_resource);
+}
+#endif
+
+//XXX inlinize?
+unsigned long
+p2m_phystomach(unsigned long gpfn)
+{
+ volatile const pte_t* pte;
+ unsigned long mfn;
+ unsigned long pteval;
+
+ if (!p2m_initialized ||
+ gpfn < p2m_min_low_pfn || gpfn > p2m_max_low_pfn
+ /* || !pfn_valid(gpfn) */)
+ return INVALID_MFN;
+ pte = p2m_pte + (gpfn - p2m_convert_min_pfn);
+
+ mfn = INVALID_MFN;
+ if (likely(__get_user(pteval, (unsigned long __user *)pte) == 0 &&
+ pte_present(__pte(pteval)) &&
+ pte_pfn(__pte(pteval)) != (INVALID_MFN >> PAGE_SHIFT)))
+ mfn = (pteval & _PFN_MASK) >> PAGE_SHIFT;
+
+ return mfn;
+}
+
+EXPORT_SYMBOL_GPL(p2m_initialized);
+EXPORT_SYMBOL_GPL(p2m_min_low_pfn);
+EXPORT_SYMBOL_GPL(p2m_max_low_pfn);
+EXPORT_SYMBOL_GPL(p2m_convert_min_pfn);
+EXPORT_SYMBOL_GPL(p2m_convert_max_pfn);
+EXPORT_SYMBOL_GPL(p2m_pte);
+EXPORT_SYMBOL_GPL(p2m_phystomach);
+#endif
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/util.c b/linux-2.6-xen-sparse/arch/ia64/xen/util.c
index 02dfaabc66..7df0c5f72c 100644
--- a/linux-2.6-xen-sparse/arch/ia64/xen/util.c
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/util.c
@@ -28,6 +28,8 @@
#include <linux/vmalloc.h>
#include <asm/uaccess.h>
#include <xen/driver_util.h>
+#include <xen/interface/memory.h>
+#include <asm/hypercall.h>
struct vm_struct *alloc_vm_area(unsigned long size)
{
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_hcall.c b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_hcall.c
new file mode 100644
index 0000000000..fda0a45ec5
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_hcall.c
@@ -0,0 +1,303 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Tristan Gingold <tristan.gingold@bull.net>
+ */
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/dom0_ops.h>
+#include <xen/interface/memory.h>
+#include <xen/interface/xencomm.h>
+#include <xen/interface/version.h>
+#include <xen/interface/sched.h>
+#include <xen/interface/event_channel.h>
+#include <xen/interface/physdev.h>
+#include <xen/interface/grant_table.h>
+#include <xen/interface/callback.h>
+#include <xen/interface/acm_ops.h>
+#include <xen/interface/hvm/params.h>
+#include <asm/hypercall.h>
+#include <asm/page.h>
+#include <asm/uaccess.h>
+#include <asm/xen/xencomm.h>
+
+/* Xencomm notes:
+ * This file defines hypercalls to be used by xencomm. The hypercalls simply
+ * create inlines descriptors for pointers and then call the raw arch hypercall
+ * xencomm_arch_hypercall_XXX
+ *
+ * If the arch wants to directly use these hypercalls, simply define macros
+ * in asm/hypercall.h, eg:
+ * #define HYPERVISOR_sched_op xencomm_hypercall_sched_op
+ *
+ * The arch may also define HYPERVISOR_xxx as a function and do more operations
+ * before/after doing the hypercall.
+ *
+ * Note: because only inline descriptors are created these functions must only
+ * be called with in kernel memory parameters.
+ */
+
+int
+xencomm_hypercall_console_io(int cmd, int count, char *str)
+{
+ return xencomm_arch_hypercall_console_io
+ (cmd, count, xencomm_create_inline(str));
+}
+
+int
+xencomm_hypercall_event_channel_op(int cmd, void *op)
+{
+ return xencomm_arch_hypercall_event_channel_op
+ (cmd, xencomm_create_inline(op));
+}
+
+int
+xencomm_hypercall_xen_version(int cmd, void *arg)
+{
+ switch (cmd) {
+ case XENVER_version:
+ case XENVER_extraversion:
+ case XENVER_compile_info:
+ case XENVER_capabilities:
+ case XENVER_changeset:
+ case XENVER_platform_parameters:
+ case XENVER_pagesize:
+ case XENVER_get_features:
+ break;
+ default:
+ printk("%s: unknown version cmd %d\n", __func__, cmd);
+ return -ENOSYS;
+ }
+
+ return xencomm_arch_hypercall_xen_version
+ (cmd, xencomm_create_inline(arg));
+}
+
+int
+xencomm_hypercall_physdev_op(int cmd, void *op)
+{
+ return xencomm_arch_hypercall_physdev_op
+ (cmd, xencomm_create_inline(op));
+}
+
+static void *
+xencommize_grant_table_op(unsigned int cmd, void *op, unsigned int count)
+{
+ switch (cmd) {
+ case GNTTABOP_map_grant_ref:
+ case GNTTABOP_unmap_grant_ref:
+ break;
+ case GNTTABOP_setup_table:
+ {
+ struct gnttab_setup_table *setup = op;
+ struct xencomm_handle *frame_list;
+
+ frame_list = xencomm_create_inline
+ (xen_guest_handle(setup->frame_list));
+
+ set_xen_guest_handle(setup->frame_list, (void *)frame_list);
+ break;
+ }
+ case GNTTABOP_dump_table:
+ case GNTTABOP_transfer:
+ case GNTTABOP_copy:
+ break;
+ default:
+ printk("%s: unknown grant table op %d\n", __func__, cmd);
+ BUG();
+ }
+
+ return xencomm_create_inline(op);
+}
+
+int
+xencomm_hypercall_grant_table_op(unsigned int cmd, void *op, unsigned int count)
+{
+ void *desc = xencommize_grant_table_op (cmd, op, count);
+
+ return xencomm_arch_hypercall_grant_table_op(cmd, desc, count);
+}
+
+int
+xencomm_hypercall_sched_op(int cmd, void *arg)
+{
+ switch (cmd) {
+ case SCHEDOP_yield:
+ case SCHEDOP_block:
+ case SCHEDOP_shutdown:
+ case SCHEDOP_remote_shutdown:
+ break;
+ case SCHEDOP_poll:
+ {
+ sched_poll_t *poll = arg;
+ struct xencomm_handle *ports;
+
+ ports = xencomm_create_inline(xen_guest_handle(poll->ports));
+
+ set_xen_guest_handle(poll->ports, (void *)ports);
+ break;
+ }
+ default:
+ printk("%s: unknown sched op %d\n", __func__, cmd);
+ return -ENOSYS;
+ }
+
+ return xencomm_arch_hypercall_sched_op(cmd, xencomm_create_inline(arg));
+}
+
+int
+xencomm_hypercall_multicall(void *call_list, int nr_calls)
+{
+ int i;
+ multicall_entry_t *mce;
+
+ for (i = 0; i < nr_calls; i++) {
+ mce = (multicall_entry_t *)call_list + i;
+
+ switch (mce->op) {
+ case __HYPERVISOR_update_va_mapping:
+ case __HYPERVISOR_mmu_update:
+ /* No-op on ia64. */
+ break;
+ case __HYPERVISOR_grant_table_op:
+ mce->args[1] = (unsigned long)xencommize_grant_table_op
+ (mce->args[0], (void *)mce->args[1],
+ mce->args[2]);
+ break;
+ case __HYPERVISOR_memory_op:
+ default:
+ printk("%s: unhandled multicall op entry op %lu\n",
+ __func__, mce->op);
+ return -ENOSYS;
+ }
+ }
+
+ return xencomm_arch_hypercall_multicall
+ (xencomm_create_inline(call_list), nr_calls);
+}
+
+int
+xencomm_hypercall_callback_op(int cmd, void *arg)
+{
+ switch (cmd)
+ {
+ case CALLBACKOP_register:
+ case CALLBACKOP_unregister:
+ break;
+ default:
+ printk("%s: unknown callback op %d\n", __func__, cmd);
+ return -ENOSYS;
+ }
+
+ return xencomm_arch_hypercall_callback_op
+ (cmd, xencomm_create_inline(arg));
+}
+
+static void
+xencommize_memory_reservation (xen_memory_reservation_t *mop)
+{
+ struct xencomm_handle *desc;
+
+ desc = xencomm_create_inline(xen_guest_handle(mop->extent_start));
+ set_xen_guest_handle(mop->extent_start, (void *)desc);
+}
+
+int
+xencomm_hypercall_memory_op(unsigned int cmd, void *arg)
+{
+ XEN_GUEST_HANDLE(xen_pfn_t) extent_start_va[2];
+ xen_memory_reservation_t *xmr = NULL, *xme_in = NULL, *xme_out = NULL;
+ int rc;
+
+ switch (cmd) {
+ case XENMEM_increase_reservation:
+ case XENMEM_decrease_reservation:
+ case XENMEM_populate_physmap:
+ xmr = (xen_memory_reservation_t *)arg;
+ xen_guest_handle(extent_start_va[0]) =
+ xen_guest_handle(xmr->extent_start);
+ xencommize_memory_reservation((xen_memory_reservation_t *)arg);
+ break;
+
+ case XENMEM_maximum_ram_page:
+ break;
+
+ case XENMEM_exchange:
+ xme_in = &((xen_memory_exchange_t *)arg)->in;
+ xme_out = &((xen_memory_exchange_t *)arg)->out;
+ xen_guest_handle(extent_start_va[0]) =
+ xen_guest_handle(xme_in->extent_start);
+ xen_guest_handle(extent_start_va[1]) =
+ xen_guest_handle(xme_out->extent_start);
+ xencommize_memory_reservation
+ (&((xen_memory_exchange_t *)arg)->in);
+ xencommize_memory_reservation
+ (&((xen_memory_exchange_t *)arg)->out);
+ break;
+
+ default:
+ printk("%s: unknown memory op %d\n", __func__, cmd);
+ return -ENOSYS;
+ }
+
+ rc = xencomm_arch_hypercall_memory_op(cmd, xencomm_create_inline(arg));
+
+ switch (cmd) {
+ case XENMEM_increase_reservation:
+ case XENMEM_decrease_reservation:
+ case XENMEM_populate_physmap:
+ xen_guest_handle(xmr->extent_start) =
+ xen_guest_handle(extent_start_va[0]);
+ break;
+
+ case XENMEM_exchange:
+ xen_guest_handle(xme_in->extent_start) =
+ xen_guest_handle(extent_start_va[0]);
+ xen_guest_handle(xme_out->extent_start) =
+ xen_guest_handle(extent_start_va[1]);
+ break;
+ }
+
+ return rc;
+}
+
+unsigned long
+xencomm_hypercall_hvm_op(int cmd, void *arg)
+{
+ switch (cmd) {
+ case HVMOP_set_param:
+ case HVMOP_get_param:
+ break;
+ default:
+ printk("%s: unknown hvm op %d\n", __func__, cmd);
+ return -ENOSYS;
+ }
+
+ return xencomm_arch_hypercall_hvm_op(cmd, xencomm_create_inline(arg));
+}
+
+int
+xencomm_hypercall_suspend(unsigned long srec)
+{
+ struct sched_shutdown arg;
+
+ arg.reason = SHUTDOWN_suspend;
+
+ return xencomm_arch_hypercall_suspend(xencomm_create_inline(&arg));
+}
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c
new file mode 100644
index 0000000000..5adec0c325
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_mini.c
@@ -0,0 +1,319 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Tristan Gingold <tristan.gingold@bull.net>
+ */
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/dom0_ops.h>
+#include <xen/interface/memory.h>
+#include <xen/interface/xencomm.h>
+#include <xen/interface/version.h>
+#include <xen/interface/event_channel.h>
+#include <xen/interface/physdev.h>
+#include <xen/interface/grant_table.h>
+#include <xen/interface/hvm/params.h>
+#ifdef CONFIG_VMX_GUEST
+#include <asm/hypervisor.h>
+#else
+#include <asm/hypercall.h>
+#endif
+#include <asm/xen/xencomm.h>
+
+int
+xencomm_mini_hypercall_event_channel_op(int cmd, void *op)
+{
+ struct xencomm_mini xc_area[2];
+ int nbr_area = 2;
+ struct xencomm_handle *desc;
+ int rc;
+
+ rc = xencomm_create_mini(xc_area, &nbr_area,
+ op, sizeof(evtchn_op_t), &desc);
+ if (rc)
+ return rc;
+
+ return xencomm_arch_hypercall_event_channel_op(cmd, desc);
+}
+EXPORT_SYMBOL(xencomm_mini_hypercall_event_channel_op);
+
+static int
+xencommize_mini_grant_table_op(struct xencomm_mini *xc_area, int *nbr_area,
+ unsigned int cmd, void *op, unsigned int count,
+ struct xencomm_handle **desc)
+{
+ struct xencomm_handle *desc1;
+ unsigned int argsize;
+ int rc;
+
+ switch (cmd) {
+ case GNTTABOP_map_grant_ref:
+ argsize = sizeof(struct gnttab_map_grant_ref);
+ break;
+ case GNTTABOP_unmap_grant_ref:
+ argsize = sizeof(struct gnttab_unmap_grant_ref);
+ break;
+ case GNTTABOP_setup_table:
+ {
+ struct gnttab_setup_table *setup = op;
+
+ argsize = sizeof(*setup);
+
+ if (count != 1)
+ return -EINVAL;
+ rc = xencomm_create_mini
+ (xc_area, nbr_area,
+ xen_guest_handle(setup->frame_list),
+ setup->nr_frames
+ * sizeof(*xen_guest_handle(setup->frame_list)),
+ &desc1);
+ if (rc)
+ return rc;
+ set_xen_guest_handle(setup->frame_list, (void *)desc1);
+ break;
+ }
+ case GNTTABOP_dump_table:
+ argsize = sizeof(struct gnttab_dump_table);
+ break;
+ case GNTTABOP_transfer:
+ argsize = sizeof(struct gnttab_transfer);
+ break;
+ default:
+ printk("%s: unknown mini grant table op %d\n", __func__, cmd);
+ BUG();
+ }
+
+ rc = xencomm_create_mini(xc_area, nbr_area, op, count * argsize, desc);
+ if (rc)
+ return rc;
+
+ return 0;
+}
+
+int
+xencomm_mini_hypercall_grant_table_op(unsigned int cmd, void *op,
+ unsigned int count)
+{
+ int rc;
+ struct xencomm_handle *desc;
+ int nbr_area = 2;
+ struct xencomm_mini xc_area[2];
+
+ rc = xencommize_mini_grant_table_op(xc_area, &nbr_area,
+ cmd, op, count, &desc);
+ if (rc)
+ return rc;
+
+ return xencomm_arch_hypercall_grant_table_op(cmd, desc, count);
+}
+EXPORT_SYMBOL(xencomm_mini_hypercall_grant_table_op);
+
+int
+xencomm_mini_hypercall_multicall(void *call_list, int nr_calls)
+{
+ int i;
+ multicall_entry_t *mce;
+ int nbr_area = 2 + nr_calls * 3;
+ struct xencomm_mini xc_area[nbr_area];
+ struct xencomm_handle *desc;
+ int rc;
+
+ for (i = 0; i < nr_calls; i++) {
+ mce = (multicall_entry_t *)call_list + i;
+
+ switch (mce->op) {
+ case __HYPERVISOR_update_va_mapping:
+ case __HYPERVISOR_mmu_update:
+ /* No-op on ia64. */
+ break;
+ case __HYPERVISOR_grant_table_op:
+ rc = xencommize_mini_grant_table_op
+ (xc_area, &nbr_area,
+ mce->args[0], (void *)mce->args[1],
+ mce->args[2], &desc);
+ if (rc)
+ return rc;
+ mce->args[1] = (unsigned long)desc;
+ break;
+ case __HYPERVISOR_memory_op:
+ default:
+ printk("%s: unhandled multicall op entry op %lu\n",
+ __func__, mce->op);
+ return -ENOSYS;
+ }
+ }
+
+ rc = xencomm_create_mini(xc_area, &nbr_area, call_list,
+ nr_calls * sizeof(multicall_entry_t), &desc);
+ if (rc)
+ return rc;
+
+ return xencomm_arch_hypercall_multicall(desc, nr_calls);
+}
+EXPORT_SYMBOL(xencomm_mini_hypercall_multicall);
+
+static int
+xencommize_mini_memory_reservation(struct xencomm_mini *area, int *nbr_area,
+ xen_memory_reservation_t *mop)
+{
+ struct xencomm_handle *desc;
+ int rc;
+
+ rc = xencomm_create_mini
+ (area, nbr_area,
+ xen_guest_handle(mop->extent_start),
+ mop->nr_extents
+ * sizeof(*xen_guest_handle(mop->extent_start)),
+ &desc);
+ if (rc)
+ return rc;
+
+ set_xen_guest_handle(mop->extent_start, (void *)desc);
+
+ return 0;
+}
+
+int
+xencomm_mini_hypercall_memory_op(unsigned int cmd, void *arg)
+{
+ int nbr_area = 4;
+ struct xencomm_mini xc_area[4];
+ struct xencomm_handle *desc;
+ int rc;
+ unsigned int argsize;
+
+ switch (cmd) {
+ case XENMEM_increase_reservation:
+ case XENMEM_decrease_reservation:
+ case XENMEM_populate_physmap:
+ argsize = sizeof(xen_memory_reservation_t);
+ rc = xencommize_mini_memory_reservation
+ (xc_area, &nbr_area, (xen_memory_reservation_t *)arg);
+ if (rc)
+ return rc;
+ break;
+
+ case XENMEM_maximum_ram_page:
+ argsize = 0;
+ break;
+
+ case XENMEM_exchange:
+ argsize = sizeof(xen_memory_exchange_t);
+ rc = xencommize_mini_memory_reservation
+ (xc_area, &nbr_area,
+ &((xen_memory_exchange_t *)arg)->in);
+ if (rc)
+ return rc;
+ rc = xencommize_mini_memory_reservation
+ (xc_area, &nbr_area,
+ &((xen_memory_exchange_t *)arg)->out);
+ if (rc)
+ return rc;
+ break;
+
+ case XENMEM_add_to_physmap:
+ argsize = sizeof (xen_add_to_physmap_t);
+ break;
+
+ default:
+ printk("%s: unknown mini memory op %d\n", __func__, cmd);
+ return -ENOSYS;
+ }
+
+ rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc);
+ if (rc)
+ return rc;
+
+ return xencomm_arch_hypercall_memory_op(cmd, desc);
+}
+EXPORT_SYMBOL(xencomm_mini_hypercall_memory_op);
+
+unsigned long
+xencomm_mini_hypercall_hvm_op(int cmd, void *arg)
+{
+ struct xencomm_handle *desc;
+ int nbr_area = 2;
+ struct xencomm_mini xc_area[2];
+ unsigned int argsize;
+ int rc;
+
+ switch (cmd) {
+ case HVMOP_get_param:
+ case HVMOP_set_param:
+ argsize = sizeof(xen_hvm_param_t);
+ break;
+ default:
+ printk("%s: unknown HVMOP %d\n", __func__, cmd);
+ return -EINVAL;
+ }
+
+ rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc);
+ if (rc)
+ return rc;
+
+ return xencomm_arch_hypercall_hvm_op(cmd, desc);
+}
+EXPORT_SYMBOL(xencomm_mini_hypercall_hvm_op);
+
+int
+xencomm_mini_hypercall_xen_version(int cmd, void *arg)
+{
+ struct xencomm_handle *desc;
+ int nbr_area = 2;
+ struct xencomm_mini xc_area[2];
+ unsigned int argsize;
+ int rc;
+
+ switch (cmd) {
+ case XENVER_version:
+ /* do not actually pass an argument */
+ return xencomm_arch_hypercall_xen_version(cmd, 0);
+ case XENVER_extraversion:
+ argsize = sizeof(xen_extraversion_t);
+ break;
+ case XENVER_compile_info:
+ argsize = sizeof(xen_compile_info_t);
+ break;
+ case XENVER_capabilities:
+ argsize = sizeof(xen_capabilities_info_t);
+ break;
+ case XENVER_changeset:
+ argsize = sizeof(xen_changeset_info_t);
+ break;
+ case XENVER_platform_parameters:
+ argsize = sizeof(xen_platform_parameters_t);
+ break;
+ case XENVER_pagesize:
+ argsize = (arg == NULL) ? 0 : sizeof(void *);
+ break;
+ case XENVER_get_features:
+ argsize = (arg == NULL) ? 0 : sizeof(xen_feature_info_t);
+ break;
+
+ default:
+ printk("%s: unknown version op %d\n", __func__, cmd);
+ return -ENOSYS;
+ }
+
+ rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc);
+ if (rc)
+ return rc;
+
+ return xencomm_arch_hypercall_xen_version(cmd, desc);
+}
+EXPORT_SYMBOL(xencomm_mini_hypercall_xen_version);
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_privcmd.c b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_privcmd.c
new file mode 100644
index 0000000000..51cbb9f4c0
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_privcmd.c
@@ -0,0 +1,656 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ * Tristan Gingold <tristan.gingold@bull.net>
+ */
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/dom0_ops.h>
+#define __XEN__
+#include <xen/interface/domctl.h>
+#include <xen/interface/sysctl.h>
+#include <xen/interface/memory.h>
+#include <xen/interface/version.h>
+#include <xen/interface/event_channel.h>
+#include <xen/interface/acm_ops.h>
+#include <xen/interface/hvm/params.h>
+#include <xen/public/privcmd.h>
+#include <asm/hypercall.h>
+#include <asm/page.h>
+#include <asm/uaccess.h>
+#include <asm/xen/xencomm.h>
+
+#define ROUND_DIV(v,s) (((v) + (s) - 1) / (s))
+
+static int
+xencomm_privcmd_dom0_op(privcmd_hypercall_t *hypercall)
+{
+ dom0_op_t kern_op;
+ dom0_op_t __user *user_op = (dom0_op_t __user *)hypercall->arg[0];
+ struct xencomm_handle *op_desc;
+ struct xencomm_handle *desc = NULL;
+ int ret = 0;
+
+ if (copy_from_user(&kern_op, user_op, sizeof(dom0_op_t)))
+ return -EFAULT;
+
+ if (kern_op.interface_version != DOM0_INTERFACE_VERSION)
+ return -EACCES;
+
+ op_desc = xencomm_create_inline(&kern_op);
+
+ switch (kern_op.cmd) {
+ default:
+ printk("%s: unknown dom0 cmd %d\n", __func__, kern_op.cmd);
+ return -ENOSYS;
+ }
+
+ if (ret) {
+ /* error mapping the nested pointer */
+ return ret;
+ }
+
+ ret = xencomm_arch_hypercall_dom0_op(op_desc);
+
+ /* FIXME: should we restore the handle? */
+ if (copy_to_user(user_op, &kern_op, sizeof(dom0_op_t)))
+ ret = -EFAULT;
+
+ if (desc)
+ xencomm_free(desc);
+ return ret;
+}
+
+/*
+ * Temporarily disable the NUMA PHYSINFO code until the rest of the
+ * changes are upstream.
+ */
+#undef IA64_NUMA_PHYSINFO
+
+static int
+xencomm_privcmd_sysctl(privcmd_hypercall_t *hypercall)
+{
+ xen_sysctl_t kern_op;
+ xen_sysctl_t __user *user_op;
+ struct xencomm_handle *op_desc;
+ struct xencomm_handle *desc = NULL;
+ struct xencomm_handle *desc1 = NULL;
+ int ret = 0;
+
+ user_op = (xen_sysctl_t __user *)hypercall->arg[0];
+
+ if (copy_from_user(&kern_op, user_op, sizeof(xen_sysctl_t)))
+ return -EFAULT;
+
+ if (kern_op.interface_version != XEN_SYSCTL_INTERFACE_VERSION)
+ return -EACCES;
+
+ op_desc = xencomm_create_inline(&kern_op);
+
+ switch (kern_op.cmd) {
+ case XEN_SYSCTL_readconsole:
+ ret = xencomm_create(
+ xen_guest_handle(kern_op.u.readconsole.buffer),
+ kern_op.u.readconsole.count,
+ &desc, GFP_KERNEL);
+ set_xen_guest_handle(kern_op.u.readconsole.buffer,
+ (void *)desc);
+ break;
+ case XEN_SYSCTL_tbuf_op:
+#ifndef IA64_NUMA_PHYSINFO
+ case XEN_SYSCTL_physinfo:
+#endif
+ case XEN_SYSCTL_sched_id:
+ break;
+ case XEN_SYSCTL_perfc_op:
+ {
+ struct xencomm_handle *tmp_desc;
+ xen_sysctl_t tmp_op = {
+ .cmd = XEN_SYSCTL_perfc_op,
+ .interface_version = XEN_SYSCTL_INTERFACE_VERSION,
+ .u.perfc_op = {
+ .cmd = XEN_SYSCTL_PERFCOP_query,
+ // .desc.p = NULL,
+ // .val.p = NULL,
+ },
+ };
+
+ if (xen_guest_handle(kern_op.u.perfc_op.desc) == NULL) {
+ if (xen_guest_handle(kern_op.u.perfc_op.val) != NULL)
+ return -EINVAL;
+ break;
+ }
+
+ /* query the buffer size for xencomm */
+ tmp_desc = xencomm_create_inline(&tmp_op);
+ ret = xencomm_arch_hypercall_sysctl(tmp_desc);
+ if (ret)
+ return ret;
+
+ ret = xencomm_create(xen_guest_handle(kern_op.u.perfc_op.desc),
+ tmp_op.u.perfc_op.nr_counters *
+ sizeof(xen_sysctl_perfc_desc_t),
+ &desc, GFP_KERNEL);
+ if (ret)
+ return ret;
+
+ set_xen_guest_handle(kern_op.u.perfc_op.desc, (void *)desc);
+
+ ret = xencomm_create(xen_guest_handle(kern_op.u.perfc_op.val),
+ tmp_op.u.perfc_op.nr_vals *
+ sizeof(xen_sysctl_perfc_val_t),
+ &desc1, GFP_KERNEL);
+ if (ret)
+ xencomm_free(desc);
+
+ set_xen_guest_handle(kern_op.u.perfc_op.val, (void *)desc1);
+ break;
+ }
+ case XEN_SYSCTL_getdomaininfolist:
+ ret = xencomm_create(
+ xen_guest_handle(kern_op.u.getdomaininfolist.buffer),
+ kern_op.u.getdomaininfolist.max_domains *
+ sizeof(xen_domctl_getdomaininfo_t),
+ &desc, GFP_KERNEL);
+ set_xen_guest_handle(kern_op.u.getdomaininfolist.buffer,
+ (void *)desc);
+ break;
+#ifdef IA64_NUMA_PHYSINFO
+ case XEN_SYSCTL_physinfo:
+ ret = xencomm_create(
+ xen_guest_handle(kern_op.u.physinfo.memory_chunks),
+ PUBLIC_MAXCHUNKS * sizeof(node_data_t),
+ &desc, GFP_KERNEL);
+ if (ret)
+ return ret;
+ set_xen_guest_handle(kern_op.u.physinfo.memory_chunks,
+ (void *)desc);
+
+ ret = xencomm_create(
+ xen_guest_handle(kern_op.u.physinfo.cpu_to_node),
+ PUBLIC_MAX_NUMNODES * sizeof(u64),
+ &desc1, GFP_KERNEL);
+ if (ret)
+ xencomm_free(desc);
+ set_xen_guest_handle(kern_op.u.physinfo.cpu_to_node,
+ (void *)desc1);
+ break;
+#endif
+ default:
+ printk("%s: unknown sysctl cmd %d\n", __func__, kern_op.cmd);
+ return -ENOSYS;
+ }
+
+ if (ret) {
+ /* error mapping the nested pointer */
+ return ret;
+ }
+
+ ret = xencomm_arch_hypercall_sysctl(op_desc);
+
+ /* FIXME: should we restore the handles? */
+ if (copy_to_user(user_op, &kern_op, sizeof(xen_sysctl_t)))
+ ret = -EFAULT;
+
+ if (desc)
+ xencomm_free(desc);
+ if (desc1)
+ xencomm_free(desc1);
+ return ret;
+}
+
+static int
+xencomm_privcmd_domctl(privcmd_hypercall_t *hypercall)
+{
+ xen_domctl_t kern_op;
+ xen_domctl_t __user *user_op;
+ struct xencomm_handle *op_desc;
+ struct xencomm_handle *desc = NULL;
+ int ret = 0;
+
+ user_op = (xen_domctl_t __user *)hypercall->arg[0];
+
+ if (copy_from_user(&kern_op, user_op, sizeof(xen_domctl_t)))
+ return -EFAULT;
+
+ if (kern_op.interface_version != XEN_DOMCTL_INTERFACE_VERSION)
+ return -EACCES;
+
+ op_desc = xencomm_create_inline(&kern_op);
+
+ switch (kern_op.cmd) {
+ case XEN_DOMCTL_createdomain:
+ case XEN_DOMCTL_destroydomain:
+ case XEN_DOMCTL_pausedomain:
+ case XEN_DOMCTL_unpausedomain:
+ case XEN_DOMCTL_getdomaininfo:
+ break;
+ case XEN_DOMCTL_getmemlist:
+ {
+ unsigned long nr_pages = kern_op.u.getmemlist.max_pfns;
+
+ ret = xencomm_create(
+ xen_guest_handle(kern_op.u.getmemlist.buffer),
+ nr_pages * sizeof(unsigned long),
+ &desc, GFP_KERNEL);
+ set_xen_guest_handle(kern_op.u.getmemlist.buffer,
+ (void *)desc);
+ break;
+ }
+ case XEN_DOMCTL_getpageframeinfo:
+ break;
+ case XEN_DOMCTL_getpageframeinfo2:
+ ret = xencomm_create(
+ xen_guest_handle(kern_op.u.getpageframeinfo2.array),
+ kern_op.u.getpageframeinfo2.num,
+ &desc, GFP_KERNEL);
+ set_xen_guest_handle(kern_op.u.getpageframeinfo2.array,
+ (void *)desc);
+ break;
+ case XEN_DOMCTL_shadow_op:
+ ret = xencomm_create(
+ xen_guest_handle(kern_op.u.shadow_op.dirty_bitmap),
+ ROUND_DIV(kern_op.u.shadow_op.pages, 8),
+ &desc, GFP_KERNEL);
+ set_xen_guest_handle(kern_op.u.shadow_op.dirty_bitmap,
+ (void *)desc);
+ break;
+ case XEN_DOMCTL_max_mem:
+ break;
+ case XEN_DOMCTL_setvcpucontext:
+ case XEN_DOMCTL_getvcpucontext:
+ ret = xencomm_create(
+ xen_guest_handle(kern_op.u.vcpucontext.ctxt),
+ sizeof(vcpu_guest_context_t),
+ &desc, GFP_KERNEL);
+ set_xen_guest_handle(kern_op.u.vcpucontext.ctxt, (void *)desc);
+ break;
+ case XEN_DOMCTL_getvcpuinfo:
+ break;
+ case XEN_DOMCTL_setvcpuaffinity:
+ case XEN_DOMCTL_getvcpuaffinity:
+ ret = xencomm_create(
+ xen_guest_handle(kern_op.u.vcpuaffinity.cpumap.bitmap),
+ ROUND_DIV(kern_op.u.vcpuaffinity.cpumap.nr_cpus, 8),
+ &desc, GFP_KERNEL);
+ set_xen_guest_handle(kern_op.u.vcpuaffinity.cpumap.bitmap,
+ (void *)desc);
+ break;
+ case XEN_DOMCTL_max_vcpus:
+ case XEN_DOMCTL_scheduler_op:
+ case XEN_DOMCTL_setdomainhandle:
+ case XEN_DOMCTL_setdebugging:
+ case XEN_DOMCTL_irq_permission:
+ case XEN_DOMCTL_iomem_permission:
+ case XEN_DOMCTL_ioport_permission:
+ case XEN_DOMCTL_hypercall_init:
+ case XEN_DOMCTL_arch_setup:
+ case XEN_DOMCTL_settimeoffset:
+ break;
+ default:
+ printk("%s: unknown domctl cmd %d\n", __func__, kern_op.cmd);
+ return -ENOSYS;
+ }
+
+ if (ret) {
+ /* error mapping the nested pointer */
+ return ret;
+ }
+
+ ret = xencomm_arch_hypercall_domctl (op_desc);
+
+ /* FIXME: should we restore the handle? */
+ if (copy_to_user(user_op, &kern_op, sizeof(xen_domctl_t)))
+ ret = -EFAULT;
+
+ if (desc)
+ xencomm_free(desc);
+ return ret;
+}
+
+static int
+xencomm_privcmd_acm_op(privcmd_hypercall_t *hypercall)
+{
+ int cmd = hypercall->arg[0];
+ void __user *arg = (void __user *)hypercall->arg[1];
+ struct xencomm_handle *op_desc;
+ struct xencomm_handle *desc = NULL;
+ int ret;
+
+ switch (cmd) {
+ case ACMOP_getssid:
+ {
+ struct acm_getssid kern_arg;
+
+ if (copy_from_user(&kern_arg, arg, sizeof (kern_arg)))
+ return -EFAULT;
+
+ op_desc = xencomm_create_inline(&kern_arg);
+
+ ret = xencomm_create(xen_guest_handle(kern_arg.ssidbuf),
+ kern_arg.ssidbuf_size, &desc, GFP_KERNEL);
+ if (ret)
+ return ret;
+
+ set_xen_guest_handle(kern_arg.ssidbuf, (void *)desc);
+
+ ret = xencomm_arch_hypercall_acm_op(cmd, op_desc);
+
+ xencomm_free(desc);
+
+ if (copy_to_user(arg, &kern_arg, sizeof (kern_arg)))
+ return -EFAULT;
+
+ return ret;
+ }
+ default:
+ printk("%s: unknown acm_op cmd %d\n", __func__, cmd);
+ return -ENOSYS;
+ }
+
+ return ret;
+}
+
+static int
+xencomm_privcmd_memory_op(privcmd_hypercall_t *hypercall)
+{
+ const unsigned long cmd = hypercall->arg[0];
+ int ret = 0;
+
+ switch (cmd) {
+ case XENMEM_increase_reservation:
+ case XENMEM_decrease_reservation:
+ case XENMEM_populate_physmap:
+ {
+ xen_memory_reservation_t kern_op;
+ xen_memory_reservation_t __user *user_op;
+ struct xencomm_handle *desc = NULL;
+ struct xencomm_handle *desc_op;
+
+ user_op = (xen_memory_reservation_t __user *)hypercall->arg[1];
+ if (copy_from_user(&kern_op, user_op,
+ sizeof(xen_memory_reservation_t)))
+ return -EFAULT;
+ desc_op = xencomm_create_inline(&kern_op);
+
+ if (xen_guest_handle(kern_op.extent_start)) {
+ void * addr;
+
+ addr = xen_guest_handle(kern_op.extent_start);
+ ret = xencomm_create
+ (addr,
+ kern_op.nr_extents *
+ sizeof(*xen_guest_handle
+ (kern_op.extent_start)),
+ &desc, GFP_KERNEL);
+ if (ret)
+ return ret;
+ set_xen_guest_handle(kern_op.extent_start,
+ (void *)desc);
+ }
+
+ ret = xencomm_arch_hypercall_memory_op(cmd, desc_op);
+
+ if (desc)
+ xencomm_free(desc);
+
+ if (ret != 0)
+ return ret;
+
+ if (copy_to_user(user_op, &kern_op,
+ sizeof(xen_memory_reservation_t)))
+ return -EFAULT;
+
+ return ret;
+ }
+ case XENMEM_translate_gpfn_list:
+ {
+ xen_translate_gpfn_list_t kern_op;
+ xen_translate_gpfn_list_t __user *user_op;
+ struct xencomm_handle *desc_gpfn = NULL;
+ struct xencomm_handle *desc_mfn = NULL;
+ struct xencomm_handle *desc_op;
+ void *addr;
+
+ user_op = (xen_translate_gpfn_list_t __user *)
+ hypercall->arg[1];
+ if (copy_from_user(&kern_op, user_op,
+ sizeof(xen_translate_gpfn_list_t)))
+ return -EFAULT;
+ desc_op = xencomm_create_inline(&kern_op);
+
+ if (kern_op.nr_gpfns) {
+ /* gpfn_list. */
+ addr = xen_guest_handle(kern_op.gpfn_list);
+
+ ret = xencomm_create(addr, kern_op.nr_gpfns *
+ sizeof(*xen_guest_handle
+ (kern_op.gpfn_list)),
+ &desc_gpfn, GFP_KERNEL);
+ if (ret)
+ return ret;
+ set_xen_guest_handle(kern_op.gpfn_list,
+ (void *)desc_gpfn);
+
+ /* mfn_list. */
+ addr = xen_guest_handle(kern_op.mfn_list);
+
+ ret = xencomm_create(addr, kern_op.nr_gpfns *
+ sizeof(*xen_guest_handle
+ (kern_op.mfn_list)),
+ &desc_mfn, GFP_KERNEL);
+ if (ret)
+ return ret;
+ set_xen_guest_handle(kern_op.mfn_list,
+ (void *)desc_mfn);
+ }
+
+ ret = xencomm_arch_hypercall_memory_op(cmd, desc_op);
+
+ if (desc_gpfn)
+ xencomm_free(desc_gpfn);
+
+ if (desc_mfn)
+ xencomm_free(desc_mfn);
+
+ if (ret != 0)
+ return ret;
+
+ return ret;
+ }
+ default:
+ printk("%s: unknown memory op %lu\n", __func__, cmd);
+ ret = -ENOSYS;
+ }
+ return ret;
+}
+
+static int
+xencomm_privcmd_xen_version(privcmd_hypercall_t *hypercall)
+{
+ int cmd = hypercall->arg[0];
+ void __user *arg = (void __user *)hypercall->arg[1];
+ struct xencomm_handle *desc;
+ size_t argsize;
+ int rc;
+
+ switch (cmd) {
+ case XENVER_version:
+ /* do not actually pass an argument */
+ return xencomm_arch_hypercall_xen_version(cmd, 0);
+ case XENVER_extraversion:
+ argsize = sizeof(xen_extraversion_t);
+ break;
+ case XENVER_compile_info:
+ argsize = sizeof(xen_compile_info_t);
+ break;
+ case XENVER_capabilities:
+ argsize = sizeof(xen_capabilities_info_t);
+ break;
+ case XENVER_changeset:
+ argsize = sizeof(xen_changeset_info_t);
+ break;
+ case XENVER_platform_parameters:
+ argsize = sizeof(xen_platform_parameters_t);
+ break;
+ case XENVER_pagesize:
+ argsize = (arg == NULL) ? 0 : sizeof(void *);
+ break;
+ case XENVER_get_features:
+ argsize = (arg == NULL) ? 0 : sizeof(xen_feature_info_t);
+ break;
+
+ default:
+ printk("%s: unknown version op %d\n", __func__, cmd);
+ return -ENOSYS;
+ }
+
+ rc = xencomm_create(arg, argsize, &desc, GFP_KERNEL);
+ if (rc)
+ return rc;
+
+ rc = xencomm_arch_hypercall_xen_version(cmd, desc);
+
+ xencomm_free(desc);
+
+ return rc;
+}
+
+static int
+xencomm_privcmd_event_channel_op(privcmd_hypercall_t *hypercall)
+{
+ int cmd = hypercall->arg[0];
+ struct xencomm_handle *desc;
+ unsigned int argsize;
+ int ret;
+
+ switch (cmd) {
+ case EVTCHNOP_alloc_unbound:
+ argsize = sizeof(evtchn_alloc_unbound_t);
+ break;
+
+ case EVTCHNOP_status:
+ argsize = sizeof(evtchn_status_t);
+ break;
+
+ default:
+ printk("%s: unknown EVTCHNOP %d\n", __func__, cmd);
+ return -EINVAL;
+ }
+
+ ret = xencomm_create((void *)hypercall->arg[1], argsize,
+ &desc, GFP_KERNEL);
+ if (ret)
+ return ret;
+
+ ret = xencomm_arch_hypercall_event_channel_op(cmd, desc);
+
+ xencomm_free(desc);
+ return ret;
+}
+
+static int
+xencomm_privcmd_hvm_op(privcmd_hypercall_t *hypercall)
+{
+ int cmd = hypercall->arg[0];
+ struct xencomm_handle *desc;
+ unsigned int argsize;
+ int ret;
+
+ switch (cmd) {
+ case HVMOP_get_param:
+ case HVMOP_set_param:
+ argsize = sizeof(xen_hvm_param_t);
+ break;
+ case HVMOP_set_irq_level:
+ argsize = sizeof(xen_hvm_set_irq_level_t);
+ break;
+ default:
+ printk("%s: unknown HVMOP %d\n", __func__, cmd);
+ return -EINVAL;
+ }
+
+ ret = xencomm_create((void *)hypercall->arg[1], argsize,
+ &desc, GFP_KERNEL);
+ if (ret)
+ return ret;
+
+ ret = xencomm_arch_hypercall_hvm_op(cmd, desc);
+
+ xencomm_free(desc);
+ return ret;
+}
+
+static int
+xencomm_privcmd_sched_op(privcmd_hypercall_t *hypercall)
+{
+ int cmd = hypercall->arg[0];
+ struct xencomm_handle *desc;
+ unsigned int argsize;
+ int ret;
+
+ switch (cmd) {
+ case SCHEDOP_remote_shutdown:
+ argsize = sizeof(sched_remote_shutdown_t);
+ break;
+ default:
+ printk("%s: unknown SCHEDOP %d\n", __func__, cmd);
+ return -EINVAL;
+ }
+
+ ret = xencomm_create((void *)hypercall->arg[1], argsize,
+ &desc, GFP_KERNEL);
+ if (ret)
+ return ret;
+
+ ret = xencomm_arch_hypercall_sched_op(cmd, desc);
+
+ xencomm_free(desc);
+ return ret;
+}
+
+int
+privcmd_hypercall(privcmd_hypercall_t *hypercall)
+{
+ switch (hypercall->op) {
+ case __HYPERVISOR_dom0_op:
+ return xencomm_privcmd_dom0_op(hypercall);
+ case __HYPERVISOR_domctl:
+ return xencomm_privcmd_domctl(hypercall);
+ case __HYPERVISOR_sysctl:
+ return xencomm_privcmd_sysctl(hypercall);
+ case __HYPERVISOR_acm_op:
+ return xencomm_privcmd_acm_op(hypercall);
+ case __HYPERVISOR_xen_version:
+ return xencomm_privcmd_xen_version(hypercall);
+ case __HYPERVISOR_memory_op:
+ return xencomm_privcmd_memory_op(hypercall);
+ case __HYPERVISOR_event_channel_op:
+ return xencomm_privcmd_event_channel_op(hypercall);
+ case __HYPERVISOR_hvm_op:
+ return xencomm_privcmd_hvm_op(hypercall);
+ case __HYPERVISOR_sched_op:
+ return xencomm_privcmd_sched_op(hypercall);
+ default:
+ printk("%s: unknown hcall (%ld)\n", __func__, hypercall->op);
+ return -ENOSYS;
+ }
+}
+
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xencomm.c b/linux-2.6-xen-sparse/arch/ia64/xen/xencomm.c
new file mode 100644
index 0000000000..367b6b32de
--- /dev/null
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xencomm.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright (C) 2006 Hollis Blanchard <hollisb@us.ibm.com>, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <xen/interface/xen.h>
+#include <asm/page.h>
+
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
+#include <asm/xen/xencomm.h>
+
+static int xencomm_debug = 0;
+
+static unsigned long kernel_start_pa;
+
+void
+xencomm_init (void)
+{
+ kernel_start_pa = KERNEL_START - ia64_tpa(KERNEL_START);
+}
+
+/* Translate virtual address to physical address. */
+unsigned long
+xencomm_vaddr_to_paddr(unsigned long vaddr)
+{
+#ifndef CONFIG_VMX_GUEST
+ struct page *page;
+ struct vm_area_struct *vma;
+#endif
+
+ if (vaddr == 0)
+ return 0;
+
+#ifdef __ia64__
+ if (REGION_NUMBER(vaddr) == 5) {
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *ptep;
+
+ /* On ia64, TASK_SIZE refers to current. It is not initialized
+ during boot.
+ Furthermore the kernel is relocatable and __pa() doesn't
+ work on addresses. */
+ if (vaddr >= KERNEL_START
+ && vaddr < (KERNEL_START + KERNEL_TR_PAGE_SIZE)) {
+ return vaddr - kernel_start_pa;
+ }
+
+ /* In kernel area -- virtually mapped. */
+ pgd = pgd_offset_k(vaddr);
+ if (pgd_none(*pgd) || pgd_bad(*pgd))
+ return ~0UL;
+
+ pud = pud_offset(pgd, vaddr);
+ if (pud_none(*pud) || pud_bad(*pud))
+ return ~0UL;
+
+ pmd = pmd_offset(pud, vaddr);
+ if (pmd_none(*pmd) || pmd_bad(*pmd))
+ return ~0UL;
+
+ ptep = pte_offset_kernel(pmd, vaddr);
+ if (!ptep)
+ return ~0UL;
+
+ return (pte_val(*ptep) & _PFN_MASK) | (vaddr & ~PAGE_MASK);
+ }
+#endif
+
+ if (vaddr > TASK_SIZE) {
+ /* kernel address */
+ return __pa(vaddr);
+ }
+
+
+#ifdef CONFIG_VMX_GUEST
+ /* No privcmd within vmx guest. */
+ return ~0UL;
+#else
+ /* XXX double-check (lack of) locking */
+ vma = find_extend_vma(current->mm, vaddr);
+ if (!vma)
+ return ~0UL;
+
+ /* We assume the page is modified. */
+ page = follow_page(vma, vaddr, FOLL_WRITE | FOLL_TOUCH);
+ if (!page)
+ return ~0UL;
+
+ return (page_to_pfn(page) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK);
+#endif
+}
+
+static int
+xencomm_init_desc(struct xencomm_desc *desc, void *buffer, unsigned long bytes)
+{
+ unsigned long recorded = 0;
+ int i = 0;
+
+ BUG_ON((buffer == NULL) && (bytes > 0));
+
+ /* record the physical pages used */
+ if (buffer == NULL)
+ desc->nr_addrs = 0;
+
+ while ((recorded < bytes) && (i < desc->nr_addrs)) {
+ unsigned long vaddr = (unsigned long)buffer + recorded;
+ unsigned long paddr;
+ int offset;
+ int chunksz;
+
+ offset = vaddr % PAGE_SIZE; /* handle partial pages */
+ chunksz = min(PAGE_SIZE - offset, bytes - recorded);
+
+ paddr = xencomm_vaddr_to_paddr(vaddr);
+ if (paddr == ~0UL) {
+ printk("%s: couldn't translate vaddr %lx\n",
+ __func__, vaddr);
+ return -EINVAL;
+ }
+
+ desc->address[i++] = paddr;
+ recorded += chunksz;
+ }
+
+ if (recorded < bytes) {
+ printk("%s: could only translate %ld of %ld bytes\n",
+ __func__, recorded, bytes);
+ return -ENOSPC;
+ }
+
+ /* mark remaining addresses invalid (just for safety) */
+ while (i < desc->nr_addrs)
+ desc->address[i++] = XENCOMM_INVALID;
+
+ desc->magic = XENCOMM_MAGIC;
+
+ return 0;
+}
+
+static struct xencomm_desc *
+xencomm_alloc(gfp_t gfp_mask)
+{
+ struct xencomm_desc *desc;
+
+ desc = (struct xencomm_desc *)__get_free_page(gfp_mask);
+ if (desc == NULL)
+ panic("%s: page allocation failed\n", __func__);
+
+ desc->nr_addrs = (PAGE_SIZE - sizeof(struct xencomm_desc)) /
+ sizeof(*desc->address);
+
+ return desc;
+}
+
+void
+xencomm_free(struct xencomm_handle *desc)
+{
+ if (desc)
+ free_page((unsigned long)__va(desc));
+}
+
+int
+xencomm_create(void *buffer, unsigned long bytes,
+ struct xencomm_handle **ret, gfp_t gfp_mask)
+{
+ struct xencomm_desc *desc;
+ struct xencomm_handle *handle;
+ int rc;
+
+ if (xencomm_debug)
+ printk("%s: %p[%ld]\n", __func__, buffer, bytes);
+
+ if (buffer == NULL || bytes == 0) {
+ *ret = (struct xencomm_handle *)NULL;
+ return 0;
+ }
+
+ desc = xencomm_alloc(gfp_mask);
+ if (!desc) {
+ printk("%s failure\n", "xencomm_alloc");
+ return -ENOMEM;
+ }
+ handle = (struct xencomm_handle *)__pa(desc);
+
+ rc = xencomm_init_desc(desc, buffer, bytes);
+ if (rc) {
+ printk("%s failure: %d\n", "xencomm_init_desc", rc);
+ xencomm_free(handle);
+ return rc;
+ }
+
+ *ret = handle;
+ return 0;
+}
+
+/* "mini" routines, for stack-based communications: */
+
+static void *
+xencomm_alloc_mini(struct xencomm_mini *area, int *nbr_area)
+{
+ unsigned long base;
+ unsigned int pageoffset;
+
+ while (*nbr_area >= 0) {
+ /* Allocate an area. */
+ (*nbr_area)--;
+
+ base = (unsigned long)(area + *nbr_area);
+ pageoffset = base % PAGE_SIZE;
+
+ /* If the area does not cross a page, use it. */
+ if ((PAGE_SIZE - pageoffset) >= sizeof(struct xencomm_mini))
+ return &area[*nbr_area];
+ }
+ /* No more area. */
+ return NULL;
+}
+
+int
+xencomm_create_mini(struct xencomm_mini *area, int *nbr_area,
+ void *buffer, unsigned long bytes,
+ struct xencomm_handle **ret)
+{
+ struct xencomm_desc *desc;
+ int rc;
+ unsigned long res;
+
+ desc = xencomm_alloc_mini(area, nbr_area);
+ if (!desc)
+ return -ENOMEM;
+ desc->nr_addrs = XENCOMM_MINI_ADDRS;
+
+ rc = xencomm_init_desc(desc, buffer, bytes);
+ if (rc)
+ return rc;
+
+ res = xencomm_vaddr_to_paddr((unsigned long)desc);
+ if (res == ~0UL)
+ return -EINVAL;
+
+ *ret = (struct xencomm_handle*)res;
+ return 0;
+}
diff --git a/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S b/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S
index 918622918e..e761278670 100644
--- a/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S
+++ b/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S
@@ -23,12 +23,11 @@ GLOBAL_ENTRY(early_xen_setup)
mov cr.iva=r10
-#if XSI_BASE != 0xf100000000000000UL
- /* Backward compatibility. */
-(isBP) mov r2=0x600
+ /* Set xsi base. */
+#define FW_HYPERCALL_SET_SHARED_INFO_VA 0x600
+(isBP) mov r2=FW_HYPERCALL_SET_SHARED_INFO_VA
(isBP) movl r28=XSI_BASE;;
(isBP) break 0x1000;;
-#endif
br.ret.sptk.many rp
;;
@@ -38,18 +37,18 @@ END(early_xen_setup)
/* Stub for suspend.
Just force the stacked registers to be written in memory. */
-GLOBAL_ENTRY(HYPERVISOR_suspend)
+GLOBAL_ENTRY(xencomm_arch_hypercall_suspend)
+ mov r15=r32
+ ;;
alloc r20=ar.pfs,0,0,0,0
- mov r14=2
- mov r15=r12
- ;;
+ mov r2=__HYPERVISOR_sched_op
+ ;;
/* We don't want to deal with RSE. */
flushrs
- mov r2=__HYPERVISOR_sched_op
- st4 [r12]=r14
+ mov r14=2 // SCHEDOP_shutdown
;;
break 0x1000
;;
mov ar.pfs=r20
br.ret.sptk.many b0
-END(HYPERVISOR_suspend)
+END(xencomm_arch_hypercall_suspend)
diff --git a/linux-2.6-xen-sparse/arch/x86_64/Kconfig b/linux-2.6-xen-sparse/arch/x86_64/Kconfig
index 45d8302cb4..79bd0044fa 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/Kconfig
+++ b/linux-2.6-xen-sparse/arch/x86_64/Kconfig
@@ -368,6 +368,8 @@ config HOTPLUG_CPU
can be controlled through /sys/devices/system/cpu/cpu#.
Say N if you want to disable CPU hotplug.
+config ARCH_ENABLE_MEMORY_HOTPLUG
+ def_bool y
config HPET_TIMER
bool
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/acpi/Makefile b/linux-2.6-xen-sparse/arch/x86_64/kernel/acpi/Makefile
index aa84f6eb98..57b7fe1c11 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/acpi/Makefile
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/acpi/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o
ifneq ($(CONFIG_ACPI_PROCESSOR),)
obj-y += processor.o
+processor-y := ../../../i386/kernel/acpi/processor.o ../../../i386/kernel/acpi/cstate.o
endif
boot-$(CONFIG_XEN) := ../../../i386/kernel/acpi/boot-xen.o
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S
index 7edd8d5cb6..687c486878 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S
@@ -316,12 +316,7 @@ tracesys:
ja 1f
movq %r10,%rcx /* fixup for C */
call *sys_call_table(,%rax,8)
- movq %rax,RAX-ARGOFFSET(%rsp)
-1: SAVE_REST
- movq %rsp,%rdi
- call syscall_trace_leave
- RESTORE_TOP_OF_STACK %rbx
- RESTORE_REST
+1: movq %rax,RAX-ARGOFFSET(%rsp)
/* Use IRET because user could have changed frame */
jmp int_ret_from_sys_call
CFI_ENDPROC
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c
index ca96708d1c..65ba83c625 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c
@@ -350,7 +350,6 @@ static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
struct user_desc ud = {
.base_addr = addr,
.limit = 0xfffff,
- .contents = (3 << 3), /* user */
.seg_32bit = 1,
.limit_in_pages = 1,
.useable = 1,
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
index 8fe13ee2b0..711ce5d198 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c
@@ -74,6 +74,7 @@
#include <asm/hypervisor.h>
#include <xen/interface/nmi.h>
#include <xen/features.h>
+#include <xen/xencons.h>
#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
#define PFN_PHYS(x) ((x) << PAGE_SHIFT)
#include <asm/mach-xen/setup_arch_post.h>
@@ -143,6 +144,9 @@ struct sys_desc_table_struct {
struct edid_info edid_info;
struct e820map e820;
+#ifdef CONFIG_XEN
+struct e820map machine_e820;
+#endif
extern int root_mountflags;
@@ -625,7 +629,6 @@ static void __init reserve_ebda_region(void)
void __init setup_arch(char **cmdline_p)
{
unsigned long kernel_end;
- struct e820entry *machine_e820;
struct xen_memory_map memmap;
#ifdef CONFIG_XEN
@@ -645,33 +648,15 @@ void __init setup_arch(char **cmdline_p)
screen_info.orig_video_cols = 80;
screen_info.orig_video_ega_bx = 3;
screen_info.orig_video_points = 16;
+ screen_info.orig_y = screen_info.orig_video_lines - 1;
if (xen_start_info->console.dom0.info_size >=
sizeof(struct dom0_vga_console_info)) {
const struct dom0_vga_console_info *info =
(struct dom0_vga_console_info *)(
(char *)xen_start_info +
xen_start_info->console.dom0.info_off);
- screen_info.orig_video_mode = info->txt_mode;
- screen_info.orig_video_isVGA = info->video_type;
- screen_info.orig_video_lines = info->video_height;
- screen_info.orig_video_cols = info->video_width;
- screen_info.orig_video_points = info->txt_points;
- screen_info.lfb_width = info->video_width;
- screen_info.lfb_height = info->video_height;
- screen_info.lfb_depth = info->lfb_depth;
- screen_info.lfb_base = info->lfb_base;
- screen_info.lfb_size = info->lfb_size;
- screen_info.lfb_linelength = info->lfb_linelen;
- screen_info.red_size = info->red_size;
- screen_info.red_pos = info->red_pos;
- screen_info.green_size = info->green_size;
- screen_info.green_pos = info->green_pos;
- screen_info.blue_size = info->blue_size;
- screen_info.blue_pos = info->blue_pos;
- screen_info.rsvd_size = info->rsvd_size;
- screen_info.rsvd_pos = info->rsvd_pos;
+ dom0_init_screen_info(info);
}
- screen_info.orig_y = screen_info.orig_video_lines - 1;
xen_start_info->console.domU.mfn = 0;
xen_start_info->console.domU.evtchn = 0;
} else
@@ -936,14 +921,14 @@ void __init setup_arch(char **cmdline_p)
probe_roms();
#ifdef CONFIG_XEN
if (is_initial_xendomain()) {
- machine_e820 = alloc_bootmem_low_pages(PAGE_SIZE);
-
memmap.nr_entries = E820MAX;
- set_xen_guest_handle(memmap.buffer, machine_e820);
+ set_xen_guest_handle(memmap.buffer, machine_e820.map);
- BUG_ON(HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap));
+ if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
+ BUG();
+ machine_e820.nr_map = memmap.nr_entries;
- e820_reserve_resources(machine_e820, memmap.nr_entries);
+ e820_reserve_resources(machine_e820.map, machine_e820.nr_map);
}
#else
e820_reserve_resources(e820.map, e820.nr_map);
@@ -959,10 +944,8 @@ void __init setup_arch(char **cmdline_p)
}
#ifdef CONFIG_XEN
- if (is_initial_xendomain()) {
- e820_setup_gap(machine_e820, memmap.nr_entries);
- free_bootmem(__pa(machine_e820), PAGE_SIZE);
- }
+ if (is_initial_xendomain())
+ e820_setup_gap(machine_e820.map, machine_e820.nr_map);
#else
e820_setup_gap(e820.map, e820.nr_map);
#endif
diff --git a/linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c b/linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c
index 9fd0b94cb5..653af67c08 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/traps-xen.c
@@ -30,6 +30,7 @@
#include <linux/moduleparam.h>
#include <linux/nmi.h>
#include <linux/kprobes.h>
+#include <linux/kexec.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -438,6 +439,8 @@ void __kprobes __die(const char * str, struct pt_regs * regs, long err)
printk(KERN_ALERT "RIP ");
printk_address(regs->rip);
printk(" RSP <%016lx>\n", regs->rsp);
+ if (kexec_should_crash(current))
+ crash_kexec(regs);
}
void die(const char * str, struct pt_regs * regs, long err)
@@ -461,6 +464,8 @@ void __kprobes die_nmi(char *str, struct pt_regs *regs)
*/
printk(str, safe_smp_processor_id());
show_registers(regs);
+ if (kexec_should_crash(current))
+ crash_kexec(regs);
if (panic_on_timeout || panic_on_oops)
panic("nmi watchdog");
printk("console shuts up ...\n");
diff --git a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c
index d3ce58355f..032c31e532 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c
+++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c
@@ -56,6 +56,11 @@
struct dma_mapping_ops* dma_ops;
EXPORT_SYMBOL(dma_ops);
+#ifdef CONFIG_XEN_COMPAT_030002
+unsigned int __kernel_page_user;
+EXPORT_SYMBOL(__kernel_page_user);
+#endif
+
extern unsigned long *contiguous_bitmap;
static unsigned long dma_reserve __initdata;
@@ -260,7 +265,10 @@ static void set_pte_phys(unsigned long vaddr,
return;
}
}
- new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
+ if (pgprot_val(prot))
+ new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
+ else
+ new_pte = __pte(0);
pte = pte_offset_kernel(pmd, vaddr);
if (!pte_none(*pte) &&
@@ -524,6 +532,33 @@ void __init xen_init_pt(void)
addr = page[pud_index(__START_KERNEL_map)];
addr_to_page(addr, page);
+#ifdef CONFIG_XEN_COMPAT_030002
+ /* On Xen 3.0.2 and older we may need to explicitly specify _PAGE_USER
+ in kernel PTEs. We check that here. */
+ if (HYPERVISOR_xen_version(XENVER_version, NULL) <= 0x30000) {
+ unsigned long *pg;
+ pte_t pte;
+
+ /* Mess with the initial mapping of page 0. It's not needed. */
+ BUILD_BUG_ON(__START_KERNEL <= __START_KERNEL_map);
+ addr = page[pmd_index(__START_KERNEL_map)];
+ addr_to_page(addr, pg);
+ pte.pte = pg[pte_index(__START_KERNEL_map)];
+ BUG_ON(!(pte.pte & _PAGE_PRESENT));
+
+ /* If _PAGE_USER isn't set, we obviously do not need it. */
+ if (pte.pte & _PAGE_USER) {
+ /* _PAGE_USER is needed, but is it set implicitly? */
+ pte.pte &= ~_PAGE_USER;
+ if ((HYPERVISOR_update_va_mapping(__START_KERNEL_map,
+ pte, 0) != 0) ||
+ !(pg[pte_index(__START_KERNEL_map)] & _PAGE_USER))
+ /* We need to explicitly specify _PAGE_USER. */
+ __kernel_page_user = _PAGE_USER;
+ }
+ }
+#endif
+
/* Construct mapping of initial pte page in our own directories. */
init_level4_pgt[pgd_index(__START_KERNEL_map)] =
mk_kernel_pgd(__pa_symbol(level3_kernel_pgt));
@@ -913,8 +948,8 @@ void __init mem_init(void)
#endif
/* XEN: init and count pages outside initial allocation. */
for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
- ClearPageReserved(&mem_map[pfn]);
- set_page_count(&mem_map[pfn], 1);
+ ClearPageReserved(pfn_to_page(pfn));
+ set_page_count(pfn_to_page(pfn), 1);
totalram_pages++;
}
reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn);
diff --git a/linux-2.6-xen-sparse/arch/x86_64/oprofile/Makefile b/linux-2.6-xen-sparse/arch/x86_64/oprofile/Makefile
index 589a7966e8..cc3b9939b0 100644
--- a/linux-2.6-xen-sparse/arch/x86_64/oprofile/Makefile
+++ b/linux-2.6-xen-sparse/arch/x86_64/oprofile/Makefile
@@ -12,6 +12,8 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
timer_int.o )
ifdef CONFIG_XEN
+XENOPROF_COMMON_OBJS = $(addprefix ../../../drivers/xen/xenoprof/, \
+ xenoprofile.o)
OPROFILE-y := xenoprof.o
else
OPROFILE-y := init.o backtrace.o
@@ -19,4 +21,5 @@ OPROFILE-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o op_model_p4.o \
op_model_ppro.o
OPROFILE-$(CONFIG_X86_IO_APIC) += nmi_timer_int.o
endif
-oprofile-y = $(DRIVER_OBJS) $(addprefix ../../i386/oprofile/, $(OPROFILE-y))
+oprofile-y = $(DRIVER_OBJS) $(XENOPROF_COMMON_OBJS) \
+ $(addprefix ../../i386/oprofile/, $(OPROFILE-y))
diff --git a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c
index 71c7dd3a00..adf016ba90 100644
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c
+++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c
@@ -41,6 +41,7 @@
#include <xen/evtchn.h>
#include <xen/interface/grant_table.h>
#include <xen/interface/io/tpmif.h>
+#include <xen/gnttab.h>
#include <xen/xenbus.h>
#include "tpm.h"
#include "tpm_vtpm.h"
@@ -343,6 +344,7 @@ static void backend_changed(struct xenbus_device *dev,
case XenbusStateInitialising:
case XenbusStateInitWait:
case XenbusStateInitialised:
+ case XenbusStateUnknown:
break;
case XenbusStateConnected:
@@ -351,13 +353,14 @@ static void backend_changed(struct xenbus_device *dev,
case XenbusStateClosing:
tpmif_set_connected_state(tp, 0);
+ xenbus_frontend_closed(dev);
break;
- case XenbusStateUnknown:
case XenbusStateClosed:
+ tpmif_set_connected_state(tp, 0);
if (tp->is_suspended == 0)
device_unregister(&dev->dev);
- xenbus_switch_state(dev, XenbusStateClosed);
+ xenbus_frontend_closed(dev);
break;
}
}
@@ -419,9 +422,10 @@ static int tpmfront_suspend(struct xenbus_device *dev)
mutex_lock(&suspend_lock);
tp->is_suspended = 1;
- for (ctr = 0; atomic_read(&tp->tx_busy) && ctr <= 25; ctr++) {
+ for (ctr = 0; atomic_read(&tp->tx_busy) && ctr <= 300; ctr++) {
if ((ctr % 10) == 0)
- printk("TPM-FE [INFO]: Waiting for outstanding request.\n");
+ printk("TPM-FE [INFO]: Waiting for outstanding "
+ "request.\n");
/*
* Wait for a request to be responded to.
*/
diff --git a/linux-2.6-xen-sparse/drivers/char/tty_io.c b/linux-2.6-xen-sparse/drivers/char/tty_io.c
index f6f0689771..0372d93bca 100644
--- a/linux-2.6-xen-sparse/drivers/char/tty_io.c
+++ b/linux-2.6-xen-sparse/drivers/char/tty_io.c
@@ -2761,7 +2761,7 @@ static void flush_to_ldisc(void *private_)
struct tty_struct *tty = (struct tty_struct *) private_;
unsigned long flags;
struct tty_ldisc *disc;
- struct tty_buffer *tbuf;
+ struct tty_buffer *tbuf, *head;
int count;
char *char_buf;
unsigned char *flag_buf;
@@ -2778,7 +2778,9 @@ static void flush_to_ldisc(void *private_)
goto out;
}
spin_lock_irqsave(&tty->buf.lock, flags);
- while((tbuf = tty->buf.head) != NULL) {
+ head = tty->buf.head;
+ tty->buf.head = NULL;
+ while((tbuf = head) != NULL) {
while ((count = tbuf->commit - tbuf->read) != 0) {
char_buf = tbuf->char_buf_ptr + tbuf->read;
flag_buf = tbuf->flag_buf_ptr + tbuf->read;
@@ -2787,10 +2789,12 @@ static void flush_to_ldisc(void *private_)
disc->receive_buf(tty, char_buf, flag_buf, count);
spin_lock_irqsave(&tty->buf.lock, flags);
}
- if (tbuf->active)
+ if (tbuf->active) {
+ tty->buf.head = head;
break;
- tty->buf.head = tbuf->next;
- if (tty->buf.head == NULL)
+ }
+ head = tbuf->next;
+ if (head == NULL)
tty->buf.tail = NULL;
tty_buffer_free(tty, tbuf);
}
diff --git a/linux-2.6-xen-sparse/drivers/serial/Kconfig b/linux-2.6-xen-sparse/drivers/serial/Kconfig
index fa1fdb0b37..c6be86d83e 100644
--- a/linux-2.6-xen-sparse/drivers/serial/Kconfig
+++ b/linux-2.6-xen-sparse/drivers/serial/Kconfig
@@ -821,6 +821,7 @@ config SERIAL_ICOM
tristate "IBM Multiport Serial Adapter"
depends on PCI && (PPC_ISERIES || PPC_PSERIES)
select SERIAL_CORE
+ select FW_LOADER
help
This driver is for a family of multiport serial adapters
including 2 port RVX, 2 port internal modem, 4 port internal
diff --git a/linux-2.6-xen-sparse/drivers/xen/balloon/Makefile b/linux-2.6-xen-sparse/drivers/xen/balloon/Makefile
index 0e3a3485c4..3fc3d0bae5 100644
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/Makefile
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/Makefile
@@ -1,2 +1,2 @@
-obj-y += balloon.o
+obj-y := balloon.o sysfs.o
diff --git a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
index a6a8396c05..b621d76383 100644
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
@@ -53,10 +53,8 @@
#include <asm/uaccess.h>
#include <asm/tlb.h>
#include <linux/list.h>
-
#include <xen/xenbus.h>
-
-#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
+#include "common.h"
#ifdef CONFIG_PROC_FS
static struct proc_dir_entry *balloon_pde;
@@ -71,9 +69,7 @@ static DECLARE_MUTEX(balloon_mutex);
*/
DEFINE_SPINLOCK(balloon_lock);
-/* We aim for 'current allocation' == 'target allocation'. */
-static unsigned long current_pages;
-static unsigned long target_pages;
+struct balloon_stats balloon_stats;
/* We increase/decrease in batches which fit in a page */
static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
@@ -81,18 +77,8 @@ static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
/* VM /proc information for memory */
extern unsigned long totalram_pages;
-/* We may hit the hard limit in Xen. If we do then we remember it. */
-static unsigned long hard_limit;
-
-/*
- * Drivers may alter the memory reservation independently, but they must
- * inform the balloon driver so that we can avoid hitting the hard limit.
- */
-static unsigned long driver_pages;
-
/* List of ballooned pages, threaded through the mem_map array. */
static LIST_HEAD(ballooned_pages);
-static unsigned long balloon_low, balloon_high;
/* Main work function, always executed in process context. */
static void balloon_process(void *unused);
@@ -124,10 +110,10 @@ static void balloon_append(struct page *page)
/* Lowmem is re-populated first, so highmem pages go at list tail. */
if (PageHighMem(page)) {
list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
- balloon_high++;
+ bs.balloon_high++;
} else {
list_add(PAGE_TO_LIST(page), &ballooned_pages);
- balloon_low++;
+ bs.balloon_low++;
}
}
@@ -143,9 +129,9 @@ static struct page *balloon_retrieve(void)
UNLIST_PAGE(page);
if (PageHighMem(page))
- balloon_high--;
+ bs.balloon_high--;
else
- balloon_low--;
+ bs.balloon_low--;
return page;
}
@@ -172,9 +158,9 @@ static void balloon_alarm(unsigned long unused)
static unsigned long current_target(void)
{
- unsigned long target = min(target_pages, hard_limit);
- if (target > (current_pages + balloon_low + balloon_high))
- target = current_pages + balloon_low + balloon_high;
+ unsigned long target = min(bs.target_pages, bs.hard_limit);
+ if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high))
+ target = bs.current_pages + bs.balloon_low + bs.balloon_high;
return target;
}
@@ -216,7 +202,8 @@ static int increase_reservation(unsigned long nr_pages)
BUG_ON(ret != rc);
}
if (rc >= 0)
- hard_limit = current_pages + rc - driver_pages;
+ bs.hard_limit = (bs.current_pages + rc -
+ bs.driver_pages);
goto out;
}
@@ -228,9 +215,7 @@ static int increase_reservation(unsigned long nr_pages)
BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
phys_to_machine_mapping_valid(pfn));
- /* Update P->M and M->P tables. */
set_phys_to_machine(pfn, frame_list[i]);
- xen_machphys_update(frame_list[i], pfn);
/* Link back into the page tables if not highmem. */
if (pfn < max_low_pfn) {
@@ -248,8 +233,8 @@ static int increase_reservation(unsigned long nr_pages)
__free_page(page);
}
- current_pages += nr_pages;
- totalram_pages = current_pages;
+ bs.current_pages += nr_pages;
+ totalram_pages = bs.current_pages;
out:
balloon_unlock(flags);
@@ -317,8 +302,8 @@ static int decrease_reservation(unsigned long nr_pages)
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
BUG_ON(ret != nr_pages);
- current_pages -= nr_pages;
- totalram_pages = current_pages;
+ bs.current_pages -= nr_pages;
+ totalram_pages = bs.current_pages;
balloon_unlock(flags);
@@ -339,7 +324,7 @@ static void balloon_process(void *unused)
down(&balloon_mutex);
do {
- credit = current_target() - current_pages;
+ credit = current_target() - bs.current_pages;
if (credit > 0)
need_sleep = (increase_reservation(credit) != 0);
if (credit < 0)
@@ -352,18 +337,18 @@ static void balloon_process(void *unused)
} while ((credit != 0) && !need_sleep);
/* Schedule more work if there is some still to be done. */
- if (current_target() != current_pages)
+ if (current_target() != bs.current_pages)
mod_timer(&balloon_timer, jiffies + HZ);
up(&balloon_mutex);
}
/* Resets the Xen limit, sets new target, and kicks off processing. */
-static void set_new_target(unsigned long target)
+void balloon_set_new_target(unsigned long target)
{
/* No need for lock. Not read-modify-write updates. */
- hard_limit = ~0UL;
- target_pages = target;
+ bs.hard_limit = ~0UL;
+ bs.target_pages = target;
schedule_work(&balloon_worker);
}
@@ -388,7 +373,7 @@ static void watch_target(struct xenbus_watch *watch,
/* The given memory/target value is in KiB, so it needs converting to
* pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
*/
- set_new_target(new_target >> (PAGE_SHIFT - 10));
+ balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
}
static int balloon_init_watcher(struct notifier_block *notifier,
@@ -424,7 +409,7 @@ static int balloon_write(struct file *file, const char __user *buffer,
memstring[sizeof(memstring)-1] = '\0';
target_bytes = memparse(memstring, &endchar);
- set_new_target(target_bytes >> PAGE_SHIFT);
+ balloon_set_new_target(target_bytes >> PAGE_SHIFT);
return count;
}
@@ -442,12 +427,13 @@ static int balloon_read(char *page, char **start, off_t off,
"High-mem balloon: %8lu kB\n"
"Driver pages: %8lu kB\n"
"Xen hard limit: ",
- PAGES2KB(current_pages), PAGES2KB(target_pages),
- PAGES2KB(balloon_low), PAGES2KB(balloon_high),
- PAGES2KB(driver_pages));
+ PAGES2KB(bs.current_pages), PAGES2KB(bs.target_pages),
+ PAGES2KB(bs.balloon_low), PAGES2KB(bs.balloon_high),
+ PAGES2KB(bs.driver_pages));
- if (hard_limit != ~0UL)
- len += sprintf(page + len, "%8lu kB\n", PAGES2KB(hard_limit));
+ if (bs.hard_limit != ~0UL)
+ len += sprintf(page + len, "%8lu kB\n",
+ PAGES2KB(bs.hard_limit));
else
len += sprintf(page + len, " ??? kB\n");
@@ -468,13 +454,13 @@ static int __init balloon_init(void)
IPRINTK("Initialising balloon driver.\n");
- current_pages = min(xen_start_info->nr_pages, max_pfn);
- totalram_pages = current_pages;
- target_pages = current_pages;
- balloon_low = 0;
- balloon_high = 0;
- driver_pages = 0UL;
- hard_limit = ~0UL;
+ bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
+ totalram_pages = bs.current_pages;
+ bs.target_pages = bs.current_pages;
+ bs.balloon_low = 0;
+ bs.balloon_high = 0;
+ bs.driver_pages = 0UL;
+ bs.hard_limit = ~0UL;
init_timer(&balloon_timer);
balloon_timer.data = 0;
@@ -489,6 +475,7 @@ static int __init balloon_init(void)
balloon_pde->read_proc = balloon_read;
balloon_pde->write_proc = balloon_write;
#endif
+ balloon_sysfs_init();
/* Initialise the balloon with excess memory space. */
for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
@@ -512,7 +499,7 @@ void balloon_update_driver_allowance(long delta)
unsigned long flags;
balloon_lock(flags);
- driver_pages += delta;
+ bs.driver_pages += delta;
balloon_unlock(flags);
}
@@ -534,75 +521,87 @@ static int dealloc_pte_fn(
return 0;
}
-struct page *balloon_alloc_empty_page_range(unsigned long nr_pages)
+struct page **alloc_empty_pages_and_pagevec(int nr_pages)
{
- unsigned long vstart, flags;
- unsigned int order = get_order(nr_pages * PAGE_SIZE);
- int ret;
- unsigned long i;
- struct page *page;
+ unsigned long vaddr, flags;
+ struct page *page, **pagevec;
+ int i, ret;
- vstart = __get_free_pages(GFP_KERNEL, order);
- if (vstart == 0)
+ pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
+ if (pagevec == NULL)
return NULL;
- scrub_pages(vstart, 1 << order);
-
- balloon_lock(flags);
- if (xen_feature(XENFEAT_auto_translated_physmap)) {
- unsigned long gmfn = __pa(vstart) >> PAGE_SHIFT;
- struct xen_memory_reservation reservation = {
- .nr_extents = 1,
- .extent_order = order,
- .domid = DOMID_SELF
- };
- set_xen_guest_handle(reservation.extent_start, &gmfn);
- ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
- &reservation);
- if (ret == -ENOSYS)
- goto err;
- BUG_ON(ret != 1);
- } else {
- ret = apply_to_page_range(&init_mm, vstart, PAGE_SIZE << order,
- dealloc_pte_fn, NULL);
- if (ret == -ENOSYS)
+ for (i = 0; i < nr_pages; i++) {
+ page = pagevec[i] = alloc_page(GFP_KERNEL);
+ if (page == NULL)
goto err;
- BUG_ON(ret);
- }
- current_pages -= 1UL << order;
- totalram_pages = current_pages;
- balloon_unlock(flags);
- schedule_work(&balloon_worker);
+ vaddr = (unsigned long)page_address(page);
- flush_tlb_all();
+ scrub_pages(vaddr, 1);
- page = virt_to_page(vstart);
+ balloon_lock(flags);
- for (i = 0; i < (1UL << order); i++)
- set_page_count(page + i, 1);
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ unsigned long gmfn = page_to_pfn(page);
+ struct xen_memory_reservation reservation = {
+ .nr_extents = 1,
+ .extent_order = 0,
+ .domid = DOMID_SELF
+ };
+ set_xen_guest_handle(reservation.extent_start, &gmfn);
+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+ &reservation);
+ if (ret == 1)
+ ret = 0; /* success */
+ } else {
+ ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE,
+ dealloc_pte_fn, NULL);
+ }
- return page;
+ if (ret != 0) {
+ balloon_unlock(flags);
+ __free_page(page);
+ goto err;
+ }
+
+ totalram_pages = --bs.current_pages;
+
+ balloon_unlock(flags);
+ }
+
+ out:
+ schedule_work(&balloon_worker);
+ flush_tlb_all();
+ return pagevec;
err:
- free_pages(vstart, order);
+ balloon_lock(flags);
+ while (--i >= 0)
+ balloon_append(pagevec[i]);
balloon_unlock(flags);
- return NULL;
+ kfree(pagevec);
+ pagevec = NULL;
+ goto out;
}
-void balloon_dealloc_empty_page_range(
- struct page *page, unsigned long nr_pages)
+void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
{
- unsigned long i, flags;
- unsigned int order = get_order(nr_pages * PAGE_SIZE);
+ unsigned long flags;
+ int i;
+
+ if (pagevec == NULL)
+ return;
balloon_lock(flags);
- for (i = 0; i < (1UL << order); i++) {
- BUG_ON(page_count(page + i) != 1);
- balloon_append(page + i);
+ for (i = 0; i < nr_pages; i++) {
+ BUG_ON(page_count(pagevec[i]) != 1);
+ balloon_append(pagevec[i]);
}
balloon_unlock(flags);
+ kfree(pagevec);
+
schedule_work(&balloon_worker);
}
@@ -612,15 +611,15 @@ void balloon_release_driver_page(struct page *page)
balloon_lock(flags);
balloon_append(page);
- driver_pages--;
+ bs.driver_pages--;
balloon_unlock(flags);
schedule_work(&balloon_worker);
}
EXPORT_SYMBOL_GPL(balloon_update_driver_allowance);
-EXPORT_SYMBOL_GPL(balloon_alloc_empty_page_range);
-EXPORT_SYMBOL_GPL(balloon_dealloc_empty_page_range);
+EXPORT_SYMBOL_GPL(alloc_empty_pages_and_pagevec);
+EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec);
EXPORT_SYMBOL_GPL(balloon_release_driver_page);
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/linux-2.6-xen-sparse/drivers/xen/balloon/common.h b/linux-2.6-xen-sparse/drivers/xen/balloon/common.h
new file mode 100644
index 0000000000..4496d215e2
--- /dev/null
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/common.h
@@ -0,0 +1,58 @@
+/******************************************************************************
+ * balloon/common.h
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_BALLOON_COMMON_H__
+#define __XEN_BALLOON_COMMON_H__
+
+#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
+
+struct balloon_stats {
+ /* We aim for 'current allocation' == 'target allocation'. */
+ unsigned long current_pages;
+ unsigned long target_pages;
+ /* We may hit the hard limit in Xen. If we do then we remember it. */
+ unsigned long hard_limit;
+ /*
+ * Drivers may alter the memory reservation independently, but they
+ * must inform the balloon driver so we avoid hitting the hard limit.
+ */
+ unsigned long driver_pages;
+ /* Number of pages in high- and low-memory balloons. */
+ unsigned long balloon_low;
+ unsigned long balloon_high;
+};
+
+extern struct balloon_stats balloon_stats;
+#define bs balloon_stats
+
+int balloon_sysfs_init(void);
+void balloon_sysfs_exit(void);
+
+void balloon_set_new_target(unsigned long target);
+
+#endif /* __XEN_BALLOON_COMMON_H__ */
diff --git a/linux-2.6-xen-sparse/drivers/xen/balloon/sysfs.c b/linux-2.6-xen-sparse/drivers/xen/balloon/sysfs.c
new file mode 100644
index 0000000000..a4ed8a6f1e
--- /dev/null
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/sysfs.c
@@ -0,0 +1,165 @@
+/******************************************************************************
+ * balloon/sysfs.c
+ *
+ * Xen balloon driver - sysfs interfaces.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/config.h>
+#include <linux/capability.h>
+#include <linux/stat.h>
+#include <linux/sysdev.h>
+#include "common.h"
+
+#define BALLOON_CLASS_NAME "memory"
+
+#define BALLOON_SHOW(name, format, args...) \
+ static ssize_t show_##name(struct sys_device *dev, \
+ char *buf) \
+ { \
+ return sprintf(buf, format, ##args); \
+ } \
+ static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
+
+BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(bs.current_pages));
+BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(bs.balloon_low));
+BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(bs.balloon_high));
+BALLOON_SHOW(hard_limit_kb,
+ (bs.hard_limit!=~0UL) ? "%lu\n" : "???\n",
+ (bs.hard_limit!=~0UL) ? PAGES2KB(bs.hard_limit) : 0);
+BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(bs.driver_pages));
+
+static ssize_t show_target_kb(struct sys_device *dev, char *buf)
+{
+ return sprintf(buf, "%lu\n", PAGES2KB(bs.target_pages));
+}
+
+static ssize_t store_target_kb(struct sys_device *dev,
+ const char *buf,
+ size_t count)
+{
+ char memstring[64], *endchar;
+ unsigned long long target_bytes;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (count <= 1)
+ return -EBADMSG; /* runt */
+ if (count > sizeof(memstring))
+ return -EFBIG; /* too long */
+ strcpy(memstring, buf);
+
+ target_bytes = memparse(memstring, &endchar);
+ balloon_set_new_target(target_bytes >> PAGE_SHIFT);
+
+ return count;
+}
+
+static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
+ show_target_kb, store_target_kb);
+
+static struct sysdev_attribute *balloon_attrs[] = {
+ &attr_target_kb,
+};
+
+static struct attribute *balloon_info_attrs[] = {
+ &attr_current_kb.attr,
+ &attr_low_kb.attr,
+ &attr_high_kb.attr,
+ &attr_hard_limit_kb.attr,
+ &attr_driver_kb.attr,
+ NULL
+};
+
+static struct attribute_group balloon_info_group = {
+ .name = "info",
+ .attrs = balloon_info_attrs,
+};
+
+static struct sysdev_class balloon_sysdev_class = {
+ set_kset_name(BALLOON_CLASS_NAME),
+};
+
+static struct sys_device balloon_sysdev;
+
+static int register_balloon(struct sys_device *sysdev)
+{
+ int i, error;
+
+ error = sysdev_class_register(&balloon_sysdev_class);
+ if (error)
+ return error;
+
+ sysdev->id = 0;
+ sysdev->cls = &balloon_sysdev_class;
+
+ error = sysdev_register(sysdev);
+ if (error) {
+ sysdev_class_unregister(&balloon_sysdev_class);
+ return error;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
+ error = sysdev_create_file(sysdev, balloon_attrs[i]);
+ if (error)
+ goto fail;
+ }
+
+ error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
+ if (error)
+ goto fail;
+
+ return 0;
+
+ fail:
+ while (--i >= 0)
+ sysdev_remove_file(sysdev, balloon_attrs[i]);
+ sysdev_unregister(sysdev);
+ sysdev_class_unregister(&balloon_sysdev_class);
+ return error;
+}
+
+static void unregister_balloon(struct sys_device *sysdev)
+{
+ int i;
+
+ sysfs_remove_group(&sysdev->kobj, &balloon_info_group);
+ for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++)
+ sysdev_remove_file(sysdev, balloon_attrs[i]);
+ sysdev_unregister(sysdev);
+ sysdev_class_unregister(&balloon_sysdev_class);
+}
+
+int balloon_sysfs_init(void)
+{
+ return register_balloon(&balloon_sysdev);
+}
+
+void balloon_sysfs_exit(void)
+{
+ unregister_balloon(&balloon_sysdev);
+}
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
index 416f7bc18c..e8df9e0346 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
@@ -56,8 +56,6 @@ static int blkif_reqs = 64;
module_param_named(reqs, blkif_reqs, int, 0);
MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
-static int mmap_pages;
-
/* Run-time switchable: /sys/module/blkback/parameters/ */
static unsigned int log_stats = 0;
static unsigned int debug_lvl = 0;
@@ -87,8 +85,7 @@ static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
#define BLKBACK_INVALID_HANDLE (~0)
-static unsigned long mmap_vstart;
-static unsigned long *pending_vaddrs;
+static struct page **pending_pages;
static grant_handle_t *pending_grant_handles;
static inline int vaddr_pagenr(pending_req_t *req, int seg)
@@ -98,7 +95,8 @@ static inline int vaddr_pagenr(pending_req_t *req, int seg)
static inline unsigned long vaddr(pending_req_t *req, int seg)
{
- return pending_vaddrs[vaddr_pagenr(req, seg)];
+ unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]);
+ return (unsigned long)pfn_to_kaddr(pfn);
}
#define pending_handle(_req, _seg) \
@@ -191,9 +189,9 @@ static void fast_flush_area(pending_req_t *req)
static void print_stats(blkif_t *blkif)
{
- printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d\n",
+ printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n",
current->comm, blkif->st_oo_req,
- blkif->st_rd_req, blkif->st_wr_req);
+ blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req);
blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
blkif->st_rd_req = 0;
blkif->st_wr_req = 0;
@@ -243,11 +241,17 @@ int blkif_schedule(void *arg)
* COMPLETION CALLBACK -- Called as bh->b_end_io()
*/
-static void __end_block_io_op(pending_req_t *pending_req, int uptodate)
+static void __end_block_io_op(pending_req_t *pending_req, int error)
{
/* An error fails the entire request. */
- if (!uptodate) {
- DPRINTK("Buffer not up-to-date at end of operation\n");
+ if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
+ (error == -EOPNOTSUPP)) {
+ DPRINTK("blkback: write barrier op failed, not supported\n");
+ blkback_barrier(XBT_NIL, pending_req->blkif->be, 0);
+ pending_req->status = BLKIF_RSP_EOPNOTSUPP;
+ } else if (error) {
+ DPRINTK("Buffer not up-to-date at end of operation, "
+ "error=%d\n", error);
pending_req->status = BLKIF_RSP_ERROR;
}
@@ -264,7 +268,7 @@ static int end_block_io_op(struct bio *bio, unsigned int done, int error)
{
if (bio->bi_size != 0)
return 1;
- __end_block_io_op(bio->bi_private, !error);
+ __end_block_io_op(bio->bi_private, error);
bio_put(bio);
return error;
}
@@ -295,7 +299,7 @@ irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
static int do_block_io_op(blkif_t *blkif)
{
blkif_back_ring_t *blk_ring = &blkif->blk_ring;
- blkif_request_t *req;
+ blkif_request_t req;
pending_req_t *pending_req;
RING_IDX rc, rp;
int more_to_do = 0;
@@ -313,22 +317,25 @@ static int do_block_io_op(blkif_t *blkif)
break;
}
- req = RING_GET_REQUEST(blk_ring, rc);
+ memcpy(&req, RING_GET_REQUEST(blk_ring, rc), sizeof(req));
blk_ring->req_cons = ++rc; /* before make_response() */
- switch (req->operation) {
+ switch (req.operation) {
case BLKIF_OP_READ:
blkif->st_rd_req++;
- dispatch_rw_block_io(blkif, req, pending_req);
+ dispatch_rw_block_io(blkif, &req, pending_req);
break;
+ case BLKIF_OP_WRITE_BARRIER:
+ blkif->st_br_req++;
+ /* fall through */
case BLKIF_OP_WRITE:
blkif->st_wr_req++;
- dispatch_rw_block_io(blkif, req, pending_req);
+ dispatch_rw_block_io(blkif, &req, pending_req);
break;
default:
DPRINTK("error: unknown block io operation [%d]\n",
- req->operation);
- make_response(blkif, req->id, req->operation,
+ req.operation);
+ make_response(blkif, req.id, req.operation,
BLKIF_RSP_ERROR);
free_req(pending_req);
break;
@@ -342,7 +349,6 @@ static void dispatch_rw_block_io(blkif_t *blkif,
pending_req_t *pending_req)
{
extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
- int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct phys_req preq;
struct {
@@ -351,6 +357,22 @@ static void dispatch_rw_block_io(blkif_t *blkif,
unsigned int nseg;
struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
int ret, i, nbio = 0;
+ int operation;
+
+ switch (req->operation) {
+ case BLKIF_OP_READ:
+ operation = READ;
+ break;
+ case BLKIF_OP_WRITE:
+ operation = WRITE;
+ break;
+ case BLKIF_OP_WRITE_BARRIER:
+ operation = WRITE_BARRIER;
+ break;
+ default:
+ operation = 0; /* make gcc happy */
+ BUG();
+ }
/* Check that number of segments is sane. */
nseg = req->nr_segments;
@@ -366,7 +388,7 @@ static void dispatch_rw_block_io(blkif_t *blkif,
pending_req->blkif = blkif;
pending_req->id = req->id;
- pending_req->operation = operation;
+ pending_req->operation = req->operation;
pending_req->status = BLKIF_RSP_OKAY;
pending_req->nr_pages = nseg;
@@ -377,12 +399,12 @@ static void dispatch_rw_block_io(blkif_t *blkif,
req->seg[i].first_sect + 1;
if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
- (seg[i].nsec <= 0))
+ (req->seg[i].last_sect < req->seg[i].first_sect))
goto fail_response;
preq.nr_sects += seg[i].nsec;
flags = GNTMAP_host_map;
- if ( operation == WRITE )
+ if (operation != READ)
flags |= GNTMAP_readonly;
gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
req->seg[i].gref, blkif->domid);
@@ -394,10 +416,15 @@ static void dispatch_rw_block_io(blkif_t *blkif,
for (i = 0; i < nseg; i++) {
if (unlikely(map[i].status != 0)) {
DPRINTK("invalid buffer -- could not remap it\n");
- goto fail_flush;
+ map[i].handle = BLKBACK_INVALID_HANDLE;
+ ret |= 1;
}
pending_handle(pending_req, i) = map[i].handle;
+
+ if (ret)
+ continue;
+
set_phys_to_machine(__pa(vaddr(
pending_req, i)) >> PAGE_SHIFT,
FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
@@ -405,6 +432,9 @@ static void dispatch_rw_block_io(blkif_t *blkif,
(req->seg[i].first_sect << 9);
}
+ if (ret)
+ goto fail_flush;
+
if (vbd_translate(&preq, blkif, operation) != 0) {
DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n",
operation == READ ? "read" : "write",
@@ -506,52 +536,43 @@ static void make_response(blkif_t *blkif, unsigned long id,
static int __init blkif_init(void)
{
- struct page *page;
- int i;
+ int i, mmap_pages;
if (!is_running_on_xen())
return -ENODEV;
- mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
-
- page = balloon_alloc_empty_page_range(mmap_pages);
- if (page == NULL)
- return -ENOMEM;
- mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+ mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
pending_reqs = kmalloc(sizeof(pending_reqs[0]) *
blkif_reqs, GFP_KERNEL);
pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
mmap_pages, GFP_KERNEL);
- pending_vaddrs = kmalloc(sizeof(pending_vaddrs[0]) *
- mmap_pages, GFP_KERNEL);
- if (!pending_reqs || !pending_grant_handles || !pending_vaddrs) {
- kfree(pending_reqs);
- kfree(pending_grant_handles);
- kfree(pending_vaddrs);
- printk("%s: out of memory\n", __FUNCTION__);
- return -ENOMEM;
- }
+ pending_pages = alloc_empty_pages_and_pagevec(mmap_pages);
- blkif_interface_init();
-
- printk("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n",
- __FUNCTION__, blkif_reqs, mmap_pages, mmap_vstart);
- BUG_ON(mmap_vstart == 0);
- for (i = 0; i < mmap_pages; i++) {
- pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT);
+ if (!pending_reqs || !pending_grant_handles || !pending_pages)
+ goto out_of_memory;
+
+ for (i = 0; i < mmap_pages; i++)
pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
- }
+
+ blkif_interface_init();
memset(pending_reqs, 0, sizeof(pending_reqs));
INIT_LIST_HEAD(&pending_free);
for (i = 0; i < blkif_reqs; i++)
list_add_tail(&pending_reqs[i].free_list, &pending_free);
-
+
blkif_xenbus_init();
return 0;
+
+ out_of_memory:
+ kfree(pending_reqs);
+ kfree(pending_grant_handles);
+ free_empty_pages_and_pagevec(pending_pages, mmap_pages);
+ printk("%s: out of memory\n", __FUNCTION__);
+ return -ENOMEM;
}
module_init(blkif_init);
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h
index 38cb756964..1b5b6a427e 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h
@@ -44,6 +44,7 @@
#include <xen/interface/io/ring.h>
#include <xen/gnttab.h>
#include <xen/driver_util.h>
+#include <xen/xenbus.h>
#define DPRINTK(_f, _a...) \
pr_debug("(file=%s, line=%d) " _f, \
@@ -87,6 +88,7 @@ typedef struct blkif_st {
int st_rd_req;
int st_wr_req;
int st_oo_req;
+ int st_br_req;
wait_queue_head_t waiting_to_free;
@@ -111,7 +113,7 @@ int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major,
unsigned minor, int readonly);
void vbd_free(struct vbd *vbd);
-unsigned long vbd_size(struct vbd *vbd);
+unsigned long long vbd_size(struct vbd *vbd);
unsigned int vbd_info(struct vbd *vbd);
unsigned long vbd_secsize(struct vbd *vbd);
@@ -131,4 +133,7 @@ void blkif_xenbus_init(void);
irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
int blkif_schedule(void *arg);
+int blkback_barrier(struct xenbus_transaction xbt,
+ struct backend_info *be, int state);
+
#endif /* __BLKIF__BACKEND__COMMON_H__ */
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c b/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c
index a809b04cd1..34048b32c4 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c
@@ -31,12 +31,11 @@
*/
#include "common.h"
-#include <xen/xenbus.h>
#define vbd_sz(_v) ((_v)->bdev->bd_part ? \
(_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity)
-unsigned long vbd_size(struct vbd *vbd)
+unsigned long long vbd_size(struct vbd *vbd)
{
return vbd_sz(vbd);
}
@@ -104,7 +103,7 @@ int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
struct vbd *vbd = &blkif->vbd;
int rc = -EACCES;
- if ((operation == WRITE) && vbd->readonly)
+ if ((operation != READ) && vbd->readonly)
goto out;
if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)))
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
index 02f90a6803..349ae64d0f 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
@@ -20,7 +20,6 @@
#include <stdarg.h>
#include <linux/module.h>
#include <linux/kthread.h>
-#include <xen/xenbus.h>
#include "common.h"
#undef DPRINTK
@@ -91,11 +90,13 @@ static void update_blkif_status(blkif_t *blkif)
VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
+VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req);
static struct attribute *vbdstat_attrs[] = {
&dev_attr_oo_req.attr,
&dev_attr_rd_req.attr,
&dev_attr_wr_req.attr,
+ &dev_attr_br_req.attr,
NULL
};
@@ -165,6 +166,19 @@ static int blkback_remove(struct xenbus_device *dev)
return 0;
}
+int blkback_barrier(struct xenbus_transaction xbt,
+ struct backend_info *be, int state)
+{
+ struct xenbus_device *dev = be->dev;
+ int err;
+
+ err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
+ "%d", state);
+ if (err)
+ xenbus_dev_fatal(dev, err, "writing feature-barrier");
+
+ return err;
+}
/**
* Entry point to this code when a new device is created. Allocate the basic
@@ -366,13 +380,16 @@ static void connect(struct backend_info *be)
/* Supply the information about the device the frontend needs */
again:
err = xenbus_transaction_start(&xbt);
-
if (err) {
xenbus_dev_fatal(dev, err, "starting transaction");
return;
}
- err = xenbus_printf(xbt, dev->nodename, "sectors", "%lu",
+ err = blkback_barrier(xbt, be, 1);
+ if (err)
+ goto abort;
+
+ err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
vbd_size(&be->blkif->vbd));
if (err) {
xenbus_dev_fatal(dev, err, "writing %s/sectors",
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
index 4c44d7608d..95cff46ff9 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
@@ -48,6 +48,10 @@
#include <asm/hypervisor.h>
#include <asm/maddr.h>
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
#define BLKIF_STATE_DISCONNECTED 0
#define BLKIF_STATE_CONNECTED 1
#define BLKIF_STATE_SUSPENDED 2
@@ -134,10 +138,10 @@ static int blkfront_resume(struct xenbus_device *dev)
DPRINTK("blkfront_resume: %s\n", dev->nodename);
- blkif_free(info, 1);
+ blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
err = talk_to_backend(dev, info);
- if (!err)
+ if (info->connected == BLKIF_STATE_SUSPENDED && !err)
blkif_recover(info);
return err;
@@ -273,7 +277,7 @@ static void backend_changed(struct xenbus_device *dev,
xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
down(&bd->bd_sem);
- if (info->users > 0 && system_state == SYSTEM_RUNNING)
+ if (info->users > 0)
xenbus_dev_error(dev, -EBUSY,
"Device in use; refusing to close");
else
@@ -294,7 +298,8 @@ static void backend_changed(struct xenbus_device *dev,
*/
static void connect(struct blkfront_info *info)
{
- unsigned long sectors, sector_size;
+ unsigned long long sectors;
+ unsigned long sector_size;
unsigned int binfo;
int err;
@@ -305,7 +310,7 @@ static void connect(struct blkfront_info *info)
DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend);
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
- "sectors", "%lu", &sectors,
+ "sectors", "%llu", &sectors,
"info", "%u", &binfo,
"sector-size", "%lu", &sector_size,
NULL);
@@ -316,6 +321,12 @@ static void connect(struct blkfront_info *info)
return;
}
+ err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+ "feature-barrier", "%lu", &info->feature_barrier,
+ NULL);
+ if (err)
+ info->feature_barrier = 0;
+
err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
if (err) {
xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
@@ -355,9 +366,11 @@ static void blkfront_closing(struct xenbus_device *dev)
blk_stop_queue(info->rq);
/* No more gnttab callback work. */
gnttab_cancel_free_callback(&info->callback);
- flush_scheduled_work();
spin_unlock_irqrestore(&blkif_io_lock, flags);
+ /* Flush gnttab callback work. Must be done with no locks held. */
+ flush_scheduled_work();
+
xlvbd_del(info);
xenbus_frontend_closed(dev);
@@ -466,6 +479,27 @@ int blkif_ioctl(struct inode *inode, struct file *filep,
command, (long)argument, inode->i_rdev);
switch (command) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
+ case HDIO_GETGEO: {
+ struct block_device *bd = inode->i_bdev;
+ struct hd_geometry geo;
+ int ret;
+
+ if (!argument)
+ return -EINVAL;
+
+ geo.start = get_start_sect(bd);
+ ret = blkif_getgeo(bd, &geo);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((struct hd_geometry __user *)argument, &geo,
+ sizeof(geo)))
+ return -EFAULT;
+
+ return 0;
+ }
+#endif
case CDROMMULTISESSION:
DPRINTK("FIXME: support multisession CDs later\n");
for (i = 0; i < sizeof(struct cdrom_multisession); i++)
@@ -542,11 +576,14 @@ static int blkif_queue_request(struct request *req)
info->shadow[id].request = (unsigned long)req;
ring_req->id = id;
- ring_req->operation = rq_data_dir(req) ?
- BLKIF_OP_WRITE : BLKIF_OP_READ;
ring_req->sector_number = (blkif_sector_t)req->sector;
ring_req->handle = info->handle;
+ ring_req->operation = rq_data_dir(req) ?
+ BLKIF_OP_WRITE : BLKIF_OP_READ;
+ if (blk_barrier_rq(req))
+ ring_req->operation = BLKIF_OP_WRITE_BARRIER;
+
ring_req->nr_segments = 0;
rq_for_each_bio (bio, req) {
bio_for_each_segment (bvec, bio, idx) {
@@ -643,6 +680,7 @@ static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
RING_IDX i, rp;
unsigned long flags;
struct blkfront_info *info = (struct blkfront_info *)dev_id;
+ int uptodate;
spin_lock_irqsave(&blkif_io_lock, flags);
@@ -667,19 +705,27 @@ static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
ADD_ID_TO_FREELIST(info, id);
+ uptodate = (bret->status == BLKIF_RSP_OKAY);
switch (bret->operation) {
+ case BLKIF_OP_WRITE_BARRIER:
+ if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
+ printk("blkfront: %s: write barrier op failed\n",
+ info->gd->disk_name);
+ uptodate = -EOPNOTSUPP;
+ info->feature_barrier = 0;
+ xlvbd_barrier(info);
+ }
+ /* fall through */
case BLKIF_OP_READ:
case BLKIF_OP_WRITE:
if (unlikely(bret->status != BLKIF_RSP_OKAY))
DPRINTK("Bad return from blkdev data "
"request: %x\n", bret->status);
- ret = end_that_request_first(
- req, (bret->status == BLKIF_RSP_OKAY),
+ ret = end_that_request_first(req, uptodate,
req->hard_nr_sectors);
BUG_ON(ret);
- end_that_request_last(
- req, (bret->status == BLKIF_RSP_OKAY));
+ end_that_request_last(req, uptodate);
break;
default:
BUG();
@@ -714,9 +760,11 @@ static void blkif_free(struct blkfront_info *info, int suspend)
blk_stop_queue(info->rq);
/* No more gnttab callback work. */
gnttab_cancel_free_callback(&info->callback);
- flush_scheduled_work();
spin_unlock_irq(&blkif_io_lock);
+ /* Flush gnttab callback work. Must be done with no locks held. */
+ flush_scheduled_work();
+
/* Free resources associated with old device channel. */
if (info->ring_ref != GRANT_INVALID_REF) {
gnttab_end_foreign_access(info->ring_ref, 0,
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h
index 5ba3d1ebc3..b86360f405 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h
@@ -126,6 +126,7 @@ struct blkfront_info
struct gnttab_free_callback callback;
struct blk_shadow shadow[BLK_RING_SIZE];
unsigned long shadow_free;
+ int feature_barrier;
/**
* The number of people holding this device open. We won't allow a
@@ -152,5 +153,6 @@ extern void do_blkif_request (request_queue_t *rq);
int xlvbd_add(blkif_sector_t capacity, int device,
u16 vdisk_info, u16 sector_size, struct blkfront_info *info);
void xlvbd_del(struct blkfront_info *info);
+int xlvbd_barrier(struct blkfront_info *info);
#endif /* __XEN_DRIVERS_BLOCK_H__ */
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
index 8aa453d3a0..f040a2b7e3 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
@@ -36,6 +36,10 @@
#include <linux/blkdev.h>
#include <linux/list.h>
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
#define BLKIF_MAJOR(dev) ((dev)>>8)
#define BLKIF_MINOR(dev) ((dev) & 0xff)
@@ -46,7 +50,7 @@
*/
#define NUM_IDE_MAJORS 10
-#define NUM_SCSI_MAJORS 9
+#define NUM_SCSI_MAJORS 17
#define NUM_VBD_MAJORS 1
static struct xlbd_type_info xlbd_ide_type = {
@@ -91,7 +95,9 @@ static struct block_device_operations xlvbd_block_fops =
.open = blkif_open,
.release = blkif_release,
.ioctl = blkif_ioctl,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
.getgeo = blkif_getgeo
+#endif
};
DEFINE_SPINLOCK(blkif_io_lock);
@@ -159,8 +165,11 @@ xlbd_get_major_info(int vdevice)
case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
index = 11 + major - SCSI_DISK1_MAJOR;
break;
- case SCSI_CDROM_MAJOR: index = 18; break;
- default: index = 19; break;
+ case SCSI_DISK8_MAJOR ... SCSI_DISK15_MAJOR:
+ index = 18 + major - SCSI_DISK8_MAJOR;
+ break;
+ case SCSI_CDROM_MAJOR: index = 26; break;
+ default: index = 27; break;
}
mi = ((major_info[index] != NULL) ? major_info[index] :
@@ -186,7 +195,11 @@ xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
if (rq == NULL)
return -1;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
elevator_init(rq, "noop");
+#else
+ elevator_init(rq, &elevator_noop);
+#endif
/* Hard sector size and max sectors impersonate the equiv. hardware. */
blk_queue_hardsect_size(rq, sector_size);
@@ -217,6 +230,7 @@ xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, int vdevice,
struct xlbd_major_info *mi;
int nr_minors = 1;
int err = -ENODEV;
+ unsigned int offset;
BUG_ON(info->gd != NULL);
BUG_ON(info->mi != NULL);
@@ -234,15 +248,33 @@ xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, int vdevice,
if (gd == NULL)
goto out;
- if (nr_minors > 1)
- sprintf(gd->disk_name, "%s%c", mi->type->diskname,
- 'a' + mi->index * mi->type->disks_per_major +
- (minor >> mi->type->partn_shift));
- else
- sprintf(gd->disk_name, "%s%c%d", mi->type->diskname,
- 'a' + mi->index * mi->type->disks_per_major +
- (minor >> mi->type->partn_shift),
- minor & ((1 << mi->type->partn_shift) - 1));
+ offset = mi->index * mi->type->disks_per_major +
+ (minor >> mi->type->partn_shift);
+ if (nr_minors > 1) {
+ if (offset < 26) {
+ sprintf(gd->disk_name, "%s%c",
+ mi->type->diskname, 'a' + offset );
+ }
+ else {
+ sprintf(gd->disk_name, "%s%c%c",
+ mi->type->diskname,
+ 'a' + ((offset/26)-1), 'a' + (offset%26) );
+ }
+ }
+ else {
+ if (offset < 26) {
+ sprintf(gd->disk_name, "%s%c%d",
+ mi->type->diskname,
+ 'a' + offset,
+ minor & ((1 << mi->type->partn_shift) - 1));
+ }
+ else {
+ sprintf(gd->disk_name, "%s%c%c%d",
+ mi->type->diskname,
+ 'a' + ((offset/26)-1), 'a' + (offset%26),
+ minor & ((1 << mi->type->partn_shift) - 1));
+ }
+ }
gd->major = mi->major;
gd->first_minor = minor;
@@ -257,6 +289,10 @@ xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, int vdevice,
}
info->rq = gd->queue;
+ info->gd = gd;
+
+ if (info->feature_barrier)
+ xlvbd_barrier(info);
if (vdisk_info & VDISK_READONLY)
set_disk_ro(gd, 1);
@@ -267,8 +303,6 @@ xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, int vdevice,
if (vdisk_info & VDISK_CDROM)
gd->flags |= GENHD_FL_CD;
- info->gd = gd;
-
return 0;
out:
@@ -316,3 +350,26 @@ xlvbd_del(struct blkfront_info *info)
blk_cleanup_queue(info->rq);
info->rq = NULL;
}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
+int
+xlvbd_barrier(struct blkfront_info *info)
+{
+ int err;
+
+ err = blk_queue_ordered(info->rq,
+ info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE, NULL);
+ if (err)
+ return err;
+ printk("blkfront: %s: barriers %s\n",
+ info->gd->disk_name, info->feature_barrier ? "enabled" : "disabled");
+ return 0;
+}
+#else
+int
+xlvbd_barrier(struct blkfront_info *info)
+{
+ printk("blkfront: %s: barriers disabled\n", info->gd->disk_name);
+ return -ENOSYS;
+}
+#endif
diff --git a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
index a6f1379c27..e0d898ab98 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
@@ -10,6 +10,9 @@
*
* Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield
*
+ * Clean ups and fix ups:
+ * Copyright (c) 2006, Steven Rostedt - Red Hat, Inc.
+ *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation; or, when distributed
@@ -44,7 +47,6 @@
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/mm.h>
-#include <linux/miscdevice.h>
#include <linux/errno.h>
#include <linux/major.h>
#include <linux/gfp.h>
@@ -52,9 +54,33 @@
#include <asm/tlbflush.h>
#include <linux/devfs_fs_kernel.h>
-#define MAX_TAP_DEV 100 /*the maximum number of tapdisk ring devices */
+#define MAX_TAP_DEV 256 /*the maximum number of tapdisk ring devices */
#define MAX_DEV_NAME 100 /*the max tapdisk ring device name e.g. blktap0 */
+
+struct class *xen_class;
+EXPORT_SYMBOL_GPL(xen_class);
+
+/*
+ * Setup the xen class. This should probably go in another file, but
+ * since blktap is the only user of it so far, it gets to keep it.
+ */
+int setup_xen_class(void)
+{
+ int ret;
+
+ if (xen_class)
+ return 0;
+
+ xen_class = class_create(THIS_MODULE, "xen");
+ if ((ret = IS_ERR(xen_class))) {
+ xen_class = NULL;
+ return ret;
+ }
+
+ return 0;
+}
+
/*
* The maximum number of requests that can be outstanding at any time
* is determined by
@@ -67,8 +93,9 @@
* mmap_alloc is initialised to 2 and should be adjustable on the fly via
* sysfs.
*/
-#define MAX_DYNAMIC_MEM 64
-#define MAX_PENDING_REQS 64
+#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
+#define MAX_DYNAMIC_MEM BLK_RING_SIZE
+#define MAX_PENDING_REQS BLK_RING_SIZE
#define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
#define MMAP_VADDR(_start, _req,_seg) \
(_start + \
@@ -82,6 +109,12 @@ static int mmap_pages = MMAP_PAGES;
* memory rings.
*/
+/*Data struct handed back to userspace for tapdisk device to VBD mapping*/
+typedef struct domid_translate {
+ unsigned short domid;
+ unsigned short busid;
+} domid_translate_t ;
+
/*Data struct associated with each of the tapdisk devices*/
typedef struct tap_blkif {
struct vm_area_struct *vma; /*Shared memory area */
@@ -100,22 +133,11 @@ typedef struct tap_blkif {
unsigned long *idx_map; /*Record the user ring id to kern
[req id, idx] tuple */
blkif_t *blkif; /*Associate blkif with tapdev */
+ struct domid_translate trans; /*Translation from domid to bus. */
} tap_blkif_t;
-/*Private data struct associated with the inode*/
-typedef struct private_info {
- int idx;
-} private_info_t;
-
-/*Data struct handed back to userspace for tapdisk device to VBD mapping*/
-typedef struct domid_translate {
- unsigned short domid;
- unsigned short busid;
-} domid_translate_t ;
-
-
-static domid_translate_t translate_domid[MAX_TAP_DEV];
-static tap_blkif_t *tapfds[MAX_TAP_DEV];
+static struct tap_blkif *tapfds[MAX_TAP_DEV];
+static int blktap_next_minor;
static int __init set_blkif_reqs(char *str)
{
@@ -168,16 +190,18 @@ static inline unsigned int RTN_PEND_IDX(pending_req_t *req, int idx) {
#define BLKBACK_INVALID_HANDLE (~0)
-typedef struct mmap_page {
- unsigned long start;
- struct page *mpage;
-} mmap_page_t;
+static struct page **foreign_pages[MAX_DYNAMIC_MEM];
+static inline unsigned long idx_to_kaddr(
+ unsigned int mmap_idx, unsigned int req_idx, unsigned int sg_idx)
+{
+ unsigned int arr_idx = req_idx*BLKIF_MAX_SEGMENTS_PER_REQUEST + sg_idx;
+ unsigned long pfn = page_to_pfn(foreign_pages[mmap_idx][arr_idx]);
+ return (unsigned long)pfn_to_kaddr(pfn);
+}
-static mmap_page_t mmap_start[MAX_DYNAMIC_MEM];
static unsigned short mmap_alloc = 0;
static unsigned short mmap_lock = 0;
static unsigned short mmap_inuse = 0;
-static unsigned long *pending_addrs[MAX_DYNAMIC_MEM];
/******************************************************************
* GRANT HANDLES
@@ -192,6 +216,7 @@ struct grant_handle_pair
grant_handle_t kernel;
grant_handle_t user;
};
+#define INVALID_GRANT_HANDLE 0xFFFF
static struct grant_handle_pair
pending_grant_handles[MAX_DYNAMIC_MEM][MMAP_PAGES];
@@ -200,15 +225,13 @@ static struct grant_handle_pair
+ (_i)])
-static int blktap_read_ufe_ring(int idx); /*local prototypes*/
+static int blktap_read_ufe_ring(tap_blkif_t *info); /*local prototypes*/
-#define BLKTAP_MINOR 0 /*/dev/xen/blktap resides at device number
- major=254, minor numbers begin at 0 */
-#define BLKTAP_DEV_MAJOR 254 /* TODO: Make major number dynamic *
- * and create devices in the kernel *
- */
+#define BLKTAP_MINOR 0 /*/dev/xen/blktap has a dynamic major */
#define BLKTAP_DEV_DIR "/dev/xen"
+static int blktap_major;
+
/* blktap IOCTLs: */
#define BLKTAP_IOCTL_KICK_FE 1
#define BLKTAP_IOCTL_KICK_BE 2 /* currently unused */
@@ -264,17 +287,19 @@ static inline int GET_NEXT_REQ(unsigned long *idx_map)
{
int i;
for (i = 0; i < MAX_PENDING_REQS; i++)
- if (idx_map[i] == INVALID_REQ) return i;
+ if (idx_map[i] == INVALID_REQ)
+ return i;
return INVALID_REQ;
}
#define BLKTAP_INVALID_HANDLE(_g) \
- (((_g->kernel) == 0xFFFF) && ((_g->user) == 0xFFFF))
+ (((_g->kernel) == INVALID_GRANT_HANDLE) && \
+ ((_g->user) == INVALID_GRANT_HANDLE))
#define BLKTAP_INVALIDATE_HANDLE(_g) do { \
- (_g)->kernel = 0xFFFF; (_g)->user = 0xFFFF; \
+ (_g)->kernel = INVALID_GRANT_HANDLE; (_g)->user = INVALID_GRANT_HANDLE; \
} while(0)
@@ -303,7 +328,7 @@ struct vm_operations_struct blktap_vm_ops = {
*/
/*Function Declarations*/
-static int get_next_free_dev(void);
+static tap_blkif_t *get_next_free_dev(void);
static int blktap_open(struct inode *inode, struct file *filp);
static int blktap_release(struct inode *inode, struct file *filp);
static int blktap_mmap(struct file *filp, struct vm_area_struct *vma);
@@ -311,8 +336,6 @@ static int blktap_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg);
static unsigned int blktap_poll(struct file *file, poll_table *wait);
-struct miscdevice *set_misc(int minor, char *name, int dev);
-
static struct file_operations blktap_fops = {
.owner = THIS_MODULE,
.poll = blktap_poll,
@@ -323,41 +346,96 @@ static struct file_operations blktap_fops = {
};
-static int get_next_free_dev(void)
+static tap_blkif_t *get_next_free_dev(void)
{
tap_blkif_t *info;
- int i = 0, ret = -1;
- unsigned long flags;
+ int minor;
- spin_lock_irqsave(&pending_free_lock, flags);
-
- while (i < MAX_TAP_DEV) {
- info = tapfds[i];
- if ( (tapfds[i] != NULL) && (info->dev_inuse == 0)
- && (info->dev_pending == 0) ) {
+ /*
+ * This is called only from the ioctl, which
+ * means we should always have interrupts enabled.
+ */
+ BUG_ON(irqs_disabled());
+
+ spin_lock_irq(&pending_free_lock);
+
+ /* tapfds[0] is always NULL */
+
+ for (minor = 1; minor < blktap_next_minor; minor++) {
+ info = tapfds[minor];
+ /* we could have failed a previous attempt. */
+ if (!info ||
+ ((info->dev_inuse == 0) &&
+ (info->dev_pending == 0)) ) {
info->dev_pending = 1;
- ret = i;
- goto done;
+ goto found;
}
- i++;
}
-
-done:
- spin_unlock_irqrestore(&pending_free_lock, flags);
- return ret;
+ info = NULL;
+ minor = -1;
+
+ /*
+ * We didn't find free device. If we can still allocate
+ * more, then we grab the next device minor that is
+ * available. This is done while we are still under
+ * the protection of the pending_free_lock.
+ */
+ if (blktap_next_minor < MAX_TAP_DEV)
+ minor = blktap_next_minor++;
+found:
+ spin_unlock_irq(&pending_free_lock);
+
+ if (!info && minor > 0) {
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (unlikely(!info)) {
+ /*
+ * If we failed here, try to put back
+ * the next minor number. But if one
+ * was just taken, then we just lose this
+ * minor. We can try to allocate this
+ * minor again later.
+ */
+ spin_lock_irq(&pending_free_lock);
+ if (blktap_next_minor == minor+1)
+ blktap_next_minor--;
+ spin_unlock_irq(&pending_free_lock);
+ goto out;
+ }
+
+ info->minor = minor;
+ /*
+ * Make sure that we have a minor before others can
+ * see us.
+ */
+ wmb();
+ tapfds[minor] = info;
+
+ class_device_create(xen_class, NULL,
+ MKDEV(blktap_major, minor), NULL,
+ "blktap%d", minor);
+ devfs_mk_cdev(MKDEV(blktap_major, minor),
+ S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", minor);
+ }
+
+out:
+ return info;
}
int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif)
{
+ tap_blkif_t *info;
int i;
-
- for (i = 0; i < MAX_TAP_DEV; i++)
- if ( (translate_domid[i].domid == domid)
- && (translate_domid[i].busid == xenbus_id) ) {
- tapfds[i]->blkif = blkif;
- tapfds[i]->status = RUNNING;
+
+ for (i = 1; i < blktap_next_minor; i++) {
+ info = tapfds[i];
+ if ( info &&
+ (info->trans.domid == domid) &&
+ (info->trans.busid == xenbus_id) ) {
+ info->blkif = blkif;
+ info->status = RUNNING;
return i;
}
+ }
return -1;
}
@@ -367,13 +445,16 @@ void signal_tapdisk(int idx)
struct task_struct *ptask;
info = tapfds[idx];
- if ( (idx > 0) && (idx < MAX_TAP_DEV) && (info->pid > 0) ) {
+ if ((idx < 0) || (idx > MAX_TAP_DEV) || !info)
+ return;
+
+ if (info->pid > 0) {
ptask = find_task_by_pid(info->pid);
- if (ptask) {
+ if (ptask)
info->status = CLEANSHUTDOWN;
- }
}
info->blkif = NULL;
+
return;
}
@@ -382,18 +463,22 @@ static int blktap_open(struct inode *inode, struct file *filp)
blkif_sring_t *sring;
int idx = iminor(inode) - BLKTAP_MINOR;
tap_blkif_t *info;
- private_info_t *prv;
int i;
- if (tapfds[idx] == NULL) {
+ /* ctrl device, treat differently */
+ if (!idx)
+ return 0;
+
+ info = tapfds[idx];
+
+ if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) {
WPRINTK("Unable to open device /dev/xen/blktap%d\n",
- idx);
- return -ENOMEM;
+ idx);
+ return -ENODEV;
}
+
DPRINTK("Opening device /dev/xen/blktap%d\n",idx);
- info = tapfds[idx];
-
/*Only one process can access device at a time*/
if (test_and_set_bit(0, &info->dev_inuse))
return -EBUSY;
@@ -410,9 +495,7 @@ static int blktap_open(struct inode *inode, struct file *filp)
SHARED_RING_INIT(sring);
FRONT_RING_INIT(&info->ufe_ring, sring, PAGE_SIZE);
- prv = kzalloc(sizeof(private_info_t),GFP_KERNEL);
- prv->idx = idx;
- filp->private_data = prv;
+ filp->private_data = info;
info->vma = NULL;
info->idx_map = kmalloc(sizeof(unsigned long) * MAX_PENDING_REQS,
@@ -433,17 +516,14 @@ static int blktap_open(struct inode *inode, struct file *filp)
static int blktap_release(struct inode *inode, struct file *filp)
{
- int idx = iminor(inode) - BLKTAP_MINOR;
- tap_blkif_t *info;
+ tap_blkif_t *info = filp->private_data;
- if (tapfds[idx] == NULL) {
- WPRINTK("Trying to free device that doesn't exist "
- "[/dev/xen/blktap%d]\n",idx);
- return -1;
- }
- info = tapfds[idx];
+ /* check for control device */
+ if (!info)
+ return 0;
+
info->dev_inuse = 0;
- DPRINTK("Freeing device [/dev/xen/blktap%d]\n",idx);
+ DPRINTK("Freeing device [/dev/xen/blktap%d]\n",info->minor);
/* Free the ring page. */
ClearPageReserved(virt_to_page(info->ufe_ring.sring));
@@ -457,11 +537,11 @@ static int blktap_release(struct inode *inode, struct file *filp)
info->vma = NULL;
}
- if (filp->private_data) kfree(filp->private_data);
-
if ( (info->status != CLEANSHUTDOWN) && (info->blkif != NULL) ) {
- kthread_stop(info->blkif->xenblkd);
- info->blkif->xenblkd = NULL;
+ if (info->blkif->xenblkd != NULL) {
+ kthread_stop(info->blkif->xenblkd);
+ info->blkif->xenblkd = NULL;
+ }
info->status = CLEANSHUTDOWN;
}
return 0;
@@ -491,16 +571,12 @@ static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
int size;
struct page **map;
int i;
- private_info_t *prv;
- tap_blkif_t *info;
+ tap_blkif_t *info = filp->private_data;
- /*Retrieve the dev info*/
- prv = (private_info_t *)filp->private_data;
- if (prv == NULL) {
+ if (info == NULL) {
WPRINTK("blktap: mmap, retrieving idx failed\n");
return -ENOMEM;
}
- info = tapfds[prv->idx];
vma->vm_flags |= VM_RESERVED;
vma->vm_ops = &blktap_vm_ops;
@@ -517,8 +593,6 @@ static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
info->user_vstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT);
/* Map the ring pages to the start of the region and reserve it. */
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
if (remap_pfn_range(vma, vma->vm_start,
__pa(info->ufe_ring.sring) >> PAGE_SHIFT,
PAGE_SIZE, vma->vm_page_prot)) {
@@ -556,20 +630,17 @@ static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
static int blktap_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg)
{
- int idx = iminor(inode) - BLKTAP_MINOR;
+ tap_blkif_t *info = filp->private_data;
+
switch(cmd) {
case BLKTAP_IOCTL_KICK_FE:
{
/* There are fe messages to process. */
- return blktap_read_ufe_ring(idx);
+ return blktap_read_ufe_ring(info);
}
case BLKTAP_IOCTL_SETMODE:
{
- tap_blkif_t *info = tapfds[idx];
-
- if ( (idx > 0) && (idx < MAX_TAP_DEV)
- && (tapfds[idx] != NULL) )
- {
+ if (info) {
if (BLKTAP_MODE_VALID(arg)) {
info->mode = arg;
/* XXX: may need to flush rings here. */
@@ -582,11 +653,7 @@ static int blktap_ioctl(struct inode *inode, struct file *filp,
}
case BLKTAP_IOCTL_PRINT_IDXS:
{
- tap_blkif_t *info = tapfds[idx];
-
- if ( (idx > 0) && (idx < MAX_TAP_DEV)
- && (tapfds[idx] != NULL) )
- {
+ if (info) {
printk("User Rings: \n-----------\n");
printk("UF: rsp_cons: %2d, req_prod_prv: %2d "
"| req_prod: %2d, rsp_prod: %2d\n",
@@ -599,11 +666,7 @@ static int blktap_ioctl(struct inode *inode, struct file *filp,
}
case BLKTAP_IOCTL_SENDPID:
{
- tap_blkif_t *info = tapfds[idx];
-
- if ( (idx > 0) && (idx < MAX_TAP_DEV)
- && (tapfds[idx] != NULL) )
- {
+ if (info) {
info->pid = (pid_t)arg;
DPRINTK("blktap: pid received %d\n",
info->pid);
@@ -614,43 +677,49 @@ static int blktap_ioctl(struct inode *inode, struct file *filp,
{
uint64_t val = (uint64_t)arg;
domid_translate_t *tr = (domid_translate_t *)&val;
- int newdev;
DPRINTK("NEWINTF Req for domid %d and bus id %d\n",
tr->domid, tr->busid);
- newdev = get_next_free_dev();
- if (newdev < 1) {
+ info = get_next_free_dev();
+ if (!info) {
WPRINTK("Error initialising /dev/xen/blktap - "
"No more devices\n");
return -1;
}
- translate_domid[newdev].domid = tr->domid;
- translate_domid[newdev].busid = tr->busid;
- return newdev;
+ info->trans.domid = tr->domid;
+ info->trans.busid = tr->busid;
+ return info->minor;
}
case BLKTAP_IOCTL_FREEINTF:
{
unsigned long dev = arg;
- tap_blkif_t *info = NULL;
+ unsigned long flags;
+
+ info = tapfds[dev];
- if ( (dev > 0) && (dev < MAX_TAP_DEV) ) info = tapfds[dev];
+ if ((dev > MAX_TAP_DEV) || !info)
+ return 0; /* should this be an error? */
- if ( (info != NULL) && (info->dev_pending) )
+ spin_lock_irqsave(&pending_free_lock, flags);
+ if (info->dev_pending)
info->dev_pending = 0;
+ spin_unlock_irqrestore(&pending_free_lock, flags);
+
return 0;
}
case BLKTAP_IOCTL_MINOR:
{
unsigned long dev = arg;
- tap_blkif_t *info = NULL;
-
- if ( (dev > 0) && (dev < MAX_TAP_DEV) ) info = tapfds[dev];
-
- if (info != NULL) return info->minor;
- else return -1;
+
+ info = tapfds[dev];
+
+ if ((dev > MAX_TAP_DEV) || !info)
+ return -EINVAL;
+
+ return info->minor;
}
case BLKTAP_IOCTL_MAJOR:
- return BLKTAP_DEV_MAJOR;
+ return blktap_major;
case BLKTAP_QUERY_ALLOC_REQS:
{
@@ -662,25 +731,16 @@ static int blktap_ioctl(struct inode *inode, struct file *filp,
return -ENOIOCTLCMD;
}
-static unsigned int blktap_poll(struct file *file, poll_table *wait)
+static unsigned int blktap_poll(struct file *filp, poll_table *wait)
{
- private_info_t *prv;
- tap_blkif_t *info;
+ tap_blkif_t *info = filp->private_data;
- /*Retrieve the dev info*/
- prv = (private_info_t *)file->private_data;
- if (prv == NULL) {
- WPRINTK(" poll, retrieving idx failed\n");
+ /* do not work on the control device */
+ if (!info)
return 0;
- }
-
- if (prv->idx == 0) return 0;
-
- info = tapfds[prv->idx];
-
- poll_wait(file, &info->wait, wait);
+
+ poll_wait(filp, &info->wait, wait);
if (info->ufe_ring.req_prod_pvt != info->ufe_ring.sring->req_prod) {
- flush_tlb_all();
RING_PUSH_REQUESTS(&info->ufe_ring);
return POLLIN | POLLRDNORM;
}
@@ -691,11 +751,13 @@ void blktap_kick_user(int idx)
{
tap_blkif_t *info;
- if (idx == 0) return;
-
info = tapfds[idx];
-
- if (info != NULL) wake_up_interruptible(&info->wait);
+
+ if ((idx < 0) || (idx > MAX_TAP_DEV) || !info)
+ return;
+
+ wake_up_interruptible(&info->wait);
+
return;
}
@@ -712,66 +774,21 @@ static void make_response(blkif_t *blkif, unsigned long id,
static int req_increase(void)
{
int i, j;
- struct page *page;
- unsigned long flags;
- int ret;
- spin_lock_irqsave(&pending_free_lock, flags);
-
- ret = -EINVAL;
if (mmap_alloc >= MAX_PENDING_REQS || mmap_lock)
- goto done;
-
-#ifdef __ia64__
- extern unsigned long alloc_empty_foreign_map_page_range(
- unsigned long pages);
- mmap_start[mmap_alloc].start = (unsigned long)
- alloc_empty_foreign_map_page_range(mmap_pages);
-#else /* ! ia64 */
- page = balloon_alloc_empty_page_range(mmap_pages);
- ret = -ENOMEM;
- if (page == NULL) {
- printk("%s balloon_alloc_empty_page_range gave NULL\n", __FUNCTION__);
- goto done;
- }
-
- /* Pin all of the pages. */
- for (i=0; i<mmap_pages; i++)
- get_page(&page[i]);
-
- mmap_start[mmap_alloc].start =
- (unsigned long)pfn_to_kaddr(page_to_pfn(page));
- mmap_start[mmap_alloc].mpage = page;
-
-#endif
-
- pending_reqs[mmap_alloc] = kzalloc(sizeof(pending_req_t) *
- blkif_reqs, GFP_KERNEL);
- pending_addrs[mmap_alloc] = kzalloc(sizeof(unsigned long) *
- mmap_pages, GFP_KERNEL);
-
- ret = -ENOMEM;
- if (!pending_reqs[mmap_alloc] || !pending_addrs[mmap_alloc]) {
- kfree(pending_reqs[mmap_alloc]);
- kfree(pending_addrs[mmap_alloc]);
- WPRINTK("%s: out of memory\n", __FUNCTION__);
- ret = -ENOMEM;
- goto done;
- }
-
- ret = 0;
+ return -EINVAL;
- DPRINTK("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n",
- __FUNCTION__, blkif_reqs, mmap_pages,
- mmap_start[mmap_alloc].start);
+ pending_reqs[mmap_alloc] = kzalloc(sizeof(pending_req_t)
+ * blkif_reqs, GFP_KERNEL);
+ foreign_pages[mmap_alloc] = alloc_empty_pages_and_pagevec(mmap_pages);
- BUG_ON(mmap_start[mmap_alloc].start == 0);
+ if (!pending_reqs[mmap_alloc] || !foreign_pages[mmap_alloc])
+ goto out_of_memory;
- for (i = 0; i < mmap_pages; i++)
- pending_addrs[mmap_alloc][i] =
- mmap_start[mmap_alloc].start + (i << PAGE_SHIFT);
+ DPRINTK("%s: reqs=%d, pages=%d\n",
+ __FUNCTION__, blkif_reqs, mmap_pages);
- for (i = 0; i < MAX_PENDING_REQS ; i++) {
+ for (i = 0; i < MAX_PENDING_REQS; i++) {
list_add_tail(&pending_reqs[mmap_alloc][i].free_list,
&pending_free);
pending_reqs[mmap_alloc][i].mem_idx = mmap_alloc;
@@ -782,67 +799,30 @@ static int req_increase(void)
mmap_alloc++;
DPRINTK("# MMAPs increased to %d\n",mmap_alloc);
- done:
- spin_unlock_irqrestore(&pending_free_lock, flags);
- return ret;
+ return 0;
+
+ out_of_memory:
+ free_empty_pages_and_pagevec(foreign_pages[mmap_alloc], mmap_pages);
+ kfree(pending_reqs[mmap_alloc]);
+ WPRINTK("%s: out of memory\n", __FUNCTION__);
+ return -ENOMEM;
}
static void mmap_req_del(int mmap)
{
- int i;
- struct page *page;
+ BUG_ON(!spin_is_locked(&pending_free_lock));
- /*Spinlock already acquired*/
kfree(pending_reqs[mmap]);
- kfree(pending_addrs[mmap]);
-
-#ifdef __ia64__
- /*Not sure what goes here yet!*/
-#else
-
- /* Unpin all of the pages. */
- page = mmap_start[mmap].mpage;
- for (i=0; i<mmap_pages; i++)
- put_page(&page[i]);
+ pending_reqs[mmap] = NULL;
- balloon_dealloc_empty_page_range(mmap_start[mmap].mpage, mmap_pages);
-#endif
+ free_empty_pages_and_pagevec(foreign_pages[mmap_alloc], mmap_pages);
+ foreign_pages[mmap] = NULL;
mmap_lock = 0;
DPRINTK("# MMAPs decreased to %d\n",mmap_alloc);
mmap_alloc--;
}
-/*N.B. Currently unused - will be accessed via sysfs*/
-static void req_decrease(void)
-{
- pending_req_t *req;
- int i;
- unsigned long flags;
-
- spin_lock_irqsave(&pending_free_lock, flags);
-
- DPRINTK("Req decrease called.\n");
- if (mmap_lock || mmap_alloc == 1)
- goto done;
-
- mmap_lock = 1;
- mmap_inuse = MAX_PENDING_REQS;
-
- /*Go through reqs and remove any that aren't in use*/
- for (i = 0; i < MAX_PENDING_REQS ; i++) {
- req = &pending_reqs[mmap_alloc-1][i];
- if (req->inuse == 0) {
- list_del(&req->free_list);
- mmap_inuse--;
- }
- }
- if (mmap_inuse == 0) mmap_req_del(mmap_alloc-1);
- done:
- spin_unlock_irqrestore(&pending_free_lock, flags);
- return;
-}
-
static pending_req_t* alloc_req(void)
{
pending_req_t *req = NULL;
@@ -888,8 +868,8 @@ static void free_req(pending_req_t *req)
wake_up(&pending_free_wq);
}
-static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx, int
- tapidx)
+static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx,
+ int tapidx)
{
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
unsigned int i, invcount = 0;
@@ -897,49 +877,65 @@ static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx, int
uint64_t ptep;
int ret, mmap_idx;
unsigned long kvaddr, uvaddr;
-
- tap_blkif_t *info = tapfds[tapidx];
+ tap_blkif_t *info;
- if (info == NULL) {
+
+ info = tapfds[tapidx];
+
+ if ((tapidx < 0) || (tapidx > MAX_TAP_DEV) || !info) {
WPRINTK("fast_flush: Couldn't get info!\n");
return;
}
+
+ if (info->vma != NULL &&
+ xen_feature(XENFEAT_auto_translated_physmap)) {
+ down_write(&info->vma->vm_mm->mmap_sem);
+ zap_page_range(info->vma,
+ MMAP_VADDR(info->user_vstart, u_idx, 0),
+ req->nr_pages << PAGE_SHIFT, NULL);
+ up_write(&info->vma->vm_mm->mmap_sem);
+ }
+
mmap_idx = req->mem_idx;
for (i = 0; i < req->nr_pages; i++) {
- kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, k_idx, i);
+ kvaddr = idx_to_kaddr(mmap_idx, k_idx, i);
uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i);
khandle = &pending_handle(mmap_idx, k_idx, i);
- if (BLKTAP_INVALID_HANDLE(khandle)) {
- WPRINTK("BLKTAP_INVALID_HANDLE\n");
- continue;
+
+ if (khandle->kernel != INVALID_GRANT_HANDLE) {
+ gnttab_set_unmap_op(&unmap[invcount],
+ idx_to_kaddr(mmap_idx, k_idx, i),
+ GNTMAP_host_map, khandle->kernel);
+ invcount++;
}
- gnttab_set_unmap_op(&unmap[invcount],
- MMAP_VADDR(mmap_start[mmap_idx].start, k_idx, i),
- GNTMAP_host_map, khandle->kernel);
- invcount++;
-
- if (create_lookup_pte_addr(
- info->vma->vm_mm,
- MMAP_VADDR(info->user_vstart, u_idx, i),
- &ptep) !=0) {
- WPRINTK("Couldn't get a pte addr!\n");
- return;
+
+ if (khandle->user != INVALID_GRANT_HANDLE) {
+ BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
+ if (create_lookup_pte_addr(
+ info->vma->vm_mm,
+ MMAP_VADDR(info->user_vstart, u_idx, i),
+ &ptep) !=0) {
+ WPRINTK("Couldn't get a pte addr!\n");
+ return;
+ }
+
+ gnttab_set_unmap_op(&unmap[invcount], ptep,
+ GNTMAP_host_map
+ | GNTMAP_application_map
+ | GNTMAP_contains_pte,
+ khandle->user);
+ invcount++;
}
- gnttab_set_unmap_op(&unmap[invcount],
- ptep, GNTMAP_host_map,
- khandle->user);
- invcount++;
-
BLKTAP_INVALIDATE_HANDLE(khandle);
}
ret = HYPERVISOR_grant_table_op(
GNTTABOP_unmap_grant_ref, unmap, invcount);
BUG_ON(ret);
- if (info->vma != NULL)
+ if (info->vma != NULL && !xen_feature(XENFEAT_auto_translated_physmap))
zap_page_range(info->vma,
MMAP_VADDR(info->user_vstart, u_idx, 0),
req->nr_pages << PAGE_SHIFT, NULL);
@@ -1002,7 +998,7 @@ int tap_blkif_schedule(void *arg)
* COMPLETION CALLBACK -- Called by user level ioctl()
*/
-static int blktap_read_ufe_ring(int idx)
+static int blktap_read_ufe_ring(tap_blkif_t *info)
{
/* This is called to read responses from the UFE ring. */
RING_IDX i, j, rp;
@@ -1010,12 +1006,9 @@ static int blktap_read_ufe_ring(int idx)
blkif_t *blkif=NULL;
int pending_idx, usr_idx, mmap_idx;
pending_req_t *pending_req;
- tap_blkif_t *info;
- info = tapfds[idx];
- if (info == NULL) {
+ if (!info)
return 0;
- }
/* We currently only forward packets in INTERCEPT_FE mode. */
if (!(info->mode & BLKTAP_MODE_INTERCEPT_FE))
@@ -1026,11 +1019,14 @@ static int blktap_read_ufe_ring(int idx)
rmb();
for (i = info->ufe_ring.rsp_cons; i != rp; i++) {
+ blkif_response_t res;
resp = RING_GET_RESPONSE(&info->ufe_ring, i);
+ memcpy(&res, resp, sizeof(res));
+ mb(); /* rsp_cons read by RING_FULL() in do_block_io_op(). */
++info->ufe_ring.rsp_cons;
/*retrieve [usr_idx] to [mmap_idx,pending_idx] mapping*/
- usr_idx = (int)resp->id;
+ usr_idx = (int)res.id;
pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx]));
mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]);
@@ -1053,9 +1049,8 @@ static int blktap_read_ufe_ring(int idx)
struct page *pg;
int offset;
- uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, j);
- kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start,
- pending_idx, j);
+ uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, j);
+ kvaddr = idx_to_kaddr(mmap_idx, pending_idx, j);
pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
ClearPageReserved(pg);
@@ -1063,10 +1058,10 @@ static int blktap_read_ufe_ring(int idx)
>> PAGE_SHIFT;
map[offset] = NULL;
}
- fast_flush_area(pending_req, pending_idx, usr_idx, idx);
- make_response(blkif, pending_req->id, resp->operation,
- resp->status);
+ fast_flush_area(pending_req, pending_idx, usr_idx, info->minor);
info->idx_map[usr_idx] = INVALID_REQ;
+ make_response(blkif, pending_req->id, res.operation,
+ res.status);
blkif_put(pending_req->blkif);
free_req(pending_req);
}
@@ -1100,7 +1095,7 @@ static int print_dbug = 1;
static int do_block_io_op(blkif_t *blkif)
{
blkif_back_ring_t *blk_ring = &blkif->blk_ring;
- blkif_request_t *req;
+ blkif_request_t req;
pending_req_t *pending_req;
RING_IDX rc, rp;
int more_to_do = 0;
@@ -1111,7 +1106,7 @@ static int do_block_io_op(blkif_t *blkif)
rmb(); /* Ensure we see queued requests up to 'rp'. */
/*Check blkif has corresponding UE ring*/
- if (blkif->dev_num == -1) {
+ if (blkif->dev_num < 0) {
/*oops*/
if (print_dbug) {
WPRINTK("Corresponding UE "
@@ -1122,7 +1117,8 @@ static int do_block_io_op(blkif_t *blkif)
}
info = tapfds[blkif->dev_num];
- if (info == NULL || !info->dev_inuse) {
+
+ if (blkif->dev_num > MAX_TAP_DEV || !info || !info->dev_inuse) {
if (print_dbug) {
WPRINTK("Can't get UE info!\n");
print_dbug = 0;
@@ -1152,24 +1148,24 @@ static int do_block_io_op(blkif_t *blkif)
break;
}
- req = RING_GET_REQUEST(blk_ring, rc);
+ memcpy(&req, RING_GET_REQUEST(blk_ring, rc), sizeof(req));
blk_ring->req_cons = ++rc; /* before make_response() */
- switch (req->operation) {
+ switch (req.operation) {
case BLKIF_OP_READ:
blkif->st_rd_req++;
- dispatch_rw_block_io(blkif, req, pending_req);
+ dispatch_rw_block_io(blkif, &req, pending_req);
break;
case BLKIF_OP_WRITE:
blkif->st_wr_req++;
- dispatch_rw_block_io(blkif, req, pending_req);
+ dispatch_rw_block_io(blkif, &req, pending_req);
break;
default:
WPRINTK("unknown operation [%d]\n",
- req->operation);
- make_response(blkif, req->id, req->operation,
+ req.operation);
+ make_response(blkif, req.id, req.operation,
BLKIF_RSP_ERROR);
free_req(pending_req);
break;
@@ -1190,17 +1186,27 @@ static void dispatch_rw_block_io(blkif_t *blkif,
struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
unsigned int nseg;
int ret, i;
- tap_blkif_t *info = tapfds[blkif->dev_num];
+ tap_blkif_t *info;
uint64_t sector;
-
blkif_request_t *target;
int pending_idx = RTN_PEND_IDX(pending_req,pending_req->mem_idx);
- int usr_idx = GET_NEXT_REQ(info->idx_map);
+ int usr_idx;
uint16_t mmap_idx = pending_req->mem_idx;
- /*Check we have space on user ring - should never fail*/
- if(usr_idx == INVALID_REQ) goto fail_flush;
-
+ if (blkif->dev_num < 0 || blkif->dev_num > MAX_TAP_DEV)
+ goto fail_response;
+
+ info = tapfds[blkif->dev_num];
+ if (info == NULL)
+ goto fail_response;
+
+ /* Check we have space on user ring - should never fail. */
+ usr_idx = GET_NEXT_REQ(info->idx_map);
+ if (usr_idx == INVALID_REQ) {
+ BUG();
+ goto fail_response;
+ }
+
/* Check that number of segments is sane. */
nseg = req->nr_segments;
if ( unlikely(nseg == 0) ||
@@ -1233,15 +1239,12 @@ static void dispatch_rw_block_io(blkif_t *blkif,
unsigned long uvaddr;
unsigned long kvaddr;
uint64_t ptep;
- struct page *page;
uint32_t flags;
uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i);
- kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start,
- pending_idx, i);
- page = virt_to_page(kvaddr);
+ kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i);
- sector = req->sector_number + (8*i);
+ sector = req->sector_number + ((PAGE_SIZE / 512) * i);
if( (blkif->sectors > 0) && (sector >= blkif->sectors) ) {
WPRINTK("BLKTAP: Sector request greater"
"than size\n");
@@ -1251,7 +1254,7 @@ static void dispatch_rw_block_io(blkif_t *blkif,
BLKIF_OP_WRITE ? "WRITE" : "READ"),
(long long unsigned) sector,
(long long unsigned) sector>>9,
- blkif->sectors);
+ (long long unsigned) blkif->sectors);
}
flags = GNTMAP_host_map;
@@ -1261,71 +1264,123 @@ static void dispatch_rw_block_io(blkif_t *blkif,
req->seg[i].gref, blkif->domid);
op++;
- /* Now map it to user. */
- ret = create_lookup_pte_addr(info->vma->vm_mm,
- uvaddr, &ptep);
- if (ret) {
- WPRINTK("Couldn't get a pte addr!\n");
- fast_flush_area(pending_req, pending_idx, usr_idx,
- blkif->dev_num);
- goto fail_flush;
- }
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ /* Now map it to user. */
+ ret = create_lookup_pte_addr(info->vma->vm_mm,
+ uvaddr, &ptep);
+ if (ret) {
+ WPRINTK("Couldn't get a pte addr!\n");
+ goto fail_flush;
+ }
- flags = GNTMAP_host_map | GNTMAP_application_map
- | GNTMAP_contains_pte;
- if (operation == WRITE)
- flags |= GNTMAP_readonly;
- gnttab_set_map_op(&map[op], ptep, flags,
- req->seg[i].gref, blkif->domid);
- op++;
+ flags = GNTMAP_host_map | GNTMAP_application_map
+ | GNTMAP_contains_pte;
+ if (operation == WRITE)
+ flags |= GNTMAP_readonly;
+ gnttab_set_map_op(&map[op], ptep, flags,
+ req->seg[i].gref, blkif->domid);
+ op++;
+ }
}
ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, op);
BUG_ON(ret);
- for (i = 0; i < (nseg*2); i+=2) {
- unsigned long uvaddr;
- unsigned long kvaddr;
- unsigned long offset;
- struct page *pg;
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ for (i = 0; i < (nseg*2); i+=2) {
+ unsigned long uvaddr;
+ unsigned long kvaddr;
+ unsigned long offset;
+ struct page *pg;
- uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i/2);
- kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start,
- pending_idx, i/2);
+ uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i/2);
+ kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i/2);
- if (unlikely(map[i].status != 0)) {
- WPRINTK("invalid kernel buffer -- "
- "could not remap it\n");
- goto fail_flush;
- }
+ if (unlikely(map[i].status != 0)) {
+ WPRINTK("invalid kernel buffer -- "
+ "could not remap it\n");
+ ret |= 1;
+ map[i].handle = INVALID_GRANT_HANDLE;
+ }
- if (unlikely(map[i+1].status != 0)) {
- WPRINTK("invalid user buffer -- "
- "could not remap it\n");
- goto fail_flush;
+ if (unlikely(map[i+1].status != 0)) {
+ WPRINTK("invalid user buffer -- "
+ "could not remap it\n");
+ ret |= 1;
+ map[i+1].handle = INVALID_GRANT_HANDLE;
+ }
+
+ pending_handle(mmap_idx, pending_idx, i/2).kernel
+ = map[i].handle;
+ pending_handle(mmap_idx, pending_idx, i/2).user
+ = map[i+1].handle;
+
+ if (ret)
+ continue;
+
+ set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
+ FOREIGN_FRAME(map[i].dev_bus_addr
+ >> PAGE_SHIFT));
+ offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
+ pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
+ ((struct page **)info->vma->vm_private_data)[offset] =
+ pg;
}
+ } else {
+ for (i = 0; i < nseg; i++) {
+ unsigned long uvaddr;
+ unsigned long kvaddr;
+ unsigned long offset;
+ struct page *pg;
- pending_handle(mmap_idx, pending_idx, i/2).kernel
- = map[i].handle;
- pending_handle(mmap_idx, pending_idx, i/2).user
- = map[i+1].handle;
- set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
- FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
- offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
- pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
- ((struct page **)info->vma->vm_private_data)[offset] =
- pg;
+ uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i);
+ kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i);
+
+ if (unlikely(map[i].status != 0)) {
+ WPRINTK("invalid kernel buffer -- "
+ "could not remap it\n");
+ ret |= 1;
+ map[i].handle = INVALID_GRANT_HANDLE;
+ }
+
+ pending_handle(mmap_idx, pending_idx, i).kernel
+ = map[i].handle;
+
+ if (ret)
+ continue;
+
+ offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
+ pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
+ ((struct page **)info->vma->vm_private_data)[offset] =
+ pg;
+ }
}
+
+ if (ret)
+ goto fail_flush;
+
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ down_write(&info->vma->vm_mm->mmap_sem);
/* Mark mapped pages as reserved: */
for (i = 0; i < req->nr_segments; i++) {
unsigned long kvaddr;
struct page *pg;
- kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start,
- pending_idx, i);
+ kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i);
pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
SetPageReserved(pg);
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ ret = vm_insert_page(info->vma,
+ MMAP_VADDR(info->user_vstart,
+ usr_idx, i), pg);
+ if (ret) {
+ up_write(&info->vma->vm_mm->mmap_sem);
+ goto fail_flush;
+ }
+ }
}
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ up_write(&info->vma->vm_mm->mmap_sem);
/*record [mmap_idx,pending_idx] to [usr_idx] mapping*/
info->idx_map[usr_idx] = MAKE_ID(mmap_idx, pending_idx);
@@ -1336,6 +1391,7 @@ static void dispatch_rw_block_io(blkif_t *blkif,
info->ufe_ring.req_prod_pvt);
memcpy(target, req, sizeof(*req));
target->id = usr_idx;
+ wmb(); /* blktap_poll() reads req_prod_pvt asynchronously */
info->ufe_ring.req_prod_pvt++;
return;
@@ -1393,7 +1449,6 @@ static void make_response(blkif_t *blkif, unsigned long id,
static int __init blkif_init(void)
{
int i,ret,blktap_dir;
- tap_blkif_t *info;
if (!is_running_on_xen())
return -ENODEV;
@@ -1413,10 +1468,8 @@ static int __init blkif_init(void)
tap_blkif_xenbus_init();
- /*Create the blktap devices, but do not map memory or waitqueue*/
- for(i = 0; i < MAX_TAP_DEV; i++) translate_domid[i].domid = 0xFFFF;
-
- ret = register_chrdev(BLKTAP_DEV_MAJOR,"blktap",&blktap_fops);
+ /* Dynamically allocate a major for this device */
+ ret = register_chrdev(0, "blktap", &blktap_fops);
blktap_dir = devfs_mk_dir(NULL, "xen", 0, NULL);
if ( (ret < 0)||(blktap_dir < 0) ) {
@@ -1424,22 +1477,36 @@ static int __init blkif_init(void)
return -ENOMEM;
}
- for(i = 0; i < MAX_TAP_DEV; i++ ) {
- info = tapfds[i] = kzalloc(sizeof(tap_blkif_t),GFP_KERNEL);
- if(tapfds[i] == NULL) return -ENOMEM;
- info->minor = i;
- info->pid = 0;
- info->blkif = NULL;
+ blktap_major = ret;
- ret = devfs_mk_cdev(MKDEV(BLKTAP_DEV_MAJOR, i),
- S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", i);
+ /* tapfds[0] is always NULL */
+ blktap_next_minor++;
- if(ret != 0) return -ENOMEM;
- info->dev_pending = info->dev_inuse = 0;
+ ret = devfs_mk_cdev(MKDEV(blktap_major, i),
+ S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", i);
- DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
+ if(ret != 0)
+ return -ENOMEM;
+
+ DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
+
+ /* Make sure the xen class exists */
+ if (!setup_xen_class()) {
+ /*
+ * This will allow udev to create the blktap ctrl device.
+ * We only want to create blktap0 first. We don't want
+ * to flood the sysfs system with needless blktap devices.
+ * We only create the device when a request of a new device is
+ * made.
+ */
+ class_device_create(xen_class, NULL,
+ MKDEV(blktap_major, 0), NULL,
+ "blktap0");
+ } else {
+ /* this is bad, but not fatal */
+ WPRINTK("blktap: sysfs xen_class not created\n");
}
-
+
DPRINTK("Blktap device successfully created\n");
return 0;
diff --git a/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c
index 6c16a2e60b..553ad45c48 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c
@@ -189,7 +189,7 @@ static int blktap_probe(struct xenbus_device *dev,
return 0;
fail:
- DPRINTK("blktap probe failed");
+ DPRINTK("blktap probe failed\n");
blktap_remove(dev);
return err;
}
@@ -243,7 +243,7 @@ static void tap_frontend_changed(struct xenbus_device *dev,
struct backend_info *be = dev->dev.driver_data;
int err;
- DPRINTK("");
+ DPRINTK("\n");
switch (frontend_state) {
case XenbusStateInitialising:
@@ -273,7 +273,6 @@ static void tap_frontend_changed(struct xenbus_device *dev,
kthread_stop(be->blkif->xenblkd);
be->blkif->xenblkd = NULL;
}
- tap_blkif_unmap(be->blkif);
xenbus_switch_state(dev, XenbusStateClosing);
break;
@@ -319,7 +318,7 @@ static int connect_ring(struct backend_info *be)
unsigned int evtchn;
int err;
- DPRINTK("%s", dev->otherend);
+ DPRINTK("%s\n", dev->otherend);
err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
&ring_ref, "event-channel", "%u", &evtchn, NULL);
diff --git a/linux-2.6-xen-sparse/drivers/xen/char/mem.c b/linux-2.6-xen-sparse/drivers/xen/char/mem.c
index 6576135c99..ac85c8dbb2 100644
--- a/linux-2.6-xen-sparse/drivers/xen/char/mem.c
+++ b/linux-2.6-xen-sparse/drivers/xen/char/mem.c
@@ -28,13 +28,12 @@
#include <asm/io.h>
#include <asm/hypervisor.h>
-static inline int uncached_access(struct file *file)
+#ifndef ARCH_HAS_VALID_PHYS_ADDR_RANGE
+static inline int valid_phys_addr_range(unsigned long addr, size_t *count)
{
- if (file->f_flags & O_SYNC)
- return 1;
- /* Xen sets correct MTRR type on non-RAM for us. */
- return 0;
+ return 1;
}
+#endif
/*
* This funcion reads the *physical* memory. The f_pos points directly to the
@@ -47,6 +46,9 @@ static ssize_t read_mem(struct file * file, char __user * buf,
ssize_t read = 0, sz;
void __iomem *v;
+ if (!valid_phys_addr_range(p, &count))
+ return -EFAULT;
+
while (count > 0) {
/*
* Handle first page in case it's not aligned
@@ -58,13 +60,15 @@ static ssize_t read_mem(struct file * file, char __user * buf,
sz = min_t(unsigned long, sz, count);
- if ((v = ioremap(p, sz)) == NULL) {
+ v = xlate_dev_mem_ptr(p, sz);
+ if (IS_ERR(v) || v == NULL) {
/*
- * Some programs (e.g., dmidecode) groove off into weird RAM
- * areas where no tables can possibly exist (because Xen will
- * have stomped on them!). These programs get rather upset if
- * we let them know that Xen failed their access, so we fake
- * out a read of all zeroes. :-)
+ * Some programs (e.g., dmidecode) groove off into
+ * weird RAM areas where no tables can possibly exist
+ * (because Xen will have stomped on them!). These
+ * programs get rather upset if we let them know that
+ * Xen failed their access, so we fake out a read of
+ * all zeroes.
*/
if (clear_user(buf, count))
return -EFAULT;
@@ -73,7 +77,7 @@ static ssize_t read_mem(struct file * file, char __user * buf,
}
ignored = copy_to_user(buf, v, sz);
- iounmap(v);
+ xlate_dev_mem_ptr_unmap(v);
if (ignored)
return -EFAULT;
buf += sz;
@@ -93,6 +97,9 @@ static ssize_t write_mem(struct file * file, const char __user * buf,
ssize_t written = 0, sz;
void __iomem *v;
+ if (!valid_phys_addr_range(p, &count))
+ return -EFAULT;
+
while (count > 0) {
/*
* Handle first page in case it's not aligned
@@ -104,11 +111,17 @@ static ssize_t write_mem(struct file * file, const char __user * buf,
sz = min_t(unsigned long, sz, count);
- if ((v = ioremap(p, sz)) == NULL)
+ v = xlate_dev_mem_ptr(p, sz);
+ if (v == NULL)
break;
+ if (IS_ERR(v)) {
+ if (written == 0)
+ return PTR_ERR(v);
+ break;
+ }
ignored = copy_from_user(v, buf, sz);
- iounmap(v);
+ xlate_dev_mem_ptr_unmap(v);
if (ignored) {
written += sz - ignored;
if (written)
@@ -125,6 +138,15 @@ static ssize_t write_mem(struct file * file, const char __user * buf,
return written;
}
+#ifndef ARCH_HAS_DEV_MEM_MMAP_MEM
+static inline int uncached_access(struct file *file)
+{
+ if (file->f_flags & O_SYNC)
+ return 1;
+ /* Xen sets correct MTRR type on non-RAM for us. */
+ return 0;
+}
+
static int mmap_mem(struct file * file, struct vm_area_struct * vma)
{
size_t size = vma->vm_end - vma->vm_start;
@@ -136,6 +158,7 @@ static int mmap_mem(struct file * file, struct vm_area_struct * vma)
return direct_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
size, vma->vm_page_prot, DOMID_IO);
}
+#endif
/*
* The memory devices use the full 32/64 bits of the offset, and so we cannot
diff --git a/linux-2.6-xen-sparse/drivers/xen/console/console.c b/linux-2.6-xen-sparse/drivers/xen/console/console.c
index a45d21a69c..ff3e2a411e 100644
--- a/linux-2.6-xen-sparse/drivers/xen/console/console.c
+++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c
@@ -49,6 +49,7 @@
#include <linux/console.h>
#include <linux/bootmem.h>
#include <linux/sysrq.h>
+#include <linux/screen_info.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/uaccess.h>
@@ -266,6 +267,41 @@ void xencons_force_flush(void)
}
+void dom0_init_screen_info(const struct dom0_vga_console_info *info)
+{
+ switch (info->video_type) {
+ case XEN_VGATYPE_TEXT_MODE_3:
+ screen_info.orig_video_mode = 3;
+ screen_info.orig_video_ega_bx = 3;
+ screen_info.orig_video_isVGA = 1;
+ screen_info.orig_video_lines = info->u.text_mode_3.rows;
+ screen_info.orig_video_cols = info->u.text_mode_3.columns;
+ screen_info.orig_x = info->u.text_mode_3.cursor_x;
+ screen_info.orig_y = info->u.text_mode_3.cursor_y;
+ screen_info.orig_video_points =
+ info->u.text_mode_3.font_height;
+ break;
+ case XEN_VGATYPE_VESA_LFB:
+ screen_info.orig_video_isVGA = VIDEO_TYPE_VLFB;
+ screen_info.lfb_width = info->u.vesa_lfb.width;
+ screen_info.lfb_height = info->u.vesa_lfb.height;
+ screen_info.lfb_depth = info->u.vesa_lfb.bits_per_pixel;
+ screen_info.lfb_base = info->u.vesa_lfb.lfb_base;
+ screen_info.lfb_size = info->u.vesa_lfb.lfb_size;
+ screen_info.lfb_linelength = info->u.vesa_lfb.bytes_per_line;
+ screen_info.red_size = info->u.vesa_lfb.red_size;
+ screen_info.red_pos = info->u.vesa_lfb.red_pos;
+ screen_info.green_size = info->u.vesa_lfb.green_size;
+ screen_info.green_pos = info->u.vesa_lfb.green_pos;
+ screen_info.blue_size = info->u.vesa_lfb.blue_size;
+ screen_info.blue_pos = info->u.vesa_lfb.blue_pos;
+ screen_info.rsvd_size = info->u.vesa_lfb.rsvd_size;
+ screen_info.rsvd_pos = info->u.vesa_lfb.rsvd_pos;
+ break;
+ }
+}
+
+
/******************** User-space console driver (/dev/console) ************/
#define DRV(_d) (_d)
diff --git a/linux-2.6-xen-sparse/drivers/xen/core/Makefile b/linux-2.6-xen-sparse/drivers/xen/core/Makefile
index c1b0c1bd51..6154454339 100644
--- a/linux-2.6-xen-sparse/drivers/xen/core/Makefile
+++ b/linux-2.6-xen-sparse/drivers/xen/core/Makefile
@@ -9,5 +9,5 @@ obj-$(CONFIG_SYSFS) += hypervisor_sysfs.o
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o
obj-$(CONFIG_XEN_SKBUFF) += skbuff.o
-obj-$(CONFIG_XEN_REBOOT) += reboot.o
+obj-$(CONFIG_XEN_REBOOT) += reboot.o machine_reboot.o
obj-$(CONFIG_XEN_SMPBOOT) += smpboot.o
diff --git a/linux-2.6-xen-sparse/drivers/xen/core/features.c b/linux-2.6-xen-sparse/drivers/xen/core/features.c
index 4d50caf50b..a76f58c04d 100644
--- a/linux-2.6-xen-sparse/drivers/xen/core/features.c
+++ b/linux-2.6-xen-sparse/drivers/xen/core/features.c
@@ -11,6 +11,10 @@
#include <asm/hypervisor.h>
#include <xen/features.h>
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
/* Not a GPL symbol: used in ubiquitous macros, so too restrictive. */
EXPORT_SYMBOL(xen_features);
diff --git a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
index 3195279a87..c5132c13bb 100644
--- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
+++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
@@ -44,6 +44,10 @@
#include <asm/io.h>
#include <xen/interface/memory.h>
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
/* External tools reserve first few grant table entries. */
#define NR_RESERVED_ENTRIES 8
diff --git a/linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c b/linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c
new file mode 100644
index 0000000000..02ee7f4728
--- /dev/null
+++ b/linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c
@@ -0,0 +1,185 @@
+#define __KERNEL_SYSCALLS__
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/unistd.h>
+#include <linux/module.h>
+#include <linux/reboot.h>
+#include <linux/sysrq.h>
+#include <linux/stringify.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <xen/evtchn.h>
+#include <asm/hypervisor.h>
+#include <xen/interface/dom0_ops.h>
+#include <xen/xenbus.h>
+#include <linux/cpu.h>
+#include <linux/kthread.h>
+#include <xen/gnttab.h>
+#include <xen/xencons.h>
+#include <xen/cpu_hotplug.h>
+
+#if defined(__i386__) || defined(__x86_64__)
+
+/*
+ * Power off function, if any
+ */
+void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
+
+void machine_emergency_restart(void)
+{
+ /* We really want to get pending console data out before we die. */
+ xencons_force_flush();
+ HYPERVISOR_shutdown(SHUTDOWN_reboot);
+}
+
+void machine_restart(char * __unused)
+{
+ machine_emergency_restart();
+}
+
+void machine_halt(void)
+{
+ machine_power_off();
+}
+
+void machine_power_off(void)
+{
+ /* We really want to get pending console data out before we die. */
+ xencons_force_flush();
+ if (pm_power_off)
+ pm_power_off();
+ HYPERVISOR_shutdown(SHUTDOWN_poweroff);
+}
+
+int reboot_thru_bios = 0; /* for dmi_scan.c */
+EXPORT_SYMBOL(machine_restart);
+EXPORT_SYMBOL(machine_halt);
+EXPORT_SYMBOL(machine_power_off);
+
+/* Ensure we run on the idle task page tables so that we will
+ switch page tables before running user space. This is needed
+ on architectures with separate kernel and user page tables
+ because the user page table pointer is not saved/restored. */
+static void switch_idle_mm(void)
+{
+ struct mm_struct *mm = current->active_mm;
+
+ if (mm == &init_mm)
+ return;
+
+ atomic_inc(&init_mm.mm_count);
+ switch_mm(mm, &init_mm, current);
+ current->active_mm = &init_mm;
+ mmdrop(mm);
+}
+
+static void pre_suspend(void)
+{
+ HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
+ clear_fixmap(FIX_SHARED_INFO);
+
+ xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
+ xen_start_info->console.domU.mfn =
+ mfn_to_pfn(xen_start_info->console.domU.mfn);
+}
+
+static void post_suspend(void)
+{
+ int i, j, k, fpp;
+ extern unsigned long max_pfn;
+ extern unsigned long *pfn_to_mfn_frame_list_list;
+ extern unsigned long *pfn_to_mfn_frame_list[];
+
+ set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
+
+ HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
+
+ memset(empty_zero_page, 0, PAGE_SIZE);
+
+ HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
+ virt_to_mfn(pfn_to_mfn_frame_list_list);
+
+ fpp = PAGE_SIZE/sizeof(unsigned long);
+ for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) {
+ if ((j % fpp) == 0) {
+ k++;
+ pfn_to_mfn_frame_list_list[k] =
+ virt_to_mfn(pfn_to_mfn_frame_list[k]);
+ j = 0;
+ }
+ pfn_to_mfn_frame_list[k][j] =
+ virt_to_mfn(&phys_to_machine_mapping[i]);
+ }
+ HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
+}
+
+#else /* !(defined(__i386__) || defined(__x86_64__)) */
+
+#define switch_idle_mm() ((void)0)
+#define mm_pin_all() ((void)0)
+#define pre_suspend() ((void)0)
+#define post_suspend() ((void)0)
+
+#endif
+
+int __xen_suspend(void)
+{
+ int err;
+
+ extern void time_resume(void);
+
+ BUG_ON(smp_processor_id() != 0);
+ BUG_ON(in_interrupt());
+
+#if defined(__i386__) || defined(__x86_64__)
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ printk(KERN_WARNING "Cannot suspend in "
+ "auto_translated_physmap mode.\n");
+ return -EOPNOTSUPP;
+ }
+#endif
+
+ err = smp_suspend();
+ if (err)
+ return err;
+
+ xenbus_suspend();
+
+ preempt_disable();
+
+ mm_pin_all();
+ local_irq_disable();
+ preempt_enable();
+
+ gnttab_suspend();
+
+ pre_suspend();
+
+ /*
+ * We'll stop somewhere inside this hypercall. When it returns,
+ * we'll start resuming after the restore.
+ */
+ HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
+
+ post_suspend();
+
+ gnttab_resume();
+
+ irq_resume();
+
+ time_resume();
+
+ switch_idle_mm();
+
+ local_irq_enable();
+
+ xencons_resume();
+
+ xenbus_resume();
+
+ smp_resume();
+
+ return err;
+}
diff --git a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c
index 34c3930961..af3fe3a15c 100644
--- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c
+++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c
@@ -1,25 +1,15 @@
#define __KERNEL_SYSCALLS__
#include <linux/version.h>
#include <linux/kernel.h>
-#include <linux/mm.h>
#include <linux/unistd.h>
#include <linux/module.h>
#include <linux/reboot.h>
#include <linux/sysrq.h>
-#include <linux/stringify.h>
-#include <asm/irq.h>
-#include <asm/mmu_context.h>
-#include <xen/evtchn.h>
#include <asm/hypervisor.h>
-#include <xen/interface/dom0_ops.h>
#include <xen/xenbus.h>
-#include <linux/cpu.h>
#include <linux/kthread.h>
-#include <xen/gnttab.h>
-#include <xen/xencons.h>
-#include <xen/cpu_hotplug.h>
-extern void ctrl_alt_del(void);
+MODULE_LICENSE("Dual BSD/GPL");
#define SHUTDOWN_INVALID -1
#define SHUTDOWN_POWEROFF 0
@@ -31,186 +21,18 @@ extern void ctrl_alt_del(void);
*/
#define SHUTDOWN_HALT 4
-#if defined(__i386__) || defined(__x86_64__)
-
-/*
- * Power off function, if any
- */
-void (*pm_power_off)(void);
-EXPORT_SYMBOL(pm_power_off);
-
-void machine_emergency_restart(void)
-{
- /* We really want to get pending console data out before we die. */
- xencons_force_flush();
- HYPERVISOR_shutdown(SHUTDOWN_reboot);
-}
-
-void machine_restart(char * __unused)
-{
- machine_emergency_restart();
-}
-
-void machine_halt(void)
-{
- machine_power_off();
-}
-
-void machine_power_off(void)
-{
- /* We really want to get pending console data out before we die. */
- xencons_force_flush();
- if (pm_power_off)
- pm_power_off();
- HYPERVISOR_shutdown(SHUTDOWN_poweroff);
-}
-
-int reboot_thru_bios = 0; /* for dmi_scan.c */
-EXPORT_SYMBOL(machine_restart);
-EXPORT_SYMBOL(machine_halt);
-EXPORT_SYMBOL(machine_power_off);
-
-#endif /* defined(__i386__) || defined(__x86_64__) */
-
-/******************************************************************************
- * Stop/pickle callback handling.
- */
-
/* Ignore multiple shutdown requests. */
static int shutting_down = SHUTDOWN_INVALID;
+
static void __shutdown_handler(void *unused);
static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
-#if defined(__i386__) || defined(__x86_64__)
-
-/* Ensure we run on the idle task page tables so that we will
- switch page tables before running user space. This is needed
- on architectures with separate kernel and user page tables
- because the user page table pointer is not saved/restored. */
-static void switch_idle_mm(void)
-{
- struct mm_struct *mm = current->active_mm;
-
- if (mm == &init_mm)
- return;
-
- atomic_inc(&init_mm.mm_count);
- switch_mm(mm, &init_mm, current);
- current->active_mm = &init_mm;
- mmdrop(mm);
-}
-
-static void pre_suspend(void)
-{
- HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
- clear_fixmap(FIX_SHARED_INFO);
-
- xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
- xen_start_info->console.domU.mfn =
- mfn_to_pfn(xen_start_info->console.domU.mfn);
-}
-
-static void post_suspend(void)
-{
- int i, j, k, fpp;
- extern unsigned long max_pfn;
- extern unsigned long *pfn_to_mfn_frame_list_list;
- extern unsigned long *pfn_to_mfn_frame_list[];
-
- set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
-
- HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
-
- memset(empty_zero_page, 0, PAGE_SIZE);
-
- HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
- virt_to_mfn(pfn_to_mfn_frame_list_list);
-
- fpp = PAGE_SIZE/sizeof(unsigned long);
- for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) {
- if ((j % fpp) == 0) {
- k++;
- pfn_to_mfn_frame_list_list[k] =
- virt_to_mfn(pfn_to_mfn_frame_list[k]);
- j = 0;
- }
- pfn_to_mfn_frame_list[k][j] =
- virt_to_mfn(&phys_to_machine_mapping[i]);
- }
- HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
-}
-
-#else /* !(defined(__i386__) || defined(__x86_64__)) */
-
-#define switch_idle_mm() ((void)0)
-#define mm_pin_all() ((void)0)
-#define pre_suspend() ((void)0)
-#define post_suspend() ((void)0)
-
-#endif
-
-static int __do_suspend(void *ignore)
-{
- int err;
-
- extern void time_resume(void);
-
- BUG_ON(smp_processor_id() != 0);
- BUG_ON(in_interrupt());
-
-#if defined(__i386__) || defined(__x86_64__)
- if (xen_feature(XENFEAT_auto_translated_physmap)) {
- printk(KERN_WARNING "Cannot suspend in "
- "auto_translated_physmap mode.\n");
- return -EOPNOTSUPP;
- }
+#ifdef CONFIG_XEN
+int __xen_suspend(void);
+#else
+#define __xen_suspend() (void)0
#endif
- err = smp_suspend();
- if (err)
- return err;
-
- xenbus_suspend();
-
- preempt_disable();
-
- mm_pin_all();
- local_irq_disable();
- preempt_enable();
-
- gnttab_suspend();
-
- pre_suspend();
-
- /*
- * We'll stop somewhere inside this hypercall. When it returns,
- * we'll start resuming after the restore.
- */
- HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
-
- shutting_down = SHUTDOWN_INVALID;
-
- post_suspend();
-
- gnttab_resume();
-
- irq_resume();
-
- time_resume();
-
- switch_idle_mm();
-
- local_irq_enable();
-
- xencons_resume();
-
- xenbus_resume();
-
- smp_resume();
-
- return err;
-}
-
static int shutdown_process(void *__unused)
{
static char *envp[] = { "HOME=/", "TERM=linux",
@@ -222,11 +44,13 @@ static int shutdown_process(void *__unused)
if ((shutting_down == SHUTDOWN_POWEROFF) ||
(shutting_down == SHUTDOWN_HALT)) {
- if (execve("/sbin/poweroff", poweroff_argv, envp) < 0) {
+ if (call_usermodehelper("/sbin/poweroff", poweroff_argv, envp, 0) < 0) {
+#ifdef CONFIG_XEN
sys_reboot(LINUX_REBOOT_MAGIC1,
LINUX_REBOOT_MAGIC2,
LINUX_REBOOT_CMD_POWER_OFF,
NULL);
+#endif /* CONFIG_XEN */
}
}
@@ -235,6 +59,13 @@ static int shutdown_process(void *__unused)
return 0;
}
+static int xen_suspend(void *__unused)
+{
+ __xen_suspend();
+ shutting_down = SHUTDOWN_INVALID;
+ return 0;
+}
+
static int kthread_create_on_cpu(int (*f)(void *arg),
void *arg,
const char *name,
@@ -257,7 +88,7 @@ static void __shutdown_handler(void *unused)
err = kernel_thread(shutdown_process, NULL,
CLONE_FS | CLONE_FILES);
else
- err = kthread_create_on_cpu(__do_suspend, NULL, "suspend", 0);
+ err = kthread_create_on_cpu(xen_suspend, NULL, "suspend", 0);
if (err < 0) {
printk(KERN_WARNING "Error creating shutdown process (%d): "
@@ -298,7 +129,7 @@ static void shutdown_handler(struct xenbus_watch *watch,
if (strcmp(str, "poweroff") == 0)
shutting_down = SHUTDOWN_POWEROFF;
else if (strcmp(str, "reboot") == 0)
- ctrl_alt_del();
+ kill_proc(1, SIGINT, 1); /* interrupt init */
else if (strcmp(str, "suspend") == 0)
shutting_down = SHUTDOWN_SUSPEND;
else if (strcmp(str, "halt") == 0)
@@ -364,10 +195,14 @@ static int setup_shutdown_watcher(struct notifier_block *notifier,
err = register_xenbus_watch(&shutdown_watch);
if (err)
printk(KERN_ERR "Failed to set shutdown watcher\n");
+ else
+ xenbus_write(XBT_NIL, "control", "feature-reboot", "1");
err = register_xenbus_watch(&sysrq_watch);
if (err)
printk(KERN_ERR "Failed to set sysrq watcher\n");
+ else
+ xenbus_write(XBT_NIL, "control", "feature-sysrq", "1");
return NOTIFY_DONE;
}
@@ -378,6 +213,7 @@ static int __init setup_shutdown_event(void)
.notifier_call = setup_shutdown_watcher
};
register_xenstore_notifier(&xenstore_notifier);
+
return 0;
}
diff --git a/linux-2.6-xen-sparse/drivers/xen/core/skbuff.c b/linux-2.6-xen-sparse/drivers/xen/core/skbuff.c
index a4a2e4edce..2fa88069c4 100644
--- a/linux-2.6-xen-sparse/drivers/xen/core/skbuff.c
+++ b/linux-2.6-xen-sparse/drivers/xen/core/skbuff.c
@@ -18,7 +18,12 @@
/*static*/ kmem_cache_t *skbuff_cachep;
EXPORT_SYMBOL(skbuff_cachep);
-#define MAX_SKBUFF_ORDER 4
+/* Allow up to 64kB or page-sized packets (whichever is greater). */
+#if PAGE_SHIFT < 16
+#define MAX_SKBUFF_ORDER (16 - PAGE_SHIFT)
+#else
+#define MAX_SKBUFF_ORDER 0
+#endif
static kmem_cache_t *skbuff_order_cachep[MAX_SKBUFF_ORDER + 1];
static struct {
diff --git a/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c b/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c
index 76bfab82e6..32f8de5bff 100644
--- a/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c
+++ b/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c
@@ -419,10 +419,9 @@ static struct file_operations evtchn_fops = {
};
static struct miscdevice evtchn_miscdev = {
- .minor = EVTCHN_MINOR,
+ .minor = MISC_DYNAMIC_MINOR,
.name = "evtchn",
.fops = &evtchn_fops,
- .devfs_name = "misc/evtchn",
};
static int __init evtchn_init(void)
diff --git a/linux-2.6-xen-sparse/drivers/xen/netback/common.h b/linux-2.6-xen-sparse/drivers/xen/netback/common.h
index 434ff6bcf2..367c008d3b 100644
--- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h
@@ -92,6 +92,9 @@ typedef struct netif_st {
unsigned long remaining_credit;
struct timer_list credit_timeout;
+ /* Enforce draining of the transmit queue. */
+ struct timer_list tx_queue_timeout;
+
/* Miscellaneous private stuff. */
struct list_head list; /* scheduling list */
atomic_t refcnt;
@@ -106,7 +109,7 @@ typedef struct netif_st {
void netif_disconnect(netif_t *netif);
-netif_t *netif_alloc(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN]);
+netif_t *netif_alloc(domid_t domid, unsigned int handle);
int netif_map(netif_t *netif, unsigned long tx_ring_ref,
unsigned long rx_ring_ref, unsigned int evtchn);
@@ -119,6 +122,8 @@ int netif_map(netif_t *netif, unsigned long tx_ring_ref,
void netif_xenbus_init(void);
+#define netif_schedulable(dev) (netif_running(dev) && netif_carrier_ok(dev))
+
void netif_schedule_work(netif_t *netif);
void netif_deschedule_work(netif_t *netif);
diff --git a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c
index d60b23b0f2..9fae954bd2 100644
--- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c
@@ -34,6 +34,23 @@
#include <linux/ethtool.h>
#include <linux/rtnetlink.h>
+/*
+ * Module parameter 'queue_length':
+ *
+ * Enables queuing in the network stack when a client has run out of receive
+ * descriptors. Although this feature can improve receive bandwidth by avoiding
+ * packet loss, it can also result in packets sitting in the 'tx_queue' for
+ * unbounded time. This is bad if those packets hold onto foreign resources.
+ * For example, consider a packet that holds onto resources belonging to the
+ * guest for which it is queued (e.g., packet received on vif1.0, destined for
+ * vif1.1 which is not activated in the guest): in this situation the guest
+ * will never be destroyed, unless vif1.1 is taken down. To avoid this, we
+ * run a timer (tx_queue_timeout) to drain the queue when the interface is
+ * blocked.
+ */
+static unsigned long netbk_queue_length = 32;
+module_param_named(queue_length, netbk_queue_length, ulong, 0);
+
static void __netif_up(netif_t *netif)
{
enable_irq(netif->irq);
@@ -107,9 +124,9 @@ static struct ethtool_ops network_ethtool_ops =
.get_link = ethtool_op_get_link,
};
-netif_t *netif_alloc(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN])
+netif_t *netif_alloc(domid_t domid, unsigned int handle)
{
- int err = 0, i;
+ int err = 0;
struct net_device *dev;
netif_t *netif;
char name[IFNAMSIZ] = {};
@@ -134,6 +151,10 @@ netif_t *netif_alloc(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN])
netif->credit_bytes = netif->remaining_credit = ~0UL;
netif->credit_usec = 0UL;
init_timer(&netif->credit_timeout);
+ /* Initialize 'expires' now: it's used to track the credit window. */
+ netif->credit_timeout.expires = jiffies;
+
+ init_timer(&netif->tx_queue_timeout);
dev->hard_start_xmit = netif_be_start_xmit;
dev->get_stats = netif_be_get_stats;
@@ -144,26 +165,16 @@ netif_t *netif_alloc(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN])
SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
+ dev->tx_queue_len = netbk_queue_length;
+
/*
- * Reduce default TX queuelen so that each guest interface only
- * allows it to eat around 6.4MB of host memory.
- */
- dev->tx_queue_len = 100;
-
- for (i = 0; i < ETH_ALEN; i++)
- if (be_mac[i] != 0)
- break;
- if (i == ETH_ALEN) {
- /*
- * Initialise a dummy MAC address. We choose the numerically
- * largest non-broadcast address to prevent the address getting
- * stolen by an Ethernet bridge for STP purposes.
- * (FE:FF:FF:FF:FF:FF)
- */
- memset(dev->dev_addr, 0xFF, ETH_ALEN);
- dev->dev_addr[0] &= ~0x01;
- } else
- memcpy(dev->dev_addr, be_mac, ETH_ALEN);
+ * Initialise a dummy MAC address. We choose the numerically
+ * largest non-broadcast address to prevent the address getting
+ * stolen by an Ethernet bridge for STP purposes.
+ * (FE:FF:FF:FF:FF:FF)
+ */
+ memset(dev->dev_addr, 0xFF, ETH_ALEN);
+ dev->dev_addr[0] &= ~0x01;
rtnl_lock();
err = register_netdevice(dev);
@@ -306,11 +317,23 @@ err_rx:
return err;
}
-static void netif_free(netif_t *netif)
+void netif_disconnect(netif_t *netif)
{
+ if (netif_carrier_ok(netif->dev)) {
+ rtnl_lock();
+ netif_carrier_off(netif->dev);
+ if (netif_running(netif->dev))
+ __netif_down(netif);
+ rtnl_unlock();
+ netif_put(netif);
+ }
+
atomic_dec(&netif->refcnt);
wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
+ del_timer_sync(&netif->credit_timeout);
+ del_timer_sync(&netif->tx_queue_timeout);
+
if (netif->irq)
unbind_from_irqhandler(netif->irq, netif);
@@ -324,16 +347,3 @@ static void netif_free(netif_t *netif)
free_netdev(netif->dev);
}
-
-void netif_disconnect(netif_t *netif)
-{
- if (netif_carrier_ok(netif->dev)) {
- rtnl_lock();
- netif_carrier_off(netif->dev);
- if (netif_running(netif->dev))
- __netif_down(netif);
- rtnl_unlock();
- netif_put(netif);
- }
- netif_free(netif);
-}
diff --git a/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c b/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c
index 391ace8a02..d021c9689a 100644
--- a/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c
@@ -53,8 +53,10 @@
#include <linux/skbuff.h>
#include <linux/ethtool.h>
#include <net/dst.h>
+#include <net/xfrm.h> /* secpath_reset() */
+#include <asm/hypervisor.h> /* is_initial_xendomain() */
-static int nloopbacks = 8;
+static int nloopbacks = -1;
module_param(nloopbacks, int, 0);
MODULE_PARM_DESC(nloopbacks, "Number of netback-loopback devices to create");
@@ -77,10 +79,60 @@ static int loopback_close(struct net_device *dev)
return 0;
}
+#ifdef CONFIG_X86
+static int is_foreign(unsigned long pfn)
+{
+ /* NB. Play it safe for auto-translation mode. */
+ return (xen_feature(XENFEAT_auto_translated_physmap) ||
+ (phys_to_machine_mapping[pfn] & FOREIGN_FRAME_BIT));
+}
+#else
+/* How to detect a foreign mapping? Play it safe. */
+#define is_foreign(pfn) (1)
+#endif
+
+static int skb_remove_foreign_references(struct sk_buff *skb)
+{
+ struct page *page;
+ unsigned long pfn;
+ int i, off;
+ char *vaddr;
+
+ BUG_ON(skb_shinfo(skb)->frag_list);
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ pfn = page_to_pfn(skb_shinfo(skb)->frags[i].page);
+ if (!is_foreign(pfn))
+ continue;
+
+ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(!page))
+ return 0;
+
+ vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
+ off = skb_shinfo(skb)->frags[i].page_offset;
+ memcpy(page_address(page) + off,
+ vaddr + off,
+ skb_shinfo(skb)->frags[i].size);
+ kunmap_skb_frag(vaddr);
+
+ put_page(skb_shinfo(skb)->frags[i].page);
+ skb_shinfo(skb)->frags[i].page = page;
+ }
+
+ return 1;
+}
+
static int loopback_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct net_private *np = netdev_priv(dev);
+ if (!skb_remove_foreign_references(skb)) {
+ np->stats.tx_dropped++;
+ dev_kfree_skb(skb);
+ return 0;
+ }
+
dst_release(skb->dst);
skb->dst = NULL;
@@ -110,6 +162,11 @@ static int loopback_start_xmit(struct sk_buff *skb, struct net_device *dev)
skb->protocol = eth_type_trans(skb, dev);
skb->dev = dev;
dev->last_rx = jiffies;
+
+ /* Flush netfilter context: rx'ed skbuffs not expected to have any. */
+ nf_reset(skb);
+ secpath_reset(skb);
+
netif_rx(skb);
return 0;
@@ -239,6 +296,9 @@ static int __init loopback_init(void)
{
int i, err = 0;
+ if (nloopbacks == -1)
+ nloopbacks = is_initial_xendomain() ? 4 : 0;
+
for (i = 0; i < nloopbacks; i++)
if ((err = make_loopback(i)) != 0)
break;
diff --git a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c
index ad8236c82f..1d24fc9b88 100644
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c
@@ -70,14 +70,15 @@ static struct timer_list net_timer;
static struct sk_buff_head rx_queue;
-static unsigned long mmap_vstart;
-#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
-
-static void *rx_mmap_area;
+static struct page **mmap_pages;
+static inline unsigned long idx_to_kaddr(unsigned int idx)
+{
+ return (unsigned long)pfn_to_kaddr(page_to_pfn(mmap_pages[idx]));
+}
#define PKT_PROT_LEN 64
-static struct {
+static struct pending_tx_info {
netif_tx_request_t req;
netif_t *netif;
} pending_tx_info[MAX_PENDING_REQS];
@@ -186,7 +187,7 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
if (unlikely(!nskb))
goto err;
- skb_reserve(nskb, 16);
+ skb_reserve(nskb, 16 + NET_IP_ALIGN);
headlen = nskb->end - nskb->data;
if (headlen > skb_headlen(skb))
headlen = skb_headlen(skb);
@@ -217,7 +218,7 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
- page = alloc_page(GFP_ATOMIC | zero);
+ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
if (unlikely(!page))
goto err_free;
@@ -263,6 +264,13 @@ static inline int netbk_queue_full(netif_t *netif)
((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
}
+static void tx_queue_callback(unsigned long data)
+{
+ netif_t *netif = (netif_t *)data;
+ if (netif_schedulable(netif->dev))
+ netif_wake_queue(netif->dev);
+}
+
int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
netif_t *netif = netdev_priv(dev);
@@ -270,20 +278,13 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
BUG_ON(skb->dev != dev);
/* Drop the packet if the target domain has no receive buffers. */
- if (unlikely(!netif_running(dev) || !netif_carrier_ok(dev)))
+ if (unlikely(!netif_schedulable(dev) || netbk_queue_full(netif)))
goto drop;
- if (unlikely(netbk_queue_full(netif))) {
- /* Not a BUG_ON() -- misbehaving netfront can trigger this. */
- if (netbk_can_queue(dev))
- DPRINTK("Queue full but not stopped!\n");
- goto drop;
- }
-
- /* Copy the packet here if it's destined for a flipping
- interface but isn't flippable (e.g. extra references to
- data)
- */
+ /*
+ * Copy the packet here if it's destined for a flipping interface
+ * but isn't flippable (e.g. extra references to data).
+ */
if (!netif->copying_receiver && !is_flippable_skb(skb)) {
struct sk_buff *nskb = netbk_copy_skb(skb);
if ( unlikely(nskb == NULL) )
@@ -304,8 +305,19 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
netif->rx.sring->req_event = netif->rx_req_cons_peek +
netbk_max_required_rx_slots(netif);
mb(); /* request notification /then/ check & stop the queue */
- if (netbk_queue_full(netif))
+ if (netbk_queue_full(netif)) {
netif_stop_queue(dev);
+ /*
+ * Schedule 500ms timeout to restart the queue, thus
+ * ensuring that an inactive queue will be drained.
+ * Packets will be immediately be dropped until more
+ * receive buffers become available (see
+ * netbk_queue_full() check above).
+ */
+ netif->tx_queue_timeout.data = (unsigned long)netif;
+ netif->tx_queue_timeout.function = tx_queue_callback;
+ __mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
+ }
}
skb_queue_tail(&rx_queue, skb);
@@ -373,14 +385,22 @@ static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
flipped. */
meta->copy = 1;
copy_gop = npo->copy + npo->copy_prod++;
- copy_gop->source.domid = DOMID_SELF;
+ copy_gop->flags = GNTCOPY_dest_gref;
+ if (PageForeign(page)) {
+ struct pending_tx_info *src_pend =
+ &pending_tx_info[page->index];
+ copy_gop->source.domid = src_pend->netif->domid;
+ copy_gop->source.u.ref = src_pend->req.gref;
+ copy_gop->flags |= GNTCOPY_source_gref;
+ } else {
+ copy_gop->source.domid = DOMID_SELF;
+ copy_gop->source.u.gmfn = old_mfn;
+ }
copy_gop->source.offset = offset;
- copy_gop->source.u.gmfn = old_mfn;
copy_gop->dest.domid = netif->domid;
copy_gop->dest.offset = 0;
copy_gop->dest.u.ref = req->gref;
copy_gop->len = size;
- copy_gop->flags = GNTCOPY_dest_gref;
} else {
meta->copy = 0;
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
@@ -474,7 +494,7 @@ static int netbk_check_gop(int nr_frags, domid_t domid,
copy_op = npo->copy + npo->copy_cons++;
if (copy_op->status != GNTST_okay) {
DPRINTK("Bad status %d from copy to DOM%d.\n",
- gop->status, domid);
+ copy_op->status, domid);
status = NETIF_RSP_ERROR;
}
} else {
@@ -697,6 +717,7 @@ static void net_rx_action(unsigned long unused)
}
if (netif_queue_stopped(netif->dev) &&
+ netif_schedulable(netif->dev) &&
!netbk_queue_full(netif))
netif_wake_queue(netif->dev);
@@ -754,8 +775,7 @@ static void add_to_net_schedule_list_tail(netif_t *netif)
spin_lock_irq(&net_schedule_list_lock);
if (!__on_net_schedule_list(netif) &&
- likely(netif_running(netif->dev) &&
- netif_carrier_ok(netif->dev))) {
+ likely(netif_schedulable(netif->dev))) {
list_add_tail(&netif->list, &net_schedule_list);
netif_get(netif);
}
@@ -792,10 +812,30 @@ void netif_deschedule_work(netif_t *netif)
}
+static void tx_add_credit(netif_t *netif)
+{
+ unsigned long max_burst, max_credit;
+
+ /*
+ * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
+ * Otherwise the interface can seize up due to insufficient credit.
+ */
+ max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
+ max_burst = min(max_burst, 131072UL);
+ max_burst = max(max_burst, netif->credit_bytes);
+
+ /* Take care that adding a new chunk of credit doesn't wrap to zero. */
+ max_credit = netif->remaining_credit + netif->credit_bytes;
+ if (max_credit < netif->remaining_credit)
+ max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
+
+ netif->remaining_credit = min(max_credit, max_burst);
+}
+
static void tx_credit_callback(unsigned long data)
{
netif_t *netif = (netif_t *)data;
- netif->remaining_credit = netif->credit_bytes;
+ tx_add_credit(netif);
netif_schedule_work(netif);
}
@@ -819,7 +859,7 @@ inline static void net_tx_action_dealloc(void)
gop = tx_unmap_ops;
while (dc != dp) {
pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
- gnttab_set_unmap_op(gop, MMAP_VADDR(pending_idx),
+ gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
GNTMAP_host_map,
grant_tx_handle[pending_idx]);
gop++;
@@ -857,20 +897,28 @@ static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
netif_put(netif);
}
-static int netbk_count_requests(netif_t *netif, netif_tx_request_t *txp,
- int work_to_do)
+static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
+ netif_tx_request_t *txp, int work_to_do)
{
- netif_tx_request_t *first = txp;
RING_IDX cons = netif->tx.req_cons;
int frags = 0;
- while (txp->flags & NETTXF_more_data) {
+ if (!(first->flags & NETTXF_more_data))
+ return 0;
+
+ do {
if (frags >= work_to_do) {
DPRINTK("Need more frags\n");
return -frags;
}
- txp = RING_GET_REQUEST(&netif->tx, cons + frags);
+ if (unlikely(frags >= MAX_SKB_FRAGS)) {
+ DPRINTK("Too many frags\n");
+ return -frags;
+ }
+
+ memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
+ sizeof(*txp));
if (txp->size > first->size) {
DPRINTK("Frags galore\n");
return -frags;
@@ -884,30 +932,28 @@ static int netbk_count_requests(netif_t *netif, netif_tx_request_t *txp,
txp->offset, txp->size);
return -frags;
}
- }
+ } while ((txp++)->flags & NETTXF_more_data);
return frags;
}
static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
struct sk_buff *skb,
+ netif_tx_request_t *txp,
gnttab_map_grant_ref_t *mop)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
skb_frag_t *frags = shinfo->frags;
- netif_tx_request_t *txp;
unsigned long pending_idx = *((u16 *)skb->data);
- RING_IDX cons = netif->tx.req_cons;
int i, start;
/* Skip first skb fragment if it is on same page as header fragment. */
start = ((unsigned long)shinfo->frags[0].page == pending_idx);
- for (i = start; i < shinfo->nr_frags; i++) {
- txp = RING_GET_REQUEST(&netif->tx, cons++);
+ for (i = start; i < shinfo->nr_frags; i++, txp++) {
pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
- gnttab_set_map_op(mop++, MMAP_VADDR(pending_idx),
+ gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
GNTMAP_host_map | GNTMAP_readonly,
txp->gref, netif->domid);
@@ -940,7 +986,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
netif_put(netif);
} else {
set_phys_to_machine(
- __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT,
+ __pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
grant_tx_handle[pending_idx] = mop->handle;
}
@@ -957,7 +1003,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
newerr = (++mop)->status;
if (likely(!newerr)) {
set_phys_to_machine(
- __pa(MMAP_VADDR(pending_idx))>>PAGE_SHIFT,
+ __pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT,
FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
grant_tx_handle[pending_idx] = mop->handle;
/* Had a previous error? Invalidate this fragment. */
@@ -1005,7 +1051,7 @@ static void netbk_fill_frags(struct sk_buff *skb)
pending_idx = (unsigned long)frag->page;
txp = &pending_tx_info[pending_idx].req;
- frag->page = virt_to_page(MMAP_VADDR(pending_idx));
+ frag->page = virt_to_page(idx_to_kaddr(pending_idx));
frag->size = txp->size;
frag->page_offset = txp->offset;
@@ -1018,7 +1064,7 @@ static void netbk_fill_frags(struct sk_buff *skb)
int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
int work_to_do)
{
- struct netif_extra_info *extra;
+ struct netif_extra_info extra;
RING_IDX cons = netif->tx.req_cons;
do {
@@ -1027,18 +1073,18 @@ int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
return -EBADR;
}
- extra = (struct netif_extra_info *)
- RING_GET_REQUEST(&netif->tx, cons);
- if (unlikely(!extra->type ||
- extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+ memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
+ sizeof(extra));
+ if (unlikely(!extra.type ||
+ extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
netif->tx.req_cons = ++cons;
- DPRINTK("Invalid extra type: %d\n", extra->type);
+ DPRINTK("Invalid extra type: %d\n", extra.type);
return -EINVAL;
}
- memcpy(&extras[extra->type - 1], extra, sizeof(*extra));
+ memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
netif->tx.req_cons = ++cons;
- } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
+ } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
return work_to_do;
}
@@ -1073,6 +1119,7 @@ static void net_tx_action(unsigned long unused)
struct sk_buff *skb;
netif_t *netif;
netif_tx_request_t txreq;
+ netif_tx_request_t txfrags[MAX_SKB_FRAGS];
struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
u16 pending_idx;
RING_IDX i;
@@ -1101,6 +1148,7 @@ static void net_tx_action(unsigned long unused)
i = netif->tx.req_cons;
rmb(); /* Ensure that we see the request before we copy it. */
memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
+
/* Credit-based scheduling. */
if (txreq.size > netif->remaining_credit) {
unsigned long now = jiffies;
@@ -1109,25 +1157,27 @@ static void net_tx_action(unsigned long unused)
msecs_to_jiffies(netif->credit_usec / 1000);
/* Timer could already be pending in rare cases. */
- if (timer_pending(&netif->credit_timeout))
- break;
+ if (timer_pending(&netif->credit_timeout)) {
+ netif_put(netif);
+ continue;
+ }
/* Passed the point where we can replenish credit? */
if (time_after_eq(now, next_credit)) {
netif->credit_timeout.expires = now;
- netif->remaining_credit = netif->credit_bytes;
+ tx_add_credit(netif);
}
/* Still too big to send right now? Set a callback. */
if (txreq.size > netif->remaining_credit) {
- netif->remaining_credit = 0;
netif->credit_timeout.data =
(unsigned long)netif;
netif->credit_timeout.function =
tx_credit_callback;
__mod_timer(&netif->credit_timeout,
next_credit);
- break;
+ netif_put(netif);
+ continue;
}
}
netif->remaining_credit -= txreq.size;
@@ -1146,19 +1196,13 @@ static void net_tx_action(unsigned long unused)
}
}
- ret = netbk_count_requests(netif, &txreq, work_to_do);
+ ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
if (unlikely(ret < 0)) {
netbk_tx_err(netif, &txreq, i - ret);
continue;
}
i += ret;
- if (unlikely(ret > MAX_SKB_FRAGS)) {
- DPRINTK("Too many frags\n");
- netbk_tx_err(netif, &txreq, i);
- continue;
- }
-
if (unlikely(txreq.size < ETH_HLEN)) {
DPRINTK("Bad packet size: %d\n", txreq.size);
netbk_tx_err(netif, &txreq, i);
@@ -1180,7 +1224,7 @@ static void net_tx_action(unsigned long unused)
ret < MAX_SKB_FRAGS) ?
PKT_PROT_LEN : txreq.size;
- skb = alloc_skb(data_len+16, GFP_ATOMIC);
+ skb = alloc_skb(data_len + 16 + NET_IP_ALIGN, GFP_ATOMIC);
if (unlikely(skb == NULL)) {
DPRINTK("Can't allocate a skb in start_xmit.\n");
netbk_tx_err(netif, &txreq, i);
@@ -1188,7 +1232,7 @@ static void net_tx_action(unsigned long unused)
}
/* Packets passed to netif_rx() must have some headroom. */
- skb_reserve(skb, 16);
+ skb_reserve(skb, 16 + NET_IP_ALIGN);
if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
struct netif_extra_info *gso;
@@ -1201,7 +1245,7 @@ static void net_tx_action(unsigned long unused)
}
}
- gnttab_set_map_op(mop, MMAP_VADDR(pending_idx),
+ gnttab_set_map_op(mop, idx_to_kaddr(pending_idx),
GNTMAP_host_map | GNTMAP_readonly,
txreq.gref, netif->domid);
mop++;
@@ -1227,7 +1271,7 @@ static void net_tx_action(unsigned long unused)
pending_cons++;
- mop = netbk_get_requests(netif, skb, mop);
+ mop = netbk_get_requests(netif, skb, txfrags, mop);
netif->tx.req_cons = i;
netif_schedule_work(netif);
@@ -1260,8 +1304,8 @@ static void net_tx_action(unsigned long unused)
}
data_len = skb->len;
- memcpy(skb->data,
- (void *)(MMAP_VADDR(pending_idx)|txp->offset),
+ memcpy(skb->data,
+ (void *)(idx_to_kaddr(pending_idx)|txp->offset),
data_len);
if (data_len < txp->size) {
/* Append the packet payload as a fragment. */
@@ -1315,18 +1359,10 @@ static void netif_idx_release(u16 pending_idx)
static void netif_page_release(struct page *page)
{
- u16 pending_idx = page - virt_to_page(mmap_vstart);
-
/* Ready for next use. */
set_page_count(page, 1);
- netif_idx_release(pending_idx);
-}
-
-static void netif_rx_page_release(struct page *page)
-{
- /* Ready for next use. */
- set_page_count(page, 1);
+ netif_idx_release(page->index);
}
irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
@@ -1336,7 +1372,7 @@ irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
add_to_net_schedule_list_tail(netif);
maybe_schedule_tx_action();
- if (netif_queue_stopped(netif->dev) && !netbk_queue_full(netif))
+ if (netif_schedulable(netif->dev) && !netbk_queue_full(netif))
netif_wake_queue(netif->dev);
return IRQ_HANDLED;
@@ -1446,27 +1482,17 @@ static int __init netback_init(void)
init_timer(&net_timer);
net_timer.data = 0;
net_timer.function = net_alarm;
-
- page = balloon_alloc_empty_page_range(MAX_PENDING_REQS);
- if (page == NULL)
- return -ENOMEM;
- mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+ mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
+ if (mmap_pages == NULL) {
+ printk("%s: out of memory\n", __FUNCTION__);
+ return -ENOMEM;
+ }
for (i = 0; i < MAX_PENDING_REQS; i++) {
- page = virt_to_page(MMAP_VADDR(i));
- set_page_count(page, 1);
+ page = mmap_pages[i];
SetPageForeign(page, netif_page_release);
- }
-
- page = balloon_alloc_empty_page_range(NET_RX_RING_SIZE);
- BUG_ON(page == NULL);
- rx_mmap_area = pfn_to_kaddr(page_to_pfn(page));
-
- for (i = 0; i < NET_RX_RING_SIZE; i++) {
- page = virt_to_page(rx_mmap_area + (i * PAGE_SIZE));
- set_page_count(page, 1);
- SetPageForeign(page, netif_rx_page_release);
+ page->index = i;
}
pending_cons = 0;
diff --git a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
index 6da614fc0c..7d301965f4 100644
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
@@ -28,29 +28,20 @@
printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
#endif
-struct backend_info
-{
+struct backend_info {
struct xenbus_device *dev;
netif_t *netif;
- struct xenbus_watch backend_watch;
enum xenbus_state frontend_state;
};
static int connect_rings(struct backend_info *);
static void connect(struct backend_info *);
-static void maybe_connect(struct backend_info *);
-static void backend_changed(struct xenbus_watch *, const char **,
- unsigned int);
+static void backend_create_netif(struct backend_info *be);
static int netback_remove(struct xenbus_device *dev)
{
struct backend_info *be = dev->dev.driver_data;
- if (be->backend_watch.node) {
- unregister_xenbus_watch(&be->backend_watch);
- kfree(be->backend_watch.node);
- be->backend_watch.node = NULL;
- }
if (be->netif) {
netif_disconnect(be->netif);
be->netif = NULL;
@@ -63,8 +54,7 @@ static int netback_remove(struct xenbus_device *dev)
/**
* Entry point to this code when a new device is created. Allocate the basic
- * structures, and watch the store waiting for the hotplug scripts to tell us
- * the device's handle. Switch to InitWait.
+ * structures and switch to InitWait.
*/
static int netback_probe(struct xenbus_device *dev,
const struct xenbus_device_id *id)
@@ -83,11 +73,6 @@ static int netback_probe(struct xenbus_device *dev,
be->dev = dev;
dev->dev.driver_data = be;
- err = xenbus_watch_path2(dev, dev->nodename, "handle",
- &be->backend_watch, backend_changed);
- if (err)
- goto fail;
-
do {
err = xenbus_transaction_start(&xbt);
if (err) {
@@ -108,9 +93,22 @@ static int netback_probe(struct xenbus_device *dev,
goto abort_transaction;
}
- err = xenbus_printf(xbt, dev->nodename, "feature-rx-copy", "%d", 1);
+ /* We support rx-copy path. */
+ err = xenbus_printf(xbt, dev->nodename,
+ "feature-rx-copy", "%d", 1);
+ if (err) {
+ message = "writing feature-rx-copy";
+ goto abort_transaction;
+ }
+
+ /*
+ * We don't support rx-flip path (except old guests who don't
+ * grok this feature flag).
+ */
+ err = xenbus_printf(xbt, dev->nodename,
+ "feature-rx-flip", "%d", 0);
if (err) {
- message = "writing feature-copying";
+ message = "writing feature-rx-flip";
goto abort_transaction;
}
@@ -123,9 +121,11 @@ static int netback_probe(struct xenbus_device *dev,
}
err = xenbus_switch_state(dev, XenbusStateInitWait);
- if (err) {
+ if (err)
goto fail;
- }
+
+ /* This kicks hotplug scripts, so do it immediately. */
+ backend_create_netif(be);
return 0;
@@ -175,48 +175,30 @@ static int netback_uevent(struct xenbus_device *xdev, char **envp,
}
-/**
- * Callback received when the hotplug scripts have placed the handle node.
- * Read it, and create a netif structure. If the frontend is ready, connect.
- */
-static void backend_changed(struct xenbus_watch *watch,
- const char **vec, unsigned int len)
+static void backend_create_netif(struct backend_info *be)
{
int err;
long handle;
- struct backend_info *be
- = container_of(watch, struct backend_info, backend_watch);
struct xenbus_device *dev = be->dev;
- DPRINTK("");
+ if (be->netif != NULL)
+ return;
err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
- if (XENBUS_EXIST_ERR(err)) {
- /* Since this watch will fire once immediately after it is
- registered, we expect this. Ignore it, and wait for the
- hotplug scripts. */
- return;
- }
if (err != 1) {
xenbus_dev_fatal(dev, err, "reading handle");
return;
}
- if (be->netif == NULL) {
- u8 be_mac[ETH_ALEN] = { 0, 0, 0, 0, 0, 0 };
-
- be->netif = netif_alloc(dev->otherend_id, handle, be_mac);
- if (IS_ERR(be->netif)) {
- err = PTR_ERR(be->netif);
- be->netif = NULL;
- xenbus_dev_fatal(dev, err, "creating interface");
- return;
- }
-
- kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
-
- maybe_connect(be);
+ be->netif = netif_alloc(dev->otherend_id, handle);
+ if (IS_ERR(be->netif)) {
+ err = PTR_ERR(be->netif);
+ be->netif = NULL;
+ xenbus_dev_fatal(dev, err, "creating interface");
+ return;
}
+
+ kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
}
@@ -249,11 +231,9 @@ static void frontend_changed(struct xenbus_device *dev,
break;
case XenbusStateConnected:
- if (!be->netif) {
- /* reconnect: setup be->netif */
- backend_changed(&be->backend_watch, NULL, 0);
- }
- maybe_connect(be);
+ backend_create_netif(be);
+ if (be->netif)
+ connect(be);
break;
case XenbusStateClosing:
@@ -279,15 +259,6 @@ static void frontend_changed(struct xenbus_device *dev,
}
-/* ** Connection ** */
-
-
-static void maybe_connect(struct backend_info *be)
-{
- if (be->netif && (be->frontend_state == XenbusStateConnected))
- connect(be);
-}
-
static void xen_net_read_rate(struct xenbus_device *dev,
unsigned long *bytes, unsigned long *usec)
{
@@ -366,6 +337,10 @@ static void connect(struct backend_info *be)
be->netif->remaining_credit = be->netif->credit_bytes;
xenbus_switch_state(dev, XenbusStateConnected);
+
+ /* May not get a kick from the frontend, so start the tx_queue now. */
+ if (!netbk_can_queue(be->netif->dev))
+ netif_wake_queue(be->netif->dev);
}
@@ -403,14 +378,16 @@ static int connect_rings(struct backend_info *be)
}
be->netif->copying_receiver = !!rx_copy;
- if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-notify", "%d",
- &val) < 0)
- val = 0;
- if (val)
- be->netif->can_queue = 1;
- else
- /* Must be non-zero for pfifo_fast to work. */
- be->netif->dev->tx_queue_len = 1;
+ if (be->netif->dev->tx_queue_len != 0) {
+ if (xenbus_scanf(XBT_NIL, dev->otherend,
+ "feature-rx-notify", "%d", &val) < 0)
+ val = 0;
+ if (val)
+ be->netif->can_queue = 1;
+ else
+ /* Must be non-zero for pfifo_fast to work. */
+ be->netif->dev->tx_queue_len = 1;
+ }
if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
val = 0;
diff --git a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
index a257cb6064..da22d45bf6 100644
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
@@ -47,6 +47,7 @@
#include <linux/in.h>
#include <linux/if_ether.h>
#include <linux/io.h>
+#include <linux/moduleparam.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/arp.h>
@@ -63,20 +64,76 @@
#include <xen/interface/grant_table.h>
#include <xen/gnttab.h>
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
+/*
+ * Mutually-exclusive module options to select receive data path:
+ * rx_copy : Packets are copied by network backend into local memory
+ * rx_flip : Page containing packet data is transferred to our ownership
+ * For fully-virtualised guests there is no option - copying must be used.
+ * For paravirtualised guests, flipping is the default.
+ */
+#ifdef CONFIG_XEN
+static int MODPARM_rx_copy = 0;
+module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
+MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)");
+static int MODPARM_rx_flip = 0;
+module_param_named(rx_flip, MODPARM_rx_flip, bool, 0);
+MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)");
+#else
+static const int MODPARM_rx_copy = 1;
+static const int MODPARM_rx_flip = 0;
+#endif
+
#define RX_COPY_THRESHOLD 256
/* If we don't have GSO, fake things up so that we never try to use it. */
-#ifndef NETIF_F_GSO
-#define netif_needs_gso(dev, skb) 0
-#define dev_disable_gso_features(dev) ((void)0)
-#else
+#if defined(NETIF_F_GSO)
#define HAVE_GSO 1
+#define HAVE_TSO 1 /* TSO is a subset of GSO */
static inline void dev_disable_gso_features(struct net_device *dev)
{
/* Turn off all GSO bits except ROBUST. */
dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
dev->features |= NETIF_F_GSO_ROBUST;
}
+#elif defined(NETIF_F_TSO)
+#define HAVE_TSO 1
+
+/* Some older kernels cannot cope with incorrect checksums,
+ * particularly in netfilter. I'm not sure there is 100% correlation
+ * with the presence of NETIF_F_TSO but it appears to be a good first
+ * approximiation.
+ */
+#define HAVE_NO_CSUM_OFFLOAD 1
+
+#define gso_size tso_size
+#define gso_segs tso_segs
+static inline void dev_disable_gso_features(struct net_device *dev)
+{
+ /* Turn off all TSO bits. */
+ dev->features &= ~NETIF_F_TSO;
+}
+static inline int skb_is_gso(const struct sk_buff *skb)
+{
+ return skb_shinfo(skb)->tso_size;
+}
+static inline int skb_gso_ok(struct sk_buff *skb, int features)
+{
+ return (features & NETIF_F_TSO);
+}
+
+static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
+{
+ return skb_is_gso(skb) &&
+ (!skb_gso_ok(skb, dev->features) ||
+ unlikely(skb->ip_summed != CHECKSUM_HW));
+}
+#else
+#define netif_needs_gso(dev, skb) 0
+#define dev_disable_gso_features(dev) ((void)0)
#endif
#define GRANT_INVALID_REF 0
@@ -96,7 +153,6 @@ struct netfront_info {
spinlock_t tx_lock;
spinlock_t rx_lock;
- unsigned int handle;
unsigned int evtchn, irq;
unsigned int copying_receiver;
@@ -120,7 +176,7 @@ struct netfront_info {
grant_ref_t gref_tx_head;
grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1];
grant_ref_t gref_rx_head;
- grant_ref_t grant_rx_ref[NET_TX_RING_SIZE];
+ grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
struct xenbus_device *xbdev;
int tx_ring_ref;
@@ -185,9 +241,8 @@ static inline grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
#define WPRINTK(fmt, args...) \
printk(KERN_WARNING "netfront: " fmt, ##args)
-static int talk_to_backend(struct xenbus_device *, struct netfront_info *);
static int setup_device(struct xenbus_device *, struct netfront_info *);
-static struct net_device *create_netdev(int, int, struct xenbus_device *);
+static struct net_device *create_netdev(struct xenbus_device *);
static void netfront_closing(struct xenbus_device *);
@@ -195,9 +250,8 @@ static void end_access(int, void *);
static void netif_disconnect_backend(struct netfront_info *);
static int open_netdev(struct netfront_info *);
static void close_netdev(struct netfront_info *);
-static void netif_free(struct netfront_info *);
-static void network_connect(struct net_device *);
+static int network_connect(struct net_device *);
static void network_tx_buf_gc(struct net_device *);
static void network_alloc_rx_buffers(struct net_device *);
static int send_fake_arp(struct net_device *);
@@ -220,8 +274,7 @@ static inline int xennet_can_sg(struct net_device *dev)
/**
* Entry point to this code when a new device is created. Allocate the basic
* structures and the ring buffers for communication with the backend, and
- * inform the backend of the appropriate details for those. Switch to
- * Connected state.
+ * inform the backend of the appropriate details for those.
*/
static int __devinit netfront_probe(struct xenbus_device *dev,
const struct xenbus_device_id *id)
@@ -229,31 +282,8 @@ static int __devinit netfront_probe(struct xenbus_device *dev,
int err;
struct net_device *netdev;
struct netfront_info *info;
- unsigned int handle;
- unsigned feature_rx_copy;
-
- err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%u", &handle);
- if (err != 1) {
- xenbus_dev_fatal(dev, err, "reading handle");
- return err;
- }
-
-#ifndef CONFIG_XEN
- err = xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-copy", "%u",
- &feature_rx_copy);
- if (err != 1) {
- xenbus_dev_fatal(dev, err, "reading feature-rx-copy");
- return err;
- }
- if (!feature_rx_copy) {
- xenbus_dev_fatal(dev, 0, "need a copy-capable backend");
- return -EINVAL;
- }
-#else
- feature_rx_copy = 0;
-#endif
- netdev = create_netdev(handle, feature_rx_copy, dev);
+ netdev = create_netdev(dev);
if (IS_ERR(netdev)) {
err = PTR_ERR(netdev);
xenbus_dev_fatal(dev, err, "creating netdev");
@@ -263,20 +293,13 @@ static int __devinit netfront_probe(struct xenbus_device *dev,
info = netdev_priv(netdev);
dev->dev.driver_data = info;
- err = talk_to_backend(dev, info);
- if (err)
- goto fail_backend;
-
err = open_netdev(info);
if (err)
- goto fail_open;
+ goto fail;
return 0;
- fail_open:
- xennet_sysfs_delif(info->netdev);
- unregister_netdev(netdev);
- fail_backend:
+ fail:
free_netdev(netdev);
dev->dev.driver_data = NULL;
return err;
@@ -296,7 +319,7 @@ static int netfront_resume(struct xenbus_device *dev)
DPRINTK("%s\n", dev->nodename);
netif_disconnect_backend(info);
- return talk_to_backend(dev, info);
+ return 0;
}
static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
@@ -379,13 +402,21 @@ again:
goto abort_transaction;
}
+#ifdef HAVE_NO_CSUM_OFFLOAD
+ err = xenbus_printf(xbt, dev->nodename, "feature-no-csum-offload", "%d", 1);
+ if (err) {
+ message = "writing feature-no-csum-offload";
+ goto abort_transaction;
+ }
+#endif
+
err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
if (err) {
message = "writing feature-sg";
goto abort_transaction;
}
-#ifdef HAVE_GSO
+#ifdef HAVE_TSO
err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
if (err) {
message = "writing feature-gso-tcpv4";
@@ -407,12 +438,11 @@ again:
xenbus_transaction_end(xbt, 1);
xenbus_dev_fatal(dev, err, "%s", message);
destroy_ring:
- netif_free(info);
+ netif_disconnect_backend(info);
out:
return err;
}
-
static int setup_device(struct xenbus_device *dev, struct netfront_info *info)
{
struct netif_tx_sring *txs;
@@ -472,11 +502,9 @@ static int setup_device(struct xenbus_device *dev, struct netfront_info *info)
return 0;
fail:
- netif_free(info);
return err;
}
-
/**
* Callback received when the backend's state changes.
*/
@@ -497,7 +525,8 @@ static void backend_changed(struct xenbus_device *dev,
break;
case XenbusStateInitWait:
- network_connect(netdev);
+ if (network_connect(netdev) != 0)
+ break;
xenbus_switch_state(dev, XenbusStateConnected);
(void)send_fake_arp(netdev);
break;
@@ -508,7 +537,6 @@ static void backend_changed(struct xenbus_device *dev,
}
}
-
/** Send a packet on a net device to encourage switches to learn the
* MAC. We send a fake ARP request.
*
@@ -537,7 +565,6 @@ static int send_fake_arp(struct net_device *dev)
return dev_queue_xmit(skb);
}
-
static int network_open(struct net_device *dev)
{
struct netfront_info *np = netdev_priv(dev);
@@ -629,14 +656,12 @@ static void network_tx_buf_gc(struct net_device *dev)
network_maybe_wake_tx(dev);
}
-
static void rx_refill_timeout(unsigned long data)
{
struct net_device *dev = (struct net_device *)data;
netif_rx_schedule(dev);
}
-
static void network_alloc_rx_buffers(struct net_device *dev)
{
unsigned short id;
@@ -669,7 +694,7 @@ static void network_alloc_rx_buffers(struct net_device *dev)
* necessary here.
* 16 bytes added as necessary headroom for netif_receive_skb.
*/
- skb = alloc_skb(RX_COPY_THRESHOLD + 16,
+ skb = alloc_skb(RX_COPY_THRESHOLD + 16 + NET_IP_ALIGN,
GFP_ATOMIC | __GFP_NOWARN);
if (unlikely(!skb))
goto no_skb;
@@ -687,7 +712,7 @@ no_skb:
break;
}
- skb_reserve(skb, 16); /* mimic dev_alloc_skb() */
+ skb_reserve(skb, 16 + NET_IP_ALIGN); /* mimic dev_alloc_skb() */
skb_shinfo(skb)->frags[0].page = page;
skb_shinfo(skb)->nr_frags = 1;
__skb_queue_tail(&np->rx_batch, skb);
@@ -742,7 +767,7 @@ no_skb:
} else {
gnttab_grant_foreign_access_ref(ref,
np->xbdev->otherend_id,
- pfn,
+ pfn_to_mfn(pfn),
0);
}
@@ -917,7 +942,7 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
tx->flags |= NETTXF_data_validated;
#endif
-#ifdef HAVE_GSO
+#ifdef HAVE_TSO
if (skb_shinfo(skb)->gso_size) {
struct netif_extra_info *gso = (struct netif_extra_info *)
RING_GET_REQUEST(&np->tx, ++i);
@@ -1071,6 +1096,7 @@ static int xennet_get_responses(struct netfront_info *np,
if (net_ratelimit())
WPRINTK("rx->offset: %x, size: %u\n",
rx->offset, rx->status);
+ xennet_move_rx_slot(np, skb, ref);
err = -EINVAL;
goto next;
}
@@ -1081,7 +1107,8 @@ static int xennet_get_responses(struct netfront_info *np,
* situation to the system controller to reboot the backed.
*/
if (ref == GRANT_INVALID_REF) {
- WPRINTK("Bad rx response id %d.\n", rx->id);
+ if (net_ratelimit())
+ WPRINTK("Bad rx response id %d.\n", rx->id);
err = -EINVAL;
goto next;
}
@@ -1153,6 +1180,9 @@ next:
err = -E2BIG;
}
+ if (unlikely(err))
+ np->rx.rsp_cons = cons + frags;
+
*pages_flipped_p = pages_flipped;
return err;
@@ -1205,12 +1235,14 @@ static int xennet_set_skb_gso(struct sk_buff *skb,
return -EINVAL;
}
-#ifdef HAVE_GSO
+#ifdef HAVE_TSO
skb_shinfo(skb)->gso_size = gso->u.gso.size;
+#ifdef HAVE_GSO
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
/* Header must be checked, and gso_segs computed. */
skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+#endif
skb_shinfo(skb)->gso_segs = 0;
return 0;
@@ -1255,9 +1287,9 @@ static int netif_poll(struct net_device *dev, int *pbudget)
rp = np->rx.sring->rsp_prod;
rmb(); /* Ensure we see queued responses up to 'rp'. */
- for (i = np->rx.rsp_cons, work_done = 0;
- (i != rp) && (work_done < budget);
- np->rx.rsp_cons = ++i, work_done++) {
+ i = np->rx.rsp_cons;
+ work_done = 0;
+ while ((i != rp) && (work_done < budget)) {
memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
memset(extras, 0, sizeof(extras));
@@ -1265,12 +1297,11 @@ static int netif_poll(struct net_device *dev, int *pbudget)
&pages_flipped);
if (unlikely(err)) {
-err:
- i = np->rx.rsp_cons + skb_queue_len(&tmpq) - 1;
- work_done--;
+err:
while ((skb = __skb_dequeue(&tmpq)))
__skb_queue_tail(&errq, skb);
np->stats.rx_errors++;
+ i = np->rx.rsp_cons;
continue;
}
@@ -1282,6 +1313,7 @@ err:
if (unlikely(xennet_set_skb_gso(skb, gso))) {
__skb_queue_head(&tmpq, skb);
+ np->rx.rsp_cons += skb_queue_len(&tmpq);
goto err;
}
}
@@ -1345,6 +1377,9 @@ err:
np->stats.rx_bytes += skb->len;
__skb_queue_tail(&rxq, skb);
+
+ np->rx.rsp_cons = ++i;
+ work_done++;
}
if (pages_flipped) {
@@ -1561,7 +1596,7 @@ static int xennet_set_sg(struct net_device *dev, u32 data)
static int xennet_set_tso(struct net_device *dev, u32 data)
{
-#ifdef HAVE_GSO
+#ifdef HAVE_TSO
if (data) {
struct netfront_info *np = netdev_priv(dev);
int val;
@@ -1588,20 +1623,53 @@ static void xennet_set_features(struct net_device *dev)
if (!(dev->features & NETIF_F_IP_CSUM))
return;
- if (!xennet_set_sg(dev, 1))
- xennet_set_tso(dev, 1);
+ if (xennet_set_sg(dev, 1))
+ return;
+
+ /* Before 2.6.9 TSO seems to be unreliable so do not enable it
+ * on older kernels.
+ */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
+ xennet_set_tso(dev, 1);
+#endif
+
}
-static void network_connect(struct net_device *dev)
+static int network_connect(struct net_device *dev)
{
struct netfront_info *np = netdev_priv(dev);
- int i, requeue_idx;
+ int i, requeue_idx, err;
struct sk_buff *skb;
grant_ref_t ref;
netif_rx_request_t *req;
+ unsigned int feature_rx_copy, feature_rx_flip;
+
+ err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+ "feature-rx-copy", "%u", &feature_rx_copy);
+ if (err != 1)
+ feature_rx_copy = 0;
+ err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+ "feature-rx-flip", "%u", &feature_rx_flip);
+ if (err != 1)
+ feature_rx_flip = 1;
+
+ /*
+ * Copy packets on receive path if:
+ * (a) This was requested by user, and the backend supports it; or
+ * (b) Flipping was requested, but this is unsupported by the backend.
+ */
+ np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) ||
+ (MODPARM_rx_flip && !feature_rx_flip));
+
+ err = talk_to_backend(np->xbdev, np);
+ if (err)
+ return err;
xennet_set_features(dev);
+ IPRINTK("device %s has %sing receive path.\n",
+ dev->name, np->copying_receiver ? "copy" : "flipp");
+
spin_lock_irq(&np->tx_lock);
spin_lock(&np->rx_lock);
@@ -1632,7 +1700,8 @@ static void network_connect(struct net_device *dev)
} else {
gnttab_grant_foreign_access_ref(
ref, np->xbdev->otherend_id,
- page_to_pfn(skb_shinfo(skb)->frags->page),
+ pfn_to_mfn(page_to_pfn(skb_shinfo(skb)->
+ frags->page)),
0);
}
req->gref = ref;
@@ -1656,6 +1725,8 @@ static void network_connect(struct net_device *dev)
spin_unlock(&np->rx_lock);
spin_unlock_irq(&np->tx_lock);
+
+ return 0;
}
static void netif_uninit(struct net_device *dev)
@@ -1821,8 +1892,7 @@ static void network_set_multicast_list(struct net_device *dev)
{
}
-static struct net_device * __devinit
-create_netdev(int handle, int copying_receiver, struct xenbus_device *dev)
+static struct net_device * __devinit create_netdev(struct xenbus_device *dev)
{
int i, err = 0;
struct net_device *netdev = NULL;
@@ -1836,9 +1906,7 @@ create_netdev(int handle, int copying_receiver, struct xenbus_device *dev)
}
np = netdev_priv(netdev);
- np->handle = handle;
np->xbdev = dev;
- np->copying_receiver = copying_receiver;
netif_carrier_off(netdev);
@@ -1969,10 +2037,12 @@ static int open_netdev(struct netfront_info *info)
err = xennet_sysfs_addif(info->netdev);
if (err) {
- /* This can be non-fatal: it only means no tuning parameters */
+ unregister_netdev(info->netdev);
printk(KERN_WARNING "%s: add sysfs failed err=%d\n",
__FUNCTION__, err);
+ return err;
}
+
return 0;
}
@@ -2007,14 +2077,6 @@ static void netif_disconnect_backend(struct netfront_info *info)
}
-static void netif_free(struct netfront_info *info)
-{
- close_netdev(info);
- netif_disconnect_backend(info);
- free_netdev(info->netdev);
-}
-
-
static void end_access(int ref, void *page)
{
if (ref != GRANT_INVALID_REF)
@@ -2053,6 +2115,16 @@ static int __init netif_init(void)
if (!is_running_on_xen())
return -ENODEV;
+#ifdef CONFIG_XEN
+ if (MODPARM_rx_flip && MODPARM_rx_copy) {
+ WPRINTK("Cannot specify both rx_copy and rx_flip.\n");
+ return -EINVAL;
+ }
+
+ if (!MODPARM_rx_flip && !MODPARM_rx_copy)
+ MODPARM_rx_flip = 1; /* Default is to flip. */
+#endif
+
if (is_initial_xendomain())
return 0;
@@ -2067,6 +2139,9 @@ module_init(netif_init);
static void __exit netif_exit(void)
{
+ if (is_initial_xendomain())
+ return;
+
unregister_inetaddr_notifier(&notifier_inetdev);
return xenbus_unregister_driver(&netfront);
diff --git a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
index a1c4b6f68e..d159e4ac74 100644
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
@@ -35,6 +35,10 @@
static struct proc_dir_entry *privcmd_intf;
static struct proc_dir_entry *capabilities_intf;
+#ifndef HAVE_ARCH_PRIVCMD_MMAP
+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
+#endif
+
static int privcmd_ioctl(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long data)
{
@@ -49,6 +53,8 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
return -EFAULT;
#if defined(__i386__)
+ if (hypercall.op >= (PAGE_SIZE >> 5))
+ break;
__asm__ __volatile__ (
"pushl %%ebx; pushl %%ecx; pushl %%edx; "
"pushl %%esi; pushl %%edi; "
@@ -65,45 +71,36 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
"popl %%ecx; popl %%ebx"
: "=a" (ret) : "0" (&hypercall) : "memory" );
#elif defined (__x86_64__)
- {
+ if (hypercall.op < (PAGE_SIZE >> 5)) {
long ign1, ign2, ign3;
__asm__ __volatile__ (
"movq %8,%%r10; movq %9,%%r8;"
- "shlq $5,%%rax ;"
+ "shll $5,%%eax ;"
"addq $hypercall_page,%%rax ;"
"call *%%rax"
: "=a" (ret), "=D" (ign1),
"=S" (ign2), "=d" (ign3)
- : "0" ((unsigned long)hypercall.op),
- "1" ((unsigned long)hypercall.arg[0]),
- "2" ((unsigned long)hypercall.arg[1]),
- "3" ((unsigned long)hypercall.arg[2]),
- "g" ((unsigned long)hypercall.arg[3]),
- "g" ((unsigned long)hypercall.arg[4])
+ : "0" ((unsigned int)hypercall.op),
+ "1" (hypercall.arg[0]),
+ "2" (hypercall.arg[1]),
+ "3" (hypercall.arg[2]),
+ "g" (hypercall.arg[3]),
+ "g" (hypercall.arg[4])
: "r8", "r10", "memory" );
}
#elif defined (__ia64__)
- __asm__ __volatile__ (
- ";; mov r14=%2; mov r15=%3; "
- "mov r16=%4; mov r17=%5; mov r18=%6;"
- "mov r2=%1; break 0x1000;; mov %0=r8 ;;"
- : "=r" (ret)
- : "r" (hypercall.op),
- "r" (hypercall.arg[0]),
- "r" (hypercall.arg[1]),
- "r" (hypercall.arg[2]),
- "r" (hypercall.arg[3]),
- "r" (hypercall.arg[4])
- : "r14","r15","r16","r17","r18","r2","r8","memory");
+ ret = privcmd_hypercall(&hypercall);
#endif
}
break;
case IOCTL_PRIVCMD_MMAP: {
-#define PRIVCMD_MMAP_SZ 32
privcmd_mmap_t mmapcmd;
- privcmd_mmap_entry_t msg[PRIVCMD_MMAP_SZ];
+ privcmd_mmap_entry_t msg;
privcmd_mmap_entry_t __user *p;
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ unsigned long va;
int i, rc;
if (!is_initial_xendomain())
@@ -113,85 +110,92 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
return -EFAULT;
p = mmapcmd.entry;
+ if (copy_from_user(&msg, p, sizeof(msg)))
+ return -EFAULT;
- for (i = 0; i < mmapcmd.num;
- i += PRIVCMD_MMAP_SZ, p += PRIVCMD_MMAP_SZ) {
- int j, n = ((mmapcmd.num-i)>PRIVCMD_MMAP_SZ)?
- PRIVCMD_MMAP_SZ:(mmapcmd.num-i);
-
- if (copy_from_user(&msg, p,
- n*sizeof(privcmd_mmap_entry_t)))
- return -EFAULT;
-
- for (j = 0; j < n; j++) {
- struct vm_area_struct *vma =
- find_vma( current->mm, msg[j].va );
-
- if (!vma)
- return -EINVAL;
-
- if (msg[j].va > PAGE_OFFSET)
- return -EINVAL;
-
- if ((msg[j].va + (msg[j].npages << PAGE_SHIFT))
- > vma->vm_end )
- return -EINVAL;
-
- if ((rc = direct_remap_pfn_range(
- vma,
- msg[j].va&PAGE_MASK,
- msg[j].mfn,
- msg[j].npages<<PAGE_SHIFT,
- vma->vm_page_prot,
- mmapcmd.dom)) < 0)
- return rc;
- }
+ down_read(&mm->mmap_sem);
+
+ vma = find_vma(mm, msg.va);
+ rc = -EINVAL;
+ if (!vma || (msg.va != vma->vm_start) ||
+ !privcmd_enforce_singleshot_mapping(vma))
+ goto mmap_out;
+
+ va = vma->vm_start;
+
+ for (i = 0; i < mmapcmd.num; i++) {
+ rc = -EFAULT;
+ if (copy_from_user(&msg, p, sizeof(msg)))
+ goto mmap_out;
+
+ /* Do not allow range to wrap the address space. */
+ rc = -EINVAL;
+ if ((msg.npages > (LONG_MAX >> PAGE_SHIFT)) ||
+ ((unsigned long)(msg.npages << PAGE_SHIFT) >= -va))
+ goto mmap_out;
+
+ /* Range chunks must be contiguous in va space. */
+ if ((msg.va != va) ||
+ ((msg.va+(msg.npages<<PAGE_SHIFT)) > vma->vm_end))
+ goto mmap_out;
+
+ if ((rc = direct_remap_pfn_range(
+ vma,
+ msg.va & PAGE_MASK,
+ msg.mfn,
+ msg.npages << PAGE_SHIFT,
+ vma->vm_page_prot,
+ mmapcmd.dom)) < 0)
+ goto mmap_out;
+
+ p++;
+ va += msg.npages << PAGE_SHIFT;
}
- ret = 0;
+
+ rc = 0;
+
+ mmap_out:
+ up_read(&mm->mmap_sem);
+ ret = rc;
}
break;
case IOCTL_PRIVCMD_MMAPBATCH: {
privcmd_mmapbatch_t m;
- struct vm_area_struct *vma = NULL;
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
xen_pfn_t __user *p;
- unsigned long addr, mfn;
+ unsigned long addr, mfn, nr_pages;
int i;
if (!is_initial_xendomain())
return -EPERM;
- if (copy_from_user(&m, udata, sizeof(m))) {
- ret = -EFAULT;
- goto batch_err;
- }
-
- if (m.dom == DOMID_SELF) {
- ret = -EINVAL;
- goto batch_err;
- }
+ if (copy_from_user(&m, udata, sizeof(m)))
+ return -EFAULT;
- vma = find_vma(current->mm, m.addr);
- if (!vma) {
- ret = -EINVAL;
- goto batch_err;
- }
+ nr_pages = m.num;
+ if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
+ return -EINVAL;
- if (m.addr > PAGE_OFFSET) {
- ret = -EFAULT;
- goto batch_err;
- }
+ down_read(&mm->mmap_sem);
- if ((m.addr + (m.num<<PAGE_SHIFT)) > vma->vm_end) {
- ret = -EFAULT;
- goto batch_err;
+ vma = find_vma(mm, m.addr);
+ if (!vma ||
+ (m.addr != vma->vm_start) ||
+ ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
+ !privcmd_enforce_singleshot_mapping(vma)) {
+ up_read(&mm->mmap_sem);
+ return -EINVAL;
}
p = m.arr;
addr = m.addr;
- for (i = 0; i < m.num; i++, addr += PAGE_SIZE, p++) {
- if (get_user(mfn, p))
+ for (i = 0; i < nr_pages; i++, addr += PAGE_SIZE, p++) {
+ if (get_user(mfn, p)) {
+ up_read(&mm->mmap_sem);
return -EFAULT;
+ }
ret = direct_remap_pfn_range(vma, addr & PAGE_MASK,
mfn, PAGE_SIZE,
@@ -200,15 +204,8 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
put_user(0xF0000000 | mfn, p);
}
+ up_read(&mm->mmap_sem);
ret = 0;
- break;
-
- batch_err:
- printk("batch_err ret=%d vma=%p addr=%lx "
- "num=%d arr=%p %lx-%lx\n",
- ret, vma, (unsigned long)m.addr, m.num, m.arr,
- vma ? vma->vm_start : 0, vma ? vma->vm_end : 0);
- break;
}
break;
@@ -221,13 +218,35 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
}
#ifndef HAVE_ARCH_PRIVCMD_MMAP
+static struct page *privcmd_nopage(struct vm_area_struct *vma,
+ unsigned long address,
+ int *type)
+{
+ return NOPAGE_SIGBUS;
+}
+
+static struct vm_operations_struct privcmd_vm_ops = {
+ .nopage = privcmd_nopage
+};
+
static int privcmd_mmap(struct file * file, struct vm_area_struct * vma)
{
+ /* Unsupported for auto-translate guests. */
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return -ENOSYS;
+
/* DONTCOPY is essential for Xen as copy_page_range is broken. */
vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
+ vma->vm_ops = &privcmd_vm_ops;
+ vma->vm_private_data = NULL;
return 0;
}
+
+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
+{
+ return (xchg(&vma->vm_private_data, (void *)1) == NULL);
+}
#endif
static struct file_operations privcmd_file_ops = {
diff --git a/linux-2.6-xen-sparse/drivers/xen/tpmback/common.h b/linux-2.6-xen-sparse/drivers/xen/tpmback/common.h
index 27b8fd283a..b209b4f583 100644
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/common.h
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/common.h
@@ -46,11 +46,10 @@ typedef struct tpmif_st {
atomic_t refcnt;
struct backend_info *bi;
- unsigned long mmap_vstart;
grant_handle_t shmem_handle;
grant_ref_t shmem_ref;
- struct page *pagerange;
+ struct page **mmap_pages;
char devname[20];
} tpmif_t;
@@ -80,6 +79,9 @@ int vtpm_release_packets(tpmif_t * tpmif, int send_msgs);
extern int num_frontends;
-#define MMAP_VADDR(t,_req) ((t)->mmap_vstart + ((_req) * PAGE_SIZE))
+static inline unsigned long idx_to_kaddr(tpmif_t *t, unsigned int idx)
+{
+ return (unsigned long)pfn_to_kaddr(page_to_pfn(t->mmap_pages[idx]));
+}
#endif /* __TPMIF__BACKEND__COMMON_H__ */
diff --git a/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c b/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c
index 0105bd93bf..2614aa5126 100644
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c
@@ -25,8 +25,8 @@ static tpmif_t *alloc_tpmif(domid_t domid, struct backend_info *bi)
tpmif_t *tpmif;
tpmif = kmem_cache_alloc(tpmif_cachep, GFP_KERNEL);
- if (!tpmif)
- return ERR_PTR(-ENOMEM);
+ if (tpmif == NULL)
+ goto out_of_memory;
memset(tpmif, 0, sizeof (*tpmif));
tpmif->domid = domid;
@@ -35,22 +35,27 @@ static tpmif_t *alloc_tpmif(domid_t domid, struct backend_info *bi)
snprintf(tpmif->devname, sizeof(tpmif->devname), "tpmif%d", domid);
atomic_set(&tpmif->refcnt, 1);
- tpmif->pagerange = balloon_alloc_empty_page_range(TPMIF_TX_RING_SIZE);
- BUG_ON(tpmif->pagerange == NULL);
- tpmif->mmap_vstart = (unsigned long)pfn_to_kaddr(
- page_to_pfn(tpmif->pagerange));
+ tpmif->mmap_pages = alloc_empty_pages_and_pagevec(TPMIF_TX_RING_SIZE);
+ if (tpmif->mmap_pages == NULL)
+ goto out_of_memory;
list_add(&tpmif->tpmif_list, &tpmif_list);
num_frontends++;
return tpmif;
+
+ out_of_memory:
+ if (tpmif != NULL)
+ kmem_cache_free(tpmif_cachep, tpmif);
+ printk("%s: out of memory\n", __FUNCTION__);
+ return ERR_PTR(-ENOMEM);
}
static void free_tpmif(tpmif_t * tpmif)
{
num_frontends--;
list_del(&tpmif->tpmif_list);
- balloon_dealloc_empty_page_range(tpmif->pagerange, TPMIF_TX_RING_SIZE);
+ free_empty_pages_and_pagevec(tpmif->mmap_pages, TPMIF_TX_RING_SIZE);
kmem_cache_free(tpmif_cachep, tpmif);
}
diff --git a/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c b/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c
index 466c3ee581..701a5ad03e 100644
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c
@@ -253,7 +253,7 @@ int _packet_write(struct packet *pak,
return 0;
}
- gnttab_set_map_op(&map_op, MMAP_VADDR(tpmif, i),
+ gnttab_set_map_op(&map_op, idx_to_kaddr(tpmif, i),
GNTMAP_host_map, tx->ref, tpmif->domid);
if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
@@ -270,7 +270,7 @@ int _packet_write(struct packet *pak,
tocopy = min_t(size_t, size - offset, PAGE_SIZE);
- if (copy_from_buffer((void *)(MMAP_VADDR(tpmif, i) |
+ if (copy_from_buffer((void *)(idx_to_kaddr(tpmif, i) |
(tx->addr & ~PAGE_MASK)),
&data[offset], tocopy, isuserbuffer)) {
tpmif_put(tpmif);
@@ -278,7 +278,7 @@ int _packet_write(struct packet *pak,
}
tx->size = tocopy;
- gnttab_set_unmap_op(&unmap_op, MMAP_VADDR(tpmif, i),
+ gnttab_set_unmap_op(&unmap_op, idx_to_kaddr(tpmif, i),
GNTMAP_host_map, handle);
if (unlikely
@@ -391,7 +391,7 @@ static int packet_read_shmem(struct packet *pak,
tx = &tpmif->tx->ring[i].req;
- gnttab_set_map_op(&map_op, MMAP_VADDR(tpmif, i),
+ gnttab_set_map_op(&map_op, idx_to_kaddr(tpmif, i),
GNTMAP_host_map, tx->ref, tpmif->domid);
if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
@@ -414,10 +414,10 @@ static int packet_read_shmem(struct packet *pak,
}
DPRINTK("Copying from mapped memory at %08lx\n",
- (unsigned long)(MMAP_VADDR(tpmif, i) |
+ (unsigned long)(idx_to_kaddr(tpmif, i) |
(tx->addr & ~PAGE_MASK)));
- src = (void *)(MMAP_VADDR(tpmif, i) |
+ src = (void *)(idx_to_kaddr(tpmif, i) |
((tx->addr & ~PAGE_MASK) + pg_offset));
if (copy_to_buffer(&buffer[offset],
src, to_copy, isuserbuffer)) {
@@ -428,7 +428,7 @@ static int packet_read_shmem(struct packet *pak,
tpmif->domid, buffer[offset], buffer[offset + 1],
buffer[offset + 2], buffer[offset + 3]);
- gnttab_set_unmap_op(&unmap_op, MMAP_VADDR(tpmif, i),
+ gnttab_set_unmap_op(&unmap_op, idx_to_kaddr(tpmif, i),
GNTMAP_host_map, handle);
if (unlikely
diff --git a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c
index 4ee5c5bbfe..f48b0e3726 100644
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c
@@ -157,10 +157,12 @@ static void frontend_changed(struct xenbus_device *dev,
case XenbusStateClosing:
be->instance = -1;
+ xenbus_switch_state(dev, XenbusStateClosing);
break;
- case XenbusStateUnknown:
+ case XenbusStateUnknown: /* keep it here */
case XenbusStateClosed:
+ xenbus_switch_state(dev, XenbusStateClosed);
device_unregister(&be->dev->dev);
tpmback_remove(dev);
break;
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile b/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile
index d7c7d05172..ce5acc2457 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile
@@ -9,4 +9,5 @@ xenbus-objs += xenbus_client.o
xenbus-objs += xenbus_comms.o
xenbus-objs += xenbus_xs.o
xenbus-objs += xenbus_probe.o
+obj-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o
obj-$(CONFIG_XEN_XENBUS_DEV) += xenbus_dev.o
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
index 9b389ec06b..0111e8e3a2 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
@@ -35,8 +35,9 @@
#include <xen/xenbus.h>
#include <xen/driver_util.h>
-/* xenbus_probe.c */
-extern char *kasprintf(const char *fmt, ...);
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
#define DPRINTK(fmt, args...) \
pr_debug("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
@@ -84,7 +85,7 @@ int xenbus_watch_path2(struct xenbus_device *dev, const char *path,
const char **, unsigned int))
{
int err;
- char *state = kasprintf("%s/%s", path, path2);
+ char *state = kasprintf(GFP_KERNEL, "%s/%s", path, path2);
if (!state) {
xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch");
return -ENOMEM;
@@ -152,7 +153,7 @@ EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
*/
static char *error_path(struct xenbus_device *dev)
{
- return kasprintf("error/%s", dev->nodename);
+ return kasprintf(GFP_KERNEL, "error/%s", dev->nodename);
}
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c
index 38da320b67..f0e42ba715 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c
@@ -30,15 +30,22 @@
* IN THE SOFTWARE.
*/
-#include <asm/hypervisor.h>
-#include <xen/evtchn.h>
#include <linux/wait.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/err.h>
+#include <linux/ptrace.h>
+#include <xen/evtchn.h>
#include <xen/xenbus.h>
+
+#include <asm/hypervisor.h>
+
#include "xenbus_comms.h"
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
static int xenbus_irq;
extern void xenbus_probe(void *);
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
index bbe4a8c5a8..ba37e61856 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
@@ -40,6 +40,7 @@
#include <linux/wait.h>
#include <linux/fs.h>
#include <linux/poll.h>
+#include <linux/mutex.h>
#include "xenbus_comms.h"
@@ -49,6 +50,10 @@
#include <xen/xen_proc.h>
#include <asm/hypervisor.h>
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
struct xenbus_dev_transaction {
struct list_head list;
struct xenbus_transaction handle;
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
index bcd1f6df06..5320368443 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
@@ -42,6 +42,7 @@
#include <linux/mm.h>
#include <linux/notifier.h>
#include <linux/kthread.h>
+#include <linux/mutex.h>
#include <asm/io.h>
#include <asm/page.h>
@@ -55,6 +56,11 @@
#include <xen/hvm.h>
#include "xenbus_comms.h"
+#include "xenbus_probe.h"
+
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
int xen_store_evtchn;
struct xenstore_domain_interface *xen_store_interface;
@@ -67,12 +73,7 @@ static struct notifier_block *xenstore_chain;
static void wait_for_devices(struct xenbus_driver *xendrv);
static int xenbus_probe_frontend(const char *type, const char *name);
-static int xenbus_uevent_backend(struct device *dev, char **envp,
- int num_envp, char *buffer, int buffer_size);
-static int xenbus_probe_backend(const char *type, const char *domid);
-static int xenbus_dev_probe(struct device *_dev);
-static int xenbus_dev_remove(struct device *_dev);
static void xenbus_dev_shutdown(struct device *_dev);
/* If something in array of ids matches this device, return it. */
@@ -86,7 +87,7 @@ match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
return NULL;
}
-static int xenbus_match(struct device *_dev, struct device_driver *_drv)
+int xenbus_match(struct device *_dev, struct device_driver *_drv)
{
struct xenbus_driver *drv = to_xenbus_driver(_drv);
@@ -96,17 +97,6 @@ static int xenbus_match(struct device *_dev, struct device_driver *_drv)
return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
}
-struct xen_bus_type
-{
- char *root;
- unsigned int levels;
- int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename);
- int (*probe)(const char *type, const char *dir);
- struct bus_type bus;
- struct device dev;
-};
-
-
/* device/<type>/<id> => <type>-<id> */
static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
{
@@ -143,7 +133,7 @@ static void free_otherend_watch(struct xenbus_device *dev)
}
-static int read_otherend_details(struct xenbus_device *xendev,
+int read_otherend_details(struct xenbus_device *xendev,
char *id_node, char *path_node)
{
int err = xenbus_gather(XBT_NIL, xendev->nodename,
@@ -176,12 +166,6 @@ static int read_backend_details(struct xenbus_device *xendev)
}
-static int read_frontend_details(struct xenbus_device *xendev)
-{
- return read_otherend_details(xendev, "frontend-id", "frontend");
-}
-
-
/* Bus type for frontend drivers. */
static struct xen_bus_type xenbus_frontend = {
.root = "device",
@@ -191,115 +175,17 @@ static struct xen_bus_type xenbus_frontend = {
.bus = {
.name = "xen",
.match = xenbus_match,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
.probe = xenbus_dev_probe,
.remove = xenbus_dev_remove,
.shutdown = xenbus_dev_shutdown,
+#endif
},
.dev = {
.bus_id = "xen",
},
};
-/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
-static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
-{
- int domid, err;
- const char *devid, *type, *frontend;
- unsigned int typelen;
-
- type = strchr(nodename, '/');
- if (!type)
- return -EINVAL;
- type++;
- typelen = strcspn(type, "/");
- if (!typelen || type[typelen] != '/')
- return -EINVAL;
-
- devid = strrchr(nodename, '/') + 1;
-
- err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid,
- "frontend", NULL, &frontend,
- NULL);
- if (err)
- return err;
- if (strlen(frontend) == 0)
- err = -ERANGE;
- if (!err && !xenbus_exists(XBT_NIL, frontend, ""))
- err = -ENOENT;
-
- kfree(frontend);
-
- if (err)
- return err;
-
- if (snprintf(bus_id, BUS_ID_SIZE,
- "%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE)
- return -ENOSPC;
- return 0;
-}
-
-static struct xen_bus_type xenbus_backend = {
- .root = "backend",
- .levels = 3, /* backend/type/<frontend>/<id> */
- .get_bus_id = backend_bus_id,
- .probe = xenbus_probe_backend,
- .bus = {
- .name = "xen-backend",
- .match = xenbus_match,
- .probe = xenbus_dev_probe,
- .remove = xenbus_dev_remove,
-// .shutdown = xenbus_dev_shutdown,
- .uevent = xenbus_uevent_backend,
- },
- .dev = {
- .bus_id = "xen-backend",
- },
-};
-
-static int xenbus_uevent_backend(struct device *dev, char **envp,
- int num_envp, char *buffer, int buffer_size)
-{
- struct xenbus_device *xdev;
- struct xenbus_driver *drv;
- int i = 0;
- int length = 0;
-
- DPRINTK("");
-
- if (dev == NULL)
- return -ENODEV;
-
- xdev = to_xenbus_device(dev);
- if (xdev == NULL)
- return -ENODEV;
-
- /* stuff we want to pass to /sbin/hotplug */
- add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
- "XENBUS_TYPE=%s", xdev->devicetype);
-
- add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
- "XENBUS_PATH=%s", xdev->nodename);
-
- add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
- "XENBUS_BASE_PATH=%s", xenbus_backend.root);
-
- /* terminate, set to next free slot, shrink available space */
- envp[i] = NULL;
- envp = &envp[i];
- num_envp -= i;
- buffer = &buffer[length];
- buffer_size -= length;
-
- if (dev->driver) {
- drv = to_xenbus_driver(dev->driver);
- if (drv && drv->uevent)
- return drv->uevent(xdev, envp, num_envp, buffer,
- buffer_size);
- }
-
- return 0;
-}
-
static void otherend_changed(struct xenbus_watch *watch,
const char **vec, unsigned int len)
{
@@ -322,6 +208,20 @@ static void otherend_changed(struct xenbus_watch *watch,
DPRINTK("state is %d (%s), %s, %s", state, xenbus_strstate(state),
dev->otherend_watch.node, vec[XS_WATCH_PATH]);
+ /*
+ * Ignore xenbus transitions during shutdown. This prevents us doing
+ * work that can fail e.g., when the rootfs is gone.
+ */
+ if (system_state > SYSTEM_RUNNING) {
+ struct xen_bus_type *bus = bus;
+ bus = container_of(dev->dev.bus, struct xen_bus_type, bus);
+ /* If we're frontend, drive the state machine to Closed. */
+ /* This should cause the backend to release our resources. */
+ if ((bus == &xenbus_frontend) && (state == XenbusStateClosing))
+ xenbus_frontend_closed(dev);
+ return;
+ }
+
if (drv->otherend_changed)
drv->otherend_changed(dev, state);
}
@@ -345,7 +245,7 @@ static int watch_otherend(struct xenbus_device *dev)
}
-static int xenbus_dev_probe(struct device *_dev)
+int xenbus_dev_probe(struct device *_dev)
{
struct xenbus_device *dev = to_xenbus_device(_dev);
struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
@@ -392,7 +292,7 @@ fail:
return -ENODEV;
}
-static int xenbus_dev_remove(struct device *_dev)
+int xenbus_dev_remove(struct device *_dev)
{
struct xenbus_device *dev = to_xenbus_device(_dev);
struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
@@ -430,14 +330,21 @@ static void xenbus_dev_shutdown(struct device *_dev)
put_device(&dev->dev);
}
-static int xenbus_register_driver_common(struct xenbus_driver *drv,
- struct xen_bus_type *bus)
+int xenbus_register_driver_common(struct xenbus_driver *drv,
+ struct xen_bus_type *bus)
{
int ret;
drv->driver.name = drv->name;
drv->driver.bus = &bus->bus;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
drv->driver.owner = drv->owner;
+#endif
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
+ drv->driver.probe = xenbus_dev_probe;
+ drv->driver.remove = xenbus_dev_remove;
+ drv->driver.shutdown = xenbus_dev_shutdown;
+#endif
mutex_lock(&xenwatch_mutex);
ret = driver_register(&drv->driver);
@@ -462,14 +369,6 @@ int xenbus_register_frontend(struct xenbus_driver *drv)
}
EXPORT_SYMBOL_GPL(xenbus_register_frontend);
-int xenbus_register_backend(struct xenbus_driver *drv)
-{
- drv->read_otherend_details = read_frontend_details;
-
- return xenbus_register_driver_common(drv, &xenbus_backend);
-}
-EXPORT_SYMBOL_GPL(xenbus_register_backend);
-
void xenbus_unregister_driver(struct xenbus_driver *drv)
{
driver_unregister(&drv->driver);
@@ -545,45 +444,30 @@ static void xenbus_dev_release(struct device *dev)
kfree(to_xenbus_device(dev));
}
-/* Simplified asprintf. */
-char *kasprintf(const char *fmt, ...)
-{
- va_list ap;
- unsigned int len;
- char *p, dummy[1];
-
- va_start(ap, fmt);
- /* FIXME: vsnprintf has a bug, NULL should work */
- len = vsnprintf(dummy, 0, fmt, ap);
- va_end(ap);
-
- p = kmalloc(len + 1, GFP_KERNEL);
- if (!p)
- return NULL;
- va_start(ap, fmt);
- vsprintf(p, fmt, ap);
- va_end(ap);
- return p;
-}
-
static ssize_t xendev_show_nodename(struct device *dev,
- struct device_attribute *attr, char *buf)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)
+ struct device_attribute *attr,
+#endif
+ char *buf)
{
return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
}
DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);
static ssize_t xendev_show_devtype(struct device *dev,
- struct device_attribute *attr, char *buf)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)
+ struct device_attribute *attr,
+#endif
+ char *buf)
{
return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
}
DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
-static int xenbus_probe_node(struct xen_bus_type *bus,
- const char *type,
- const char *nodename)
+int xenbus_probe_node(struct xen_bus_type *bus,
+ const char *type,
+ const char *nodename)
{
int err;
struct xenbus_device *xendev;
@@ -642,7 +526,7 @@ static int xenbus_probe_frontend(const char *type, const char *name)
char *nodename;
int err;
- nodename = kasprintf("%s/%s/%s", xenbus_frontend.root, type, name);
+ nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", xenbus_frontend.root, type, name);
if (!nodename)
return -ENOMEM;
@@ -653,55 +537,6 @@ static int xenbus_probe_frontend(const char *type, const char *name)
return err;
}
-/* backend/<typename>/<frontend-uuid>/<name> */
-static int xenbus_probe_backend_unit(const char *dir,
- const char *type,
- const char *name)
-{
- char *nodename;
- int err;
-
- nodename = kasprintf("%s/%s", dir, name);
- if (!nodename)
- return -ENOMEM;
-
- DPRINTK("%s\n", nodename);
-
- err = xenbus_probe_node(&xenbus_backend, type, nodename);
- kfree(nodename);
- return err;
-}
-
-/* backend/<typename>/<frontend-domid> */
-static int xenbus_probe_backend(const char *type, const char *domid)
-{
- char *nodename;
- int err = 0;
- char **dir;
- unsigned int i, dir_n = 0;
-
- DPRINTK("");
-
- nodename = kasprintf("%s/%s/%s", xenbus_backend.root, type, domid);
- if (!nodename)
- return -ENOMEM;
-
- dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n);
- if (IS_ERR(dir)) {
- kfree(nodename);
- return PTR_ERR(dir);
- }
-
- for (i = 0; i < dir_n; i++) {
- err = xenbus_probe_backend_unit(nodename, type, dir[i]);
- if (err)
- break;
- }
- kfree(dir);
- kfree(nodename);
- return err;
-}
-
static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
{
int err = 0;
@@ -722,7 +557,7 @@ static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
return err;
}
-static int xenbus_probe_devices(struct xen_bus_type *bus)
+int xenbus_probe_devices(struct xen_bus_type *bus)
{
int err = 0;
char **dir;
@@ -764,7 +599,7 @@ static int strsep_len(const char *str, char c, unsigned int len)
return (len == 0) ? i : -ERANGE;
}
-static void dev_changed(const char *node, struct xen_bus_type *bus)
+void dev_changed(const char *node, struct xen_bus_type *bus)
{
int exists, rootlen;
struct xenbus_device *dev;
@@ -788,7 +623,7 @@ static void dev_changed(const char *node, struct xen_bus_type *bus)
rootlen = strsep_len(node, '/', bus->levels);
if (rootlen < 0)
return;
- root = kasprintf("%.*s", rootlen, node);
+ root = kasprintf(GFP_KERNEL, "%.*s", rootlen, node);
if (!root)
return;
@@ -809,25 +644,12 @@ static void frontend_changed(struct xenbus_watch *watch,
dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend);
}
-static void backend_changed(struct xenbus_watch *watch,
- const char **vec, unsigned int len)
-{
- DPRINTK("");
-
- dev_changed(vec[XS_WATCH_PATH], &xenbus_backend);
-}
-
/* We watch for devices appearing and vanishing. */
static struct xenbus_watch fe_watch = {
.node = "device",
.callback = frontend_changed,
};
-static struct xenbus_watch be_watch = {
- .node = "backend",
- .callback = backend_changed,
-};
-
static int suspend_dev(struct device *dev, void *data)
{
int err = 0;
@@ -898,7 +720,7 @@ void xenbus_suspend(void)
DPRINTK("");
bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
- bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, suspend_dev);
+ xenbus_backend_suspend(suspend_dev);
xs_suspend();
}
EXPORT_SYMBOL_GPL(xenbus_suspend);
@@ -908,7 +730,7 @@ void xenbus_resume(void)
xb_init_comms();
xs_resume();
bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
- bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, resume_dev);
+ xenbus_backend_resume(resume_dev);
}
EXPORT_SYMBOL_GPL(xenbus_resume);
@@ -941,20 +763,17 @@ void xenbus_probe(void *unused)
{
BUG_ON((xenstored_ready <= 0));
- /* Enumerate devices in xenstore. */
+ /* Enumerate devices in xenstore and watch for changes. */
xenbus_probe_devices(&xenbus_frontend);
- xenbus_probe_devices(&xenbus_backend);
-
- /* Watch for changes. */
register_xenbus_watch(&fe_watch);
- register_xenbus_watch(&be_watch);
+ xenbus_backend_probe_and_watch();
/* Notify others that xenstore is up */
notifier_call_chain(&xenstore_chain, 0, NULL);
}
-#ifdef CONFIG_PROC_FS
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST)
static struct file_operations xsd_kva_fops;
static struct proc_dir_entry *xsd_kva_intf;
static struct proc_dir_entry *xsd_port_intf;
@@ -1006,7 +825,7 @@ static int __init xenbus_probe_init(void)
/* Register ourselves with the kernel bus subsystem */
bus_register(&xenbus_frontend.bus);
- bus_register(&xenbus_backend.bus);
+ xenbus_backend_bus_register();
/*
* Domain0 doesn't have a store_evtchn or store_mfn yet.
@@ -1035,7 +854,7 @@ static int __init xenbus_probe_init(void)
xen_store_evtchn = xen_start_info->store_evtchn =
alloc_unbound.port;
-#ifdef CONFIG_PROC_FS
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST)
/* And finally publish the above info in /proc/xen */
xsd_kva_intf = create_xen_proc_entry("xsd_kva", 0600);
if (xsd_kva_intf) {
@@ -1077,7 +896,7 @@ static int __init xenbus_probe_init(void)
/* Register ourselves with the kernel device subsystem */
device_register(&xenbus_frontend.dev);
- device_register(&xenbus_backend.dev);
+ xenbus_backend_device_register();
if (!is_initial_xendomain())
xenbus_probe(NULL);
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.h b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.h
new file mode 100644
index 0000000000..2d2e567826
--- /dev/null
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.h
@@ -0,0 +1,74 @@
+/******************************************************************************
+ * xenbus_probe.h
+ *
+ * Talks to Xen Store to figure out what devices we have.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * Copyright (C) 2005 XenSource Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _XENBUS_PROBE_H
+#define _XENBUS_PROBE_H
+
+#ifdef CONFIG_XEN_BACKEND
+extern void xenbus_backend_suspend(int (*fn)(struct device *, void *));
+extern void xenbus_backend_resume(int (*fn)(struct device *, void *));
+extern void xenbus_backend_probe_and_watch(void);
+extern void xenbus_backend_bus_register(void);
+extern void xenbus_backend_device_register(void);
+#else
+static inline void xenbus_backend_suspend(int (*fn)(struct device *, void *)) {}
+static inline void xenbus_backend_resume(int (*fn)(struct device *, void *)) {}
+static inline void xenbus_backend_probe_and_watch(void) {}
+static inline void xenbus_backend_bus_register(void) {}
+static inline void xenbus_backend_device_register(void) {}
+#endif
+
+struct xen_bus_type
+{
+ char *root;
+ unsigned int levels;
+ int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename);
+ int (*probe)(const char *type, const char *dir);
+ struct bus_type bus;
+ struct device dev;
+};
+
+extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
+extern int xenbus_dev_probe(struct device *_dev);
+extern int xenbus_dev_remove(struct device *_dev);
+extern int xenbus_register_driver_common(struct xenbus_driver *drv,
+ struct xen_bus_type *bus);
+extern int xenbus_probe_node(struct xen_bus_type *bus,
+ const char *type,
+ const char *nodename);
+extern int xenbus_probe_devices(struct xen_bus_type *bus);
+
+extern void dev_changed(const char *node, struct xen_bus_type *bus);
+
+#endif
+
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe_backend.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe_backend.c
new file mode 100644
index 0000000000..934e79732d
--- /dev/null
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -0,0 +1,271 @@
+/******************************************************************************
+ * Talks to Xen Store to figure out what devices we have (backend half).
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * Copyright (C) 2005 Mike Wray, Hewlett-Packard
+ * Copyright (C) 2005, 2006 XenSource Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#define DPRINTK(fmt, args...) \
+ pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \
+ __FUNCTION__, __LINE__, ##args)
+
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/maddr.h>
+#include <asm/pgtable.h>
+#include <asm/hypervisor.h>
+#include <xen/xenbus.h>
+#include <xen/xen_proc.h>
+#include <xen/evtchn.h>
+#include <xen/features.h>
+#include <xen/hvm.h>
+
+#include "xenbus_comms.h"
+#include "xenbus_probe.h"
+
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
+static int xenbus_uevent_backend(struct device *dev, char **envp,
+ int num_envp, char *buffer, int buffer_size);
+static int xenbus_probe_backend(const char *type, const char *domid);
+
+extern int read_otherend_details(struct xenbus_device *xendev,
+ char *id_node, char *path_node);
+
+static int read_frontend_details(struct xenbus_device *xendev)
+{
+ return read_otherend_details(xendev, "frontend-id", "frontend");
+}
+
+/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
+static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
+{
+ int domid, err;
+ const char *devid, *type, *frontend;
+ unsigned int typelen;
+
+ type = strchr(nodename, '/');
+ if (!type)
+ return -EINVAL;
+ type++;
+ typelen = strcspn(type, "/");
+ if (!typelen || type[typelen] != '/')
+ return -EINVAL;
+
+ devid = strrchr(nodename, '/') + 1;
+
+ err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid,
+ "frontend", NULL, &frontend,
+ NULL);
+ if (err)
+ return err;
+ if (strlen(frontend) == 0)
+ err = -ERANGE;
+ if (!err && !xenbus_exists(XBT_NIL, frontend, ""))
+ err = -ENOENT;
+ kfree(frontend);
+
+ if (err)
+ return err;
+
+ if (snprintf(bus_id, BUS_ID_SIZE,
+ "%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE)
+ return -ENOSPC;
+ return 0;
+}
+
+static struct xen_bus_type xenbus_backend = {
+ .root = "backend",
+ .levels = 3, /* backend/type/<frontend>/<id> */
+ .get_bus_id = backend_bus_id,
+ .probe = xenbus_probe_backend,
+ .bus = {
+ .name = "xen-backend",
+ .match = xenbus_match,
+ .probe = xenbus_dev_probe,
+ .remove = xenbus_dev_remove,
+// .shutdown = xenbus_dev_shutdown,
+ .uevent = xenbus_uevent_backend,
+ },
+ .dev = {
+ .bus_id = "xen-backend",
+ },
+};
+
+static int xenbus_uevent_backend(struct device *dev, char **envp,
+ int num_envp, char *buffer, int buffer_size)
+{
+ struct xenbus_device *xdev;
+ struct xenbus_driver *drv;
+ int i = 0;
+ int length = 0;
+
+ DPRINTK("");
+
+ if (dev == NULL)
+ return -ENODEV;
+
+ xdev = to_xenbus_device(dev);
+ if (xdev == NULL)
+ return -ENODEV;
+
+ /* stuff we want to pass to /sbin/hotplug */
+ add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
+ "XENBUS_TYPE=%s", xdev->devicetype);
+
+ add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
+ "XENBUS_PATH=%s", xdev->nodename);
+
+ add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
+ "XENBUS_BASE_PATH=%s", xenbus_backend.root);
+
+ /* terminate, set to next free slot, shrink available space */
+ envp[i] = NULL;
+ envp = &envp[i];
+ num_envp -= i;
+ buffer = &buffer[length];
+ buffer_size -= length;
+
+ if (dev->driver) {
+ drv = to_xenbus_driver(dev->driver);
+ if (drv && drv->uevent)
+ return drv->uevent(xdev, envp, num_envp, buffer,
+ buffer_size);
+ }
+
+ return 0;
+}
+
+int xenbus_register_backend(struct xenbus_driver *drv)
+{
+ drv->read_otherend_details = read_frontend_details;
+
+ return xenbus_register_driver_common(drv, &xenbus_backend);
+}
+EXPORT_SYMBOL_GPL(xenbus_register_backend);
+
+/* backend/<typename>/<frontend-uuid>/<name> */
+static int xenbus_probe_backend_unit(const char *dir,
+ const char *type,
+ const char *name)
+{
+ char *nodename;
+ int err;
+
+ nodename = kasprintf(GFP_KERNEL, "%s/%s", dir, name);
+ if (!nodename)
+ return -ENOMEM;
+
+ DPRINTK("%s\n", nodename);
+
+ err = xenbus_probe_node(&xenbus_backend, type, nodename);
+ kfree(nodename);
+ return err;
+}
+
+/* backend/<typename>/<frontend-domid> */
+static int xenbus_probe_backend(const char *type, const char *domid)
+{
+ char *nodename;
+ int err = 0;
+ char **dir;
+ unsigned int i, dir_n = 0;
+
+ DPRINTK("");
+
+ nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", xenbus_backend.root, type, domid);
+ if (!nodename)
+ return -ENOMEM;
+
+ dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n);
+ if (IS_ERR(dir)) {
+ kfree(nodename);
+ return PTR_ERR(dir);
+ }
+
+ for (i = 0; i < dir_n; i++) {
+ err = xenbus_probe_backend_unit(nodename, type, dir[i]);
+ if (err)
+ break;
+ }
+ kfree(dir);
+ kfree(nodename);
+ return err;
+}
+
+static void backend_changed(struct xenbus_watch *watch,
+ const char **vec, unsigned int len)
+{
+ DPRINTK("");
+
+ dev_changed(vec[XS_WATCH_PATH], &xenbus_backend);
+}
+
+static struct xenbus_watch be_watch = {
+ .node = "backend",
+ .callback = backend_changed,
+};
+
+void xenbus_backend_suspend(int (*fn)(struct device *, void *))
+{
+ DPRINTK("");
+ bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn);
+}
+
+void xenbus_backend_resume(int (*fn)(struct device *, void *))
+{
+ DPRINTK("");
+ bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn);
+}
+
+void xenbus_backend_probe_and_watch(void)
+{
+ xenbus_probe_devices(&xenbus_backend);
+ register_xenbus_watch(&be_watch);
+}
+
+void xenbus_backend_bus_register(void)
+{
+ bus_register(&xenbus_backend.bus);
+}
+
+void xenbus_backend_device_register(void)
+{
+ device_register(&xenbus_backend.dev);
+}
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
index 190fa1e794..4c5052d13a 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
@@ -42,11 +42,14 @@
#include <linux/fcntl.h>
#include <linux/kthread.h>
#include <linux/rwsem.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
#include <xen/xenbus.h>
#include "xenbus_comms.h"
-/* xenbus_probe.c */
-extern char *kasprintf(const char *fmt, ...);
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
struct xs_stored_msg {
struct list_head list;
@@ -289,9 +292,9 @@ static char *join(const char *dir, const char *name)
char *buffer;
if (strlen(name) == 0)
- buffer = kasprintf("%s", dir);
+ buffer = kasprintf(GFP_KERNEL, "%s", dir);
else
- buffer = kasprintf("%s/%s", dir, name);
+ buffer = kasprintf(GFP_KERNEL, "%s/%s", dir, name);
return (!buffer) ? ERR_PTR(-ENOMEM) : buffer;
}
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenoprof/xenoprofile.c b/linux-2.6-xen-sparse/drivers/xen/xenoprof/xenoprofile.c
new file mode 100644
index 0000000000..382a50f647
--- /dev/null
+++ b/linux-2.6-xen-sparse/drivers/xen/xenoprof/xenoprofile.c
@@ -0,0 +1,500 @@
+/**
+ * @file xenoprofile.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon and Jose Renato Santos for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ *
+ * Separated out arch-generic part
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ */
+
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/smp.h>
+#include <linux/oprofile.h>
+#include <linux/sysdev.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/vmalloc.h>
+#include <asm/pgtable.h>
+#include <xen/evtchn.h>
+#include <xen/xenoprof.h>
+#include <xen/driver_util.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/xenoprof.h>
+#include "../../../drivers/oprofile/cpu_buffer.h"
+#include "../../../drivers/oprofile/event_buffer.h"
+
+#define MAX_XENOPROF_SAMPLES 16
+
+/* sample buffers shared with Xen */
+xenoprof_buf_t * xenoprof_buf[MAX_VIRT_CPUS];
+/* Shared buffer area */
+struct xenoprof_shared_buffer shared_buffer;
+
+/* Passive sample buffers shared with Xen */
+xenoprof_buf_t *p_xenoprof_buf[MAX_OPROF_DOMAINS][MAX_VIRT_CPUS];
+/* Passive shared buffer area */
+struct xenoprof_shared_buffer p_shared_buffer[MAX_OPROF_DOMAINS];
+
+static int xenoprof_start(void);
+static void xenoprof_stop(void);
+
+static int xenoprof_enabled = 0;
+static int xenoprof_is_primary = 0;
+static int active_defined;
+
+/* Number of buffers in shared area (one per VCPU) */
+int nbuf;
+/* Mappings of VIRQ_XENOPROF to irq number (per cpu) */
+int ovf_irq[NR_CPUS];
+/* cpu model type string - copied from Xen memory space on XENOPROF_init command */
+char cpu_type[XENOPROF_CPU_TYPE_SIZE];
+
+#ifdef CONFIG_PM
+
+static int xenoprof_suspend(struct sys_device * dev, pm_message_t state)
+{
+ if (xenoprof_enabled == 1)
+ xenoprof_stop();
+ return 0;
+}
+
+
+static int xenoprof_resume(struct sys_device * dev)
+{
+ if (xenoprof_enabled == 1)
+ xenoprof_start();
+ return 0;
+}
+
+
+static struct sysdev_class oprofile_sysclass = {
+ set_kset_name("oprofile"),
+ .resume = xenoprof_resume,
+ .suspend = xenoprof_suspend
+};
+
+
+static struct sys_device device_oprofile = {
+ .id = 0,
+ .cls = &oprofile_sysclass,
+};
+
+
+static int __init init_driverfs(void)
+{
+ int error;
+ if (!(error = sysdev_class_register(&oprofile_sysclass)))
+ error = sysdev_register(&device_oprofile);
+ return error;
+}
+
+
+static void exit_driverfs(void)
+{
+ sysdev_unregister(&device_oprofile);
+ sysdev_class_unregister(&oprofile_sysclass);
+}
+
+#else
+#define init_driverfs() do { } while (0)
+#define exit_driverfs() do { } while (0)
+#endif /* CONFIG_PM */
+
+unsigned long long oprofile_samples = 0;
+unsigned long long p_oprofile_samples = 0;
+
+unsigned int pdomains;
+struct xenoprof_passive passive_domains[MAX_OPROF_DOMAINS];
+
+static void xenoprof_add_pc(xenoprof_buf_t *buf, int is_passive)
+{
+ int head, tail, size;
+
+ head = buf->event_head;
+ tail = buf->event_tail;
+ size = buf->event_size;
+
+ if (tail > head) {
+ while (tail < size) {
+ oprofile_add_pc(buf->event_log[tail].eip,
+ buf->event_log[tail].mode,
+ buf->event_log[tail].event);
+ if (!is_passive)
+ oprofile_samples++;
+ else
+ p_oprofile_samples++;
+ tail++;
+ }
+ tail = 0;
+ }
+ while (tail < head) {
+ oprofile_add_pc(buf->event_log[tail].eip,
+ buf->event_log[tail].mode,
+ buf->event_log[tail].event);
+ if (!is_passive)
+ oprofile_samples++;
+ else
+ p_oprofile_samples++;
+ tail++;
+ }
+
+ buf->event_tail = tail;
+}
+
+static void xenoprof_handle_passive(void)
+{
+ int i, j;
+ int flag_domain, flag_switch = 0;
+
+ for (i = 0; i < pdomains; i++) {
+ flag_domain = 0;
+ for (j = 0; j < passive_domains[i].nbuf; j++) {
+ xenoprof_buf_t *buf = p_xenoprof_buf[i][j];
+ if (buf->event_head == buf->event_tail)
+ continue;
+ if (!flag_domain) {
+ if (!oprofile_add_domain_switch(passive_domains[i].
+ domain_id))
+ goto done;
+ flag_domain = 1;
+ }
+ xenoprof_add_pc(buf, 1);
+ flag_switch = 1;
+ }
+ }
+done:
+ if (flag_switch)
+ oprofile_add_domain_switch(COORDINATOR_DOMAIN);
+}
+
+static irqreturn_t
+xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs)
+{
+ struct xenoprof_buf * buf;
+ int cpu;
+ static unsigned long flag;
+
+ cpu = smp_processor_id();
+ buf = xenoprof_buf[cpu];
+
+ xenoprof_add_pc(buf, 0);
+
+ if (xenoprof_is_primary && !test_and_set_bit(0, &flag)) {
+ xenoprof_handle_passive();
+ smp_mb__before_clear_bit();
+ clear_bit(0, &flag);
+ }
+
+ return IRQ_HANDLED;
+}
+
+
+static void unbind_virq(void)
+{
+ int i;
+
+ for_each_online_cpu(i) {
+ if (ovf_irq[i] >= 0) {
+ unbind_from_irqhandler(ovf_irq[i], NULL);
+ ovf_irq[i] = -1;
+ }
+ }
+}
+
+
+static int bind_virq(void)
+{
+ int i, result;
+
+ for_each_online_cpu(i) {
+ result = bind_virq_to_irqhandler(VIRQ_XENOPROF,
+ i,
+ xenoprof_ovf_interrupt,
+ SA_INTERRUPT,
+ "xenoprof",
+ NULL);
+
+ if (result < 0) {
+ unbind_virq();
+ return result;
+ }
+
+ ovf_irq[i] = result;
+ }
+
+ return 0;
+}
+
+
+static void unmap_passive_list(void)
+{
+ int i;
+ for (i = 0; i < pdomains; i++)
+ xenoprof_arch_unmap_shared_buffer(&p_shared_buffer[i]);
+ pdomains = 0;
+}
+
+
+static int map_xenoprof_buffer(int max_samples)
+{
+ struct xenoprof_get_buffer get_buffer;
+ struct xenoprof_buf *buf;
+ int ret, i;
+
+ if ( shared_buffer.buffer )
+ return 0;
+
+ get_buffer.max_samples = max_samples;
+ ret = xenoprof_arch_map_shared_buffer(&get_buffer, &shared_buffer);
+ if (ret)
+ return ret;
+ nbuf = get_buffer.nbuf;
+
+ for (i=0; i< nbuf; i++) {
+ buf = (struct xenoprof_buf*)
+ &shared_buffer.buffer[i * get_buffer.bufsize];
+ BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS);
+ xenoprof_buf[buf->vcpu_id] = buf;
+ }
+
+ return 0;
+}
+
+
+static int xenoprof_setup(void)
+{
+ int ret;
+
+ if ( (ret = map_xenoprof_buffer(MAX_XENOPROF_SAMPLES)) )
+ return ret;
+
+ if ( (ret = bind_virq()) )
+ return ret;
+
+ if (xenoprof_is_primary) {
+ /* Define dom0 as an active domain if not done yet */
+ if (!active_defined) {
+ domid_t domid;
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL);
+ if (ret)
+ goto err;
+ domid = 0;
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
+ if (ret)
+ goto err;
+ active_defined = 1;
+ }
+
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_reserve_counters, NULL);
+ if (ret)
+ goto err;
+ xenoprof_arch_counter();
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_setup_events, NULL);
+
+ if (ret)
+ goto err;
+ }
+
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_enable_virq, NULL);
+ if (ret)
+ goto err;
+
+ xenoprof_enabled = 1;
+ return 0;
+ err:
+ unbind_virq();
+ return ret;
+}
+
+
+static void xenoprof_shutdown(void)
+{
+ xenoprof_enabled = 0;
+
+ HYPERVISOR_xenoprof_op(XENOPROF_disable_virq, NULL);
+
+ if (xenoprof_is_primary) {
+ HYPERVISOR_xenoprof_op(XENOPROF_release_counters, NULL);
+ active_defined = 0;
+ }
+
+ unbind_virq();
+
+ xenoprof_arch_unmap_shared_buffer(&shared_buffer);
+ if (xenoprof_is_primary)
+ unmap_passive_list();
+}
+
+
+static int xenoprof_start(void)
+{
+ int ret = 0;
+
+ if (xenoprof_is_primary)
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_start, NULL);
+ if (!ret)
+ xenoprof_arch_start();
+ return ret;
+}
+
+
+static void xenoprof_stop(void)
+{
+ if (xenoprof_is_primary)
+ HYPERVISOR_xenoprof_op(XENOPROF_stop, NULL);
+ xenoprof_arch_stop();
+}
+
+
+static int xenoprof_set_active(int * active_domains,
+ unsigned int adomains)
+{
+ int ret = 0;
+ int i;
+ int set_dom0 = 0;
+ domid_t domid;
+
+ if (!xenoprof_is_primary)
+ return 0;
+
+ if (adomains > MAX_OPROF_DOMAINS)
+ return -E2BIG;
+
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL);
+ if (ret)
+ return ret;
+
+ for (i=0; i<adomains; i++) {
+ domid = active_domains[i];
+ if (domid != active_domains[i]) {
+ ret = -EINVAL;
+ goto out;
+ }
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
+ if (ret)
+ goto out;
+ if (active_domains[i] == 0)
+ set_dom0 = 1;
+ }
+ /* dom0 must always be active but may not be in the list */
+ if (!set_dom0) {
+ domid = 0;
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
+ }
+
+out:
+ if (ret)
+ HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL);
+ active_defined = !ret;
+ return ret;
+}
+
+static int xenoprof_set_passive(int * p_domains,
+ unsigned int pdoms)
+{
+ int ret;
+ int i, j;
+ struct xenoprof_buf *buf;
+
+ if (!xenoprof_is_primary)
+ return 0;
+
+ if (pdoms > MAX_OPROF_DOMAINS)
+ return -E2BIG;
+
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_passive_list, NULL);
+ if (ret)
+ return ret;
+ unmap_passive_list();
+
+ for (i = 0; i < pdoms; i++) {
+ passive_domains[i].domain_id = p_domains[i];
+ passive_domains[i].max_samples = 2048;
+ ret = xenoprof_arch_set_passive(&passive_domains[i],
+ &p_shared_buffer[i]);
+ if (ret)
+ goto out;
+ for (j = 0; j < passive_domains[i].nbuf; j++) {
+ buf = (struct xenoprof_buf *)
+ &p_shared_buffer[i].buffer[j * passive_domains[i].bufsize];
+ BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS);
+ p_xenoprof_buf[i][buf->vcpu_id] = buf;
+ }
+ }
+
+ pdomains = pdoms;
+ return 0;
+
+out:
+ for (j = 0; j < i; j++)
+ xenoprof_arch_unmap_shared_buffer(&p_shared_buffer[i]);
+
+ return ret;
+}
+
+struct oprofile_operations xenoprof_ops = {
+#ifdef HAVE_XENOPROF_CREATE_FILES
+ .create_files = xenoprof_create_files,
+#endif
+ .set_active = xenoprof_set_active,
+ .set_passive = xenoprof_set_passive,
+ .setup = xenoprof_setup,
+ .shutdown = xenoprof_shutdown,
+ .start = xenoprof_start,
+ .stop = xenoprof_stop
+};
+
+
+/* in order to get driverfs right */
+static int using_xenoprof;
+
+int __init xenoprofile_init(struct oprofile_operations * ops)
+{
+ struct xenoprof_init init;
+ int ret, i;
+
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_init, &init);
+ if (!ret) {
+ xenoprof_arch_init_counter(&init);
+ xenoprof_is_primary = init.is_primary;
+
+ /* cpu_type is detected by Xen */
+ cpu_type[XENOPROF_CPU_TYPE_SIZE-1] = 0;
+ strncpy(cpu_type, init.cpu_type, XENOPROF_CPU_TYPE_SIZE - 1);
+ xenoprof_ops.cpu_type = cpu_type;
+
+ init_driverfs();
+ using_xenoprof = 1;
+ *ops = xenoprof_ops;
+
+ for (i=0; i<NR_CPUS; i++)
+ ovf_irq[i] = -1;
+
+ active_defined = 0;
+ }
+ printk(KERN_INFO "%s: ret %d, events %d, xenoprof_is_primary %d\n",
+ __func__, ret, init.num_events, xenoprof_is_primary);
+ return ret;
+}
+
+
+void xenoprofile_exit(void)
+{
+ if (using_xenoprof)
+ exit_driverfs();
+
+ xenoprof_arch_unmap_shared_buffer(&shared_buffer);
+ if (xenoprof_is_primary) {
+ unmap_passive_list();
+ HYPERVISOR_xenoprof_op(XENOPROF_shutdown, NULL);
+ }
+}
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/fixmap.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/fixmap.h
index a6f3e9ea79..a9c3cc28fd 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/fixmap.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/fixmap.h
@@ -27,7 +27,6 @@ extern unsigned long __FIXADDR_TOP;
#include <asm/acpi.h>
#include <asm/apicdef.h>
#include <asm/page.h>
-#include <xen/gnttab.h>
#ifdef CONFIG_HIGHMEM
#include <linux/threads.h>
#include <asm/kmap_types.h>
@@ -99,7 +98,7 @@ enum fixed_addresses {
extern void __set_fixmap(enum fixed_addresses idx,
maddr_t phys, pgprot_t flags);
-extern void set_fixaddr_top(unsigned long top);
+extern void set_fixaddr_top(void);
#define set_fixmap(idx, phys) \
__set_fixmap(idx, phys, PAGE_KERNEL)
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h
index 2e6d1fa596..a12e349016 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h
@@ -260,6 +260,8 @@ HYPERVISOR_event_channel_op(
int cmd, void *arg)
{
int rc = _hypercall2(int, event_channel_op, cmd, arg);
+
+#ifdef CONFIG_XEN_COMPAT_030002
if (unlikely(rc == -ENOSYS)) {
struct evtchn_op op;
op.cmd = cmd;
@@ -267,6 +269,8 @@ HYPERVISOR_event_channel_op(
rc = _hypercall1(int, event_channel_op_compat, &op);
memcpy(arg, &op.u, sizeof(op.u));
}
+#endif
+
return rc;
}
@@ -296,6 +300,8 @@ HYPERVISOR_physdev_op(
int cmd, void *arg)
{
int rc = _hypercall2(int, physdev_op, cmd, arg);
+
+#ifdef CONFIG_XEN_COMPAT_030002
if (unlikely(rc == -ENOSYS)) {
struct physdev_op op;
op.cmd = cmd;
@@ -303,6 +309,8 @@ HYPERVISOR_physdev_op(
rc = _hypercall1(int, physdev_op_compat, &op);
memcpy(arg, &op.u, sizeof(op.u));
}
+#endif
+
return rc;
}
@@ -350,9 +358,11 @@ HYPERVISOR_suspend(
int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown,
&sched_shutdown, srec);
+#ifdef CONFIG_XEN_COMPAT_030002
if (rc == -ENOSYS)
rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown,
SHUTDOWN_suspend, srec);
+#endif
return rc;
}
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h
index 47586d22f9..f7904ac0b0 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypervisor.h
@@ -56,6 +56,10 @@
extern shared_info_t *HYPERVISOR_shared_info;
+#ifdef CONFIG_X86_32
+extern unsigned long hypervisor_virt_start;
+#endif
+
/* arch/xen/i386/kernel/setup.c */
extern start_info_t *xen_start_info;
#ifdef CONFIG_XEN_PRIVILEGED_GUEST
@@ -94,7 +98,6 @@ void xen_pgd_pin(unsigned long ptr);
void xen_pgd_unpin(unsigned long ptr);
void xen_set_ldt(unsigned long ptr, unsigned long bytes);
-void xen_machphys_update(unsigned long mfn, unsigned long pfn);
#ifdef CONFIG_SMP
#include <linux/cpumask.h>
@@ -131,8 +134,10 @@ HYPERVISOR_yield(
{
int rc = HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
+#ifdef CONFIG_XEN_COMPAT_030002
if (rc == -ENOSYS)
rc = HYPERVISOR_sched_op_compat(SCHEDOP_yield, 0);
+#endif
return rc;
}
@@ -143,8 +148,10 @@ HYPERVISOR_block(
{
int rc = HYPERVISOR_sched_op(SCHEDOP_block, NULL);
+#ifdef CONFIG_XEN_COMPAT_030002
if (rc == -ENOSYS)
rc = HYPERVISOR_sched_op_compat(SCHEDOP_block, 0);
+#endif
return rc;
}
@@ -159,8 +166,10 @@ HYPERVISOR_shutdown(
int rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown);
+#ifdef CONFIG_XEN_COMPAT_030002
if (rc == -ENOSYS)
rc = HYPERVISOR_sched_op_compat(SCHEDOP_shutdown, reason);
+#endif
return rc;
}
@@ -177,8 +186,10 @@ HYPERVISOR_poll(
set_xen_guest_handle(sched_poll.ports, ports);
rc = HYPERVISOR_sched_op(SCHEDOP_poll, &sched_poll);
+#ifdef CONFIG_XEN_COMPAT_030002
if (rc == -ENOSYS)
rc = HYPERVISOR_sched_op_compat(SCHEDOP_yield, 0);
+#endif
return rc;
}
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/io.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/io.h
index 7f9b7cdd36..ed5203a573 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/io.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/io.h
@@ -54,7 +54,8 @@
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
* access
*/
-#define xlate_dev_mem_ptr(p) __va(p)
+#define xlate_dev_mem_ptr(p, sz) ioremap(p, sz)
+#define xlate_dev_mem_ptr_unmap(p) iounmap(p)
/*
* Convert a virtual cached pointer to an uncached pointer
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/maddr.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/maddr.h
index b467320d5c..f805d6ea60 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/maddr.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/maddr.h
@@ -9,6 +9,15 @@
#define FOREIGN_FRAME_BIT (1UL<<31)
#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
+/* Definitions for machine and pseudophysical addresses. */
+#ifdef CONFIG_X86_PAE
+typedef unsigned long long paddr_t;
+typedef unsigned long long maddr_t;
+#else
+typedef unsigned long paddr_t;
+typedef unsigned long maddr_t;
+#endif
+
#ifdef CONFIG_XEN
extern unsigned long *phys_to_machine_mapping;
@@ -101,32 +110,13 @@ static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
phys_to_machine_mapping[pfn] = mfn;
}
-
-#else /* !CONFIG_XEN */
-
-#define pfn_to_mfn(pfn) (pfn)
-#define mfn_to_pfn(mfn) (mfn)
-#define mfn_to_local_pfn(mfn) (mfn)
-#define set_phys_to_machine(pfn, mfn) BUG_ON((pfn) != (mfn))
-#define phys_to_machine_mapping_valid(pfn) (1)
-
-#endif /* !CONFIG_XEN */
-
-/* Definitions for machine and pseudophysical addresses. */
-#ifdef CONFIG_X86_PAE
-typedef unsigned long long paddr_t;
-typedef unsigned long long maddr_t;
-#else
-typedef unsigned long paddr_t;
-typedef unsigned long maddr_t;
-#endif
-
static inline maddr_t phys_to_machine(paddr_t phys)
{
maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT);
machine = (machine << PAGE_SHIFT) | (phys & ~PAGE_MASK);
return machine;
}
+
static inline paddr_t machine_to_phys(maddr_t machine)
{
paddr_t phys = mfn_to_pfn(machine >> PAGE_SHIFT);
@@ -134,6 +124,32 @@ static inline paddr_t machine_to_phys(maddr_t machine)
return phys;
}
+static inline paddr_t pte_machine_to_phys(maddr_t machine)
+{
+ /*
+ * In PAE mode, the NX bit needs to be dealt with in the value
+ * passed to mfn_to_pfn(). On x86_64, we need to mask it off,
+ * but for i386 the conversion to ulong for the argument will
+ * clip it off.
+ */
+ paddr_t phys = mfn_to_pfn(machine >> PAGE_SHIFT);
+ phys = (phys << PAGE_SHIFT) | (machine & ~PHYSICAL_PAGE_MASK);
+ return phys;
+}
+
+#else /* !CONFIG_XEN */
+
+#define pfn_to_mfn(pfn) (pfn)
+#define mfn_to_pfn(mfn) (mfn)
+#define mfn_to_local_pfn(mfn) (mfn)
+#define set_phys_to_machine(pfn, mfn) BUG_ON((pfn) != (mfn))
+#define phys_to_machine_mapping_valid(pfn) (1)
+#define phys_to_machine(phys) ((maddr_t)(phys))
+#define machine_to_phys(mach) ((paddr_t)(mach))
+#define pte_machine_to_phys(mach) ((paddr_t)(mach))
+
+#endif /* !CONFIG_XEN */
+
/* VIRT <-> MACHINE conversion */
#define virt_to_machine(v) (phys_to_machine(__pa(v)))
#define virt_to_mfn(v) (pfn_to_mfn(__pa(v) >> PAGE_SHIFT))
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h
index 0f829c8cd3..ff183db39e 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h
@@ -6,6 +6,16 @@
#define PAGE_SIZE (1UL << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
+#ifdef CONFIG_X86_PAE
+#define __PHYSICAL_MASK_SHIFT 36
+#define __PHYSICAL_MASK ((1ULL << __PHYSICAL_MASK_SHIFT) - 1)
+#define PHYSICAL_PAGE_MASK (~((1ULL << PAGE_SHIFT) - 1) & __PHYSICAL_MASK)
+#else
+#define __PHYSICAL_MASK_SHIFT 32
+#define __PHYSICAL_MASK (~0UL)
+#define PHYSICAL_PAGE_MASK (PAGE_MASK & __PHYSICAL_MASK)
+#endif
+
#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT)
@@ -85,7 +95,7 @@ static inline unsigned long long pte_val(pte_t x)
if (x.pte_low) {
ret = x.pte_low | (unsigned long long)x.pte_high << 32;
- ret = machine_to_phys(ret) | 1;
+ ret = pte_machine_to_phys(ret) | 1;
} else {
ret = 0;
}
@@ -94,13 +104,13 @@ static inline unsigned long long pte_val(pte_t x)
static inline unsigned long long pmd_val(pmd_t x)
{
unsigned long long ret = x.pmd;
- if (ret) ret = machine_to_phys(ret) | 1;
+ if (ret) ret = pte_machine_to_phys(ret) | 1;
return ret;
}
static inline unsigned long long pgd_val(pgd_t x)
{
unsigned long long ret = x.pgd;
- if (ret) ret = machine_to_phys(ret) | 1;
+ if (ret) ret = pte_machine_to_phys(ret) | 1;
return ret;
}
static inline unsigned long long pte_val_ma(pte_t x)
@@ -115,7 +125,8 @@ typedef struct { unsigned long pgprot; } pgprot_t;
#define pgprot_val(x) ((x).pgprot)
#include <asm/maddr.h>
#define boot_pte_t pte_t /* or would you rather have a typedef */
-#define pte_val(x) (((x).pte_low & 1) ? machine_to_phys((x).pte_low) : \
+#define pte_val(x) (((x).pte_low & 1) ? \
+ pte_machine_to_phys((x).pte_low) : \
(x).pte_low)
#define pte_val_ma(x) ((x).pte_low)
#define __pte(x) ({ unsigned long _x = (x); \
@@ -125,7 +136,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
static inline unsigned long pgd_val(pgd_t x)
{
unsigned long ret = x.pgd;
- if (ret) ret = machine_to_phys(ret) | 1;
+ if (ret) ret = pte_machine_to_phys(ret) | 1;
return ret;
}
#define HPAGE_SHIFT 22
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level-defs.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level-defs.h
index 3791d2de39..bd6346f410 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level-defs.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-2level-defs.h
@@ -9,7 +9,6 @@
#define PGDIR_SHIFT 22
#define PTRS_PER_PGD 1024
-#define PTRS_PER_PGD_NO_HV (HYPERVISOR_VIRT_START >> PGDIR_SHIFT)
/*
* the i386 is two-level, so we don't really have any
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level-defs.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level-defs.h
index 10445c142c..148c8d9e78 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level-defs.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/pgtable-3level-defs.h
@@ -8,7 +8,6 @@
*/
#define PGDIR_SHIFT 30
#define PTRS_PER_PGD 4
-#define PTRS_PER_PGD_NO_HV 4
/*
* PMD_SHIFT determines the size of the area a middle-level
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/synch_bitops.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/synch_bitops.h
index 6a4e5e4508..807ca388c5 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/synch_bitops.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/synch_bitops.h
@@ -9,6 +9,10 @@
#include <linux/config.h>
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
#define ADDR (*(volatile long *) addr)
static __inline__ void synch_set_bit(int nr, volatile void * addr)
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/xenoprof.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/xenoprof.h
new file mode 100644
index 0000000000..2733e00ee4
--- /dev/null
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/xenoprof.h
@@ -0,0 +1,48 @@
+/******************************************************************************
+ * asm-i386/mach-xen/asm/xenoprof.h
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+#ifndef __ASM_XENOPROF_H__
+#define __ASM_XENOPROF_H__
+#ifdef CONFIG_XEN
+
+struct super_block;
+struct dentry;
+int xenoprof_create_files(struct super_block * sb, struct dentry * root);
+#define HAVE_XENOPROF_CREATE_FILES
+
+struct xenoprof_init;
+void xenoprof_arch_init_counter(struct xenoprof_init *init);
+void xenoprof_arch_counter(void);
+void xenoprof_arch_start(void);
+void xenoprof_arch_stop(void);
+
+struct xenoprof_arch_shared_buffer {
+ /* nothing */
+};
+struct xenoprof_shared_buffer;
+void xenoprof_arch_unmap_shared_buffer(struct xenoprof_shared_buffer* sbuf);
+struct xenoprof_get_buffer;
+int xenoprof_arch_map_shared_buffer(struct xenoprof_get_buffer* get_buffer, struct xenoprof_shared_buffer* sbuf);
+struct xenoprof_passive;
+int xenoprof_arch_set_passive(struct xenoprof_passive* pdomain, struct xenoprof_shared_buffer* sbuf);
+
+#endif /* CONFIG_XEN */
+#endif /* __ASM_XENOPROF_H__ */
diff --git a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h
index 0f1caa0604..bed1e1d211 100644
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/setup_arch_post.h
@@ -56,15 +56,15 @@ static void __init machine_specific_arch_setup(void)
struct xen_machphys_mapping mapping;
unsigned long machine_to_phys_nr_ents;
struct xen_platform_parameters pp;
- struct callback_register event = {
+ static struct callback_register __initdata event = {
.type = CALLBACKTYPE_event,
.address = { __KERNEL_CS, (unsigned long)hypervisor_callback },
};
- struct callback_register failsafe = {
+ static struct callback_register __initdata failsafe = {
.type = CALLBACKTYPE_failsafe,
.address = { __KERNEL_CS, (unsigned long)failsafe_callback },
};
- struct callback_register nmi_cb = {
+ static struct callback_register __initdata nmi_cb = {
.type = CALLBACKTYPE_nmi,
.address = { __KERNEL_CS, (unsigned long)nmi },
};
@@ -72,23 +72,30 @@ static void __init machine_specific_arch_setup(void)
ret = HYPERVISOR_callback_op(CALLBACKOP_register, &event);
if (ret == 0)
ret = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe);
+#ifdef CONFIG_XEN_COMPAT_030002
if (ret == -ENOSYS)
ret = HYPERVISOR_set_callbacks(
event.address.cs, event.address.eip,
failsafe.address.cs, failsafe.address.eip);
+#endif
BUG_ON(ret);
ret = HYPERVISOR_callback_op(CALLBACKOP_register, &nmi_cb);
+#ifdef CONFIG_XEN_COMPAT_030002
if (ret == -ENOSYS) {
- struct xennmi_callback cb;
+ static struct xennmi_callback __initdata cb = {
+ .handler_address = (unsigned long)nmi
+ };
- cb.handler_address = nmi_cb.address.eip;
HYPERVISOR_nmi_op(XENNMI_register_callback, &cb);
}
+#endif
if (HYPERVISOR_xen_version(XENVER_platform_parameters,
- &pp) == 0)
- set_fixaddr_top(pp.virt_start - PAGE_SIZE);
+ &pp) == 0) {
+ hypervisor_virt_start = pp.virt_start;
+ set_fixaddr_top();
+ }
machine_to_phys_mapping = (unsigned long *)MACH2PHYS_VIRT_START;
machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
diff --git a/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h b/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h
index 8375336941..7a522be483 100644
--- a/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h
+++ b/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h
@@ -33,12 +33,13 @@
#ifndef __HYPERCALL_H__
#define __HYPERCALL_H__
-#include <linux/string.h> /* memcpy() */
-
#ifndef __HYPERVISOR_H__
# error "please don't include this file directly"
#endif
+#include <asm/xen/xcom_hcall.h>
+struct xencomm_handle;
+
/*
* Assembler stubs for hyper-calls.
*/
@@ -157,157 +158,117 @@
(type)__res; \
})
-static inline int
-HYPERVISOR_sched_op_compat(
- int cmd, unsigned long arg)
-{
- return _hypercall2(int, sched_op_compat, cmd, arg);
-}
static inline int
-HYPERVISOR_sched_op(
- int cmd, void *arg)
+xencomm_arch_hypercall_sched_op(int cmd, struct xencomm_handle *arg)
{
return _hypercall2(int, sched_op, cmd, arg);
}
static inline long
-HYPERVISOR_set_timer_op(
- u64 timeout)
+HYPERVISOR_set_timer_op(u64 timeout)
{
- unsigned long timeout_hi = (unsigned long)(timeout>>32);
- unsigned long timeout_lo = (unsigned long)timeout;
- return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi);
+ unsigned long timeout_hi = (unsigned long)(timeout >> 32);
+ unsigned long timeout_lo = (unsigned long)timeout;
+ return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi);
}
static inline int
-HYPERVISOR_dom0_op(
- dom0_op_t *dom0_op)
+xencomm_arch_hypercall_dom0_op(struct xencomm_handle *op)
{
- dom0_op->interface_version = DOM0_INTERFACE_VERSION;
- return _hypercall1(int, dom0_op, dom0_op);
+ return _hypercall1(int, dom0_op, op);
}
static inline int
-HYPERVISOR_multicall(
- void *call_list, int nr_calls)
+xencomm_arch_hypercall_sysctl(struct xencomm_handle *op)
{
- return _hypercall2(int, multicall, call_list, nr_calls);
+ return _hypercall1(int, sysctl, op);
}
-//XXX xen/ia64 copy_from_guest() is broken.
-// This is a temporal work around until it is fixed.
static inline int
-____HYPERVISOR_memory_op(
- unsigned int cmd, void *arg)
+xencomm_arch_hypercall_domctl(struct xencomm_handle *op)
{
- return _hypercall2(int, memory_op, cmd, arg);
+ return _hypercall1(int, domctl, op);
}
-#include <xen/interface/memory.h>
-#ifdef CONFIG_VMX_GUEST
-# define ia64_xenmem_reservation_op(op, xmr) (0)
-#else
-int ia64_xenmem_reservation_op(unsigned long op,
- struct xen_memory_reservation* reservation__);
-#endif
static inline int
-HYPERVISOR_memory_op(
- unsigned int cmd, void *arg)
+xencomm_arch_hypercall_multicall(struct xencomm_handle *call_list,
+ int nr_calls)
{
- switch (cmd) {
- case XENMEM_increase_reservation:
- case XENMEM_decrease_reservation:
- case XENMEM_populate_physmap:
- return ia64_xenmem_reservation_op(cmd,
- (struct xen_memory_reservation*)arg);
- default:
- return ____HYPERVISOR_memory_op(cmd, arg);
- }
- /* NOTREACHED */
+ return _hypercall2(int, multicall, call_list, nr_calls);
}
static inline int
-HYPERVISOR_event_channel_op(
- int cmd, void *arg)
+xencomm_arch_hypercall_memory_op(unsigned int cmd, struct xencomm_handle *arg)
{
- int rc = _hypercall2(int, event_channel_op, cmd, arg);
- if (unlikely(rc == -ENOSYS)) {
- struct evtchn_op op;
- op.cmd = cmd;
- memcpy(&op.u, arg, sizeof(op.u));
- rc = _hypercall1(int, event_channel_op_compat, &op);
- }
- return rc;
+ return _hypercall2(int, memory_op, cmd, arg);
}
static inline int
-HYPERVISOR_acm_op(
- unsigned int cmd, void *arg)
+xencomm_arch_hypercall_event_channel_op(int cmd, struct xencomm_handle *arg)
{
- return _hypercall2(int, acm_op, cmd, arg);
+ return _hypercall2(int, event_channel_op, cmd, arg);
}
static inline int
-HYPERVISOR_xen_version(
- int cmd, void *arg)
+xencomm_arch_hypercall_acm_op(unsigned int cmd, struct xencomm_handle *arg)
{
- return _hypercall2(int, xen_version, cmd, arg);
+ return _hypercall2(int, acm_op, cmd, arg);
}
static inline int
-HYPERVISOR_console_io(
- int cmd, int count, char *str)
+xencomm_arch_hypercall_xen_version(int cmd, struct xencomm_handle *arg)
{
- return _hypercall3(int, console_io, cmd, count, str);
+ return _hypercall2(int, xen_version, cmd, arg);
}
static inline int
-HYPERVISOR_physdev_op(
- int cmd, void *arg)
+xencomm_arch_hypercall_console_io(int cmd, int count,
+ struct xencomm_handle *str)
{
- int rc = _hypercall2(int, physdev_op, cmd, arg);
- if (unlikely(rc == -ENOSYS)) {
- struct physdev_op op;
- op.cmd = cmd;
- memcpy(&op.u, arg, sizeof(op.u));
- rc = _hypercall1(int, physdev_op_compat, &op);
- }
- return rc;
+ return _hypercall3(int, console_io, cmd, count, str);
}
-//XXX __HYPERVISOR_grant_table_op is used for this hypercall constant.
static inline int
-____HYPERVISOR_grant_table_op(
- unsigned int cmd, void *uop, unsigned int count,
- unsigned long pa1, unsigned long pa2)
+xencomm_arch_hypercall_physdev_op(int cmd, struct xencomm_handle *arg)
{
- return _hypercall5(int, grant_table_op, cmd, uop, count, pa1, pa2);
+ return _hypercall2(int, physdev_op, cmd, arg);
+}
+
+static inline int
+xencomm_arch_hypercall_grant_table_op(unsigned int cmd,
+ struct xencomm_handle *uop,
+ unsigned int count)
+{
+ return _hypercall3(int, grant_table_op, cmd, uop, count);
}
int HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count);
+extern int xencomm_arch_hypercall_suspend(struct xencomm_handle *arg);
+
static inline int
-HYPERVISOR_vcpu_op(
- int cmd, int vcpuid, void *extra_args)
+xencomm_arch_hypercall_callback_op(int cmd, struct xencomm_handle *arg)
{
- return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
+ return _hypercall2(int, callback_op, cmd, arg);
}
-extern int HYPERVISOR_suspend(unsigned long srec);
-
static inline unsigned long
-HYPERVISOR_hvm_op(
- int cmd, void *arg)
+xencomm_arch_hypercall_hvm_op(int cmd, void *arg)
{
return _hypercall2(unsigned long, hvm_op, cmd, arg);
}
static inline int
-HYPERVISOR_callback_op(
- int cmd, void *arg)
+HYPERVISOR_physdev_op(int cmd, void *arg)
{
- return _hypercall2(int, callback_op, cmd, arg);
+ switch (cmd) {
+ case PHYSDEVOP_eoi:
+ return _hypercall1(int, ia64_fast_eoi,
+ ((struct physdev_eoi *)arg)->irq);
+ default:
+ return xencomm_hypercall_physdev_op(cmd, arg);
+ }
}
extern fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs);
@@ -322,6 +283,9 @@ static inline void exit_idle(void) {}
#ifdef CONFIG_XEN
#include <asm/xen/privop.h>
#endif /* CONFIG_XEN */
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
static inline unsigned long
__HYPERVISOR_ioremap(unsigned long ioaddr, unsigned long size)
@@ -417,7 +381,42 @@ HYPERVISOR_add_physmap(unsigned long gpfn, unsigned long mfn,
return ret;
}
+#ifdef CONFIG_XEN_IA64_EXPOSE_P2M
+static inline unsigned long
+HYPERVISOR_expose_p2m(unsigned long conv_start_gpfn,
+ unsigned long assign_start_gpfn,
+ unsigned long expose_size, unsigned long granule_pfn)
+{
+ return _hypercall5(unsigned long, ia64_dom0vp_op,
+ IA64_DOM0VP_expose_p2m, conv_start_gpfn,
+ assign_start_gpfn, expose_size, granule_pfn);
+}
+#endif
+
// for balloon driver
#define HYPERVISOR_update_va_mapping(va, new_val, flags) (0)
+/* Use xencomm to do hypercalls. */
+#ifdef MODULE
+#define HYPERVISOR_sched_op xencomm_mini_hypercall_sched_op
+#define HYPERVISOR_event_channel_op xencomm_mini_hypercall_event_channel_op
+#define HYPERVISOR_callback_op xencomm_mini_hypercall_callback_op
+#define HYPERVISOR_multicall xencomm_mini_hypercall_multicall
+#define HYPERVISOR_xen_version xencomm_mini_hypercall_xen_version
+#define HYPERVISOR_console_io xencomm_mini_hypercall_console_io
+#define HYPERVISOR_hvm_op xencomm_mini_hypercall_hvm_op
+#define HYPERVISOR_memory_op xencomm_mini_hypercall_memory_op
+#else
+#define HYPERVISOR_sched_op xencomm_hypercall_sched_op
+#define HYPERVISOR_event_channel_op xencomm_hypercall_event_channel_op
+#define HYPERVISOR_callback_op xencomm_hypercall_callback_op
+#define HYPERVISOR_multicall xencomm_hypercall_multicall
+#define HYPERVISOR_xen_version xencomm_hypercall_xen_version
+#define HYPERVISOR_console_io xencomm_hypercall_console_io
+#define HYPERVISOR_hvm_op xencomm_hypercall_hvm_op
+#define HYPERVISOR_memory_op xencomm_hypercall_memory_op
+#endif
+
+#define HYPERVISOR_suspend xencomm_hypercall_suspend
+
#endif /* __HYPERCALL_H__ */
diff --git a/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h b/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h
index 7b1a9a7fc9..083884c130 100644
--- a/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h
+++ b/linux-2.6-xen-sparse/include/asm-ia64/hypervisor.h
@@ -75,9 +75,6 @@ HYPERVISOR_yield(
{
int rc = HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
- if (rc == -ENOSYS)
- rc = HYPERVISOR_sched_op_compat(SCHEDOP_yield, 0);
-
return rc;
}
@@ -87,9 +84,6 @@ HYPERVISOR_block(
{
int rc = HYPERVISOR_sched_op(SCHEDOP_block, NULL);
- if (rc == -ENOSYS)
- rc = HYPERVISOR_sched_op_compat(SCHEDOP_block, 0);
-
return rc;
}
@@ -103,9 +97,6 @@ HYPERVISOR_shutdown(
int rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown);
- if (rc == -ENOSYS)
- rc = HYPERVISOR_sched_op_compat(SCHEDOP_shutdown, reason);
-
return rc;
}
@@ -122,8 +113,6 @@ HYPERVISOR_poll(
set_xen_guest_handle(sched_poll.ports, ports);
rc = HYPERVISOR_sched_op(SCHEDOP_poll, &sched_poll);
- if (rc == -ENOSYS)
- rc = HYPERVISOR_sched_op_compat(SCHEDOP_yield, 0);
return rc;
}
@@ -138,6 +127,7 @@ int direct_remap_pfn_range(struct vm_area_struct *vma,
pgprot_t prot,
domid_t domid);
struct file;
+int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
int privcmd_mmap(struct file * file, struct vm_area_struct * vma);
#define HAVE_ARCH_PRIVCMD_MMAP
@@ -201,6 +191,22 @@ MULTI_grant_table_op(multicall_entry_t *mcl, unsigned int cmd,
mcl->args[2] = count;
}
+/*
+ * for blktap.c
+ * int create_lookup_pte_addr(struct mm_struct *mm,
+ * unsigned long address,
+ * uint64_t *ptep);
+ */
+#define create_lookup_pte_addr(mm, address, ptep) \
+ ({ \
+ printk(KERN_EMERG \
+ "%s:%d " \
+ "create_lookup_pte_addr() isn't supported.\n", \
+ __func__, __LINE__); \
+ BUG(); \
+ (-ENOSYS); \
+ })
+
// for debug
asmlinkage int xprintk(const char *fmt, ...);
#define xprintd(fmt, ...) xprintk("%s:%d " fmt, __func__, __LINE__, \
diff --git a/linux-2.6-xen-sparse/include/asm-ia64/maddr.h b/linux-2.6-xen-sparse/include/asm-ia64/maddr.h
index 55c6f94d10..cbdef5a96e 100644
--- a/linux-2.6-xen-sparse/include/asm-ia64/maddr.h
+++ b/linux-2.6-xen-sparse/include/asm-ia64/maddr.h
@@ -10,11 +10,26 @@
#define INVALID_P2M_ENTRY (~0UL)
+#ifdef CONFIG_XEN_IA64_EXPOSE_P2M
+extern int p2m_initialized;
+extern unsigned long p2m_min_low_pfn;
+extern unsigned long p2m_max_low_pfn;
+extern unsigned long p2m_convert_min_pfn;
+extern unsigned long p2m_convert_max_pfn;
+extern volatile const pte_t* p2m_pte;
+unsigned long p2m_phystomach(unsigned long gpfn);
+#else
+#define p2m_initialized (0)
+#define p2m_phystomach(gpfn) INVALID_MFN
+#endif
+
/* XXX xen page size != page size */
static inline unsigned long
pfn_to_mfn_for_dma(unsigned long pfn)
{
unsigned long mfn;
+ if (p2m_initialized)
+ return p2m_phystomach(pfn);
mfn = HYPERVISOR_phystomach(pfn);
BUG_ON(mfn == 0); // XXX
BUG_ON(mfn == INVALID_P2M_ENTRY); // XXX
@@ -81,11 +96,6 @@ mfn_to_local_pfn(unsigned long mfn)
#define virt_to_machine(virt) __pa(virt) // for tpmfront.c
#define set_phys_to_machine(pfn, mfn) do { } while (0)
-#ifdef CONFIG_VMX_GUEST
-extern void xen_machphys_update(unsigned long mfn, unsigned long pfn);
-#else /* CONFIG_VMX_GUEST */
-#define xen_machphys_update(mfn, pfn) do { } while (0)
-#endif /* CONFIG_VMX_GUEST */
typedef unsigned long maddr_t; // to compile netback, netfront
diff --git a/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h b/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h
index 073b3a2a77..6f3c20a8ed 100644
--- a/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h
+++ b/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h
@@ -14,12 +14,9 @@
#define IA64_PARAVIRTUALIZED
-#if 0
-#undef XSI_BASE
/* At 1 MB, before per-cpu space but still addressable using addl instead
of movl. */
#define XSI_BASE 0xfffffffffff00000
-#endif
/* Address of mapped regs. */
#define XMAPPEDREGS_BASE (XSI_BASE + XSI_SIZE)
diff --git a/linux-2.6-xen-sparse/include/asm-ia64/xen/xcom_hcall.h b/linux-2.6-xen-sparse/include/asm-ia64/xen/xcom_hcall.h
new file mode 100644
index 0000000000..3c073a71cd
--- /dev/null
+++ b/linux-2.6-xen-sparse/include/asm-ia64/xen/xcom_hcall.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2006 Tristan Gingold <tristan.gingold@bull.net>, Bull SAS
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _LINUX_XENCOMM_HCALL_H_
+#define _LINUX_XENCOMM_HCALL_H_
+
+/* These function creates inline descriptor for the parameters and
+ calls the corresponding xencomm_arch_hypercall_X.
+ Architectures should defines HYPERVISOR_xxx as xencomm_hypercall_xxx unless
+ they want to use their own wrapper. */
+extern int xencomm_hypercall_console_io(int cmd, int count, char *str);
+
+extern int xencomm_hypercall_event_channel_op(int cmd, void *op);
+
+extern int xencomm_hypercall_xen_version(int cmd, void *arg);
+
+extern int xencomm_hypercall_physdev_op(int cmd, void *op);
+
+extern int xencomm_hypercall_grant_table_op(unsigned int cmd, void *op,
+ unsigned int count);
+
+extern int xencomm_hypercall_sched_op(int cmd, void *arg);
+
+extern int xencomm_hypercall_multicall(void *call_list, int nr_calls);
+
+extern int xencomm_hypercall_callback_op(int cmd, void *arg);
+
+extern int xencomm_hypercall_memory_op(unsigned int cmd, void *arg);
+
+extern unsigned long xencomm_hypercall_hvm_op(int cmd, void *arg);
+
+extern int xencomm_hypercall_suspend(unsigned long srec);
+
+/* Using mini xencomm. */
+extern int xencomm_mini_hypercall_console_io(int cmd, int count, char *str);
+
+extern int xencomm_mini_hypercall_event_channel_op(int cmd, void *op);
+
+extern int xencomm_mini_hypercall_xen_version(int cmd, void *arg);
+
+extern int xencomm_mini_hypercall_physdev_op(int cmd, void *op);
+
+extern int xencomm_mini_hypercall_grant_table_op(unsigned int cmd, void *op,
+ unsigned int count);
+
+extern int xencomm_mini_hypercall_sched_op(int cmd, void *arg);
+
+extern int xencomm_mini_hypercall_multicall(void *call_list, int nr_calls);
+
+extern int xencomm_mini_hypercall_callback_op(int cmd, void *arg);
+
+extern int xencomm_mini_hypercall_memory_op(unsigned int cmd, void *arg);
+
+extern unsigned long xencomm_mini_hypercall_hvm_op(int cmd, void *arg);
+
+/* For privcmd. Locally declare argument type to avoid include storm.
+ Type coherency will be checked within privcmd.c */
+struct privcmd_hypercall;
+extern int privcmd_hypercall(struct privcmd_hypercall *hypercall);
+
+#endif /* _LINUX_XENCOMM_HCALL_H_ */
diff --git a/linux-2.6-xen-sparse/include/asm-ia64/xen/xencomm.h b/linux-2.6-xen-sparse/include/asm-ia64/xen/xencomm.h
new file mode 100644
index 0000000000..eae11369f1
--- /dev/null
+++ b/linux-2.6-xen-sparse/include/asm-ia64/xen/xencomm.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2006 Hollis Blanchard <hollisb@us.ibm.com>, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _LINUX_XENCOMM_H_
+#define _LINUX_XENCOMM_H_
+
+#include <xen/interface/xencomm.h>
+
+#define XENCOMM_MINI_ADDRS 3
+struct xencomm_mini {
+ struct xencomm_desc _desc;
+ uint64_t address[XENCOMM_MINI_ADDRS];
+};
+
+/* Must be called before any hypercall. */
+extern void xencomm_init (void);
+
+/* To avoid additionnal virt to phys conversion, an opaque structure is
+ presented. */
+struct xencomm_handle;
+
+extern int xencomm_create(void *buffer, unsigned long bytes,
+ struct xencomm_handle **desc, gfp_t type);
+extern void xencomm_free(struct xencomm_handle *desc);
+
+extern int xencomm_create_mini(struct xencomm_mini *area, int *nbr_area,
+ void *buffer, unsigned long bytes,
+ struct xencomm_handle **ret);
+
+/* Translate virtual address to physical address. */
+extern unsigned long xencomm_vaddr_to_paddr(unsigned long vaddr);
+
+/* Inline version. To be used only on linear space (kernel space). */
+static inline struct xencomm_handle *
+xencomm_create_inline(void *buffer)
+{
+ unsigned long paddr;
+
+ paddr = xencomm_vaddr_to_paddr((unsigned long)buffer);
+ return (struct xencomm_handle *)(paddr | XENCOMM_INLINE_FLAG);
+}
+
+#define xen_guest_handle(hnd) ((hnd).p)
+
+#endif /* _LINUX_XENCOMM_H_ */
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/fixmap.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/fixmap.h
index 64ae42e1aa..29beed9512 100644
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/fixmap.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/fixmap.h
@@ -14,7 +14,6 @@
#include <linux/config.h>
#include <linux/kernel.h>
#include <asm/apicdef.h>
-#include <xen/gnttab.h>
#include <asm/page.h>
#include <asm/vsyscall.h>
#include <asm/vsyscall32.h>
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h
index 14fb01d3d2..956a4c4b0d 100644
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/hypercall.h
@@ -258,6 +258,8 @@ HYPERVISOR_event_channel_op(
int cmd, void *arg)
{
int rc = _hypercall2(int, event_channel_op, cmd, arg);
+
+#ifdef CONFIG_XEN_COMPAT_030002
if (unlikely(rc == -ENOSYS)) {
struct evtchn_op op;
op.cmd = cmd;
@@ -265,6 +267,8 @@ HYPERVISOR_event_channel_op(
rc = _hypercall1(int, event_channel_op_compat, &op);
memcpy(arg, &op.u, sizeof(op.u));
}
+#endif
+
return rc;
}
@@ -294,6 +298,8 @@ HYPERVISOR_physdev_op(
int cmd, void *arg)
{
int rc = _hypercall2(int, physdev_op, cmd, arg);
+
+#ifdef CONFIG_XEN_COMPAT_030002
if (unlikely(rc == -ENOSYS)) {
struct physdev_op op;
op.cmd = cmd;
@@ -301,6 +307,8 @@ HYPERVISOR_physdev_op(
rc = _hypercall1(int, physdev_op_compat, &op);
memcpy(arg, &op.u, sizeof(op.u));
}
+#endif
+
return rc;
}
@@ -351,9 +359,11 @@ HYPERVISOR_suspend(
int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown,
&sched_shutdown, srec);
+#ifdef CONFIG_XEN_COMPAT_030002
if (rc == -ENOSYS)
rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown,
SHUTDOWN_suspend, srec);
+#endif
return rc;
}
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h
index 1ae9c89ba9..4d13b1b9c7 100644
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h
@@ -346,7 +346,8 @@ extern int iommu_bio_merge;
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
* access
*/
-#define xlate_dev_mem_ptr(p) __va(p)
+#define xlate_dev_mem_ptr(p, sz) ioremap(p, sz)
+#define xlate_dev_mem_ptr_unmap(p) iounmap(p)
/*
* Convert a virtual cached pointer to an uncached pointer
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/maddr.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/maddr.h
index 0104de8082..77544b3ca2 100644
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/maddr.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/maddr.h
@@ -9,6 +9,10 @@
#define FOREIGN_FRAME_BIT (1UL<<63)
#define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT)
+/* Definitions for machine and pseudophysical addresses. */
+typedef unsigned long paddr_t;
+typedef unsigned long maddr_t;
+
#ifdef CONFIG_XEN
extern unsigned long *phys_to_machine_mapping;
@@ -99,20 +103,6 @@ static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
phys_to_machine_mapping[pfn] = mfn;
}
-#else /* !CONFIG_XEN */
-
-#define pfn_to_mfn(pfn) (pfn)
-#define mfn_to_pfn(mfn) (mfn)
-#define mfn_to_local_pfn(mfn) (mfn)
-#define set_phys_to_machine(pfn, mfn) BUG_ON((pfn) != (mfn))
-#define phys_to_machine_mapping_valid(pfn) (1)
-
-#endif /* !CONFIG_XEN */
-
-/* Definitions for machine and pseudophysical addresses. */
-typedef unsigned long paddr_t;
-typedef unsigned long maddr_t;
-
static inline maddr_t phys_to_machine(paddr_t phys)
{
maddr_t machine = pfn_to_mfn(phys >> PAGE_SHIFT);
@@ -127,6 +117,27 @@ static inline paddr_t machine_to_phys(maddr_t machine)
return phys;
}
+static inline paddr_t pte_machine_to_phys(maddr_t machine)
+{
+ paddr_t phys;
+ phys = mfn_to_pfn((machine & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT);
+ phys = (phys << PAGE_SHIFT) | (machine & ~PHYSICAL_PAGE_MASK);
+ return phys;
+}
+
+#else /* !CONFIG_XEN */
+
+#define pfn_to_mfn(pfn) (pfn)
+#define mfn_to_pfn(mfn) (mfn)
+#define mfn_to_local_pfn(mfn) (mfn)
+#define set_phys_to_machine(pfn, mfn) BUG_ON((pfn) != (mfn))
+#define phys_to_machine_mapping_valid(pfn) (1)
+#define phys_to_machine(phys) ((maddr_t)(phys))
+#define machine_to_phys(mach) ((paddr_t)(mach))
+#define pte_machine_to_phys(mach) ((paddr_t)(mach))
+
+#endif /* !CONFIG_XEN */
+
/* VIRT <-> MACHINE conversion */
#define virt_to_machine(v) (phys_to_machine(__pa(v)))
#define virt_to_mfn(v) (pfn_to_mfn(__pa(v) >> PAGE_SHIFT))
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h
index cd23862b05..7573cce405 100644
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/page.h
@@ -33,6 +33,13 @@
#define PAGE_SIZE (1UL << PAGE_SHIFT)
#endif
#define PAGE_MASK (~(PAGE_SIZE-1))
+
+/* See Documentation/x86_64/mm.txt for a description of the memory map. */
+#define __PHYSICAL_MASK_SHIFT 46
+#define __PHYSICAL_MASK ((1UL << __PHYSICAL_MASK_SHIFT) - 1)
+#define __VIRTUAL_MASK_SHIFT 48
+#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
+
#define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK)
#define THREAD_ORDER 1
@@ -90,28 +97,28 @@ typedef struct { unsigned long pgd; } pgd_t;
typedef struct { unsigned long pgprot; } pgprot_t;
-#define pte_val(x) (((x).pte & 1) ? machine_to_phys((x).pte) : \
+#define pte_val(x) (((x).pte & 1) ? pte_machine_to_phys((x).pte) : \
(x).pte)
#define pte_val_ma(x) ((x).pte)
static inline unsigned long pmd_val(pmd_t x)
{
unsigned long ret = x.pmd;
- if (ret) ret = machine_to_phys(ret);
+ if (ret) ret = pte_machine_to_phys(ret);
return ret;
}
static inline unsigned long pud_val(pud_t x)
{
unsigned long ret = x.pud;
- if (ret) ret = machine_to_phys(ret);
+ if (ret) ret = pte_machine_to_phys(ret);
return ret;
}
static inline unsigned long pgd_val(pgd_t x)
{
unsigned long ret = x.pgd;
- if (ret) ret = machine_to_phys(ret);
+ if (ret) ret = pte_machine_to_phys(ret);
return ret;
}
@@ -163,12 +170,6 @@ static inline pgd_t __pgd(unsigned long x)
/* to align the pointer to the (next) page boundary */
#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
-/* See Documentation/x86_64/mm.txt for a description of the memory map. */
-#define __PHYSICAL_MASK_SHIFT 46
-#define __PHYSICAL_MASK ((1UL << __PHYSICAL_MASK_SHIFT) - 1)
-#define __VIRTUAL_MASK_SHIFT 48
-#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
-
#define KERNEL_TEXT_SIZE (40UL*1024*1024)
#define KERNEL_TEXT_START 0xffffffff80000000UL
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h
index ff6d94f9e0..0c4d0a888e 100644
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h
@@ -205,8 +205,14 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long
#define _PAGE_PROTNONE 0x080 /* If not present */
#define _PAGE_NX (1UL<<_PAGE_BIT_NX)
+#ifdef CONFIG_XEN_COMPAT_030002
+extern unsigned int __kernel_page_user;
+#else
+#define __kernel_page_user 0
+#endif
+
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | __kernel_page_user)
#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
@@ -219,13 +225,13 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long
#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_NX)
#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
#define __PAGE_KERNEL \
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX)
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX | __kernel_page_user)
#define __PAGE_KERNEL_EXEC \
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | __kernel_page_user)
#define __PAGE_KERNEL_NOCACHE \
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED | _PAGE_NX)
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED | _PAGE_NX | __kernel_page_user)
#define __PAGE_KERNEL_RO \
- (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX)
+ (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_NX | __kernel_page_user)
#define __PAGE_KERNEL_VSYSCALL \
(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
#define __PAGE_KERNEL_VSYSCALL_NOCACHE \
@@ -422,7 +428,8 @@ static inline pud_t *pud_offset_k(pgd_t *pgd, unsigned long address)
can temporarily clear it. */
#define pmd_present(x) (pmd_val(x))
#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
-#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
+#define pmd_bad(x) ((pmd_val(x) & ~(PTE_MASK | _PAGE_USER | _PAGE_PRESENT)) \
+ != (_KERNPG_TABLE & ~(_PAGE_USER | _PAGE_PRESENT)))
#define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
#define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
diff --git a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/setup_arch_post.h b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/setup_arch_post.h
index e21d4ee6f2..5244e2855e 100644
--- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/setup_arch_post.h
+++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/setup_arch_post.h
@@ -15,20 +15,20 @@ extern void nmi(void);
static void __init machine_specific_arch_setup(void)
{
int ret;
- struct callback_register event = {
+ static struct callback_register __initdata event = {
.type = CALLBACKTYPE_event,
.address = (unsigned long) hypervisor_callback,
};
- struct callback_register failsafe = {
+ static struct callback_register __initdata failsafe = {
.type = CALLBACKTYPE_failsafe,
.address = (unsigned long)failsafe_callback,
};
- struct callback_register syscall = {
+ static struct callback_register __initdata syscall = {
.type = CALLBACKTYPE_syscall,
.address = (unsigned long)system_call,
};
#ifdef CONFIG_X86_LOCAL_APIC
- struct callback_register nmi_cb = {
+ static struct callback_register __initdata nmi_cb = {
.type = CALLBACKTYPE_nmi,
.address = (unsigned long)nmi,
};
@@ -39,20 +39,25 @@ static void __init machine_specific_arch_setup(void)
ret = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe);
if (ret == 0)
ret = HYPERVISOR_callback_op(CALLBACKOP_register, &syscall);
+#ifdef CONFIG_XEN_COMPAT_030002
if (ret == -ENOSYS)
ret = HYPERVISOR_set_callbacks(
event.address,
failsafe.address,
syscall.address);
+#endif
BUG_ON(ret);
#ifdef CONFIG_X86_LOCAL_APIC
ret = HYPERVISOR_callback_op(CALLBACKOP_register, &nmi_cb);
+#ifdef CONFIG_XEN_COMPAT_030002
if (ret == -ENOSYS) {
- struct xennmi_callback cb;
+ static struct xennmi_callback __initdata cb = {
+ .handler_address = (unsigned long)nmi
+ };
- cb.handler_address = nmi_cb.address;
HYPERVISOR_nmi_op(XENNMI_register_callback, &cb);
}
#endif
+#endif
}
diff --git a/linux-2.6-xen-sparse/include/linux/skbuff.h b/linux-2.6-xen-sparse/include/linux/skbuff.h
index 9ea3924ab6..07b8f3036d 100644
--- a/linux-2.6-xen-sparse/include/linux/skbuff.h
+++ b/linux-2.6-xen-sparse/include/linux/skbuff.h
@@ -974,15 +974,16 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
#define NET_IP_ALIGN 2
#endif
-extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
+extern int ___pskb_trim(struct sk_buff *skb, unsigned int len);
static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
{
- if (!skb->data_len) {
- skb->len = len;
- skb->tail = skb->data + len;
- } else
- ___pskb_trim(skb, len, 0);
+ if (unlikely(skb->data_len)) {
+ WARN_ON(1);
+ return;
+ }
+ skb->len = len;
+ skb->tail = skb->data + len;
}
/**
@@ -992,6 +993,7 @@ static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
*
* Cut the length of a buffer down by removing data from the tail. If
* the buffer is already under the length specified it is not modified.
+ * The skb must be linear.
*/
static inline void skb_trim(struct sk_buff *skb, unsigned int len)
{
@@ -1002,12 +1004,10 @@ static inline void skb_trim(struct sk_buff *skb, unsigned int len)
static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
{
- if (!skb->data_len) {
- skb->len = len;
- skb->tail = skb->data+len;
- return 0;
- }
- return ___pskb_trim(skb, len, 1);
+ if (skb->data_len)
+ return ___pskb_trim(skb, len);
+ __skb_trim(skb, len);
+ return 0;
}
static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
diff --git a/linux-2.6-xen-sparse/include/xen/balloon.h b/linux-2.6-xen-sparse/include/xen/balloon.h
index 60d7099aa1..d26c62bef4 100644
--- a/linux-2.6-xen-sparse/include/xen/balloon.h
+++ b/linux-2.6-xen-sparse/include/xen/balloon.h
@@ -38,23 +38,13 @@
* Inform the balloon driver that it should allow some slop for device-driver
* memory activities.
*/
-void
-balloon_update_driver_allowance(
- long delta);
+void balloon_update_driver_allowance(long delta);
-/* Allocate an empty low-memory page range. */
-struct page *
-balloon_alloc_empty_page_range(
- unsigned long nr_pages);
+/* Allocate/free a set of empty pages in low memory (i.e., no RAM mapped). */
+struct page **alloc_empty_pages_and_pagevec(int nr_pages);
+void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages);
-/* Deallocate an empty page range, adding to the balloon. */
-void
-balloon_dealloc_empty_page_range(
- struct page *page, unsigned long nr_pages);
-
-void
-balloon_release_driver_page(
- struct page *page);
+void balloon_release_driver_page(struct page *page);
/*
* Prevent the balloon driver from changing the memory reservation during
diff --git a/linux-2.6-xen-sparse/include/xen/gnttab.h b/linux-2.6-xen-sparse/include/xen/gnttab.h
index 676fca5054..f1543c01d6 100644
--- a/linux-2.6-xen-sparse/include/xen/gnttab.h
+++ b/linux-2.6-xen-sparse/include/xen/gnttab.h
@@ -39,6 +39,7 @@
#include <linux/config.h>
#include <asm/hypervisor.h>
+#include <asm/maddr.h> /* maddr_t */
#include <xen/interface/grant_table.h>
#include <xen/features.h>
@@ -118,7 +119,7 @@ int gnttab_suspend(void);
int gnttab_resume(void);
static inline void
-gnttab_set_map_op(struct gnttab_map_grant_ref *map, unsigned long addr,
+gnttab_set_map_op(struct gnttab_map_grant_ref *map, maddr_t addr,
uint32_t flags, grant_ref_t ref, domid_t domid)
{
if (flags & GNTMAP_contains_pte)
@@ -134,7 +135,7 @@ gnttab_set_map_op(struct gnttab_map_grant_ref *map, unsigned long addr,
}
static inline void
-gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap, unsigned long addr,
+gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap, maddr_t addr,
uint32_t flags, grant_handle_t handle)
{
if (flags & GNTMAP_contains_pte)
diff --git a/linux-2.6-xen-sparse/include/xen/public/evtchn.h b/linux-2.6-xen-sparse/include/xen/public/evtchn.h
index 99e2948240..938d4da2bd 100644
--- a/linux-2.6-xen-sparse/include/xen/public/evtchn.h
+++ b/linux-2.6-xen-sparse/include/xen/public/evtchn.h
@@ -33,9 +33,6 @@
#ifndef __LINUX_PUBLIC_EVTCHN_H__
#define __LINUX_PUBLIC_EVTCHN_H__
-/* /dev/xen/evtchn resides at device number major=10, minor=201 */
-#define EVTCHN_MINOR 201
-
/*
* Bind a fresh port to VIRQ @virq.
* Return allocated port.
diff --git a/linux-2.6-xen-sparse/include/xen/xenbus.h b/linux-2.6-xen-sparse/include/xen/xenbus.h
index 8e259ce777..c7cb7eaa3a 100644
--- a/linux-2.6-xen-sparse/include/xen/xenbus.h
+++ b/linux-2.6-xen-sparse/include/xen/xenbus.h
@@ -38,6 +38,7 @@
#include <linux/notifier.h>
#include <linux/mutex.h>
#include <linux/completion.h>
+#include <linux/init.h>
#include <xen/interface/xen.h>
#include <xen/interface/grant_table.h>
#include <xen/interface/io/xenbus.h>
diff --git a/linux-2.6-xen-sparse/include/xen/xencons.h b/linux-2.6-xen-sparse/include/xen/xencons.h
index fa2160d89d..ae873746aa 100644
--- a/linux-2.6-xen-sparse/include/xen/xencons.h
+++ b/linux-2.6-xen-sparse/include/xen/xencons.h
@@ -1,6 +1,9 @@
#ifndef __ASM_XENCONS_H__
#define __ASM_XENCONS_H__
+struct dom0_vga_console_info;
+void dom0_init_screen_info(const struct dom0_vga_console_info *info);
+
void xencons_force_flush(void);
void xencons_resume(void);
diff --git a/linux-2.6-xen-sparse/include/xen/xenoprof.h b/linux-2.6-xen-sparse/include/xen/xenoprof.h
new file mode 100644
index 0000000000..4c3ab0fb21
--- /dev/null
+++ b/linux-2.6-xen-sparse/include/xen/xenoprof.h
@@ -0,0 +1,42 @@
+/******************************************************************************
+ * xen/xenoprof.h
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef __XEN_XENOPROF_H__
+#define __XEN_XENOPROF_H__
+#ifdef CONFIG_XEN
+
+#include <asm/xenoprof.h>
+
+struct oprofile_operations;
+int xenoprofile_init(struct oprofile_operations * ops);
+void xenoprofile_exit(void);
+
+struct xenoprof_shared_buffer {
+ char *buffer;
+ struct xenoprof_arch_shared_buffer arch;
+};
+#else
+#define xenoprofile_init(ops) (-ENOSYS)
+#define xenoprofile_exit() do { } while (0)
+
+#endif /* CONFIG_XEN */
+#endif /* __XEN_XENOPROF_H__ */
diff --git a/linux-2.6-xen-sparse/lib/Makefile b/linux-2.6-xen-sparse/lib/Makefile
index 2657bb5d10..1f96de0517 100644
--- a/linux-2.6-xen-sparse/lib/Makefile
+++ b/linux-2.6-xen-sparse/lib/Makefile
@@ -45,9 +45,7 @@ obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o
obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o
obj-$(CONFIG_SWIOTLB) += swiotlb.o
-ifneq ($(CONFIG_XEN_IA64_DOM0_NON_VP),y)
swiotlb-$(CONFIG_XEN) := ../arch/i386/kernel/swiotlb.o
-endif
hostprogs-y := gen_crc32table
clean-files := crc32table.h
diff --git a/linux-2.6-xen-sparse/mm/Kconfig b/linux-2.6-xen-sparse/mm/Kconfig
index c9e8c6ddb8..f54f49fbb6 100644
--- a/linux-2.6-xen-sparse/mm/Kconfig
+++ b/linux-2.6-xen-sparse/mm/Kconfig
@@ -115,7 +115,7 @@ config SPARSEMEM_EXTREME
# eventually, we can have this option just 'select SPARSEMEM'
config MEMORY_HOTPLUG
bool "Allow for memory hot-add"
- depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND
+ depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND && ARCH_ENABLE_MEMORY_HOTPLUG
comment "Memory hotplug is currently incompatible with Software Suspend"
depends on SPARSEMEM && HOTPLUG && SOFTWARE_SUSPEND
diff --git a/linux-2.6-xen-sparse/mm/memory.c b/linux-2.6-xen-sparse/mm/memory.c
index 1a63339203..d7319d32f9 100644
--- a/linux-2.6-xen-sparse/mm/memory.c
+++ b/linux-2.6-xen-sparse/mm/memory.c
@@ -390,7 +390,7 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_
if (vma->vm_flags & VM_PFNMAP) {
unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT;
- if ((pfn == vma->vm_pgoff + off) || !pfn_valid(pfn))
+ if (pfn == vma->vm_pgoff + off)
return NULL;
if (!is_cow_mapping(vma->vm_flags))
return NULL;
@@ -405,7 +405,8 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_
* Remove this test eventually!
*/
if (unlikely(!pfn_valid(pfn))) {
- print_bad_pte(vma, pte, addr);
+ if (!(vma->vm_flags & VM_RESERVED))
+ print_bad_pte(vma, pte, addr);
return NULL;
}
@@ -1534,6 +1535,7 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo
if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
memset(kaddr, 0, PAGE_SIZE);
kunmap_atomic(kaddr, KM_USER0);
+ flush_dcache_page(dst);
return;
}
diff --git a/linux-2.6-xen-sparse/mm/mmap.c b/linux-2.6-xen-sparse/mm/mmap.c
index a01e3ffb9d..f1b2f0f0ed 100644
--- a/linux-2.6-xen-sparse/mm/mmap.c
+++ b/linux-2.6-xen-sparse/mm/mmap.c
@@ -30,6 +30,10 @@
#include <asm/cacheflush.h>
#include <asm/tlb.h>
+#ifndef arch_mmap_check
+#define arch_mmap_check(addr, len, flags) (0)
+#endif
+
static void unmap_region(struct mm_struct *mm,
struct vm_area_struct *vma, struct vm_area_struct *prev,
unsigned long start, unsigned long end);
@@ -906,6 +910,10 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
if (!len)
return -EINVAL;
+ error = arch_mmap_check(addr, len, flags);
+ if (error)
+ return error;
+
/* Careful about overflows.. */
len = PAGE_ALIGN(len);
if (!len || len > TASK_SIZE)
@@ -1846,6 +1854,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
unsigned long flags;
struct rb_node ** rb_link, * rb_parent;
pgoff_t pgoff = addr >> PAGE_SHIFT;
+ int error;
len = PAGE_ALIGN(len);
if (!len)
@@ -1854,6 +1863,12 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
if ((addr + len) > TASK_SIZE || (addr + len) < addr)
return -EINVAL;
+ flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
+
+ error = arch_mmap_check(addr, len, flags);
+ if (error)
+ return error;
+
/*
* mlock MCL_FUTURE?
*/
@@ -1894,8 +1909,6 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
if (security_vm_enough_memory(len >> PAGE_SHIFT))
return -ENOMEM;
- flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
-
/* Can we just expand an old private anonymous mapping? */
if (vma_merge(mm, prev, addr, addr + len, flags,
NULL, NULL, pgoff, NULL))
diff --git a/linux-2.6-xen-sparse/mm/page_alloc.c b/linux-2.6-xen-sparse/mm/page_alloc.c
index c0f3c60537..f12323955a 100644
--- a/linux-2.6-xen-sparse/mm/page_alloc.c
+++ b/linux-2.6-xen-sparse/mm/page_alloc.c
@@ -951,7 +951,8 @@ restart:
alloc_flags |= ALLOC_HARDER;
if (gfp_mask & __GFP_HIGH)
alloc_flags |= ALLOC_HIGH;
- alloc_flags |= ALLOC_CPUSET;
+ if (wait)
+ alloc_flags |= ALLOC_CPUSET;
/*
* Go through the zonelist again. Let __GFP_HIGH and allocations
diff --git a/linux-2.6-xen-sparse/net/core/skbuff.c b/linux-2.6-xen-sparse/net/core/skbuff.c
index 236946bd7e..064e6277b1 100644
--- a/linux-2.6-xen-sparse/net/core/skbuff.c
+++ b/linux-2.6-xen-sparse/net/core/skbuff.c
@@ -261,11 +261,11 @@ nodata:
}
-static void skb_drop_fraglist(struct sk_buff *skb)
+static void skb_drop_list(struct sk_buff **listp)
{
- struct sk_buff *list = skb_shinfo(skb)->frag_list;
+ struct sk_buff *list = *listp;
- skb_shinfo(skb)->frag_list = NULL;
+ *listp = NULL;
do {
struct sk_buff *this = list;
@@ -274,6 +274,11 @@ static void skb_drop_fraglist(struct sk_buff *skb)
} while (list);
}
+static inline void skb_drop_fraglist(struct sk_buff *skb)
+{
+ skb_drop_list(&skb_shinfo(skb)->frag_list);
+}
+
static void skb_clone_fraglist(struct sk_buff *skb)
{
struct sk_buff *list;
@@ -604,6 +609,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
n->csum = skb->csum;
n->ip_summed = skb->ip_summed;
+ n->truesize += skb->data_len;
n->data_len = skb->data_len;
n->len = skb->len;
@@ -798,49 +804,86 @@ struct sk_buff *skb_pad(struct sk_buff *skb, int pad)
return nskb;
}
-/* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
- * If realloc==0 and trimming is impossible without change of data,
- * it is BUG().
+/* Trims skb to length len. It can change skb pointers.
*/
-int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
+int ___pskb_trim(struct sk_buff *skb, unsigned int len)
{
+ struct sk_buff **fragp;
+ struct sk_buff *frag;
int offset = skb_headlen(skb);
int nfrags = skb_shinfo(skb)->nr_frags;
int i;
+ int err;
- for (i = 0; i < nfrags; i++) {
+ if (skb_cloned(skb) &&
+ unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))))
+ return err;
+
+ i = 0;
+ if (offset >= len)
+ goto drop_pages;
+
+ for (; i < nfrags; i++) {
int end = offset + skb_shinfo(skb)->frags[i].size;
- if (end > len) {
- if (skb_cloned(skb)) {
- BUG_ON(!realloc);
- if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
- return -ENOMEM;
- }
- if (len <= offset) {
- put_page(skb_shinfo(skb)->frags[i].page);
- skb_shinfo(skb)->nr_frags--;
- } else {
- skb_shinfo(skb)->frags[i].size = len - offset;
- }
+
+ if (end < len) {
+ offset = end;
+ continue;
+ }
+
+ skb_shinfo(skb)->frags[i++].size = len - offset;
+
+drop_pages:
+ skb_shinfo(skb)->nr_frags = i;
+
+ for (; i < nfrags; i++)
+ put_page(skb_shinfo(skb)->frags[i].page);
+
+ if (skb_shinfo(skb)->frag_list)
+ skb_drop_fraglist(skb);
+ goto done;
+ }
+
+ for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp);
+ fragp = &frag->next) {
+ int end = offset + frag->len;
+
+ if (skb_shared(frag)) {
+ struct sk_buff *nfrag;
+
+ nfrag = skb_clone(frag, GFP_ATOMIC);
+ if (unlikely(!nfrag))
+ return -ENOMEM;
+
+ nfrag->next = frag->next;
+ kfree_skb(frag);
+ frag = nfrag;
+ *fragp = frag;
}
- offset = end;
+
+ if (end < len) {
+ offset = end;
+ continue;
+ }
+
+ if (end > len &&
+ unlikely((err = pskb_trim(frag, len - offset))))
+ return err;
+
+ if (frag->next)
+ skb_drop_list(&frag->next);
+ break;
}
- if (offset < len) {
+done:
+ if (len > skb_headlen(skb)) {
skb->data_len -= skb->len - len;
skb->len = len;
} else {
- if (len <= skb_headlen(skb)) {
- skb->len = len;
- skb->data_len = 0;
- skb->tail = skb->data + len;
- if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
- skb_drop_fraglist(skb);
- } else {
- skb->data_len -= skb->len - len;
- skb->len = len;
- }
+ skb->len = len;
+ skb->data_len = 0;
+ skb->tail = skb->data + len;
}
return 0;