aboutsummaryrefslogtreecommitdiffstats
path: root/linux-2.6-xen-sparse/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'linux-2.6-xen-sparse/drivers')
-rw-r--r--linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c12
-rw-r--r--linux-2.6-xen-sparse/drivers/char/tty_io.c14
-rw-r--r--linux-2.6-xen-sparse/drivers/serial/Kconfig1
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/balloon/Makefile2
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c201
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/balloon/common.h58
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/balloon/sysfs.c165
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c119
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blkback/common.h7
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c5
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c23
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c74
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blkfront/block.h2
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c85
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c799
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c7
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/char/mem.c51
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/console/console.c36
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/core/Makefile2
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/core/features.c4
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/core/gnttab.c4
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c185
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/core/reboot.c210
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/core/skbuff.c7
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c3
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/netback/common.h7
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/netback/interface.c80
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/netback/loopback.c62
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/netback/netback.c216
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c123
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c249
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c195
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/tpmback/common.h8
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c19
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c14
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c4
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile1
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c9
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c11
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c5
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c297
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.h74
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe_backend.c271
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c11
-rw-r--r--linux-2.6-xen-sparse/drivers/xen/xenoprof/xenoprofile.c500
45 files changed, 2816 insertions, 1416 deletions
diff --git a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c
index 71c7dd3a00..adf016ba90 100644
--- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c
+++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c
@@ -41,6 +41,7 @@
#include <xen/evtchn.h>
#include <xen/interface/grant_table.h>
#include <xen/interface/io/tpmif.h>
+#include <xen/gnttab.h>
#include <xen/xenbus.h>
#include "tpm.h"
#include "tpm_vtpm.h"
@@ -343,6 +344,7 @@ static void backend_changed(struct xenbus_device *dev,
case XenbusStateInitialising:
case XenbusStateInitWait:
case XenbusStateInitialised:
+ case XenbusStateUnknown:
break;
case XenbusStateConnected:
@@ -351,13 +353,14 @@ static void backend_changed(struct xenbus_device *dev,
case XenbusStateClosing:
tpmif_set_connected_state(tp, 0);
+ xenbus_frontend_closed(dev);
break;
- case XenbusStateUnknown:
case XenbusStateClosed:
+ tpmif_set_connected_state(tp, 0);
if (tp->is_suspended == 0)
device_unregister(&dev->dev);
- xenbus_switch_state(dev, XenbusStateClosed);
+ xenbus_frontend_closed(dev);
break;
}
}
@@ -419,9 +422,10 @@ static int tpmfront_suspend(struct xenbus_device *dev)
mutex_lock(&suspend_lock);
tp->is_suspended = 1;
- for (ctr = 0; atomic_read(&tp->tx_busy) && ctr <= 25; ctr++) {
+ for (ctr = 0; atomic_read(&tp->tx_busy) && ctr <= 300; ctr++) {
if ((ctr % 10) == 0)
- printk("TPM-FE [INFO]: Waiting for outstanding request.\n");
+ printk("TPM-FE [INFO]: Waiting for outstanding "
+ "request.\n");
/*
* Wait for a request to be responded to.
*/
diff --git a/linux-2.6-xen-sparse/drivers/char/tty_io.c b/linux-2.6-xen-sparse/drivers/char/tty_io.c
index f6f0689771..0372d93bca 100644
--- a/linux-2.6-xen-sparse/drivers/char/tty_io.c
+++ b/linux-2.6-xen-sparse/drivers/char/tty_io.c
@@ -2761,7 +2761,7 @@ static void flush_to_ldisc(void *private_)
struct tty_struct *tty = (struct tty_struct *) private_;
unsigned long flags;
struct tty_ldisc *disc;
- struct tty_buffer *tbuf;
+ struct tty_buffer *tbuf, *head;
int count;
char *char_buf;
unsigned char *flag_buf;
@@ -2778,7 +2778,9 @@ static void flush_to_ldisc(void *private_)
goto out;
}
spin_lock_irqsave(&tty->buf.lock, flags);
- while((tbuf = tty->buf.head) != NULL) {
+ head = tty->buf.head;
+ tty->buf.head = NULL;
+ while((tbuf = head) != NULL) {
while ((count = tbuf->commit - tbuf->read) != 0) {
char_buf = tbuf->char_buf_ptr + tbuf->read;
flag_buf = tbuf->flag_buf_ptr + tbuf->read;
@@ -2787,10 +2789,12 @@ static void flush_to_ldisc(void *private_)
disc->receive_buf(tty, char_buf, flag_buf, count);
spin_lock_irqsave(&tty->buf.lock, flags);
}
- if (tbuf->active)
+ if (tbuf->active) {
+ tty->buf.head = head;
break;
- tty->buf.head = tbuf->next;
- if (tty->buf.head == NULL)
+ }
+ head = tbuf->next;
+ if (head == NULL)
tty->buf.tail = NULL;
tty_buffer_free(tty, tbuf);
}
diff --git a/linux-2.6-xen-sparse/drivers/serial/Kconfig b/linux-2.6-xen-sparse/drivers/serial/Kconfig
index fa1fdb0b37..c6be86d83e 100644
--- a/linux-2.6-xen-sparse/drivers/serial/Kconfig
+++ b/linux-2.6-xen-sparse/drivers/serial/Kconfig
@@ -821,6 +821,7 @@ config SERIAL_ICOM
tristate "IBM Multiport Serial Adapter"
depends on PCI && (PPC_ISERIES || PPC_PSERIES)
select SERIAL_CORE
+ select FW_LOADER
help
This driver is for a family of multiport serial adapters
including 2 port RVX, 2 port internal modem, 4 port internal
diff --git a/linux-2.6-xen-sparse/drivers/xen/balloon/Makefile b/linux-2.6-xen-sparse/drivers/xen/balloon/Makefile
index 0e3a3485c4..3fc3d0bae5 100644
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/Makefile
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/Makefile
@@ -1,2 +1,2 @@
-obj-y += balloon.o
+obj-y := balloon.o sysfs.o
diff --git a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
index a6a8396c05..b621d76383 100644
--- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c
@@ -53,10 +53,8 @@
#include <asm/uaccess.h>
#include <asm/tlb.h>
#include <linux/list.h>
-
#include <xen/xenbus.h>
-
-#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
+#include "common.h"
#ifdef CONFIG_PROC_FS
static struct proc_dir_entry *balloon_pde;
@@ -71,9 +69,7 @@ static DECLARE_MUTEX(balloon_mutex);
*/
DEFINE_SPINLOCK(balloon_lock);
-/* We aim for 'current allocation' == 'target allocation'. */
-static unsigned long current_pages;
-static unsigned long target_pages;
+struct balloon_stats balloon_stats;
/* We increase/decrease in batches which fit in a page */
static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
@@ -81,18 +77,8 @@ static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
/* VM /proc information for memory */
extern unsigned long totalram_pages;
-/* We may hit the hard limit in Xen. If we do then we remember it. */
-static unsigned long hard_limit;
-
-/*
- * Drivers may alter the memory reservation independently, but they must
- * inform the balloon driver so that we can avoid hitting the hard limit.
- */
-static unsigned long driver_pages;
-
/* List of ballooned pages, threaded through the mem_map array. */
static LIST_HEAD(ballooned_pages);
-static unsigned long balloon_low, balloon_high;
/* Main work function, always executed in process context. */
static void balloon_process(void *unused);
@@ -124,10 +110,10 @@ static void balloon_append(struct page *page)
/* Lowmem is re-populated first, so highmem pages go at list tail. */
if (PageHighMem(page)) {
list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
- balloon_high++;
+ bs.balloon_high++;
} else {
list_add(PAGE_TO_LIST(page), &ballooned_pages);
- balloon_low++;
+ bs.balloon_low++;
}
}
@@ -143,9 +129,9 @@ static struct page *balloon_retrieve(void)
UNLIST_PAGE(page);
if (PageHighMem(page))
- balloon_high--;
+ bs.balloon_high--;
else
- balloon_low--;
+ bs.balloon_low--;
return page;
}
@@ -172,9 +158,9 @@ static void balloon_alarm(unsigned long unused)
static unsigned long current_target(void)
{
- unsigned long target = min(target_pages, hard_limit);
- if (target > (current_pages + balloon_low + balloon_high))
- target = current_pages + balloon_low + balloon_high;
+ unsigned long target = min(bs.target_pages, bs.hard_limit);
+ if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high))
+ target = bs.current_pages + bs.balloon_low + bs.balloon_high;
return target;
}
@@ -216,7 +202,8 @@ static int increase_reservation(unsigned long nr_pages)
BUG_ON(ret != rc);
}
if (rc >= 0)
- hard_limit = current_pages + rc - driver_pages;
+ bs.hard_limit = (bs.current_pages + rc -
+ bs.driver_pages);
goto out;
}
@@ -228,9 +215,7 @@ static int increase_reservation(unsigned long nr_pages)
BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
phys_to_machine_mapping_valid(pfn));
- /* Update P->M and M->P tables. */
set_phys_to_machine(pfn, frame_list[i]);
- xen_machphys_update(frame_list[i], pfn);
/* Link back into the page tables if not highmem. */
if (pfn < max_low_pfn) {
@@ -248,8 +233,8 @@ static int increase_reservation(unsigned long nr_pages)
__free_page(page);
}
- current_pages += nr_pages;
- totalram_pages = current_pages;
+ bs.current_pages += nr_pages;
+ totalram_pages = bs.current_pages;
out:
balloon_unlock(flags);
@@ -317,8 +302,8 @@ static int decrease_reservation(unsigned long nr_pages)
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
BUG_ON(ret != nr_pages);
- current_pages -= nr_pages;
- totalram_pages = current_pages;
+ bs.current_pages -= nr_pages;
+ totalram_pages = bs.current_pages;
balloon_unlock(flags);
@@ -339,7 +324,7 @@ static void balloon_process(void *unused)
down(&balloon_mutex);
do {
- credit = current_target() - current_pages;
+ credit = current_target() - bs.current_pages;
if (credit > 0)
need_sleep = (increase_reservation(credit) != 0);
if (credit < 0)
@@ -352,18 +337,18 @@ static void balloon_process(void *unused)
} while ((credit != 0) && !need_sleep);
/* Schedule more work if there is some still to be done. */
- if (current_target() != current_pages)
+ if (current_target() != bs.current_pages)
mod_timer(&balloon_timer, jiffies + HZ);
up(&balloon_mutex);
}
/* Resets the Xen limit, sets new target, and kicks off processing. */
-static void set_new_target(unsigned long target)
+void balloon_set_new_target(unsigned long target)
{
/* No need for lock. Not read-modify-write updates. */
- hard_limit = ~0UL;
- target_pages = target;
+ bs.hard_limit = ~0UL;
+ bs.target_pages = target;
schedule_work(&balloon_worker);
}
@@ -388,7 +373,7 @@ static void watch_target(struct xenbus_watch *watch,
/* The given memory/target value is in KiB, so it needs converting to
* pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
*/
- set_new_target(new_target >> (PAGE_SHIFT - 10));
+ balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
}
static int balloon_init_watcher(struct notifier_block *notifier,
@@ -424,7 +409,7 @@ static int balloon_write(struct file *file, const char __user *buffer,
memstring[sizeof(memstring)-1] = '\0';
target_bytes = memparse(memstring, &endchar);
- set_new_target(target_bytes >> PAGE_SHIFT);
+ balloon_set_new_target(target_bytes >> PAGE_SHIFT);
return count;
}
@@ -442,12 +427,13 @@ static int balloon_read(char *page, char **start, off_t off,
"High-mem balloon: %8lu kB\n"
"Driver pages: %8lu kB\n"
"Xen hard limit: ",
- PAGES2KB(current_pages), PAGES2KB(target_pages),
- PAGES2KB(balloon_low), PAGES2KB(balloon_high),
- PAGES2KB(driver_pages));
+ PAGES2KB(bs.current_pages), PAGES2KB(bs.target_pages),
+ PAGES2KB(bs.balloon_low), PAGES2KB(bs.balloon_high),
+ PAGES2KB(bs.driver_pages));
- if (hard_limit != ~0UL)
- len += sprintf(page + len, "%8lu kB\n", PAGES2KB(hard_limit));
+ if (bs.hard_limit != ~0UL)
+ len += sprintf(page + len, "%8lu kB\n",
+ PAGES2KB(bs.hard_limit));
else
len += sprintf(page + len, " ??? kB\n");
@@ -468,13 +454,13 @@ static int __init balloon_init(void)
IPRINTK("Initialising balloon driver.\n");
- current_pages = min(xen_start_info->nr_pages, max_pfn);
- totalram_pages = current_pages;
- target_pages = current_pages;
- balloon_low = 0;
- balloon_high = 0;
- driver_pages = 0UL;
- hard_limit = ~0UL;
+ bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
+ totalram_pages = bs.current_pages;
+ bs.target_pages = bs.current_pages;
+ bs.balloon_low = 0;
+ bs.balloon_high = 0;
+ bs.driver_pages = 0UL;
+ bs.hard_limit = ~0UL;
init_timer(&balloon_timer);
balloon_timer.data = 0;
@@ -489,6 +475,7 @@ static int __init balloon_init(void)
balloon_pde->read_proc = balloon_read;
balloon_pde->write_proc = balloon_write;
#endif
+ balloon_sysfs_init();
/* Initialise the balloon with excess memory space. */
for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
@@ -512,7 +499,7 @@ void balloon_update_driver_allowance(long delta)
unsigned long flags;
balloon_lock(flags);
- driver_pages += delta;
+ bs.driver_pages += delta;
balloon_unlock(flags);
}
@@ -534,75 +521,87 @@ static int dealloc_pte_fn(
return 0;
}
-struct page *balloon_alloc_empty_page_range(unsigned long nr_pages)
+struct page **alloc_empty_pages_and_pagevec(int nr_pages)
{
- unsigned long vstart, flags;
- unsigned int order = get_order(nr_pages * PAGE_SIZE);
- int ret;
- unsigned long i;
- struct page *page;
+ unsigned long vaddr, flags;
+ struct page *page, **pagevec;
+ int i, ret;
- vstart = __get_free_pages(GFP_KERNEL, order);
- if (vstart == 0)
+ pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
+ if (pagevec == NULL)
return NULL;
- scrub_pages(vstart, 1 << order);
-
- balloon_lock(flags);
- if (xen_feature(XENFEAT_auto_translated_physmap)) {
- unsigned long gmfn = __pa(vstart) >> PAGE_SHIFT;
- struct xen_memory_reservation reservation = {
- .nr_extents = 1,
- .extent_order = order,
- .domid = DOMID_SELF
- };
- set_xen_guest_handle(reservation.extent_start, &gmfn);
- ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
- &reservation);
- if (ret == -ENOSYS)
- goto err;
- BUG_ON(ret != 1);
- } else {
- ret = apply_to_page_range(&init_mm, vstart, PAGE_SIZE << order,
- dealloc_pte_fn, NULL);
- if (ret == -ENOSYS)
+ for (i = 0; i < nr_pages; i++) {
+ page = pagevec[i] = alloc_page(GFP_KERNEL);
+ if (page == NULL)
goto err;
- BUG_ON(ret);
- }
- current_pages -= 1UL << order;
- totalram_pages = current_pages;
- balloon_unlock(flags);
- schedule_work(&balloon_worker);
+ vaddr = (unsigned long)page_address(page);
- flush_tlb_all();
+ scrub_pages(vaddr, 1);
- page = virt_to_page(vstart);
+ balloon_lock(flags);
- for (i = 0; i < (1UL << order); i++)
- set_page_count(page + i, 1);
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ unsigned long gmfn = page_to_pfn(page);
+ struct xen_memory_reservation reservation = {
+ .nr_extents = 1,
+ .extent_order = 0,
+ .domid = DOMID_SELF
+ };
+ set_xen_guest_handle(reservation.extent_start, &gmfn);
+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+ &reservation);
+ if (ret == 1)
+ ret = 0; /* success */
+ } else {
+ ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE,
+ dealloc_pte_fn, NULL);
+ }
- return page;
+ if (ret != 0) {
+ balloon_unlock(flags);
+ __free_page(page);
+ goto err;
+ }
+
+ totalram_pages = --bs.current_pages;
+
+ balloon_unlock(flags);
+ }
+
+ out:
+ schedule_work(&balloon_worker);
+ flush_tlb_all();
+ return pagevec;
err:
- free_pages(vstart, order);
+ balloon_lock(flags);
+ while (--i >= 0)
+ balloon_append(pagevec[i]);
balloon_unlock(flags);
- return NULL;
+ kfree(pagevec);
+ pagevec = NULL;
+ goto out;
}
-void balloon_dealloc_empty_page_range(
- struct page *page, unsigned long nr_pages)
+void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
{
- unsigned long i, flags;
- unsigned int order = get_order(nr_pages * PAGE_SIZE);
+ unsigned long flags;
+ int i;
+
+ if (pagevec == NULL)
+ return;
balloon_lock(flags);
- for (i = 0; i < (1UL << order); i++) {
- BUG_ON(page_count(page + i) != 1);
- balloon_append(page + i);
+ for (i = 0; i < nr_pages; i++) {
+ BUG_ON(page_count(pagevec[i]) != 1);
+ balloon_append(pagevec[i]);
}
balloon_unlock(flags);
+ kfree(pagevec);
+
schedule_work(&balloon_worker);
}
@@ -612,15 +611,15 @@ void balloon_release_driver_page(struct page *page)
balloon_lock(flags);
balloon_append(page);
- driver_pages--;
+ bs.driver_pages--;
balloon_unlock(flags);
schedule_work(&balloon_worker);
}
EXPORT_SYMBOL_GPL(balloon_update_driver_allowance);
-EXPORT_SYMBOL_GPL(balloon_alloc_empty_page_range);
-EXPORT_SYMBOL_GPL(balloon_dealloc_empty_page_range);
+EXPORT_SYMBOL_GPL(alloc_empty_pages_and_pagevec);
+EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec);
EXPORT_SYMBOL_GPL(balloon_release_driver_page);
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/linux-2.6-xen-sparse/drivers/xen/balloon/common.h b/linux-2.6-xen-sparse/drivers/xen/balloon/common.h
new file mode 100644
index 0000000000..4496d215e2
--- /dev/null
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/common.h
@@ -0,0 +1,58 @@
+/******************************************************************************
+ * balloon/common.h
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_BALLOON_COMMON_H__
+#define __XEN_BALLOON_COMMON_H__
+
+#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
+
+struct balloon_stats {
+ /* We aim for 'current allocation' == 'target allocation'. */
+ unsigned long current_pages;
+ unsigned long target_pages;
+ /* We may hit the hard limit in Xen. If we do then we remember it. */
+ unsigned long hard_limit;
+ /*
+ * Drivers may alter the memory reservation independently, but they
+ * must inform the balloon driver so we avoid hitting the hard limit.
+ */
+ unsigned long driver_pages;
+ /* Number of pages in high- and low-memory balloons. */
+ unsigned long balloon_low;
+ unsigned long balloon_high;
+};
+
+extern struct balloon_stats balloon_stats;
+#define bs balloon_stats
+
+int balloon_sysfs_init(void);
+void balloon_sysfs_exit(void);
+
+void balloon_set_new_target(unsigned long target);
+
+#endif /* __XEN_BALLOON_COMMON_H__ */
diff --git a/linux-2.6-xen-sparse/drivers/xen/balloon/sysfs.c b/linux-2.6-xen-sparse/drivers/xen/balloon/sysfs.c
new file mode 100644
index 0000000000..a4ed8a6f1e
--- /dev/null
+++ b/linux-2.6-xen-sparse/drivers/xen/balloon/sysfs.c
@@ -0,0 +1,165 @@
+/******************************************************************************
+ * balloon/sysfs.c
+ *
+ * Xen balloon driver - sysfs interfaces.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/config.h>
+#include <linux/capability.h>
+#include <linux/stat.h>
+#include <linux/sysdev.h>
+#include "common.h"
+
+#define BALLOON_CLASS_NAME "memory"
+
+#define BALLOON_SHOW(name, format, args...) \
+ static ssize_t show_##name(struct sys_device *dev, \
+ char *buf) \
+ { \
+ return sprintf(buf, format, ##args); \
+ } \
+ static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
+
+BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(bs.current_pages));
+BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(bs.balloon_low));
+BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(bs.balloon_high));
+BALLOON_SHOW(hard_limit_kb,
+ (bs.hard_limit!=~0UL) ? "%lu\n" : "???\n",
+ (bs.hard_limit!=~0UL) ? PAGES2KB(bs.hard_limit) : 0);
+BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(bs.driver_pages));
+
+static ssize_t show_target_kb(struct sys_device *dev, char *buf)
+{
+ return sprintf(buf, "%lu\n", PAGES2KB(bs.target_pages));
+}
+
+static ssize_t store_target_kb(struct sys_device *dev,
+ const char *buf,
+ size_t count)
+{
+ char memstring[64], *endchar;
+ unsigned long long target_bytes;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (count <= 1)
+ return -EBADMSG; /* runt */
+ if (count > sizeof(memstring))
+ return -EFBIG; /* too long */
+ strcpy(memstring, buf);
+
+ target_bytes = memparse(memstring, &endchar);
+ balloon_set_new_target(target_bytes >> PAGE_SHIFT);
+
+ return count;
+}
+
+static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
+ show_target_kb, store_target_kb);
+
+static struct sysdev_attribute *balloon_attrs[] = {
+ &attr_target_kb,
+};
+
+static struct attribute *balloon_info_attrs[] = {
+ &attr_current_kb.attr,
+ &attr_low_kb.attr,
+ &attr_high_kb.attr,
+ &attr_hard_limit_kb.attr,
+ &attr_driver_kb.attr,
+ NULL
+};
+
+static struct attribute_group balloon_info_group = {
+ .name = "info",
+ .attrs = balloon_info_attrs,
+};
+
+static struct sysdev_class balloon_sysdev_class = {
+ set_kset_name(BALLOON_CLASS_NAME),
+};
+
+static struct sys_device balloon_sysdev;
+
+static int register_balloon(struct sys_device *sysdev)
+{
+ int i, error;
+
+ error = sysdev_class_register(&balloon_sysdev_class);
+ if (error)
+ return error;
+
+ sysdev->id = 0;
+ sysdev->cls = &balloon_sysdev_class;
+
+ error = sysdev_register(sysdev);
+ if (error) {
+ sysdev_class_unregister(&balloon_sysdev_class);
+ return error;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
+ error = sysdev_create_file(sysdev, balloon_attrs[i]);
+ if (error)
+ goto fail;
+ }
+
+ error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
+ if (error)
+ goto fail;
+
+ return 0;
+
+ fail:
+ while (--i >= 0)
+ sysdev_remove_file(sysdev, balloon_attrs[i]);
+ sysdev_unregister(sysdev);
+ sysdev_class_unregister(&balloon_sysdev_class);
+ return error;
+}
+
+static void unregister_balloon(struct sys_device *sysdev)
+{
+ int i;
+
+ sysfs_remove_group(&sysdev->kobj, &balloon_info_group);
+ for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++)
+ sysdev_remove_file(sysdev, balloon_attrs[i]);
+ sysdev_unregister(sysdev);
+ sysdev_class_unregister(&balloon_sysdev_class);
+}
+
+int balloon_sysfs_init(void)
+{
+ return register_balloon(&balloon_sysdev);
+}
+
+void balloon_sysfs_exit(void)
+{
+ unregister_balloon(&balloon_sysdev);
+}
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
index 416f7bc18c..e8df9e0346 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c
@@ -56,8 +56,6 @@ static int blkif_reqs = 64;
module_param_named(reqs, blkif_reqs, int, 0);
MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
-static int mmap_pages;
-
/* Run-time switchable: /sys/module/blkback/parameters/ */
static unsigned int log_stats = 0;
static unsigned int debug_lvl = 0;
@@ -87,8 +85,7 @@ static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
#define BLKBACK_INVALID_HANDLE (~0)
-static unsigned long mmap_vstart;
-static unsigned long *pending_vaddrs;
+static struct page **pending_pages;
static grant_handle_t *pending_grant_handles;
static inline int vaddr_pagenr(pending_req_t *req, int seg)
@@ -98,7 +95,8 @@ static inline int vaddr_pagenr(pending_req_t *req, int seg)
static inline unsigned long vaddr(pending_req_t *req, int seg)
{
- return pending_vaddrs[vaddr_pagenr(req, seg)];
+ unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]);
+ return (unsigned long)pfn_to_kaddr(pfn);
}
#define pending_handle(_req, _seg) \
@@ -191,9 +189,9 @@ static void fast_flush_area(pending_req_t *req)
static void print_stats(blkif_t *blkif)
{
- printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d\n",
+ printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n",
current->comm, blkif->st_oo_req,
- blkif->st_rd_req, blkif->st_wr_req);
+ blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req);
blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
blkif->st_rd_req = 0;
blkif->st_wr_req = 0;
@@ -243,11 +241,17 @@ int blkif_schedule(void *arg)
* COMPLETION CALLBACK -- Called as bh->b_end_io()
*/
-static void __end_block_io_op(pending_req_t *pending_req, int uptodate)
+static void __end_block_io_op(pending_req_t *pending_req, int error)
{
/* An error fails the entire request. */
- if (!uptodate) {
- DPRINTK("Buffer not up-to-date at end of operation\n");
+ if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
+ (error == -EOPNOTSUPP)) {
+ DPRINTK("blkback: write barrier op failed, not supported\n");
+ blkback_barrier(XBT_NIL, pending_req->blkif->be, 0);
+ pending_req->status = BLKIF_RSP_EOPNOTSUPP;
+ } else if (error) {
+ DPRINTK("Buffer not up-to-date at end of operation, "
+ "error=%d\n", error);
pending_req->status = BLKIF_RSP_ERROR;
}
@@ -264,7 +268,7 @@ static int end_block_io_op(struct bio *bio, unsigned int done, int error)
{
if (bio->bi_size != 0)
return 1;
- __end_block_io_op(bio->bi_private, !error);
+ __end_block_io_op(bio->bi_private, error);
bio_put(bio);
return error;
}
@@ -295,7 +299,7 @@ irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
static int do_block_io_op(blkif_t *blkif)
{
blkif_back_ring_t *blk_ring = &blkif->blk_ring;
- blkif_request_t *req;
+ blkif_request_t req;
pending_req_t *pending_req;
RING_IDX rc, rp;
int more_to_do = 0;
@@ -313,22 +317,25 @@ static int do_block_io_op(blkif_t *blkif)
break;
}
- req = RING_GET_REQUEST(blk_ring, rc);
+ memcpy(&req, RING_GET_REQUEST(blk_ring, rc), sizeof(req));
blk_ring->req_cons = ++rc; /* before make_response() */
- switch (req->operation) {
+ switch (req.operation) {
case BLKIF_OP_READ:
blkif->st_rd_req++;
- dispatch_rw_block_io(blkif, req, pending_req);
+ dispatch_rw_block_io(blkif, &req, pending_req);
break;
+ case BLKIF_OP_WRITE_BARRIER:
+ blkif->st_br_req++;
+ /* fall through */
case BLKIF_OP_WRITE:
blkif->st_wr_req++;
- dispatch_rw_block_io(blkif, req, pending_req);
+ dispatch_rw_block_io(blkif, &req, pending_req);
break;
default:
DPRINTK("error: unknown block io operation [%d]\n",
- req->operation);
- make_response(blkif, req->id, req->operation,
+ req.operation);
+ make_response(blkif, req.id, req.operation,
BLKIF_RSP_ERROR);
free_req(pending_req);
break;
@@ -342,7 +349,6 @@ static void dispatch_rw_block_io(blkif_t *blkif,
pending_req_t *pending_req)
{
extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
- int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct phys_req preq;
struct {
@@ -351,6 +357,22 @@ static void dispatch_rw_block_io(blkif_t *blkif,
unsigned int nseg;
struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
int ret, i, nbio = 0;
+ int operation;
+
+ switch (req->operation) {
+ case BLKIF_OP_READ:
+ operation = READ;
+ break;
+ case BLKIF_OP_WRITE:
+ operation = WRITE;
+ break;
+ case BLKIF_OP_WRITE_BARRIER:
+ operation = WRITE_BARRIER;
+ break;
+ default:
+ operation = 0; /* make gcc happy */
+ BUG();
+ }
/* Check that number of segments is sane. */
nseg = req->nr_segments;
@@ -366,7 +388,7 @@ static void dispatch_rw_block_io(blkif_t *blkif,
pending_req->blkif = blkif;
pending_req->id = req->id;
- pending_req->operation = operation;
+ pending_req->operation = req->operation;
pending_req->status = BLKIF_RSP_OKAY;
pending_req->nr_pages = nseg;
@@ -377,12 +399,12 @@ static void dispatch_rw_block_io(blkif_t *blkif,
req->seg[i].first_sect + 1;
if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
- (seg[i].nsec <= 0))
+ (req->seg[i].last_sect < req->seg[i].first_sect))
goto fail_response;
preq.nr_sects += seg[i].nsec;
flags = GNTMAP_host_map;
- if ( operation == WRITE )
+ if (operation != READ)
flags |= GNTMAP_readonly;
gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
req->seg[i].gref, blkif->domid);
@@ -394,10 +416,15 @@ static void dispatch_rw_block_io(blkif_t *blkif,
for (i = 0; i < nseg; i++) {
if (unlikely(map[i].status != 0)) {
DPRINTK("invalid buffer -- could not remap it\n");
- goto fail_flush;
+ map[i].handle = BLKBACK_INVALID_HANDLE;
+ ret |= 1;
}
pending_handle(pending_req, i) = map[i].handle;
+
+ if (ret)
+ continue;
+
set_phys_to_machine(__pa(vaddr(
pending_req, i)) >> PAGE_SHIFT,
FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
@@ -405,6 +432,9 @@ static void dispatch_rw_block_io(blkif_t *blkif,
(req->seg[i].first_sect << 9);
}
+ if (ret)
+ goto fail_flush;
+
if (vbd_translate(&preq, blkif, operation) != 0) {
DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n",
operation == READ ? "read" : "write",
@@ -506,52 +536,43 @@ static void make_response(blkif_t *blkif, unsigned long id,
static int __init blkif_init(void)
{
- struct page *page;
- int i;
+ int i, mmap_pages;
if (!is_running_on_xen())
return -ENODEV;
- mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
-
- page = balloon_alloc_empty_page_range(mmap_pages);
- if (page == NULL)
- return -ENOMEM;
- mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+ mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
pending_reqs = kmalloc(sizeof(pending_reqs[0]) *
blkif_reqs, GFP_KERNEL);
pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
mmap_pages, GFP_KERNEL);
- pending_vaddrs = kmalloc(sizeof(pending_vaddrs[0]) *
- mmap_pages, GFP_KERNEL);
- if (!pending_reqs || !pending_grant_handles || !pending_vaddrs) {
- kfree(pending_reqs);
- kfree(pending_grant_handles);
- kfree(pending_vaddrs);
- printk("%s: out of memory\n", __FUNCTION__);
- return -ENOMEM;
- }
+ pending_pages = alloc_empty_pages_and_pagevec(mmap_pages);
- blkif_interface_init();
-
- printk("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n",
- __FUNCTION__, blkif_reqs, mmap_pages, mmap_vstart);
- BUG_ON(mmap_vstart == 0);
- for (i = 0; i < mmap_pages; i++) {
- pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT);
+ if (!pending_reqs || !pending_grant_handles || !pending_pages)
+ goto out_of_memory;
+
+ for (i = 0; i < mmap_pages; i++)
pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
- }
+
+ blkif_interface_init();
memset(pending_reqs, 0, sizeof(pending_reqs));
INIT_LIST_HEAD(&pending_free);
for (i = 0; i < blkif_reqs; i++)
list_add_tail(&pending_reqs[i].free_list, &pending_free);
-
+
blkif_xenbus_init();
return 0;
+
+ out_of_memory:
+ kfree(pending_reqs);
+ kfree(pending_grant_handles);
+ free_empty_pages_and_pagevec(pending_pages, mmap_pages);
+ printk("%s: out of memory\n", __FUNCTION__);
+ return -ENOMEM;
}
module_init(blkif_init);
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h
index 38cb756964..1b5b6a427e 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/common.h
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/common.h
@@ -44,6 +44,7 @@
#include <xen/interface/io/ring.h>
#include <xen/gnttab.h>
#include <xen/driver_util.h>
+#include <xen/xenbus.h>
#define DPRINTK(_f, _a...) \
pr_debug("(file=%s, line=%d) " _f, \
@@ -87,6 +88,7 @@ typedef struct blkif_st {
int st_rd_req;
int st_wr_req;
int st_oo_req;
+ int st_br_req;
wait_queue_head_t waiting_to_free;
@@ -111,7 +113,7 @@ int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major,
unsigned minor, int readonly);
void vbd_free(struct vbd *vbd);
-unsigned long vbd_size(struct vbd *vbd);
+unsigned long long vbd_size(struct vbd *vbd);
unsigned int vbd_info(struct vbd *vbd);
unsigned long vbd_secsize(struct vbd *vbd);
@@ -131,4 +133,7 @@ void blkif_xenbus_init(void);
irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
int blkif_schedule(void *arg);
+int blkback_barrier(struct xenbus_transaction xbt,
+ struct backend_info *be, int state);
+
#endif /* __BLKIF__BACKEND__COMMON_H__ */
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c b/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c
index a809b04cd1..34048b32c4 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c
@@ -31,12 +31,11 @@
*/
#include "common.h"
-#include <xen/xenbus.h>
#define vbd_sz(_v) ((_v)->bdev->bd_part ? \
(_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity)
-unsigned long vbd_size(struct vbd *vbd)
+unsigned long long vbd_size(struct vbd *vbd)
{
return vbd_sz(vbd);
}
@@ -104,7 +103,7 @@ int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
struct vbd *vbd = &blkif->vbd;
int rc = -EACCES;
- if ((operation == WRITE) && vbd->readonly)
+ if ((operation != READ) && vbd->readonly)
goto out;
if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)))
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
index 02f90a6803..349ae64d0f 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c
@@ -20,7 +20,6 @@
#include <stdarg.h>
#include <linux/module.h>
#include <linux/kthread.h>
-#include <xen/xenbus.h>
#include "common.h"
#undef DPRINTK
@@ -91,11 +90,13 @@ static void update_blkif_status(blkif_t *blkif)
VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
+VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req);
static struct attribute *vbdstat_attrs[] = {
&dev_attr_oo_req.attr,
&dev_attr_rd_req.attr,
&dev_attr_wr_req.attr,
+ &dev_attr_br_req.attr,
NULL
};
@@ -165,6 +166,19 @@ static int blkback_remove(struct xenbus_device *dev)
return 0;
}
+int blkback_barrier(struct xenbus_transaction xbt,
+ struct backend_info *be, int state)
+{
+ struct xenbus_device *dev = be->dev;
+ int err;
+
+ err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
+ "%d", state);
+ if (err)
+ xenbus_dev_fatal(dev, err, "writing feature-barrier");
+
+ return err;
+}
/**
* Entry point to this code when a new device is created. Allocate the basic
@@ -366,13 +380,16 @@ static void connect(struct backend_info *be)
/* Supply the information about the device the frontend needs */
again:
err = xenbus_transaction_start(&xbt);
-
if (err) {
xenbus_dev_fatal(dev, err, "starting transaction");
return;
}
- err = xenbus_printf(xbt, dev->nodename, "sectors", "%lu",
+ err = blkback_barrier(xbt, be, 1);
+ if (err)
+ goto abort;
+
+ err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
vbd_size(&be->blkif->vbd));
if (err) {
xenbus_dev_fatal(dev, err, "writing %s/sectors",
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
index 4c44d7608d..95cff46ff9 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c
@@ -48,6 +48,10 @@
#include <asm/hypervisor.h>
#include <asm/maddr.h>
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
#define BLKIF_STATE_DISCONNECTED 0
#define BLKIF_STATE_CONNECTED 1
#define BLKIF_STATE_SUSPENDED 2
@@ -134,10 +138,10 @@ static int blkfront_resume(struct xenbus_device *dev)
DPRINTK("blkfront_resume: %s\n", dev->nodename);
- blkif_free(info, 1);
+ blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
err = talk_to_backend(dev, info);
- if (!err)
+ if (info->connected == BLKIF_STATE_SUSPENDED && !err)
blkif_recover(info);
return err;
@@ -273,7 +277,7 @@ static void backend_changed(struct xenbus_device *dev,
xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
down(&bd->bd_sem);
- if (info->users > 0 && system_state == SYSTEM_RUNNING)
+ if (info->users > 0)
xenbus_dev_error(dev, -EBUSY,
"Device in use; refusing to close");
else
@@ -294,7 +298,8 @@ static void backend_changed(struct xenbus_device *dev,
*/
static void connect(struct blkfront_info *info)
{
- unsigned long sectors, sector_size;
+ unsigned long long sectors;
+ unsigned long sector_size;
unsigned int binfo;
int err;
@@ -305,7 +310,7 @@ static void connect(struct blkfront_info *info)
DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend);
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
- "sectors", "%lu", &sectors,
+ "sectors", "%llu", &sectors,
"info", "%u", &binfo,
"sector-size", "%lu", &sector_size,
NULL);
@@ -316,6 +321,12 @@ static void connect(struct blkfront_info *info)
return;
}
+ err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+ "feature-barrier", "%lu", &info->feature_barrier,
+ NULL);
+ if (err)
+ info->feature_barrier = 0;
+
err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
if (err) {
xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
@@ -355,9 +366,11 @@ static void blkfront_closing(struct xenbus_device *dev)
blk_stop_queue(info->rq);
/* No more gnttab callback work. */
gnttab_cancel_free_callback(&info->callback);
- flush_scheduled_work();
spin_unlock_irqrestore(&blkif_io_lock, flags);
+ /* Flush gnttab callback work. Must be done with no locks held. */
+ flush_scheduled_work();
+
xlvbd_del(info);
xenbus_frontend_closed(dev);
@@ -466,6 +479,27 @@ int blkif_ioctl(struct inode *inode, struct file *filep,
command, (long)argument, inode->i_rdev);
switch (command) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
+ case HDIO_GETGEO: {
+ struct block_device *bd = inode->i_bdev;
+ struct hd_geometry geo;
+ int ret;
+
+ if (!argument)
+ return -EINVAL;
+
+ geo.start = get_start_sect(bd);
+ ret = blkif_getgeo(bd, &geo);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((struct hd_geometry __user *)argument, &geo,
+ sizeof(geo)))
+ return -EFAULT;
+
+ return 0;
+ }
+#endif
case CDROMMULTISESSION:
DPRINTK("FIXME: support multisession CDs later\n");
for (i = 0; i < sizeof(struct cdrom_multisession); i++)
@@ -542,11 +576,14 @@ static int blkif_queue_request(struct request *req)
info->shadow[id].request = (unsigned long)req;
ring_req->id = id;
- ring_req->operation = rq_data_dir(req) ?
- BLKIF_OP_WRITE : BLKIF_OP_READ;
ring_req->sector_number = (blkif_sector_t)req->sector;
ring_req->handle = info->handle;
+ ring_req->operation = rq_data_dir(req) ?
+ BLKIF_OP_WRITE : BLKIF_OP_READ;
+ if (blk_barrier_rq(req))
+ ring_req->operation = BLKIF_OP_WRITE_BARRIER;
+
ring_req->nr_segments = 0;
rq_for_each_bio (bio, req) {
bio_for_each_segment (bvec, bio, idx) {
@@ -643,6 +680,7 @@ static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
RING_IDX i, rp;
unsigned long flags;
struct blkfront_info *info = (struct blkfront_info *)dev_id;
+ int uptodate;
spin_lock_irqsave(&blkif_io_lock, flags);
@@ -667,19 +705,27 @@ static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
ADD_ID_TO_FREELIST(info, id);
+ uptodate = (bret->status == BLKIF_RSP_OKAY);
switch (bret->operation) {
+ case BLKIF_OP_WRITE_BARRIER:
+ if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
+ printk("blkfront: %s: write barrier op failed\n",
+ info->gd->disk_name);
+ uptodate = -EOPNOTSUPP;
+ info->feature_barrier = 0;
+ xlvbd_barrier(info);
+ }
+ /* fall through */
case BLKIF_OP_READ:
case BLKIF_OP_WRITE:
if (unlikely(bret->status != BLKIF_RSP_OKAY))
DPRINTK("Bad return from blkdev data "
"request: %x\n", bret->status);
- ret = end_that_request_first(
- req, (bret->status == BLKIF_RSP_OKAY),
+ ret = end_that_request_first(req, uptodate,
req->hard_nr_sectors);
BUG_ON(ret);
- end_that_request_last(
- req, (bret->status == BLKIF_RSP_OKAY));
+ end_that_request_last(req, uptodate);
break;
default:
BUG();
@@ -714,9 +760,11 @@ static void blkif_free(struct blkfront_info *info, int suspend)
blk_stop_queue(info->rq);
/* No more gnttab callback work. */
gnttab_cancel_free_callback(&info->callback);
- flush_scheduled_work();
spin_unlock_irq(&blkif_io_lock);
+ /* Flush gnttab callback work. Must be done with no locks held. */
+ flush_scheduled_work();
+
/* Free resources associated with old device channel. */
if (info->ring_ref != GRANT_INVALID_REF) {
gnttab_end_foreign_access(info->ring_ref, 0,
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h
index 5ba3d1ebc3..b86360f405 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/block.h
@@ -126,6 +126,7 @@ struct blkfront_info
struct gnttab_free_callback callback;
struct blk_shadow shadow[BLK_RING_SIZE];
unsigned long shadow_free;
+ int feature_barrier;
/**
* The number of people holding this device open. We won't allow a
@@ -152,5 +153,6 @@ extern void do_blkif_request (request_queue_t *rq);
int xlvbd_add(blkif_sector_t capacity, int device,
u16 vdisk_info, u16 sector_size, struct blkfront_info *info);
void xlvbd_del(struct blkfront_info *info);
+int xlvbd_barrier(struct blkfront_info *info);
#endif /* __XEN_DRIVERS_BLOCK_H__ */
diff --git a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
index 8aa453d3a0..f040a2b7e3 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c
@@ -36,6 +36,10 @@
#include <linux/blkdev.h>
#include <linux/list.h>
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
#define BLKIF_MAJOR(dev) ((dev)>>8)
#define BLKIF_MINOR(dev) ((dev) & 0xff)
@@ -46,7 +50,7 @@
*/
#define NUM_IDE_MAJORS 10
-#define NUM_SCSI_MAJORS 9
+#define NUM_SCSI_MAJORS 17
#define NUM_VBD_MAJORS 1
static struct xlbd_type_info xlbd_ide_type = {
@@ -91,7 +95,9 @@ static struct block_device_operations xlvbd_block_fops =
.open = blkif_open,
.release = blkif_release,
.ioctl = blkif_ioctl,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
.getgeo = blkif_getgeo
+#endif
};
DEFINE_SPINLOCK(blkif_io_lock);
@@ -159,8 +165,11 @@ xlbd_get_major_info(int vdevice)
case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
index = 11 + major - SCSI_DISK1_MAJOR;
break;
- case SCSI_CDROM_MAJOR: index = 18; break;
- default: index = 19; break;
+ case SCSI_DISK8_MAJOR ... SCSI_DISK15_MAJOR:
+ index = 18 + major - SCSI_DISK8_MAJOR;
+ break;
+ case SCSI_CDROM_MAJOR: index = 26; break;
+ default: index = 27; break;
}
mi = ((major_info[index] != NULL) ? major_info[index] :
@@ -186,7 +195,11 @@ xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
if (rq == NULL)
return -1;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
elevator_init(rq, "noop");
+#else
+ elevator_init(rq, &elevator_noop);
+#endif
/* Hard sector size and max sectors impersonate the equiv. hardware. */
blk_queue_hardsect_size(rq, sector_size);
@@ -217,6 +230,7 @@ xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, int vdevice,
struct xlbd_major_info *mi;
int nr_minors = 1;
int err = -ENODEV;
+ unsigned int offset;
BUG_ON(info->gd != NULL);
BUG_ON(info->mi != NULL);
@@ -234,15 +248,33 @@ xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, int vdevice,
if (gd == NULL)
goto out;
- if (nr_minors > 1)
- sprintf(gd->disk_name, "%s%c", mi->type->diskname,
- 'a' + mi->index * mi->type->disks_per_major +
- (minor >> mi->type->partn_shift));
- else
- sprintf(gd->disk_name, "%s%c%d", mi->type->diskname,
- 'a' + mi->index * mi->type->disks_per_major +
- (minor >> mi->type->partn_shift),
- minor & ((1 << mi->type->partn_shift) - 1));
+ offset = mi->index * mi->type->disks_per_major +
+ (minor >> mi->type->partn_shift);
+ if (nr_minors > 1) {
+ if (offset < 26) {
+ sprintf(gd->disk_name, "%s%c",
+ mi->type->diskname, 'a' + offset );
+ }
+ else {
+ sprintf(gd->disk_name, "%s%c%c",
+ mi->type->diskname,
+ 'a' + ((offset/26)-1), 'a' + (offset%26) );
+ }
+ }
+ else {
+ if (offset < 26) {
+ sprintf(gd->disk_name, "%s%c%d",
+ mi->type->diskname,
+ 'a' + offset,
+ minor & ((1 << mi->type->partn_shift) - 1));
+ }
+ else {
+ sprintf(gd->disk_name, "%s%c%c%d",
+ mi->type->diskname,
+ 'a' + ((offset/26)-1), 'a' + (offset%26),
+ minor & ((1 << mi->type->partn_shift) - 1));
+ }
+ }
gd->major = mi->major;
gd->first_minor = minor;
@@ -257,6 +289,10 @@ xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, int vdevice,
}
info->rq = gd->queue;
+ info->gd = gd;
+
+ if (info->feature_barrier)
+ xlvbd_barrier(info);
if (vdisk_info & VDISK_READONLY)
set_disk_ro(gd, 1);
@@ -267,8 +303,6 @@ xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, int vdevice,
if (vdisk_info & VDISK_CDROM)
gd->flags |= GENHD_FL_CD;
- info->gd = gd;
-
return 0;
out:
@@ -316,3 +350,26 @@ xlvbd_del(struct blkfront_info *info)
blk_cleanup_queue(info->rq);
info->rq = NULL;
}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
+int
+xlvbd_barrier(struct blkfront_info *info)
+{
+ int err;
+
+ err = blk_queue_ordered(info->rq,
+ info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE, NULL);
+ if (err)
+ return err;
+ printk("blkfront: %s: barriers %s\n",
+ info->gd->disk_name, info->feature_barrier ? "enabled" : "disabled");
+ return 0;
+}
+#else
+int
+xlvbd_barrier(struct blkfront_info *info)
+{
+ printk("blkfront: %s: barriers disabled\n", info->gd->disk_name);
+ return -ENOSYS;
+}
+#endif
diff --git a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
index a6f1379c27..e0d898ab98 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
@@ -10,6 +10,9 @@
*
* Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield
*
+ * Clean ups and fix ups:
+ * Copyright (c) 2006, Steven Rostedt - Red Hat, Inc.
+ *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation; or, when distributed
@@ -44,7 +47,6 @@
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/mm.h>
-#include <linux/miscdevice.h>
#include <linux/errno.h>
#include <linux/major.h>
#include <linux/gfp.h>
@@ -52,9 +54,33 @@
#include <asm/tlbflush.h>
#include <linux/devfs_fs_kernel.h>
-#define MAX_TAP_DEV 100 /*the maximum number of tapdisk ring devices */
+#define MAX_TAP_DEV 256 /*the maximum number of tapdisk ring devices */
#define MAX_DEV_NAME 100 /*the max tapdisk ring device name e.g. blktap0 */
+
+struct class *xen_class;
+EXPORT_SYMBOL_GPL(xen_class);
+
+/*
+ * Setup the xen class. This should probably go in another file, but
+ * since blktap is the only user of it so far, it gets to keep it.
+ */
+int setup_xen_class(void)
+{
+ int ret;
+
+ if (xen_class)
+ return 0;
+
+ xen_class = class_create(THIS_MODULE, "xen");
+ if ((ret = IS_ERR(xen_class))) {
+ xen_class = NULL;
+ return ret;
+ }
+
+ return 0;
+}
+
/*
* The maximum number of requests that can be outstanding at any time
* is determined by
@@ -67,8 +93,9 @@
* mmap_alloc is initialised to 2 and should be adjustable on the fly via
* sysfs.
*/
-#define MAX_DYNAMIC_MEM 64
-#define MAX_PENDING_REQS 64
+#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
+#define MAX_DYNAMIC_MEM BLK_RING_SIZE
+#define MAX_PENDING_REQS BLK_RING_SIZE
#define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
#define MMAP_VADDR(_start, _req,_seg) \
(_start + \
@@ -82,6 +109,12 @@ static int mmap_pages = MMAP_PAGES;
* memory rings.
*/
+/*Data struct handed back to userspace for tapdisk device to VBD mapping*/
+typedef struct domid_translate {
+ unsigned short domid;
+ unsigned short busid;
+} domid_translate_t ;
+
/*Data struct associated with each of the tapdisk devices*/
typedef struct tap_blkif {
struct vm_area_struct *vma; /*Shared memory area */
@@ -100,22 +133,11 @@ typedef struct tap_blkif {
unsigned long *idx_map; /*Record the user ring id to kern
[req id, idx] tuple */
blkif_t *blkif; /*Associate blkif with tapdev */
+ struct domid_translate trans; /*Translation from domid to bus. */
} tap_blkif_t;
-/*Private data struct associated with the inode*/
-typedef struct private_info {
- int idx;
-} private_info_t;
-
-/*Data struct handed back to userspace for tapdisk device to VBD mapping*/
-typedef struct domid_translate {
- unsigned short domid;
- unsigned short busid;
-} domid_translate_t ;
-
-
-static domid_translate_t translate_domid[MAX_TAP_DEV];
-static tap_blkif_t *tapfds[MAX_TAP_DEV];
+static struct tap_blkif *tapfds[MAX_TAP_DEV];
+static int blktap_next_minor;
static int __init set_blkif_reqs(char *str)
{
@@ -168,16 +190,18 @@ static inline unsigned int RTN_PEND_IDX(pending_req_t *req, int idx) {
#define BLKBACK_INVALID_HANDLE (~0)
-typedef struct mmap_page {
- unsigned long start;
- struct page *mpage;
-} mmap_page_t;
+static struct page **foreign_pages[MAX_DYNAMIC_MEM];
+static inline unsigned long idx_to_kaddr(
+ unsigned int mmap_idx, unsigned int req_idx, unsigned int sg_idx)
+{
+ unsigned int arr_idx = req_idx*BLKIF_MAX_SEGMENTS_PER_REQUEST + sg_idx;
+ unsigned long pfn = page_to_pfn(foreign_pages[mmap_idx][arr_idx]);
+ return (unsigned long)pfn_to_kaddr(pfn);
+}
-static mmap_page_t mmap_start[MAX_DYNAMIC_MEM];
static unsigned short mmap_alloc = 0;
static unsigned short mmap_lock = 0;
static unsigned short mmap_inuse = 0;
-static unsigned long *pending_addrs[MAX_DYNAMIC_MEM];
/******************************************************************
* GRANT HANDLES
@@ -192,6 +216,7 @@ struct grant_handle_pair
grant_handle_t kernel;
grant_handle_t user;
};
+#define INVALID_GRANT_HANDLE 0xFFFF
static struct grant_handle_pair
pending_grant_handles[MAX_DYNAMIC_MEM][MMAP_PAGES];
@@ -200,15 +225,13 @@ static struct grant_handle_pair
+ (_i)])
-static int blktap_read_ufe_ring(int idx); /*local prototypes*/
+static int blktap_read_ufe_ring(tap_blkif_t *info); /*local prototypes*/
-#define BLKTAP_MINOR 0 /*/dev/xen/blktap resides at device number
- major=254, minor numbers begin at 0 */
-#define BLKTAP_DEV_MAJOR 254 /* TODO: Make major number dynamic *
- * and create devices in the kernel *
- */
+#define BLKTAP_MINOR 0 /*/dev/xen/blktap has a dynamic major */
#define BLKTAP_DEV_DIR "/dev/xen"
+static int blktap_major;
+
/* blktap IOCTLs: */
#define BLKTAP_IOCTL_KICK_FE 1
#define BLKTAP_IOCTL_KICK_BE 2 /* currently unused */
@@ -264,17 +287,19 @@ static inline int GET_NEXT_REQ(unsigned long *idx_map)
{
int i;
for (i = 0; i < MAX_PENDING_REQS; i++)
- if (idx_map[i] == INVALID_REQ) return i;
+ if (idx_map[i] == INVALID_REQ)
+ return i;
return INVALID_REQ;
}
#define BLKTAP_INVALID_HANDLE(_g) \
- (((_g->kernel) == 0xFFFF) && ((_g->user) == 0xFFFF))
+ (((_g->kernel) == INVALID_GRANT_HANDLE) && \
+ ((_g->user) == INVALID_GRANT_HANDLE))
#define BLKTAP_INVALIDATE_HANDLE(_g) do { \
- (_g)->kernel = 0xFFFF; (_g)->user = 0xFFFF; \
+ (_g)->kernel = INVALID_GRANT_HANDLE; (_g)->user = INVALID_GRANT_HANDLE; \
} while(0)
@@ -303,7 +328,7 @@ struct vm_operations_struct blktap_vm_ops = {
*/
/*Function Declarations*/
-static int get_next_free_dev(void);
+static tap_blkif_t *get_next_free_dev(void);
static int blktap_open(struct inode *inode, struct file *filp);
static int blktap_release(struct inode *inode, struct file *filp);
static int blktap_mmap(struct file *filp, struct vm_area_struct *vma);
@@ -311,8 +336,6 @@ static int blktap_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg);
static unsigned int blktap_poll(struct file *file, poll_table *wait);
-struct miscdevice *set_misc(int minor, char *name, int dev);
-
static struct file_operations blktap_fops = {
.owner = THIS_MODULE,
.poll = blktap_poll,
@@ -323,41 +346,96 @@ static struct file_operations blktap_fops = {
};
-static int get_next_free_dev(void)
+static tap_blkif_t *get_next_free_dev(void)
{
tap_blkif_t *info;
- int i = 0, ret = -1;
- unsigned long flags;
+ int minor;
- spin_lock_irqsave(&pending_free_lock, flags);
-
- while (i < MAX_TAP_DEV) {
- info = tapfds[i];
- if ( (tapfds[i] != NULL) && (info->dev_inuse == 0)
- && (info->dev_pending == 0) ) {
+ /*
+ * This is called only from the ioctl, which
+ * means we should always have interrupts enabled.
+ */
+ BUG_ON(irqs_disabled());
+
+ spin_lock_irq(&pending_free_lock);
+
+ /* tapfds[0] is always NULL */
+
+ for (minor = 1; minor < blktap_next_minor; minor++) {
+ info = tapfds[minor];
+ /* we could have failed a previous attempt. */
+ if (!info ||
+ ((info->dev_inuse == 0) &&
+ (info->dev_pending == 0)) ) {
info->dev_pending = 1;
- ret = i;
- goto done;
+ goto found;
}
- i++;
}
-
-done:
- spin_unlock_irqrestore(&pending_free_lock, flags);
- return ret;
+ info = NULL;
+ minor = -1;
+
+ /*
+ * We didn't find free device. If we can still allocate
+ * more, then we grab the next device minor that is
+ * available. This is done while we are still under
+ * the protection of the pending_free_lock.
+ */
+ if (blktap_next_minor < MAX_TAP_DEV)
+ minor = blktap_next_minor++;
+found:
+ spin_unlock_irq(&pending_free_lock);
+
+ if (!info && minor > 0) {
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (unlikely(!info)) {
+ /*
+ * If we failed here, try to put back
+ * the next minor number. But if one
+ * was just taken, then we just lose this
+ * minor. We can try to allocate this
+ * minor again later.
+ */
+ spin_lock_irq(&pending_free_lock);
+ if (blktap_next_minor == minor+1)
+ blktap_next_minor--;
+ spin_unlock_irq(&pending_free_lock);
+ goto out;
+ }
+
+ info->minor = minor;
+ /*
+ * Make sure that we have a minor before others can
+ * see us.
+ */
+ wmb();
+ tapfds[minor] = info;
+
+ class_device_create(xen_class, NULL,
+ MKDEV(blktap_major, minor), NULL,
+ "blktap%d", minor);
+ devfs_mk_cdev(MKDEV(blktap_major, minor),
+ S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", minor);
+ }
+
+out:
+ return info;
}
int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif)
{
+ tap_blkif_t *info;
int i;
-
- for (i = 0; i < MAX_TAP_DEV; i++)
- if ( (translate_domid[i].domid == domid)
- && (translate_domid[i].busid == xenbus_id) ) {
- tapfds[i]->blkif = blkif;
- tapfds[i]->status = RUNNING;
+
+ for (i = 1; i < blktap_next_minor; i++) {
+ info = tapfds[i];
+ if ( info &&
+ (info->trans.domid == domid) &&
+ (info->trans.busid == xenbus_id) ) {
+ info->blkif = blkif;
+ info->status = RUNNING;
return i;
}
+ }
return -1;
}
@@ -367,13 +445,16 @@ void signal_tapdisk(int idx)
struct task_struct *ptask;
info = tapfds[idx];
- if ( (idx > 0) && (idx < MAX_TAP_DEV) && (info->pid > 0) ) {
+ if ((idx < 0) || (idx > MAX_TAP_DEV) || !info)
+ return;
+
+ if (info->pid > 0) {
ptask = find_task_by_pid(info->pid);
- if (ptask) {
+ if (ptask)
info->status = CLEANSHUTDOWN;
- }
}
info->blkif = NULL;
+
return;
}
@@ -382,18 +463,22 @@ static int blktap_open(struct inode *inode, struct file *filp)
blkif_sring_t *sring;
int idx = iminor(inode) - BLKTAP_MINOR;
tap_blkif_t *info;
- private_info_t *prv;
int i;
- if (tapfds[idx] == NULL) {
+ /* ctrl device, treat differently */
+ if (!idx)
+ return 0;
+
+ info = tapfds[idx];
+
+ if ((idx < 0) || (idx > MAX_TAP_DEV) || !info) {
WPRINTK("Unable to open device /dev/xen/blktap%d\n",
- idx);
- return -ENOMEM;
+ idx);
+ return -ENODEV;
}
+
DPRINTK("Opening device /dev/xen/blktap%d\n",idx);
- info = tapfds[idx];
-
/*Only one process can access device at a time*/
if (test_and_set_bit(0, &info->dev_inuse))
return -EBUSY;
@@ -410,9 +495,7 @@ static int blktap_open(struct inode *inode, struct file *filp)
SHARED_RING_INIT(sring);
FRONT_RING_INIT(&info->ufe_ring, sring, PAGE_SIZE);
- prv = kzalloc(sizeof(private_info_t),GFP_KERNEL);
- prv->idx = idx;
- filp->private_data = prv;
+ filp->private_data = info;
info->vma = NULL;
info->idx_map = kmalloc(sizeof(unsigned long) * MAX_PENDING_REQS,
@@ -433,17 +516,14 @@ static int blktap_open(struct inode *inode, struct file *filp)
static int blktap_release(struct inode *inode, struct file *filp)
{
- int idx = iminor(inode) - BLKTAP_MINOR;
- tap_blkif_t *info;
+ tap_blkif_t *info = filp->private_data;
- if (tapfds[idx] == NULL) {
- WPRINTK("Trying to free device that doesn't exist "
- "[/dev/xen/blktap%d]\n",idx);
- return -1;
- }
- info = tapfds[idx];
+ /* check for control device */
+ if (!info)
+ return 0;
+
info->dev_inuse = 0;
- DPRINTK("Freeing device [/dev/xen/blktap%d]\n",idx);
+ DPRINTK("Freeing device [/dev/xen/blktap%d]\n",info->minor);
/* Free the ring page. */
ClearPageReserved(virt_to_page(info->ufe_ring.sring));
@@ -457,11 +537,11 @@ static int blktap_release(struct inode *inode, struct file *filp)
info->vma = NULL;
}
- if (filp->private_data) kfree(filp->private_data);
-
if ( (info->status != CLEANSHUTDOWN) && (info->blkif != NULL) ) {
- kthread_stop(info->blkif->xenblkd);
- info->blkif->xenblkd = NULL;
+ if (info->blkif->xenblkd != NULL) {
+ kthread_stop(info->blkif->xenblkd);
+ info->blkif->xenblkd = NULL;
+ }
info->status = CLEANSHUTDOWN;
}
return 0;
@@ -491,16 +571,12 @@ static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
int size;
struct page **map;
int i;
- private_info_t *prv;
- tap_blkif_t *info;
+ tap_blkif_t *info = filp->private_data;
- /*Retrieve the dev info*/
- prv = (private_info_t *)filp->private_data;
- if (prv == NULL) {
+ if (info == NULL) {
WPRINTK("blktap: mmap, retrieving idx failed\n");
return -ENOMEM;
}
- info = tapfds[prv->idx];
vma->vm_flags |= VM_RESERVED;
vma->vm_ops = &blktap_vm_ops;
@@ -517,8 +593,6 @@ static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
info->user_vstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT);
/* Map the ring pages to the start of the region and reserve it. */
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
if (remap_pfn_range(vma, vma->vm_start,
__pa(info->ufe_ring.sring) >> PAGE_SHIFT,
PAGE_SIZE, vma->vm_page_prot)) {
@@ -556,20 +630,17 @@ static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
static int blktap_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg)
{
- int idx = iminor(inode) - BLKTAP_MINOR;
+ tap_blkif_t *info = filp->private_data;
+
switch(cmd) {
case BLKTAP_IOCTL_KICK_FE:
{
/* There are fe messages to process. */
- return blktap_read_ufe_ring(idx);
+ return blktap_read_ufe_ring(info);
}
case BLKTAP_IOCTL_SETMODE:
{
- tap_blkif_t *info = tapfds[idx];
-
- if ( (idx > 0) && (idx < MAX_TAP_DEV)
- && (tapfds[idx] != NULL) )
- {
+ if (info) {
if (BLKTAP_MODE_VALID(arg)) {
info->mode = arg;
/* XXX: may need to flush rings here. */
@@ -582,11 +653,7 @@ static int blktap_ioctl(struct inode *inode, struct file *filp,
}
case BLKTAP_IOCTL_PRINT_IDXS:
{
- tap_blkif_t *info = tapfds[idx];
-
- if ( (idx > 0) && (idx < MAX_TAP_DEV)
- && (tapfds[idx] != NULL) )
- {
+ if (info) {
printk("User Rings: \n-----------\n");
printk("UF: rsp_cons: %2d, req_prod_prv: %2d "
"| req_prod: %2d, rsp_prod: %2d\n",
@@ -599,11 +666,7 @@ static int blktap_ioctl(struct inode *inode, struct file *filp,
}
case BLKTAP_IOCTL_SENDPID:
{
- tap_blkif_t *info = tapfds[idx];
-
- if ( (idx > 0) && (idx < MAX_TAP_DEV)
- && (tapfds[idx] != NULL) )
- {
+ if (info) {
info->pid = (pid_t)arg;
DPRINTK("blktap: pid received %d\n",
info->pid);
@@ -614,43 +677,49 @@ static int blktap_ioctl(struct inode *inode, struct file *filp,
{
uint64_t val = (uint64_t)arg;
domid_translate_t *tr = (domid_translate_t *)&val;
- int newdev;
DPRINTK("NEWINTF Req for domid %d and bus id %d\n",
tr->domid, tr->busid);
- newdev = get_next_free_dev();
- if (newdev < 1) {
+ info = get_next_free_dev();
+ if (!info) {
WPRINTK("Error initialising /dev/xen/blktap - "
"No more devices\n");
return -1;
}
- translate_domid[newdev].domid = tr->domid;
- translate_domid[newdev].busid = tr->busid;
- return newdev;
+ info->trans.domid = tr->domid;
+ info->trans.busid = tr->busid;
+ return info->minor;
}
case BLKTAP_IOCTL_FREEINTF:
{
unsigned long dev = arg;
- tap_blkif_t *info = NULL;
+ unsigned long flags;
+
+ info = tapfds[dev];
- if ( (dev > 0) && (dev < MAX_TAP_DEV) ) info = tapfds[dev];
+ if ((dev > MAX_TAP_DEV) || !info)
+ return 0; /* should this be an error? */
- if ( (info != NULL) && (info->dev_pending) )
+ spin_lock_irqsave(&pending_free_lock, flags);
+ if (info->dev_pending)
info->dev_pending = 0;
+ spin_unlock_irqrestore(&pending_free_lock, flags);
+
return 0;
}
case BLKTAP_IOCTL_MINOR:
{
unsigned long dev = arg;
- tap_blkif_t *info = NULL;
-
- if ( (dev > 0) && (dev < MAX_TAP_DEV) ) info = tapfds[dev];
-
- if (info != NULL) return info->minor;
- else return -1;
+
+ info = tapfds[dev];
+
+ if ((dev > MAX_TAP_DEV) || !info)
+ return -EINVAL;
+
+ return info->minor;
}
case BLKTAP_IOCTL_MAJOR:
- return BLKTAP_DEV_MAJOR;
+ return blktap_major;
case BLKTAP_QUERY_ALLOC_REQS:
{
@@ -662,25 +731,16 @@ static int blktap_ioctl(struct inode *inode, struct file *filp,
return -ENOIOCTLCMD;
}
-static unsigned int blktap_poll(struct file *file, poll_table *wait)
+static unsigned int blktap_poll(struct file *filp, poll_table *wait)
{
- private_info_t *prv;
- tap_blkif_t *info;
+ tap_blkif_t *info = filp->private_data;
- /*Retrieve the dev info*/
- prv = (private_info_t *)file->private_data;
- if (prv == NULL) {
- WPRINTK(" poll, retrieving idx failed\n");
+ /* do not work on the control device */
+ if (!info)
return 0;
- }
-
- if (prv->idx == 0) return 0;
-
- info = tapfds[prv->idx];
-
- poll_wait(file, &info->wait, wait);
+
+ poll_wait(filp, &info->wait, wait);
if (info->ufe_ring.req_prod_pvt != info->ufe_ring.sring->req_prod) {
- flush_tlb_all();
RING_PUSH_REQUESTS(&info->ufe_ring);
return POLLIN | POLLRDNORM;
}
@@ -691,11 +751,13 @@ void blktap_kick_user(int idx)
{
tap_blkif_t *info;
- if (idx == 0) return;
-
info = tapfds[idx];
-
- if (info != NULL) wake_up_interruptible(&info->wait);
+
+ if ((idx < 0) || (idx > MAX_TAP_DEV) || !info)
+ return;
+
+ wake_up_interruptible(&info->wait);
+
return;
}
@@ -712,66 +774,21 @@ static void make_response(blkif_t *blkif, unsigned long id,
static int req_increase(void)
{
int i, j;
- struct page *page;
- unsigned long flags;
- int ret;
- spin_lock_irqsave(&pending_free_lock, flags);
-
- ret = -EINVAL;
if (mmap_alloc >= MAX_PENDING_REQS || mmap_lock)
- goto done;
-
-#ifdef __ia64__
- extern unsigned long alloc_empty_foreign_map_page_range(
- unsigned long pages);
- mmap_start[mmap_alloc].start = (unsigned long)
- alloc_empty_foreign_map_page_range(mmap_pages);
-#else /* ! ia64 */
- page = balloon_alloc_empty_page_range(mmap_pages);
- ret = -ENOMEM;
- if (page == NULL) {
- printk("%s balloon_alloc_empty_page_range gave NULL\n", __FUNCTION__);
- goto done;
- }
-
- /* Pin all of the pages. */
- for (i=0; i<mmap_pages; i++)
- get_page(&page[i]);
-
- mmap_start[mmap_alloc].start =
- (unsigned long)pfn_to_kaddr(page_to_pfn(page));
- mmap_start[mmap_alloc].mpage = page;
-
-#endif
-
- pending_reqs[mmap_alloc] = kzalloc(sizeof(pending_req_t) *
- blkif_reqs, GFP_KERNEL);
- pending_addrs[mmap_alloc] = kzalloc(sizeof(unsigned long) *
- mmap_pages, GFP_KERNEL);
-
- ret = -ENOMEM;
- if (!pending_reqs[mmap_alloc] || !pending_addrs[mmap_alloc]) {
- kfree(pending_reqs[mmap_alloc]);
- kfree(pending_addrs[mmap_alloc]);
- WPRINTK("%s: out of memory\n", __FUNCTION__);
- ret = -ENOMEM;
- goto done;
- }
-
- ret = 0;
+ return -EINVAL;
- DPRINTK("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n",
- __FUNCTION__, blkif_reqs, mmap_pages,
- mmap_start[mmap_alloc].start);
+ pending_reqs[mmap_alloc] = kzalloc(sizeof(pending_req_t)
+ * blkif_reqs, GFP_KERNEL);
+ foreign_pages[mmap_alloc] = alloc_empty_pages_and_pagevec(mmap_pages);
- BUG_ON(mmap_start[mmap_alloc].start == 0);
+ if (!pending_reqs[mmap_alloc] || !foreign_pages[mmap_alloc])
+ goto out_of_memory;
- for (i = 0; i < mmap_pages; i++)
- pending_addrs[mmap_alloc][i] =
- mmap_start[mmap_alloc].start + (i << PAGE_SHIFT);
+ DPRINTK("%s: reqs=%d, pages=%d\n",
+ __FUNCTION__, blkif_reqs, mmap_pages);
- for (i = 0; i < MAX_PENDING_REQS ; i++) {
+ for (i = 0; i < MAX_PENDING_REQS; i++) {
list_add_tail(&pending_reqs[mmap_alloc][i].free_list,
&pending_free);
pending_reqs[mmap_alloc][i].mem_idx = mmap_alloc;
@@ -782,67 +799,30 @@ static int req_increase(void)
mmap_alloc++;
DPRINTK("# MMAPs increased to %d\n",mmap_alloc);
- done:
- spin_unlock_irqrestore(&pending_free_lock, flags);
- return ret;
+ return 0;
+
+ out_of_memory:
+ free_empty_pages_and_pagevec(foreign_pages[mmap_alloc], mmap_pages);
+ kfree(pending_reqs[mmap_alloc]);
+ WPRINTK("%s: out of memory\n", __FUNCTION__);
+ return -ENOMEM;
}
static void mmap_req_del(int mmap)
{
- int i;
- struct page *page;
+ BUG_ON(!spin_is_locked(&pending_free_lock));
- /*Spinlock already acquired*/
kfree(pending_reqs[mmap]);
- kfree(pending_addrs[mmap]);
-
-#ifdef __ia64__
- /*Not sure what goes here yet!*/
-#else
-
- /* Unpin all of the pages. */
- page = mmap_start[mmap].mpage;
- for (i=0; i<mmap_pages; i++)
- put_page(&page[i]);
+ pending_reqs[mmap] = NULL;
- balloon_dealloc_empty_page_range(mmap_start[mmap].mpage, mmap_pages);
-#endif
+ free_empty_pages_and_pagevec(foreign_pages[mmap_alloc], mmap_pages);
+ foreign_pages[mmap] = NULL;
mmap_lock = 0;
DPRINTK("# MMAPs decreased to %d\n",mmap_alloc);
mmap_alloc--;
}
-/*N.B. Currently unused - will be accessed via sysfs*/
-static void req_decrease(void)
-{
- pending_req_t *req;
- int i;
- unsigned long flags;
-
- spin_lock_irqsave(&pending_free_lock, flags);
-
- DPRINTK("Req decrease called.\n");
- if (mmap_lock || mmap_alloc == 1)
- goto done;
-
- mmap_lock = 1;
- mmap_inuse = MAX_PENDING_REQS;
-
- /*Go through reqs and remove any that aren't in use*/
- for (i = 0; i < MAX_PENDING_REQS ; i++) {
- req = &pending_reqs[mmap_alloc-1][i];
- if (req->inuse == 0) {
- list_del(&req->free_list);
- mmap_inuse--;
- }
- }
- if (mmap_inuse == 0) mmap_req_del(mmap_alloc-1);
- done:
- spin_unlock_irqrestore(&pending_free_lock, flags);
- return;
-}
-
static pending_req_t* alloc_req(void)
{
pending_req_t *req = NULL;
@@ -888,8 +868,8 @@ static void free_req(pending_req_t *req)
wake_up(&pending_free_wq);
}
-static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx, int
- tapidx)
+static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx,
+ int tapidx)
{
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
unsigned int i, invcount = 0;
@@ -897,49 +877,65 @@ static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx, int
uint64_t ptep;
int ret, mmap_idx;
unsigned long kvaddr, uvaddr;
-
- tap_blkif_t *info = tapfds[tapidx];
+ tap_blkif_t *info;
- if (info == NULL) {
+
+ info = tapfds[tapidx];
+
+ if ((tapidx < 0) || (tapidx > MAX_TAP_DEV) || !info) {
WPRINTK("fast_flush: Couldn't get info!\n");
return;
}
+
+ if (info->vma != NULL &&
+ xen_feature(XENFEAT_auto_translated_physmap)) {
+ down_write(&info->vma->vm_mm->mmap_sem);
+ zap_page_range(info->vma,
+ MMAP_VADDR(info->user_vstart, u_idx, 0),
+ req->nr_pages << PAGE_SHIFT, NULL);
+ up_write(&info->vma->vm_mm->mmap_sem);
+ }
+
mmap_idx = req->mem_idx;
for (i = 0; i < req->nr_pages; i++) {
- kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, k_idx, i);
+ kvaddr = idx_to_kaddr(mmap_idx, k_idx, i);
uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i);
khandle = &pending_handle(mmap_idx, k_idx, i);
- if (BLKTAP_INVALID_HANDLE(khandle)) {
- WPRINTK("BLKTAP_INVALID_HANDLE\n");
- continue;
+
+ if (khandle->kernel != INVALID_GRANT_HANDLE) {
+ gnttab_set_unmap_op(&unmap[invcount],
+ idx_to_kaddr(mmap_idx, k_idx, i),
+ GNTMAP_host_map, khandle->kernel);
+ invcount++;
}
- gnttab_set_unmap_op(&unmap[invcount],
- MMAP_VADDR(mmap_start[mmap_idx].start, k_idx, i),
- GNTMAP_host_map, khandle->kernel);
- invcount++;
-
- if (create_lookup_pte_addr(
- info->vma->vm_mm,
- MMAP_VADDR(info->user_vstart, u_idx, i),
- &ptep) !=0) {
- WPRINTK("Couldn't get a pte addr!\n");
- return;
+
+ if (khandle->user != INVALID_GRANT_HANDLE) {
+ BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
+ if (create_lookup_pte_addr(
+ info->vma->vm_mm,
+ MMAP_VADDR(info->user_vstart, u_idx, i),
+ &ptep) !=0) {
+ WPRINTK("Couldn't get a pte addr!\n");
+ return;
+ }
+
+ gnttab_set_unmap_op(&unmap[invcount], ptep,
+ GNTMAP_host_map
+ | GNTMAP_application_map
+ | GNTMAP_contains_pte,
+ khandle->user);
+ invcount++;
}
- gnttab_set_unmap_op(&unmap[invcount],
- ptep, GNTMAP_host_map,
- khandle->user);
- invcount++;
-
BLKTAP_INVALIDATE_HANDLE(khandle);
}
ret = HYPERVISOR_grant_table_op(
GNTTABOP_unmap_grant_ref, unmap, invcount);
BUG_ON(ret);
- if (info->vma != NULL)
+ if (info->vma != NULL && !xen_feature(XENFEAT_auto_translated_physmap))
zap_page_range(info->vma,
MMAP_VADDR(info->user_vstart, u_idx, 0),
req->nr_pages << PAGE_SHIFT, NULL);
@@ -1002,7 +998,7 @@ int tap_blkif_schedule(void *arg)
* COMPLETION CALLBACK -- Called by user level ioctl()
*/
-static int blktap_read_ufe_ring(int idx)
+static int blktap_read_ufe_ring(tap_blkif_t *info)
{
/* This is called to read responses from the UFE ring. */
RING_IDX i, j, rp;
@@ -1010,12 +1006,9 @@ static int blktap_read_ufe_ring(int idx)
blkif_t *blkif=NULL;
int pending_idx, usr_idx, mmap_idx;
pending_req_t *pending_req;
- tap_blkif_t *info;
- info = tapfds[idx];
- if (info == NULL) {
+ if (!info)
return 0;
- }
/* We currently only forward packets in INTERCEPT_FE mode. */
if (!(info->mode & BLKTAP_MODE_INTERCEPT_FE))
@@ -1026,11 +1019,14 @@ static int blktap_read_ufe_ring(int idx)
rmb();
for (i = info->ufe_ring.rsp_cons; i != rp; i++) {
+ blkif_response_t res;
resp = RING_GET_RESPONSE(&info->ufe_ring, i);
+ memcpy(&res, resp, sizeof(res));
+ mb(); /* rsp_cons read by RING_FULL() in do_block_io_op(). */
++info->ufe_ring.rsp_cons;
/*retrieve [usr_idx] to [mmap_idx,pending_idx] mapping*/
- usr_idx = (int)resp->id;
+ usr_idx = (int)res.id;
pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx]));
mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]);
@@ -1053,9 +1049,8 @@ static int blktap_read_ufe_ring(int idx)
struct page *pg;
int offset;
- uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, j);
- kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start,
- pending_idx, j);
+ uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, j);
+ kvaddr = idx_to_kaddr(mmap_idx, pending_idx, j);
pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
ClearPageReserved(pg);
@@ -1063,10 +1058,10 @@ static int blktap_read_ufe_ring(int idx)
>> PAGE_SHIFT;
map[offset] = NULL;
}
- fast_flush_area(pending_req, pending_idx, usr_idx, idx);
- make_response(blkif, pending_req->id, resp->operation,
- resp->status);
+ fast_flush_area(pending_req, pending_idx, usr_idx, info->minor);
info->idx_map[usr_idx] = INVALID_REQ;
+ make_response(blkif, pending_req->id, res.operation,
+ res.status);
blkif_put(pending_req->blkif);
free_req(pending_req);
}
@@ -1100,7 +1095,7 @@ static int print_dbug = 1;
static int do_block_io_op(blkif_t *blkif)
{
blkif_back_ring_t *blk_ring = &blkif->blk_ring;
- blkif_request_t *req;
+ blkif_request_t req;
pending_req_t *pending_req;
RING_IDX rc, rp;
int more_to_do = 0;
@@ -1111,7 +1106,7 @@ static int do_block_io_op(blkif_t *blkif)
rmb(); /* Ensure we see queued requests up to 'rp'. */
/*Check blkif has corresponding UE ring*/
- if (blkif->dev_num == -1) {
+ if (blkif->dev_num < 0) {
/*oops*/
if (print_dbug) {
WPRINTK("Corresponding UE "
@@ -1122,7 +1117,8 @@ static int do_block_io_op(blkif_t *blkif)
}
info = tapfds[blkif->dev_num];
- if (info == NULL || !info->dev_inuse) {
+
+ if (blkif->dev_num > MAX_TAP_DEV || !info || !info->dev_inuse) {
if (print_dbug) {
WPRINTK("Can't get UE info!\n");
print_dbug = 0;
@@ -1152,24 +1148,24 @@ static int do_block_io_op(blkif_t *blkif)
break;
}
- req = RING_GET_REQUEST(blk_ring, rc);
+ memcpy(&req, RING_GET_REQUEST(blk_ring, rc), sizeof(req));
blk_ring->req_cons = ++rc; /* before make_response() */
- switch (req->operation) {
+ switch (req.operation) {
case BLKIF_OP_READ:
blkif->st_rd_req++;
- dispatch_rw_block_io(blkif, req, pending_req);
+ dispatch_rw_block_io(blkif, &req, pending_req);
break;
case BLKIF_OP_WRITE:
blkif->st_wr_req++;
- dispatch_rw_block_io(blkif, req, pending_req);
+ dispatch_rw_block_io(blkif, &req, pending_req);
break;
default:
WPRINTK("unknown operation [%d]\n",
- req->operation);
- make_response(blkif, req->id, req->operation,
+ req.operation);
+ make_response(blkif, req.id, req.operation,
BLKIF_RSP_ERROR);
free_req(pending_req);
break;
@@ -1190,17 +1186,27 @@ static void dispatch_rw_block_io(blkif_t *blkif,
struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
unsigned int nseg;
int ret, i;
- tap_blkif_t *info = tapfds[blkif->dev_num];
+ tap_blkif_t *info;
uint64_t sector;
-
blkif_request_t *target;
int pending_idx = RTN_PEND_IDX(pending_req,pending_req->mem_idx);
- int usr_idx = GET_NEXT_REQ(info->idx_map);
+ int usr_idx;
uint16_t mmap_idx = pending_req->mem_idx;
- /*Check we have space on user ring - should never fail*/
- if(usr_idx == INVALID_REQ) goto fail_flush;
-
+ if (blkif->dev_num < 0 || blkif->dev_num > MAX_TAP_DEV)
+ goto fail_response;
+
+ info = tapfds[blkif->dev_num];
+ if (info == NULL)
+ goto fail_response;
+
+ /* Check we have space on user ring - should never fail. */
+ usr_idx = GET_NEXT_REQ(info->idx_map);
+ if (usr_idx == INVALID_REQ) {
+ BUG();
+ goto fail_response;
+ }
+
/* Check that number of segments is sane. */
nseg = req->nr_segments;
if ( unlikely(nseg == 0) ||
@@ -1233,15 +1239,12 @@ static void dispatch_rw_block_io(blkif_t *blkif,
unsigned long uvaddr;
unsigned long kvaddr;
uint64_t ptep;
- struct page *page;
uint32_t flags;
uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i);
- kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start,
- pending_idx, i);
- page = virt_to_page(kvaddr);
+ kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i);
- sector = req->sector_number + (8*i);
+ sector = req->sector_number + ((PAGE_SIZE / 512) * i);
if( (blkif->sectors > 0) && (sector >= blkif->sectors) ) {
WPRINTK("BLKTAP: Sector request greater"
"than size\n");
@@ -1251,7 +1254,7 @@ static void dispatch_rw_block_io(blkif_t *blkif,
BLKIF_OP_WRITE ? "WRITE" : "READ"),
(long long unsigned) sector,
(long long unsigned) sector>>9,
- blkif->sectors);
+ (long long unsigned) blkif->sectors);
}
flags = GNTMAP_host_map;
@@ -1261,71 +1264,123 @@ static void dispatch_rw_block_io(blkif_t *blkif,
req->seg[i].gref, blkif->domid);
op++;
- /* Now map it to user. */
- ret = create_lookup_pte_addr(info->vma->vm_mm,
- uvaddr, &ptep);
- if (ret) {
- WPRINTK("Couldn't get a pte addr!\n");
- fast_flush_area(pending_req, pending_idx, usr_idx,
- blkif->dev_num);
- goto fail_flush;
- }
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ /* Now map it to user. */
+ ret = create_lookup_pte_addr(info->vma->vm_mm,
+ uvaddr, &ptep);
+ if (ret) {
+ WPRINTK("Couldn't get a pte addr!\n");
+ goto fail_flush;
+ }
- flags = GNTMAP_host_map | GNTMAP_application_map
- | GNTMAP_contains_pte;
- if (operation == WRITE)
- flags |= GNTMAP_readonly;
- gnttab_set_map_op(&map[op], ptep, flags,
- req->seg[i].gref, blkif->domid);
- op++;
+ flags = GNTMAP_host_map | GNTMAP_application_map
+ | GNTMAP_contains_pte;
+ if (operation == WRITE)
+ flags |= GNTMAP_readonly;
+ gnttab_set_map_op(&map[op], ptep, flags,
+ req->seg[i].gref, blkif->domid);
+ op++;
+ }
}
ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, op);
BUG_ON(ret);
- for (i = 0; i < (nseg*2); i+=2) {
- unsigned long uvaddr;
- unsigned long kvaddr;
- unsigned long offset;
- struct page *pg;
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ for (i = 0; i < (nseg*2); i+=2) {
+ unsigned long uvaddr;
+ unsigned long kvaddr;
+ unsigned long offset;
+ struct page *pg;
- uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i/2);
- kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start,
- pending_idx, i/2);
+ uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i/2);
+ kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i/2);
- if (unlikely(map[i].status != 0)) {
- WPRINTK("invalid kernel buffer -- "
- "could not remap it\n");
- goto fail_flush;
- }
+ if (unlikely(map[i].status != 0)) {
+ WPRINTK("invalid kernel buffer -- "
+ "could not remap it\n");
+ ret |= 1;
+ map[i].handle = INVALID_GRANT_HANDLE;
+ }
- if (unlikely(map[i+1].status != 0)) {
- WPRINTK("invalid user buffer -- "
- "could not remap it\n");
- goto fail_flush;
+ if (unlikely(map[i+1].status != 0)) {
+ WPRINTK("invalid user buffer -- "
+ "could not remap it\n");
+ ret |= 1;
+ map[i+1].handle = INVALID_GRANT_HANDLE;
+ }
+
+ pending_handle(mmap_idx, pending_idx, i/2).kernel
+ = map[i].handle;
+ pending_handle(mmap_idx, pending_idx, i/2).user
+ = map[i+1].handle;
+
+ if (ret)
+ continue;
+
+ set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
+ FOREIGN_FRAME(map[i].dev_bus_addr
+ >> PAGE_SHIFT));
+ offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
+ pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
+ ((struct page **)info->vma->vm_private_data)[offset] =
+ pg;
}
+ } else {
+ for (i = 0; i < nseg; i++) {
+ unsigned long uvaddr;
+ unsigned long kvaddr;
+ unsigned long offset;
+ struct page *pg;
- pending_handle(mmap_idx, pending_idx, i/2).kernel
- = map[i].handle;
- pending_handle(mmap_idx, pending_idx, i/2).user
- = map[i+1].handle;
- set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
- FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
- offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
- pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
- ((struct page **)info->vma->vm_private_data)[offset] =
- pg;
+ uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i);
+ kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i);
+
+ if (unlikely(map[i].status != 0)) {
+ WPRINTK("invalid kernel buffer -- "
+ "could not remap it\n");
+ ret |= 1;
+ map[i].handle = INVALID_GRANT_HANDLE;
+ }
+
+ pending_handle(mmap_idx, pending_idx, i).kernel
+ = map[i].handle;
+
+ if (ret)
+ continue;
+
+ offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
+ pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
+ ((struct page **)info->vma->vm_private_data)[offset] =
+ pg;
+ }
}
+
+ if (ret)
+ goto fail_flush;
+
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ down_write(&info->vma->vm_mm->mmap_sem);
/* Mark mapped pages as reserved: */
for (i = 0; i < req->nr_segments; i++) {
unsigned long kvaddr;
struct page *pg;
- kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start,
- pending_idx, i);
+ kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i);
pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
SetPageReserved(pg);
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ ret = vm_insert_page(info->vma,
+ MMAP_VADDR(info->user_vstart,
+ usr_idx, i), pg);
+ if (ret) {
+ up_write(&info->vma->vm_mm->mmap_sem);
+ goto fail_flush;
+ }
+ }
}
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ up_write(&info->vma->vm_mm->mmap_sem);
/*record [mmap_idx,pending_idx] to [usr_idx] mapping*/
info->idx_map[usr_idx] = MAKE_ID(mmap_idx, pending_idx);
@@ -1336,6 +1391,7 @@ static void dispatch_rw_block_io(blkif_t *blkif,
info->ufe_ring.req_prod_pvt);
memcpy(target, req, sizeof(*req));
target->id = usr_idx;
+ wmb(); /* blktap_poll() reads req_prod_pvt asynchronously */
info->ufe_ring.req_prod_pvt++;
return;
@@ -1393,7 +1449,6 @@ static void make_response(blkif_t *blkif, unsigned long id,
static int __init blkif_init(void)
{
int i,ret,blktap_dir;
- tap_blkif_t *info;
if (!is_running_on_xen())
return -ENODEV;
@@ -1413,10 +1468,8 @@ static int __init blkif_init(void)
tap_blkif_xenbus_init();
- /*Create the blktap devices, but do not map memory or waitqueue*/
- for(i = 0; i < MAX_TAP_DEV; i++) translate_domid[i].domid = 0xFFFF;
-
- ret = register_chrdev(BLKTAP_DEV_MAJOR,"blktap",&blktap_fops);
+ /* Dynamically allocate a major for this device */
+ ret = register_chrdev(0, "blktap", &blktap_fops);
blktap_dir = devfs_mk_dir(NULL, "xen", 0, NULL);
if ( (ret < 0)||(blktap_dir < 0) ) {
@@ -1424,22 +1477,36 @@ static int __init blkif_init(void)
return -ENOMEM;
}
- for(i = 0; i < MAX_TAP_DEV; i++ ) {
- info = tapfds[i] = kzalloc(sizeof(tap_blkif_t),GFP_KERNEL);
- if(tapfds[i] == NULL) return -ENOMEM;
- info->minor = i;
- info->pid = 0;
- info->blkif = NULL;
+ blktap_major = ret;
- ret = devfs_mk_cdev(MKDEV(BLKTAP_DEV_MAJOR, i),
- S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", i);
+ /* tapfds[0] is always NULL */
+ blktap_next_minor++;
- if(ret != 0) return -ENOMEM;
- info->dev_pending = info->dev_inuse = 0;
+ ret = devfs_mk_cdev(MKDEV(blktap_major, i),
+ S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", i);
- DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
+ if(ret != 0)
+ return -ENOMEM;
+
+ DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
+
+ /* Make sure the xen class exists */
+ if (!setup_xen_class()) {
+ /*
+ * This will allow udev to create the blktap ctrl device.
+ * We only want to create blktap0 first. We don't want
+ * to flood the sysfs system with needless blktap devices.
+ * We only create the device when a request of a new device is
+ * made.
+ */
+ class_device_create(xen_class, NULL,
+ MKDEV(blktap_major, 0), NULL,
+ "blktap0");
+ } else {
+ /* this is bad, but not fatal */
+ WPRINTK("blktap: sysfs xen_class not created\n");
}
-
+
DPRINTK("Blktap device successfully created\n");
return 0;
diff --git a/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c
index 6c16a2e60b..553ad45c48 100644
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c
@@ -189,7 +189,7 @@ static int blktap_probe(struct xenbus_device *dev,
return 0;
fail:
- DPRINTK("blktap probe failed");
+ DPRINTK("blktap probe failed\n");
blktap_remove(dev);
return err;
}
@@ -243,7 +243,7 @@ static void tap_frontend_changed(struct xenbus_device *dev,
struct backend_info *be = dev->dev.driver_data;
int err;
- DPRINTK("");
+ DPRINTK("\n");
switch (frontend_state) {
case XenbusStateInitialising:
@@ -273,7 +273,6 @@ static void tap_frontend_changed(struct xenbus_device *dev,
kthread_stop(be->blkif->xenblkd);
be->blkif->xenblkd = NULL;
}
- tap_blkif_unmap(be->blkif);
xenbus_switch_state(dev, XenbusStateClosing);
break;
@@ -319,7 +318,7 @@ static int connect_ring(struct backend_info *be)
unsigned int evtchn;
int err;
- DPRINTK("%s", dev->otherend);
+ DPRINTK("%s\n", dev->otherend);
err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
&ring_ref, "event-channel", "%u", &evtchn, NULL);
diff --git a/linux-2.6-xen-sparse/drivers/xen/char/mem.c b/linux-2.6-xen-sparse/drivers/xen/char/mem.c
index 6576135c99..ac85c8dbb2 100644
--- a/linux-2.6-xen-sparse/drivers/xen/char/mem.c
+++ b/linux-2.6-xen-sparse/drivers/xen/char/mem.c
@@ -28,13 +28,12 @@
#include <asm/io.h>
#include <asm/hypervisor.h>
-static inline int uncached_access(struct file *file)
+#ifndef ARCH_HAS_VALID_PHYS_ADDR_RANGE
+static inline int valid_phys_addr_range(unsigned long addr, size_t *count)
{
- if (file->f_flags & O_SYNC)
- return 1;
- /* Xen sets correct MTRR type on non-RAM for us. */
- return 0;
+ return 1;
}
+#endif
/*
* This funcion reads the *physical* memory. The f_pos points directly to the
@@ -47,6 +46,9 @@ static ssize_t read_mem(struct file * file, char __user * buf,
ssize_t read = 0, sz;
void __iomem *v;
+ if (!valid_phys_addr_range(p, &count))
+ return -EFAULT;
+
while (count > 0) {
/*
* Handle first page in case it's not aligned
@@ -58,13 +60,15 @@ static ssize_t read_mem(struct file * file, char __user * buf,
sz = min_t(unsigned long, sz, count);
- if ((v = ioremap(p, sz)) == NULL) {
+ v = xlate_dev_mem_ptr(p, sz);
+ if (IS_ERR(v) || v == NULL) {
/*
- * Some programs (e.g., dmidecode) groove off into weird RAM
- * areas where no tables can possibly exist (because Xen will
- * have stomped on them!). These programs get rather upset if
- * we let them know that Xen failed their access, so we fake
- * out a read of all zeroes. :-)
+ * Some programs (e.g., dmidecode) groove off into
+ * weird RAM areas where no tables can possibly exist
+ * (because Xen will have stomped on them!). These
+ * programs get rather upset if we let them know that
+ * Xen failed their access, so we fake out a read of
+ * all zeroes.
*/
if (clear_user(buf, count))
return -EFAULT;
@@ -73,7 +77,7 @@ static ssize_t read_mem(struct file * file, char __user * buf,
}
ignored = copy_to_user(buf, v, sz);
- iounmap(v);
+ xlate_dev_mem_ptr_unmap(v);
if (ignored)
return -EFAULT;
buf += sz;
@@ -93,6 +97,9 @@ static ssize_t write_mem(struct file * file, const char __user * buf,
ssize_t written = 0, sz;
void __iomem *v;
+ if (!valid_phys_addr_range(p, &count))
+ return -EFAULT;
+
while (count > 0) {
/*
* Handle first page in case it's not aligned
@@ -104,11 +111,17 @@ static ssize_t write_mem(struct file * file, const char __user * buf,
sz = min_t(unsigned long, sz, count);
- if ((v = ioremap(p, sz)) == NULL)
+ v = xlate_dev_mem_ptr(p, sz);
+ if (v == NULL)
break;
+ if (IS_ERR(v)) {
+ if (written == 0)
+ return PTR_ERR(v);
+ break;
+ }
ignored = copy_from_user(v, buf, sz);
- iounmap(v);
+ xlate_dev_mem_ptr_unmap(v);
if (ignored) {
written += sz - ignored;
if (written)
@@ -125,6 +138,15 @@ static ssize_t write_mem(struct file * file, const char __user * buf,
return written;
}
+#ifndef ARCH_HAS_DEV_MEM_MMAP_MEM
+static inline int uncached_access(struct file *file)
+{
+ if (file->f_flags & O_SYNC)
+ return 1;
+ /* Xen sets correct MTRR type on non-RAM for us. */
+ return 0;
+}
+
static int mmap_mem(struct file * file, struct vm_area_struct * vma)
{
size_t size = vma->vm_end - vma->vm_start;
@@ -136,6 +158,7 @@ static int mmap_mem(struct file * file, struct vm_area_struct * vma)
return direct_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
size, vma->vm_page_prot, DOMID_IO);
}
+#endif
/*
* The memory devices use the full 32/64 bits of the offset, and so we cannot
diff --git a/linux-2.6-xen-sparse/drivers/xen/console/console.c b/linux-2.6-xen-sparse/drivers/xen/console/console.c
index a45d21a69c..ff3e2a411e 100644
--- a/linux-2.6-xen-sparse/drivers/xen/console/console.c
+++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c
@@ -49,6 +49,7 @@
#include <linux/console.h>
#include <linux/bootmem.h>
#include <linux/sysrq.h>
+#include <linux/screen_info.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/uaccess.h>
@@ -266,6 +267,41 @@ void xencons_force_flush(void)
}
+void dom0_init_screen_info(const struct dom0_vga_console_info *info)
+{
+ switch (info->video_type) {
+ case XEN_VGATYPE_TEXT_MODE_3:
+ screen_info.orig_video_mode = 3;
+ screen_info.orig_video_ega_bx = 3;
+ screen_info.orig_video_isVGA = 1;
+ screen_info.orig_video_lines = info->u.text_mode_3.rows;
+ screen_info.orig_video_cols = info->u.text_mode_3.columns;
+ screen_info.orig_x = info->u.text_mode_3.cursor_x;
+ screen_info.orig_y = info->u.text_mode_3.cursor_y;
+ screen_info.orig_video_points =
+ info->u.text_mode_3.font_height;
+ break;
+ case XEN_VGATYPE_VESA_LFB:
+ screen_info.orig_video_isVGA = VIDEO_TYPE_VLFB;
+ screen_info.lfb_width = info->u.vesa_lfb.width;
+ screen_info.lfb_height = info->u.vesa_lfb.height;
+ screen_info.lfb_depth = info->u.vesa_lfb.bits_per_pixel;
+ screen_info.lfb_base = info->u.vesa_lfb.lfb_base;
+ screen_info.lfb_size = info->u.vesa_lfb.lfb_size;
+ screen_info.lfb_linelength = info->u.vesa_lfb.bytes_per_line;
+ screen_info.red_size = info->u.vesa_lfb.red_size;
+ screen_info.red_pos = info->u.vesa_lfb.red_pos;
+ screen_info.green_size = info->u.vesa_lfb.green_size;
+ screen_info.green_pos = info->u.vesa_lfb.green_pos;
+ screen_info.blue_size = info->u.vesa_lfb.blue_size;
+ screen_info.blue_pos = info->u.vesa_lfb.blue_pos;
+ screen_info.rsvd_size = info->u.vesa_lfb.rsvd_size;
+ screen_info.rsvd_pos = info->u.vesa_lfb.rsvd_pos;
+ break;
+ }
+}
+
+
/******************** User-space console driver (/dev/console) ************/
#define DRV(_d) (_d)
diff --git a/linux-2.6-xen-sparse/drivers/xen/core/Makefile b/linux-2.6-xen-sparse/drivers/xen/core/Makefile
index c1b0c1bd51..6154454339 100644
--- a/linux-2.6-xen-sparse/drivers/xen/core/Makefile
+++ b/linux-2.6-xen-sparse/drivers/xen/core/Makefile
@@ -9,5 +9,5 @@ obj-$(CONFIG_SYSFS) += hypervisor_sysfs.o
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o
obj-$(CONFIG_XEN_SKBUFF) += skbuff.o
-obj-$(CONFIG_XEN_REBOOT) += reboot.o
+obj-$(CONFIG_XEN_REBOOT) += reboot.o machine_reboot.o
obj-$(CONFIG_XEN_SMPBOOT) += smpboot.o
diff --git a/linux-2.6-xen-sparse/drivers/xen/core/features.c b/linux-2.6-xen-sparse/drivers/xen/core/features.c
index 4d50caf50b..a76f58c04d 100644
--- a/linux-2.6-xen-sparse/drivers/xen/core/features.c
+++ b/linux-2.6-xen-sparse/drivers/xen/core/features.c
@@ -11,6 +11,10 @@
#include <asm/hypervisor.h>
#include <xen/features.h>
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
/* Not a GPL symbol: used in ubiquitous macros, so too restrictive. */
EXPORT_SYMBOL(xen_features);
diff --git a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
index 3195279a87..c5132c13bb 100644
--- a/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
+++ b/linux-2.6-xen-sparse/drivers/xen/core/gnttab.c
@@ -44,6 +44,10 @@
#include <asm/io.h>
#include <xen/interface/memory.h>
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
/* External tools reserve first few grant table entries. */
#define NR_RESERVED_ENTRIES 8
diff --git a/linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c b/linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c
new file mode 100644
index 0000000000..02ee7f4728
--- /dev/null
+++ b/linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c
@@ -0,0 +1,185 @@
+#define __KERNEL_SYSCALLS__
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/unistd.h>
+#include <linux/module.h>
+#include <linux/reboot.h>
+#include <linux/sysrq.h>
+#include <linux/stringify.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <xen/evtchn.h>
+#include <asm/hypervisor.h>
+#include <xen/interface/dom0_ops.h>
+#include <xen/xenbus.h>
+#include <linux/cpu.h>
+#include <linux/kthread.h>
+#include <xen/gnttab.h>
+#include <xen/xencons.h>
+#include <xen/cpu_hotplug.h>
+
+#if defined(__i386__) || defined(__x86_64__)
+
+/*
+ * Power off function, if any
+ */
+void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
+
+void machine_emergency_restart(void)
+{
+ /* We really want to get pending console data out before we die. */
+ xencons_force_flush();
+ HYPERVISOR_shutdown(SHUTDOWN_reboot);
+}
+
+void machine_restart(char * __unused)
+{
+ machine_emergency_restart();
+}
+
+void machine_halt(void)
+{
+ machine_power_off();
+}
+
+void machine_power_off(void)
+{
+ /* We really want to get pending console data out before we die. */
+ xencons_force_flush();
+ if (pm_power_off)
+ pm_power_off();
+ HYPERVISOR_shutdown(SHUTDOWN_poweroff);
+}
+
+int reboot_thru_bios = 0; /* for dmi_scan.c */
+EXPORT_SYMBOL(machine_restart);
+EXPORT_SYMBOL(machine_halt);
+EXPORT_SYMBOL(machine_power_off);
+
+/* Ensure we run on the idle task page tables so that we will
+ switch page tables before running user space. This is needed
+ on architectures with separate kernel and user page tables
+ because the user page table pointer is not saved/restored. */
+static void switch_idle_mm(void)
+{
+ struct mm_struct *mm = current->active_mm;
+
+ if (mm == &init_mm)
+ return;
+
+ atomic_inc(&init_mm.mm_count);
+ switch_mm(mm, &init_mm, current);
+ current->active_mm = &init_mm;
+ mmdrop(mm);
+}
+
+static void pre_suspend(void)
+{
+ HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
+ clear_fixmap(FIX_SHARED_INFO);
+
+ xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
+ xen_start_info->console.domU.mfn =
+ mfn_to_pfn(xen_start_info->console.domU.mfn);
+}
+
+static void post_suspend(void)
+{
+ int i, j, k, fpp;
+ extern unsigned long max_pfn;
+ extern unsigned long *pfn_to_mfn_frame_list_list;
+ extern unsigned long *pfn_to_mfn_frame_list[];
+
+ set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
+
+ HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
+
+ memset(empty_zero_page, 0, PAGE_SIZE);
+
+ HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
+ virt_to_mfn(pfn_to_mfn_frame_list_list);
+
+ fpp = PAGE_SIZE/sizeof(unsigned long);
+ for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) {
+ if ((j % fpp) == 0) {
+ k++;
+ pfn_to_mfn_frame_list_list[k] =
+ virt_to_mfn(pfn_to_mfn_frame_list[k]);
+ j = 0;
+ }
+ pfn_to_mfn_frame_list[k][j] =
+ virt_to_mfn(&phys_to_machine_mapping[i]);
+ }
+ HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
+}
+
+#else /* !(defined(__i386__) || defined(__x86_64__)) */
+
+#define switch_idle_mm() ((void)0)
+#define mm_pin_all() ((void)0)
+#define pre_suspend() ((void)0)
+#define post_suspend() ((void)0)
+
+#endif
+
+int __xen_suspend(void)
+{
+ int err;
+
+ extern void time_resume(void);
+
+ BUG_ON(smp_processor_id() != 0);
+ BUG_ON(in_interrupt());
+
+#if defined(__i386__) || defined(__x86_64__)
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ printk(KERN_WARNING "Cannot suspend in "
+ "auto_translated_physmap mode.\n");
+ return -EOPNOTSUPP;
+ }
+#endif
+
+ err = smp_suspend();
+ if (err)
+ return err;
+
+ xenbus_suspend();
+
+ preempt_disable();
+
+ mm_pin_all();
+ local_irq_disable();
+ preempt_enable();
+
+ gnttab_suspend();
+
+ pre_suspend();
+
+ /*
+ * We'll stop somewhere inside this hypercall. When it returns,
+ * we'll start resuming after the restore.
+ */
+ HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
+
+ post_suspend();
+
+ gnttab_resume();
+
+ irq_resume();
+
+ time_resume();
+
+ switch_idle_mm();
+
+ local_irq_enable();
+
+ xencons_resume();
+
+ xenbus_resume();
+
+ smp_resume();
+
+ return err;
+}
diff --git a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c
index 34c3930961..af3fe3a15c 100644
--- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c
+++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c
@@ -1,25 +1,15 @@
#define __KERNEL_SYSCALLS__
#include <linux/version.h>
#include <linux/kernel.h>
-#include <linux/mm.h>
#include <linux/unistd.h>
#include <linux/module.h>
#include <linux/reboot.h>
#include <linux/sysrq.h>
-#include <linux/stringify.h>
-#include <asm/irq.h>
-#include <asm/mmu_context.h>
-#include <xen/evtchn.h>
#include <asm/hypervisor.h>
-#include <xen/interface/dom0_ops.h>
#include <xen/xenbus.h>
-#include <linux/cpu.h>
#include <linux/kthread.h>
-#include <xen/gnttab.h>
-#include <xen/xencons.h>
-#include <xen/cpu_hotplug.h>
-extern void ctrl_alt_del(void);
+MODULE_LICENSE("Dual BSD/GPL");
#define SHUTDOWN_INVALID -1
#define SHUTDOWN_POWEROFF 0
@@ -31,186 +21,18 @@ extern void ctrl_alt_del(void);
*/
#define SHUTDOWN_HALT 4
-#if defined(__i386__) || defined(__x86_64__)
-
-/*
- * Power off function, if any
- */
-void (*pm_power_off)(void);
-EXPORT_SYMBOL(pm_power_off);
-
-void machine_emergency_restart(void)
-{
- /* We really want to get pending console data out before we die. */
- xencons_force_flush();
- HYPERVISOR_shutdown(SHUTDOWN_reboot);
-}
-
-void machine_restart(char * __unused)
-{
- machine_emergency_restart();
-}
-
-void machine_halt(void)
-{
- machine_power_off();
-}
-
-void machine_power_off(void)
-{
- /* We really want to get pending console data out before we die. */
- xencons_force_flush();
- if (pm_power_off)
- pm_power_off();
- HYPERVISOR_shutdown(SHUTDOWN_poweroff);
-}
-
-int reboot_thru_bios = 0; /* for dmi_scan.c */
-EXPORT_SYMBOL(machine_restart);
-EXPORT_SYMBOL(machine_halt);
-EXPORT_SYMBOL(machine_power_off);
-
-#endif /* defined(__i386__) || defined(__x86_64__) */
-
-/******************************************************************************
- * Stop/pickle callback handling.
- */
-
/* Ignore multiple shutdown requests. */
static int shutting_down = SHUTDOWN_INVALID;
+
static void __shutdown_handler(void *unused);
static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
-#if defined(__i386__) || defined(__x86_64__)
-
-/* Ensure we run on the idle task page tables so that we will
- switch page tables before running user space. This is needed
- on architectures with separate kernel and user page tables
- because the user page table pointer is not saved/restored. */
-static void switch_idle_mm(void)
-{
- struct mm_struct *mm = current->active_mm;
-
- if (mm == &init_mm)
- return;
-
- atomic_inc(&init_mm.mm_count);
- switch_mm(mm, &init_mm, current);
- current->active_mm = &init_mm;
- mmdrop(mm);
-}
-
-static void pre_suspend(void)
-{
- HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
- clear_fixmap(FIX_SHARED_INFO);
-
- xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
- xen_start_info->console.domU.mfn =
- mfn_to_pfn(xen_start_info->console.domU.mfn);
-}
-
-static void post_suspend(void)
-{
- int i, j, k, fpp;
- extern unsigned long max_pfn;
- extern unsigned long *pfn_to_mfn_frame_list_list;
- extern unsigned long *pfn_to_mfn_frame_list[];
-
- set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
-
- HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
-
- memset(empty_zero_page, 0, PAGE_SIZE);
-
- HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
- virt_to_mfn(pfn_to_mfn_frame_list_list);
-
- fpp = PAGE_SIZE/sizeof(unsigned long);
- for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) {
- if ((j % fpp) == 0) {
- k++;
- pfn_to_mfn_frame_list_list[k] =
- virt_to_mfn(pfn_to_mfn_frame_list[k]);
- j = 0;
- }
- pfn_to_mfn_frame_list[k][j] =
- virt_to_mfn(&phys_to_machine_mapping[i]);
- }
- HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
-}
-
-#else /* !(defined(__i386__) || defined(__x86_64__)) */
-
-#define switch_idle_mm() ((void)0)
-#define mm_pin_all() ((void)0)
-#define pre_suspend() ((void)0)
-#define post_suspend() ((void)0)
-
-#endif
-
-static int __do_suspend(void *ignore)
-{
- int err;
-
- extern void time_resume(void);
-
- BUG_ON(smp_processor_id() != 0);
- BUG_ON(in_interrupt());
-
-#if defined(__i386__) || defined(__x86_64__)
- if (xen_feature(XENFEAT_auto_translated_physmap)) {
- printk(KERN_WARNING "Cannot suspend in "
- "auto_translated_physmap mode.\n");
- return -EOPNOTSUPP;
- }
+#ifdef CONFIG_XEN
+int __xen_suspend(void);
+#else
+#define __xen_suspend() (void)0
#endif
- err = smp_suspend();
- if (err)
- return err;
-
- xenbus_suspend();
-
- preempt_disable();
-
- mm_pin_all();
- local_irq_disable();
- preempt_enable();
-
- gnttab_suspend();
-
- pre_suspend();
-
- /*
- * We'll stop somewhere inside this hypercall. When it returns,
- * we'll start resuming after the restore.
- */
- HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
-
- shutting_down = SHUTDOWN_INVALID;
-
- post_suspend();
-
- gnttab_resume();
-
- irq_resume();
-
- time_resume();
-
- switch_idle_mm();
-
- local_irq_enable();
-
- xencons_resume();
-
- xenbus_resume();
-
- smp_resume();
-
- return err;
-}
-
static int shutdown_process(void *__unused)
{
static char *envp[] = { "HOME=/", "TERM=linux",
@@ -222,11 +44,13 @@ static int shutdown_process(void *__unused)
if ((shutting_down == SHUTDOWN_POWEROFF) ||
(shutting_down == SHUTDOWN_HALT)) {
- if (execve("/sbin/poweroff", poweroff_argv, envp) < 0) {
+ if (call_usermodehelper("/sbin/poweroff", poweroff_argv, envp, 0) < 0) {
+#ifdef CONFIG_XEN
sys_reboot(LINUX_REBOOT_MAGIC1,
LINUX_REBOOT_MAGIC2,
LINUX_REBOOT_CMD_POWER_OFF,
NULL);
+#endif /* CONFIG_XEN */
}
}
@@ -235,6 +59,13 @@ static int shutdown_process(void *__unused)
return 0;
}
+static int xen_suspend(void *__unused)
+{
+ __xen_suspend();
+ shutting_down = SHUTDOWN_INVALID;
+ return 0;
+}
+
static int kthread_create_on_cpu(int (*f)(void *arg),
void *arg,
const char *name,
@@ -257,7 +88,7 @@ static void __shutdown_handler(void *unused)
err = kernel_thread(shutdown_process, NULL,
CLONE_FS | CLONE_FILES);
else
- err = kthread_create_on_cpu(__do_suspend, NULL, "suspend", 0);
+ err = kthread_create_on_cpu(xen_suspend, NULL, "suspend", 0);
if (err < 0) {
printk(KERN_WARNING "Error creating shutdown process (%d): "
@@ -298,7 +129,7 @@ static void shutdown_handler(struct xenbus_watch *watch,
if (strcmp(str, "poweroff") == 0)
shutting_down = SHUTDOWN_POWEROFF;
else if (strcmp(str, "reboot") == 0)
- ctrl_alt_del();
+ kill_proc(1, SIGINT, 1); /* interrupt init */
else if (strcmp(str, "suspend") == 0)
shutting_down = SHUTDOWN_SUSPEND;
else if (strcmp(str, "halt") == 0)
@@ -364,10 +195,14 @@ static int setup_shutdown_watcher(struct notifier_block *notifier,
err = register_xenbus_watch(&shutdown_watch);
if (err)
printk(KERN_ERR "Failed to set shutdown watcher\n");
+ else
+ xenbus_write(XBT_NIL, "control", "feature-reboot", "1");
err = register_xenbus_watch(&sysrq_watch);
if (err)
printk(KERN_ERR "Failed to set sysrq watcher\n");
+ else
+ xenbus_write(XBT_NIL, "control", "feature-sysrq", "1");
return NOTIFY_DONE;
}
@@ -378,6 +213,7 @@ static int __init setup_shutdown_event(void)
.notifier_call = setup_shutdown_watcher
};
register_xenstore_notifier(&xenstore_notifier);
+
return 0;
}
diff --git a/linux-2.6-xen-sparse/drivers/xen/core/skbuff.c b/linux-2.6-xen-sparse/drivers/xen/core/skbuff.c
index a4a2e4edce..2fa88069c4 100644
--- a/linux-2.6-xen-sparse/drivers/xen/core/skbuff.c
+++ b/linux-2.6-xen-sparse/drivers/xen/core/skbuff.c
@@ -18,7 +18,12 @@
/*static*/ kmem_cache_t *skbuff_cachep;
EXPORT_SYMBOL(skbuff_cachep);
-#define MAX_SKBUFF_ORDER 4
+/* Allow up to 64kB or page-sized packets (whichever is greater). */
+#if PAGE_SHIFT < 16
+#define MAX_SKBUFF_ORDER (16 - PAGE_SHIFT)
+#else
+#define MAX_SKBUFF_ORDER 0
+#endif
static kmem_cache_t *skbuff_order_cachep[MAX_SKBUFF_ORDER + 1];
static struct {
diff --git a/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c b/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c
index 76bfab82e6..32f8de5bff 100644
--- a/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c
+++ b/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c
@@ -419,10 +419,9 @@ static struct file_operations evtchn_fops = {
};
static struct miscdevice evtchn_miscdev = {
- .minor = EVTCHN_MINOR,
+ .minor = MISC_DYNAMIC_MINOR,
.name = "evtchn",
.fops = &evtchn_fops,
- .devfs_name = "misc/evtchn",
};
static int __init evtchn_init(void)
diff --git a/linux-2.6-xen-sparse/drivers/xen/netback/common.h b/linux-2.6-xen-sparse/drivers/xen/netback/common.h
index 434ff6bcf2..367c008d3b 100644
--- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h
@@ -92,6 +92,9 @@ typedef struct netif_st {
unsigned long remaining_credit;
struct timer_list credit_timeout;
+ /* Enforce draining of the transmit queue. */
+ struct timer_list tx_queue_timeout;
+
/* Miscellaneous private stuff. */
struct list_head list; /* scheduling list */
atomic_t refcnt;
@@ -106,7 +109,7 @@ typedef struct netif_st {
void netif_disconnect(netif_t *netif);
-netif_t *netif_alloc(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN]);
+netif_t *netif_alloc(domid_t domid, unsigned int handle);
int netif_map(netif_t *netif, unsigned long tx_ring_ref,
unsigned long rx_ring_ref, unsigned int evtchn);
@@ -119,6 +122,8 @@ int netif_map(netif_t *netif, unsigned long tx_ring_ref,
void netif_xenbus_init(void);
+#define netif_schedulable(dev) (netif_running(dev) && netif_carrier_ok(dev))
+
void netif_schedule_work(netif_t *netif);
void netif_deschedule_work(netif_t *netif);
diff --git a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c
index d60b23b0f2..9fae954bd2 100644
--- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c
@@ -34,6 +34,23 @@
#include <linux/ethtool.h>
#include <linux/rtnetlink.h>
+/*
+ * Module parameter 'queue_length':
+ *
+ * Enables queuing in the network stack when a client has run out of receive
+ * descriptors. Although this feature can improve receive bandwidth by avoiding
+ * packet loss, it can also result in packets sitting in the 'tx_queue' for
+ * unbounded time. This is bad if those packets hold onto foreign resources.
+ * For example, consider a packet that holds onto resources belonging to the
+ * guest for which it is queued (e.g., packet received on vif1.0, destined for
+ * vif1.1 which is not activated in the guest): in this situation the guest
+ * will never be destroyed, unless vif1.1 is taken down. To avoid this, we
+ * run a timer (tx_queue_timeout) to drain the queue when the interface is
+ * blocked.
+ */
+static unsigned long netbk_queue_length = 32;
+module_param_named(queue_length, netbk_queue_length, ulong, 0);
+
static void __netif_up(netif_t *netif)
{
enable_irq(netif->irq);
@@ -107,9 +124,9 @@ static struct ethtool_ops network_ethtool_ops =
.get_link = ethtool_op_get_link,
};
-netif_t *netif_alloc(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN])
+netif_t *netif_alloc(domid_t domid, unsigned int handle)
{
- int err = 0, i;
+ int err = 0;
struct net_device *dev;
netif_t *netif;
char name[IFNAMSIZ] = {};
@@ -134,6 +151,10 @@ netif_t *netif_alloc(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN])
netif->credit_bytes = netif->remaining_credit = ~0UL;
netif->credit_usec = 0UL;
init_timer(&netif->credit_timeout);
+ /* Initialize 'expires' now: it's used to track the credit window. */
+ netif->credit_timeout.expires = jiffies;
+
+ init_timer(&netif->tx_queue_timeout);
dev->hard_start_xmit = netif_be_start_xmit;
dev->get_stats = netif_be_get_stats;
@@ -144,26 +165,16 @@ netif_t *netif_alloc(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN])
SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
+ dev->tx_queue_len = netbk_queue_length;
+
/*
- * Reduce default TX queuelen so that each guest interface only
- * allows it to eat around 6.4MB of host memory.
- */
- dev->tx_queue_len = 100;
-
- for (i = 0; i < ETH_ALEN; i++)
- if (be_mac[i] != 0)
- break;
- if (i == ETH_ALEN) {
- /*
- * Initialise a dummy MAC address. We choose the numerically
- * largest non-broadcast address to prevent the address getting
- * stolen by an Ethernet bridge for STP purposes.
- * (FE:FF:FF:FF:FF:FF)
- */
- memset(dev->dev_addr, 0xFF, ETH_ALEN);
- dev->dev_addr[0] &= ~0x01;
- } else
- memcpy(dev->dev_addr, be_mac, ETH_ALEN);
+ * Initialise a dummy MAC address. We choose the numerically
+ * largest non-broadcast address to prevent the address getting
+ * stolen by an Ethernet bridge for STP purposes.
+ * (FE:FF:FF:FF:FF:FF)
+ */
+ memset(dev->dev_addr, 0xFF, ETH_ALEN);
+ dev->dev_addr[0] &= ~0x01;
rtnl_lock();
err = register_netdevice(dev);
@@ -306,11 +317,23 @@ err_rx:
return err;
}
-static void netif_free(netif_t *netif)
+void netif_disconnect(netif_t *netif)
{
+ if (netif_carrier_ok(netif->dev)) {
+ rtnl_lock();
+ netif_carrier_off(netif->dev);
+ if (netif_running(netif->dev))
+ __netif_down(netif);
+ rtnl_unlock();
+ netif_put(netif);
+ }
+
atomic_dec(&netif->refcnt);
wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
+ del_timer_sync(&netif->credit_timeout);
+ del_timer_sync(&netif->tx_queue_timeout);
+
if (netif->irq)
unbind_from_irqhandler(netif->irq, netif);
@@ -324,16 +347,3 @@ static void netif_free(netif_t *netif)
free_netdev(netif->dev);
}
-
-void netif_disconnect(netif_t *netif)
-{
- if (netif_carrier_ok(netif->dev)) {
- rtnl_lock();
- netif_carrier_off(netif->dev);
- if (netif_running(netif->dev))
- __netif_down(netif);
- rtnl_unlock();
- netif_put(netif);
- }
- netif_free(netif);
-}
diff --git a/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c b/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c
index 391ace8a02..d021c9689a 100644
--- a/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/loopback.c
@@ -53,8 +53,10 @@
#include <linux/skbuff.h>
#include <linux/ethtool.h>
#include <net/dst.h>
+#include <net/xfrm.h> /* secpath_reset() */
+#include <asm/hypervisor.h> /* is_initial_xendomain() */
-static int nloopbacks = 8;
+static int nloopbacks = -1;
module_param(nloopbacks, int, 0);
MODULE_PARM_DESC(nloopbacks, "Number of netback-loopback devices to create");
@@ -77,10 +79,60 @@ static int loopback_close(struct net_device *dev)
return 0;
}
+#ifdef CONFIG_X86
+static int is_foreign(unsigned long pfn)
+{
+ /* NB. Play it safe for auto-translation mode. */
+ return (xen_feature(XENFEAT_auto_translated_physmap) ||
+ (phys_to_machine_mapping[pfn] & FOREIGN_FRAME_BIT));
+}
+#else
+/* How to detect a foreign mapping? Play it safe. */
+#define is_foreign(pfn) (1)
+#endif
+
+static int skb_remove_foreign_references(struct sk_buff *skb)
+{
+ struct page *page;
+ unsigned long pfn;
+ int i, off;
+ char *vaddr;
+
+ BUG_ON(skb_shinfo(skb)->frag_list);
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ pfn = page_to_pfn(skb_shinfo(skb)->frags[i].page);
+ if (!is_foreign(pfn))
+ continue;
+
+ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(!page))
+ return 0;
+
+ vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
+ off = skb_shinfo(skb)->frags[i].page_offset;
+ memcpy(page_address(page) + off,
+ vaddr + off,
+ skb_shinfo(skb)->frags[i].size);
+ kunmap_skb_frag(vaddr);
+
+ put_page(skb_shinfo(skb)->frags[i].page);
+ skb_shinfo(skb)->frags[i].page = page;
+ }
+
+ return 1;
+}
+
static int loopback_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct net_private *np = netdev_priv(dev);
+ if (!skb_remove_foreign_references(skb)) {
+ np->stats.tx_dropped++;
+ dev_kfree_skb(skb);
+ return 0;
+ }
+
dst_release(skb->dst);
skb->dst = NULL;
@@ -110,6 +162,11 @@ static int loopback_start_xmit(struct sk_buff *skb, struct net_device *dev)
skb->protocol = eth_type_trans(skb, dev);
skb->dev = dev;
dev->last_rx = jiffies;
+
+ /* Flush netfilter context: rx'ed skbuffs not expected to have any. */
+ nf_reset(skb);
+ secpath_reset(skb);
+
netif_rx(skb);
return 0;
@@ -239,6 +296,9 @@ static int __init loopback_init(void)
{
int i, err = 0;
+ if (nloopbacks == -1)
+ nloopbacks = is_initial_xendomain() ? 4 : 0;
+
for (i = 0; i < nloopbacks; i++)
if ((err = make_loopback(i)) != 0)
break;
diff --git a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c
index ad8236c82f..1d24fc9b88 100644
--- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c
@@ -70,14 +70,15 @@ static struct timer_list net_timer;
static struct sk_buff_head rx_queue;
-static unsigned long mmap_vstart;
-#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
-
-static void *rx_mmap_area;
+static struct page **mmap_pages;
+static inline unsigned long idx_to_kaddr(unsigned int idx)
+{
+ return (unsigned long)pfn_to_kaddr(page_to_pfn(mmap_pages[idx]));
+}
#define PKT_PROT_LEN 64
-static struct {
+static struct pending_tx_info {
netif_tx_request_t req;
netif_t *netif;
} pending_tx_info[MAX_PENDING_REQS];
@@ -186,7 +187,7 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
if (unlikely(!nskb))
goto err;
- skb_reserve(nskb, 16);
+ skb_reserve(nskb, 16 + NET_IP_ALIGN);
headlen = nskb->end - nskb->data;
if (headlen > skb_headlen(skb))
headlen = skb_headlen(skb);
@@ -217,7 +218,7 @@ static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
- page = alloc_page(GFP_ATOMIC | zero);
+ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
if (unlikely(!page))
goto err_free;
@@ -263,6 +264,13 @@ static inline int netbk_queue_full(netif_t *netif)
((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
}
+static void tx_queue_callback(unsigned long data)
+{
+ netif_t *netif = (netif_t *)data;
+ if (netif_schedulable(netif->dev))
+ netif_wake_queue(netif->dev);
+}
+
int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
netif_t *netif = netdev_priv(dev);
@@ -270,20 +278,13 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
BUG_ON(skb->dev != dev);
/* Drop the packet if the target domain has no receive buffers. */
- if (unlikely(!netif_running(dev) || !netif_carrier_ok(dev)))
+ if (unlikely(!netif_schedulable(dev) || netbk_queue_full(netif)))
goto drop;
- if (unlikely(netbk_queue_full(netif))) {
- /* Not a BUG_ON() -- misbehaving netfront can trigger this. */
- if (netbk_can_queue(dev))
- DPRINTK("Queue full but not stopped!\n");
- goto drop;
- }
-
- /* Copy the packet here if it's destined for a flipping
- interface but isn't flippable (e.g. extra references to
- data)
- */
+ /*
+ * Copy the packet here if it's destined for a flipping interface
+ * but isn't flippable (e.g. extra references to data).
+ */
if (!netif->copying_receiver && !is_flippable_skb(skb)) {
struct sk_buff *nskb = netbk_copy_skb(skb);
if ( unlikely(nskb == NULL) )
@@ -304,8 +305,19 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
netif->rx.sring->req_event = netif->rx_req_cons_peek +
netbk_max_required_rx_slots(netif);
mb(); /* request notification /then/ check & stop the queue */
- if (netbk_queue_full(netif))
+ if (netbk_queue_full(netif)) {
netif_stop_queue(dev);
+ /*
+ * Schedule 500ms timeout to restart the queue, thus
+ * ensuring that an inactive queue will be drained.
+ * Packets will be immediately be dropped until more
+ * receive buffers become available (see
+ * netbk_queue_full() check above).
+ */
+ netif->tx_queue_timeout.data = (unsigned long)netif;
+ netif->tx_queue_timeout.function = tx_queue_callback;
+ __mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
+ }
}
skb_queue_tail(&rx_queue, skb);
@@ -373,14 +385,22 @@ static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
flipped. */
meta->copy = 1;
copy_gop = npo->copy + npo->copy_prod++;
- copy_gop->source.domid = DOMID_SELF;
+ copy_gop->flags = GNTCOPY_dest_gref;
+ if (PageForeign(page)) {
+ struct pending_tx_info *src_pend =
+ &pending_tx_info[page->index];
+ copy_gop->source.domid = src_pend->netif->domid;
+ copy_gop->source.u.ref = src_pend->req.gref;
+ copy_gop->flags |= GNTCOPY_source_gref;
+ } else {
+ copy_gop->source.domid = DOMID_SELF;
+ copy_gop->source.u.gmfn = old_mfn;
+ }
copy_gop->source.offset = offset;
- copy_gop->source.u.gmfn = old_mfn;
copy_gop->dest.domid = netif->domid;
copy_gop->dest.offset = 0;
copy_gop->dest.u.ref = req->gref;
copy_gop->len = size;
- copy_gop->flags = GNTCOPY_dest_gref;
} else {
meta->copy = 0;
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
@@ -474,7 +494,7 @@ static int netbk_check_gop(int nr_frags, domid_t domid,
copy_op = npo->copy + npo->copy_cons++;
if (copy_op->status != GNTST_okay) {
DPRINTK("Bad status %d from copy to DOM%d.\n",
- gop->status, domid);
+ copy_op->status, domid);
status = NETIF_RSP_ERROR;
}
} else {
@@ -697,6 +717,7 @@ static void net_rx_action(unsigned long unused)
}
if (netif_queue_stopped(netif->dev) &&
+ netif_schedulable(netif->dev) &&
!netbk_queue_full(netif))
netif_wake_queue(netif->dev);
@@ -754,8 +775,7 @@ static void add_to_net_schedule_list_tail(netif_t *netif)
spin_lock_irq(&net_schedule_list_lock);
if (!__on_net_schedule_list(netif) &&
- likely(netif_running(netif->dev) &&
- netif_carrier_ok(netif->dev))) {
+ likely(netif_schedulable(netif->dev))) {
list_add_tail(&netif->list, &net_schedule_list);
netif_get(netif);
}
@@ -792,10 +812,30 @@ void netif_deschedule_work(netif_t *netif)
}
+static void tx_add_credit(netif_t *netif)
+{
+ unsigned long max_burst, max_credit;
+
+ /*
+ * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
+ * Otherwise the interface can seize up due to insufficient credit.
+ */
+ max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
+ max_burst = min(max_burst, 131072UL);
+ max_burst = max(max_burst, netif->credit_bytes);
+
+ /* Take care that adding a new chunk of credit doesn't wrap to zero. */
+ max_credit = netif->remaining_credit + netif->credit_bytes;
+ if (max_credit < netif->remaining_credit)
+ max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
+
+ netif->remaining_credit = min(max_credit, max_burst);
+}
+
static void tx_credit_callback(unsigned long data)
{
netif_t *netif = (netif_t *)data;
- netif->remaining_credit = netif->credit_bytes;
+ tx_add_credit(netif);
netif_schedule_work(netif);
}
@@ -819,7 +859,7 @@ inline static void net_tx_action_dealloc(void)
gop = tx_unmap_ops;
while (dc != dp) {
pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
- gnttab_set_unmap_op(gop, MMAP_VADDR(pending_idx),
+ gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
GNTMAP_host_map,
grant_tx_handle[pending_idx]);
gop++;
@@ -857,20 +897,28 @@ static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
netif_put(netif);
}
-static int netbk_count_requests(netif_t *netif, netif_tx_request_t *txp,
- int work_to_do)
+static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
+ netif_tx_request_t *txp, int work_to_do)
{
- netif_tx_request_t *first = txp;
RING_IDX cons = netif->tx.req_cons;
int frags = 0;
- while (txp->flags & NETTXF_more_data) {
+ if (!(first->flags & NETTXF_more_data))
+ return 0;
+
+ do {
if (frags >= work_to_do) {
DPRINTK("Need more frags\n");
return -frags;
}
- txp = RING_GET_REQUEST(&netif->tx, cons + frags);
+ if (unlikely(frags >= MAX_SKB_FRAGS)) {
+ DPRINTK("Too many frags\n");
+ return -frags;
+ }
+
+ memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
+ sizeof(*txp));
if (txp->size > first->size) {
DPRINTK("Frags galore\n");
return -frags;
@@ -884,30 +932,28 @@ static int netbk_count_requests(netif_t *netif, netif_tx_request_t *txp,
txp->offset, txp->size);
return -frags;
}
- }
+ } while ((txp++)->flags & NETTXF_more_data);
return frags;
}
static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
struct sk_buff *skb,
+ netif_tx_request_t *txp,
gnttab_map_grant_ref_t *mop)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
skb_frag_t *frags = shinfo->frags;
- netif_tx_request_t *txp;
unsigned long pending_idx = *((u16 *)skb->data);
- RING_IDX cons = netif->tx.req_cons;
int i, start;
/* Skip first skb fragment if it is on same page as header fragment. */
start = ((unsigned long)shinfo->frags[0].page == pending_idx);
- for (i = start; i < shinfo->nr_frags; i++) {
- txp = RING_GET_REQUEST(&netif->tx, cons++);
+ for (i = start; i < shinfo->nr_frags; i++, txp++) {
pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
- gnttab_set_map_op(mop++, MMAP_VADDR(pending_idx),
+ gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
GNTMAP_host_map | GNTMAP_readonly,
txp->gref, netif->domid);
@@ -940,7 +986,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
netif_put(netif);
} else {
set_phys_to_machine(
- __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT,
+ __pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
grant_tx_handle[pending_idx] = mop->handle;
}
@@ -957,7 +1003,7 @@ static int netbk_tx_check_mop(struct sk_buff *skb,
newerr = (++mop)->status;
if (likely(!newerr)) {
set_phys_to_machine(
- __pa(MMAP_VADDR(pending_idx))>>PAGE_SHIFT,
+ __pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT,
FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
grant_tx_handle[pending_idx] = mop->handle;
/* Had a previous error? Invalidate this fragment. */
@@ -1005,7 +1051,7 @@ static void netbk_fill_frags(struct sk_buff *skb)
pending_idx = (unsigned long)frag->page;
txp = &pending_tx_info[pending_idx].req;
- frag->page = virt_to_page(MMAP_VADDR(pending_idx));
+ frag->page = virt_to_page(idx_to_kaddr(pending_idx));
frag->size = txp->size;
frag->page_offset = txp->offset;
@@ -1018,7 +1064,7 @@ static void netbk_fill_frags(struct sk_buff *skb)
int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
int work_to_do)
{
- struct netif_extra_info *extra;
+ struct netif_extra_info extra;
RING_IDX cons = netif->tx.req_cons;
do {
@@ -1027,18 +1073,18 @@ int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
return -EBADR;
}
- extra = (struct netif_extra_info *)
- RING_GET_REQUEST(&netif->tx, cons);
- if (unlikely(!extra->type ||
- extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+ memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
+ sizeof(extra));
+ if (unlikely(!extra.type ||
+ extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
netif->tx.req_cons = ++cons;
- DPRINTK("Invalid extra type: %d\n", extra->type);
+ DPRINTK("Invalid extra type: %d\n", extra.type);
return -EINVAL;
}
- memcpy(&extras[extra->type - 1], extra, sizeof(*extra));
+ memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
netif->tx.req_cons = ++cons;
- } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
+ } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
return work_to_do;
}
@@ -1073,6 +1119,7 @@ static void net_tx_action(unsigned long unused)
struct sk_buff *skb;
netif_t *netif;
netif_tx_request_t txreq;
+ netif_tx_request_t txfrags[MAX_SKB_FRAGS];
struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
u16 pending_idx;
RING_IDX i;
@@ -1101,6 +1148,7 @@ static void net_tx_action(unsigned long unused)
i = netif->tx.req_cons;
rmb(); /* Ensure that we see the request before we copy it. */
memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
+
/* Credit-based scheduling. */
if (txreq.size > netif->remaining_credit) {
unsigned long now = jiffies;
@@ -1109,25 +1157,27 @@ static void net_tx_action(unsigned long unused)
msecs_to_jiffies(netif->credit_usec / 1000);
/* Timer could already be pending in rare cases. */
- if (timer_pending(&netif->credit_timeout))
- break;
+ if (timer_pending(&netif->credit_timeout)) {
+ netif_put(netif);
+ continue;
+ }
/* Passed the point where we can replenish credit? */
if (time_after_eq(now, next_credit)) {
netif->credit_timeout.expires = now;
- netif->remaining_credit = netif->credit_bytes;
+ tx_add_credit(netif);
}
/* Still too big to send right now? Set a callback. */
if (txreq.size > netif->remaining_credit) {
- netif->remaining_credit = 0;
netif->credit_timeout.data =
(unsigned long)netif;
netif->credit_timeout.function =
tx_credit_callback;
__mod_timer(&netif->credit_timeout,
next_credit);
- break;
+ netif_put(netif);
+ continue;
}
}
netif->remaining_credit -= txreq.size;
@@ -1146,19 +1196,13 @@ static void net_tx_action(unsigned long unused)
}
}
- ret = netbk_count_requests(netif, &txreq, work_to_do);
+ ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
if (unlikely(ret < 0)) {
netbk_tx_err(netif, &txreq, i - ret);
continue;
}
i += ret;
- if (unlikely(ret > MAX_SKB_FRAGS)) {
- DPRINTK("Too many frags\n");
- netbk_tx_err(netif, &txreq, i);
- continue;
- }
-
if (unlikely(txreq.size < ETH_HLEN)) {
DPRINTK("Bad packet size: %d\n", txreq.size);
netbk_tx_err(netif, &txreq, i);
@@ -1180,7 +1224,7 @@ static void net_tx_action(unsigned long unused)
ret < MAX_SKB_FRAGS) ?
PKT_PROT_LEN : txreq.size;
- skb = alloc_skb(data_len+16, GFP_ATOMIC);
+ skb = alloc_skb(data_len + 16 + NET_IP_ALIGN, GFP_ATOMIC);
if (unlikely(skb == NULL)) {
DPRINTK("Can't allocate a skb in start_xmit.\n");
netbk_tx_err(netif, &txreq, i);
@@ -1188,7 +1232,7 @@ static void net_tx_action(unsigned long unused)
}
/* Packets passed to netif_rx() must have some headroom. */
- skb_reserve(skb, 16);
+ skb_reserve(skb, 16 + NET_IP_ALIGN);
if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
struct netif_extra_info *gso;
@@ -1201,7 +1245,7 @@ static void net_tx_action(unsigned long unused)
}
}
- gnttab_set_map_op(mop, MMAP_VADDR(pending_idx),
+ gnttab_set_map_op(mop, idx_to_kaddr(pending_idx),
GNTMAP_host_map | GNTMAP_readonly,
txreq.gref, netif->domid);
mop++;
@@ -1227,7 +1271,7 @@ static void net_tx_action(unsigned long unused)
pending_cons++;
- mop = netbk_get_requests(netif, skb, mop);
+ mop = netbk_get_requests(netif, skb, txfrags, mop);
netif->tx.req_cons = i;
netif_schedule_work(netif);
@@ -1260,8 +1304,8 @@ static void net_tx_action(unsigned long unused)
}
data_len = skb->len;
- memcpy(skb->data,
- (void *)(MMAP_VADDR(pending_idx)|txp->offset),
+ memcpy(skb->data,
+ (void *)(idx_to_kaddr(pending_idx)|txp->offset),
data_len);
if (data_len < txp->size) {
/* Append the packet payload as a fragment. */
@@ -1315,18 +1359,10 @@ static void netif_idx_release(u16 pending_idx)
static void netif_page_release(struct page *page)
{
- u16 pending_idx = page - virt_to_page(mmap_vstart);
-
/* Ready for next use. */
set_page_count(page, 1);
- netif_idx_release(pending_idx);
-}
-
-static void netif_rx_page_release(struct page *page)
-{
- /* Ready for next use. */
- set_page_count(page, 1);
+ netif_idx_release(page->index);
}
irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
@@ -1336,7 +1372,7 @@ irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
add_to_net_schedule_list_tail(netif);
maybe_schedule_tx_action();
- if (netif_queue_stopped(netif->dev) && !netbk_queue_full(netif))
+ if (netif_schedulable(netif->dev) && !netbk_queue_full(netif))
netif_wake_queue(netif->dev);
return IRQ_HANDLED;
@@ -1446,27 +1482,17 @@ static int __init netback_init(void)
init_timer(&net_timer);
net_timer.data = 0;
net_timer.function = net_alarm;
-
- page = balloon_alloc_empty_page_range(MAX_PENDING_REQS);
- if (page == NULL)
- return -ENOMEM;
- mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+ mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
+ if (mmap_pages == NULL) {
+ printk("%s: out of memory\n", __FUNCTION__);
+ return -ENOMEM;
+ }
for (i = 0; i < MAX_PENDING_REQS; i++) {
- page = virt_to_page(MMAP_VADDR(i));
- set_page_count(page, 1);
+ page = mmap_pages[i];
SetPageForeign(page, netif_page_release);
- }
-
- page = balloon_alloc_empty_page_range(NET_RX_RING_SIZE);
- BUG_ON(page == NULL);
- rx_mmap_area = pfn_to_kaddr(page_to_pfn(page));
-
- for (i = 0; i < NET_RX_RING_SIZE; i++) {
- page = virt_to_page(rx_mmap_area + (i * PAGE_SIZE));
- set_page_count(page, 1);
- SetPageForeign(page, netif_rx_page_release);
+ page->index = i;
}
pending_cons = 0;
diff --git a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
index 6da614fc0c..7d301965f4 100644
--- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
+++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
@@ -28,29 +28,20 @@
printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
#endif
-struct backend_info
-{
+struct backend_info {
struct xenbus_device *dev;
netif_t *netif;
- struct xenbus_watch backend_watch;
enum xenbus_state frontend_state;
};
static int connect_rings(struct backend_info *);
static void connect(struct backend_info *);
-static void maybe_connect(struct backend_info *);
-static void backend_changed(struct xenbus_watch *, const char **,
- unsigned int);
+static void backend_create_netif(struct backend_info *be);
static int netback_remove(struct xenbus_device *dev)
{
struct backend_info *be = dev->dev.driver_data;
- if (be->backend_watch.node) {
- unregister_xenbus_watch(&be->backend_watch);
- kfree(be->backend_watch.node);
- be->backend_watch.node = NULL;
- }
if (be->netif) {
netif_disconnect(be->netif);
be->netif = NULL;
@@ -63,8 +54,7 @@ static int netback_remove(struct xenbus_device *dev)
/**
* Entry point to this code when a new device is created. Allocate the basic
- * structures, and watch the store waiting for the hotplug scripts to tell us
- * the device's handle. Switch to InitWait.
+ * structures and switch to InitWait.
*/
static int netback_probe(struct xenbus_device *dev,
const struct xenbus_device_id *id)
@@ -83,11 +73,6 @@ static int netback_probe(struct xenbus_device *dev,
be->dev = dev;
dev->dev.driver_data = be;
- err = xenbus_watch_path2(dev, dev->nodename, "handle",
- &be->backend_watch, backend_changed);
- if (err)
- goto fail;
-
do {
err = xenbus_transaction_start(&xbt);
if (err) {
@@ -108,9 +93,22 @@ static int netback_probe(struct xenbus_device *dev,
goto abort_transaction;
}
- err = xenbus_printf(xbt, dev->nodename, "feature-rx-copy", "%d", 1);
+ /* We support rx-copy path. */
+ err = xenbus_printf(xbt, dev->nodename,
+ "feature-rx-copy", "%d", 1);
+ if (err) {
+ message = "writing feature-rx-copy";
+ goto abort_transaction;
+ }
+
+ /*
+ * We don't support rx-flip path (except old guests who don't
+ * grok this feature flag).
+ */
+ err = xenbus_printf(xbt, dev->nodename,
+ "feature-rx-flip", "%d", 0);
if (err) {
- message = "writing feature-copying";
+ message = "writing feature-rx-flip";
goto abort_transaction;
}
@@ -123,9 +121,11 @@ static int netback_probe(struct xenbus_device *dev,
}
err = xenbus_switch_state(dev, XenbusStateInitWait);
- if (err) {
+ if (err)
goto fail;
- }
+
+ /* This kicks hotplug scripts, so do it immediately. */
+ backend_create_netif(be);
return 0;
@@ -175,48 +175,30 @@ static int netback_uevent(struct xenbus_device *xdev, char **envp,
}
-/**
- * Callback received when the hotplug scripts have placed the handle node.
- * Read it, and create a netif structure. If the frontend is ready, connect.
- */
-static void backend_changed(struct xenbus_watch *watch,
- const char **vec, unsigned int len)
+static void backend_create_netif(struct backend_info *be)
{
int err;
long handle;
- struct backend_info *be
- = container_of(watch, struct backend_info, backend_watch);
struct xenbus_device *dev = be->dev;
- DPRINTK("");
+ if (be->netif != NULL)
+ return;
err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
- if (XENBUS_EXIST_ERR(err)) {
- /* Since this watch will fire once immediately after it is
- registered, we expect this. Ignore it, and wait for the
- hotplug scripts. */
- return;
- }
if (err != 1) {
xenbus_dev_fatal(dev, err, "reading handle");
return;
}
- if (be->netif == NULL) {
- u8 be_mac[ETH_ALEN] = { 0, 0, 0, 0, 0, 0 };
-
- be->netif = netif_alloc(dev->otherend_id, handle, be_mac);
- if (IS_ERR(be->netif)) {
- err = PTR_ERR(be->netif);
- be->netif = NULL;
- xenbus_dev_fatal(dev, err, "creating interface");
- return;
- }
-
- kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
-
- maybe_connect(be);
+ be->netif = netif_alloc(dev->otherend_id, handle);
+ if (IS_ERR(be->netif)) {
+ err = PTR_ERR(be->netif);
+ be->netif = NULL;
+ xenbus_dev_fatal(dev, err, "creating interface");
+ return;
}
+
+ kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
}
@@ -249,11 +231,9 @@ static void frontend_changed(struct xenbus_device *dev,
break;
case XenbusStateConnected:
- if (!be->netif) {
- /* reconnect: setup be->netif */
- backend_changed(&be->backend_watch, NULL, 0);
- }
- maybe_connect(be);
+ backend_create_netif(be);
+ if (be->netif)
+ connect(be);
break;
case XenbusStateClosing:
@@ -279,15 +259,6 @@ static void frontend_changed(struct xenbus_device *dev,
}
-/* ** Connection ** */
-
-
-static void maybe_connect(struct backend_info *be)
-{
- if (be->netif && (be->frontend_state == XenbusStateConnected))
- connect(be);
-}
-
static void xen_net_read_rate(struct xenbus_device *dev,
unsigned long *bytes, unsigned long *usec)
{
@@ -366,6 +337,10 @@ static void connect(struct backend_info *be)
be->netif->remaining_credit = be->netif->credit_bytes;
xenbus_switch_state(dev, XenbusStateConnected);
+
+ /* May not get a kick from the frontend, so start the tx_queue now. */
+ if (!netbk_can_queue(be->netif->dev))
+ netif_wake_queue(be->netif->dev);
}
@@ -403,14 +378,16 @@ static int connect_rings(struct backend_info *be)
}
be->netif->copying_receiver = !!rx_copy;
- if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-notify", "%d",
- &val) < 0)
- val = 0;
- if (val)
- be->netif->can_queue = 1;
- else
- /* Must be non-zero for pfifo_fast to work. */
- be->netif->dev->tx_queue_len = 1;
+ if (be->netif->dev->tx_queue_len != 0) {
+ if (xenbus_scanf(XBT_NIL, dev->otherend,
+ "feature-rx-notify", "%d", &val) < 0)
+ val = 0;
+ if (val)
+ be->netif->can_queue = 1;
+ else
+ /* Must be non-zero for pfifo_fast to work. */
+ be->netif->dev->tx_queue_len = 1;
+ }
if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
val = 0;
diff --git a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
index a257cb6064..da22d45bf6 100644
--- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
+++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
@@ -47,6 +47,7 @@
#include <linux/in.h>
#include <linux/if_ether.h>
#include <linux/io.h>
+#include <linux/moduleparam.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
#include <net/arp.h>
@@ -63,20 +64,76 @@
#include <xen/interface/grant_table.h>
#include <xen/gnttab.h>
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
+/*
+ * Mutually-exclusive module options to select receive data path:
+ * rx_copy : Packets are copied by network backend into local memory
+ * rx_flip : Page containing packet data is transferred to our ownership
+ * For fully-virtualised guests there is no option - copying must be used.
+ * For paravirtualised guests, flipping is the default.
+ */
+#ifdef CONFIG_XEN
+static int MODPARM_rx_copy = 0;
+module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
+MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)");
+static int MODPARM_rx_flip = 0;
+module_param_named(rx_flip, MODPARM_rx_flip, bool, 0);
+MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)");
+#else
+static const int MODPARM_rx_copy = 1;
+static const int MODPARM_rx_flip = 0;
+#endif
+
#define RX_COPY_THRESHOLD 256
/* If we don't have GSO, fake things up so that we never try to use it. */
-#ifndef NETIF_F_GSO
-#define netif_needs_gso(dev, skb) 0
-#define dev_disable_gso_features(dev) ((void)0)
-#else
+#if defined(NETIF_F_GSO)
#define HAVE_GSO 1
+#define HAVE_TSO 1 /* TSO is a subset of GSO */
static inline void dev_disable_gso_features(struct net_device *dev)
{
/* Turn off all GSO bits except ROBUST. */
dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
dev->features |= NETIF_F_GSO_ROBUST;
}
+#elif defined(NETIF_F_TSO)
+#define HAVE_TSO 1
+
+/* Some older kernels cannot cope with incorrect checksums,
+ * particularly in netfilter. I'm not sure there is 100% correlation
+ * with the presence of NETIF_F_TSO but it appears to be a good first
+ * approximiation.
+ */
+#define HAVE_NO_CSUM_OFFLOAD 1
+
+#define gso_size tso_size
+#define gso_segs tso_segs
+static inline void dev_disable_gso_features(struct net_device *dev)
+{
+ /* Turn off all TSO bits. */
+ dev->features &= ~NETIF_F_TSO;
+}
+static inline int skb_is_gso(const struct sk_buff *skb)
+{
+ return skb_shinfo(skb)->tso_size;
+}
+static inline int skb_gso_ok(struct sk_buff *skb, int features)
+{
+ return (features & NETIF_F_TSO);
+}
+
+static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
+{
+ return skb_is_gso(skb) &&
+ (!skb_gso_ok(skb, dev->features) ||
+ unlikely(skb->ip_summed != CHECKSUM_HW));
+}
+#else
+#define netif_needs_gso(dev, skb) 0
+#define dev_disable_gso_features(dev) ((void)0)
#endif
#define GRANT_INVALID_REF 0
@@ -96,7 +153,6 @@ struct netfront_info {
spinlock_t tx_lock;
spinlock_t rx_lock;
- unsigned int handle;
unsigned int evtchn, irq;
unsigned int copying_receiver;
@@ -120,7 +176,7 @@ struct netfront_info {
grant_ref_t gref_tx_head;
grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1];
grant_ref_t gref_rx_head;
- grant_ref_t grant_rx_ref[NET_TX_RING_SIZE];
+ grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
struct xenbus_device *xbdev;
int tx_ring_ref;
@@ -185,9 +241,8 @@ static inline grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
#define WPRINTK(fmt, args...) \
printk(KERN_WARNING "netfront: " fmt, ##args)
-static int talk_to_backend(struct xenbus_device *, struct netfront_info *);
static int setup_device(struct xenbus_device *, struct netfront_info *);
-static struct net_device *create_netdev(int, int, struct xenbus_device *);
+static struct net_device *create_netdev(struct xenbus_device *);
static void netfront_closing(struct xenbus_device *);
@@ -195,9 +250,8 @@ static void end_access(int, void *);
static void netif_disconnect_backend(struct netfront_info *);
static int open_netdev(struct netfront_info *);
static void close_netdev(struct netfront_info *);
-static void netif_free(struct netfront_info *);
-static void network_connect(struct net_device *);
+static int network_connect(struct net_device *);
static void network_tx_buf_gc(struct net_device *);
static void network_alloc_rx_buffers(struct net_device *);
static int send_fake_arp(struct net_device *);
@@ -220,8 +274,7 @@ static inline int xennet_can_sg(struct net_device *dev)
/**
* Entry point to this code when a new device is created. Allocate the basic
* structures and the ring buffers for communication with the backend, and
- * inform the backend of the appropriate details for those. Switch to
- * Connected state.
+ * inform the backend of the appropriate details for those.
*/
static int __devinit netfront_probe(struct xenbus_device *dev,
const struct xenbus_device_id *id)
@@ -229,31 +282,8 @@ static int __devinit netfront_probe(struct xenbus_device *dev,
int err;
struct net_device *netdev;
struct netfront_info *info;
- unsigned int handle;
- unsigned feature_rx_copy;
-
- err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%u", &handle);
- if (err != 1) {
- xenbus_dev_fatal(dev, err, "reading handle");
- return err;
- }
-
-#ifndef CONFIG_XEN
- err = xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-copy", "%u",
- &feature_rx_copy);
- if (err != 1) {
- xenbus_dev_fatal(dev, err, "reading feature-rx-copy");
- return err;
- }
- if (!feature_rx_copy) {
- xenbus_dev_fatal(dev, 0, "need a copy-capable backend");
- return -EINVAL;
- }
-#else
- feature_rx_copy = 0;
-#endif
- netdev = create_netdev(handle, feature_rx_copy, dev);
+ netdev = create_netdev(dev);
if (IS_ERR(netdev)) {
err = PTR_ERR(netdev);
xenbus_dev_fatal(dev, err, "creating netdev");
@@ -263,20 +293,13 @@ static int __devinit netfront_probe(struct xenbus_device *dev,
info = netdev_priv(netdev);
dev->dev.driver_data = info;
- err = talk_to_backend(dev, info);
- if (err)
- goto fail_backend;
-
err = open_netdev(info);
if (err)
- goto fail_open;
+ goto fail;
return 0;
- fail_open:
- xennet_sysfs_delif(info->netdev);
- unregister_netdev(netdev);
- fail_backend:
+ fail:
free_netdev(netdev);
dev->dev.driver_data = NULL;
return err;
@@ -296,7 +319,7 @@ static int netfront_resume(struct xenbus_device *dev)
DPRINTK("%s\n", dev->nodename);
netif_disconnect_backend(info);
- return talk_to_backend(dev, info);
+ return 0;
}
static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
@@ -379,13 +402,21 @@ again:
goto abort_transaction;
}
+#ifdef HAVE_NO_CSUM_OFFLOAD
+ err = xenbus_printf(xbt, dev->nodename, "feature-no-csum-offload", "%d", 1);
+ if (err) {
+ message = "writing feature-no-csum-offload";
+ goto abort_transaction;
+ }
+#endif
+
err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
if (err) {
message = "writing feature-sg";
goto abort_transaction;
}
-#ifdef HAVE_GSO
+#ifdef HAVE_TSO
err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
if (err) {
message = "writing feature-gso-tcpv4";
@@ -407,12 +438,11 @@ again:
xenbus_transaction_end(xbt, 1);
xenbus_dev_fatal(dev, err, "%s", message);
destroy_ring:
- netif_free(info);
+ netif_disconnect_backend(info);
out:
return err;
}
-
static int setup_device(struct xenbus_device *dev, struct netfront_info *info)
{
struct netif_tx_sring *txs;
@@ -472,11 +502,9 @@ static int setup_device(struct xenbus_device *dev, struct netfront_info *info)
return 0;
fail:
- netif_free(info);
return err;
}
-
/**
* Callback received when the backend's state changes.
*/
@@ -497,7 +525,8 @@ static void backend_changed(struct xenbus_device *dev,
break;
case XenbusStateInitWait:
- network_connect(netdev);
+ if (network_connect(netdev) != 0)
+ break;
xenbus_switch_state(dev, XenbusStateConnected);
(void)send_fake_arp(netdev);
break;
@@ -508,7 +537,6 @@ static void backend_changed(struct xenbus_device *dev,
}
}
-
/** Send a packet on a net device to encourage switches to learn the
* MAC. We send a fake ARP request.
*
@@ -537,7 +565,6 @@ static int send_fake_arp(struct net_device *dev)
return dev_queue_xmit(skb);
}
-
static int network_open(struct net_device *dev)
{
struct netfront_info *np = netdev_priv(dev);
@@ -629,14 +656,12 @@ static void network_tx_buf_gc(struct net_device *dev)
network_maybe_wake_tx(dev);
}
-
static void rx_refill_timeout(unsigned long data)
{
struct net_device *dev = (struct net_device *)data;
netif_rx_schedule(dev);
}
-
static void network_alloc_rx_buffers(struct net_device *dev)
{
unsigned short id;
@@ -669,7 +694,7 @@ static void network_alloc_rx_buffers(struct net_device *dev)
* necessary here.
* 16 bytes added as necessary headroom for netif_receive_skb.
*/
- skb = alloc_skb(RX_COPY_THRESHOLD + 16,
+ skb = alloc_skb(RX_COPY_THRESHOLD + 16 + NET_IP_ALIGN,
GFP_ATOMIC | __GFP_NOWARN);
if (unlikely(!skb))
goto no_skb;
@@ -687,7 +712,7 @@ no_skb:
break;
}
- skb_reserve(skb, 16); /* mimic dev_alloc_skb() */
+ skb_reserve(skb, 16 + NET_IP_ALIGN); /* mimic dev_alloc_skb() */
skb_shinfo(skb)->frags[0].page = page;
skb_shinfo(skb)->nr_frags = 1;
__skb_queue_tail(&np->rx_batch, skb);
@@ -742,7 +767,7 @@ no_skb:
} else {
gnttab_grant_foreign_access_ref(ref,
np->xbdev->otherend_id,
- pfn,
+ pfn_to_mfn(pfn),
0);
}
@@ -917,7 +942,7 @@ static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
tx->flags |= NETTXF_data_validated;
#endif
-#ifdef HAVE_GSO
+#ifdef HAVE_TSO
if (skb_shinfo(skb)->gso_size) {
struct netif_extra_info *gso = (struct netif_extra_info *)
RING_GET_REQUEST(&np->tx, ++i);
@@ -1071,6 +1096,7 @@ static int xennet_get_responses(struct netfront_info *np,
if (net_ratelimit())
WPRINTK("rx->offset: %x, size: %u\n",
rx->offset, rx->status);
+ xennet_move_rx_slot(np, skb, ref);
err = -EINVAL;
goto next;
}
@@ -1081,7 +1107,8 @@ static int xennet_get_responses(struct netfront_info *np,
* situation to the system controller to reboot the backed.
*/
if (ref == GRANT_INVALID_REF) {
- WPRINTK("Bad rx response id %d.\n", rx->id);
+ if (net_ratelimit())
+ WPRINTK("Bad rx response id %d.\n", rx->id);
err = -EINVAL;
goto next;
}
@@ -1153,6 +1180,9 @@ next:
err = -E2BIG;
}
+ if (unlikely(err))
+ np->rx.rsp_cons = cons + frags;
+
*pages_flipped_p = pages_flipped;
return err;
@@ -1205,12 +1235,14 @@ static int xennet_set_skb_gso(struct sk_buff *skb,
return -EINVAL;
}
-#ifdef HAVE_GSO
+#ifdef HAVE_TSO
skb_shinfo(skb)->gso_size = gso->u.gso.size;
+#ifdef HAVE_GSO
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
/* Header must be checked, and gso_segs computed. */
skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+#endif
skb_shinfo(skb)->gso_segs = 0;
return 0;
@@ -1255,9 +1287,9 @@ static int netif_poll(struct net_device *dev, int *pbudget)
rp = np->rx.sring->rsp_prod;
rmb(); /* Ensure we see queued responses up to 'rp'. */
- for (i = np->rx.rsp_cons, work_done = 0;
- (i != rp) && (work_done < budget);
- np->rx.rsp_cons = ++i, work_done++) {
+ i = np->rx.rsp_cons;
+ work_done = 0;
+ while ((i != rp) && (work_done < budget)) {
memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
memset(extras, 0, sizeof(extras));
@@ -1265,12 +1297,11 @@ static int netif_poll(struct net_device *dev, int *pbudget)
&pages_flipped);
if (unlikely(err)) {
-err:
- i = np->rx.rsp_cons + skb_queue_len(&tmpq) - 1;
- work_done--;
+err:
while ((skb = __skb_dequeue(&tmpq)))
__skb_queue_tail(&errq, skb);
np->stats.rx_errors++;
+ i = np->rx.rsp_cons;
continue;
}
@@ -1282,6 +1313,7 @@ err:
if (unlikely(xennet_set_skb_gso(skb, gso))) {
__skb_queue_head(&tmpq, skb);
+ np->rx.rsp_cons += skb_queue_len(&tmpq);
goto err;
}
}
@@ -1345,6 +1377,9 @@ err:
np->stats.rx_bytes += skb->len;
__skb_queue_tail(&rxq, skb);
+
+ np->rx.rsp_cons = ++i;
+ work_done++;
}
if (pages_flipped) {
@@ -1561,7 +1596,7 @@ static int xennet_set_sg(struct net_device *dev, u32 data)
static int xennet_set_tso(struct net_device *dev, u32 data)
{
-#ifdef HAVE_GSO
+#ifdef HAVE_TSO
if (data) {
struct netfront_info *np = netdev_priv(dev);
int val;
@@ -1588,20 +1623,53 @@ static void xennet_set_features(struct net_device *dev)
if (!(dev->features & NETIF_F_IP_CSUM))
return;
- if (!xennet_set_sg(dev, 1))
- xennet_set_tso(dev, 1);
+ if (xennet_set_sg(dev, 1))
+ return;
+
+ /* Before 2.6.9 TSO seems to be unreliable so do not enable it
+ * on older kernels.
+ */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
+ xennet_set_tso(dev, 1);
+#endif
+
}
-static void network_connect(struct net_device *dev)
+static int network_connect(struct net_device *dev)
{
struct netfront_info *np = netdev_priv(dev);
- int i, requeue_idx;
+ int i, requeue_idx, err;
struct sk_buff *skb;
grant_ref_t ref;
netif_rx_request_t *req;
+ unsigned int feature_rx_copy, feature_rx_flip;
+
+ err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+ "feature-rx-copy", "%u", &feature_rx_copy);
+ if (err != 1)
+ feature_rx_copy = 0;
+ err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+ "feature-rx-flip", "%u", &feature_rx_flip);
+ if (err != 1)
+ feature_rx_flip = 1;
+
+ /*
+ * Copy packets on receive path if:
+ * (a) This was requested by user, and the backend supports it; or
+ * (b) Flipping was requested, but this is unsupported by the backend.
+ */
+ np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) ||
+ (MODPARM_rx_flip && !feature_rx_flip));
+
+ err = talk_to_backend(np->xbdev, np);
+ if (err)
+ return err;
xennet_set_features(dev);
+ IPRINTK("device %s has %sing receive path.\n",
+ dev->name, np->copying_receiver ? "copy" : "flipp");
+
spin_lock_irq(&np->tx_lock);
spin_lock(&np->rx_lock);
@@ -1632,7 +1700,8 @@ static void network_connect(struct net_device *dev)
} else {
gnttab_grant_foreign_access_ref(
ref, np->xbdev->otherend_id,
- page_to_pfn(skb_shinfo(skb)->frags->page),
+ pfn_to_mfn(page_to_pfn(skb_shinfo(skb)->
+ frags->page)),
0);
}
req->gref = ref;
@@ -1656,6 +1725,8 @@ static void network_connect(struct net_device *dev)
spin_unlock(&np->rx_lock);
spin_unlock_irq(&np->tx_lock);
+
+ return 0;
}
static void netif_uninit(struct net_device *dev)
@@ -1821,8 +1892,7 @@ static void network_set_multicast_list(struct net_device *dev)
{
}
-static struct net_device * __devinit
-create_netdev(int handle, int copying_receiver, struct xenbus_device *dev)
+static struct net_device * __devinit create_netdev(struct xenbus_device *dev)
{
int i, err = 0;
struct net_device *netdev = NULL;
@@ -1836,9 +1906,7 @@ create_netdev(int handle, int copying_receiver, struct xenbus_device *dev)
}
np = netdev_priv(netdev);
- np->handle = handle;
np->xbdev = dev;
- np->copying_receiver = copying_receiver;
netif_carrier_off(netdev);
@@ -1969,10 +2037,12 @@ static int open_netdev(struct netfront_info *info)
err = xennet_sysfs_addif(info->netdev);
if (err) {
- /* This can be non-fatal: it only means no tuning parameters */
+ unregister_netdev(info->netdev);
printk(KERN_WARNING "%s: add sysfs failed err=%d\n",
__FUNCTION__, err);
+ return err;
}
+
return 0;
}
@@ -2007,14 +2077,6 @@ static void netif_disconnect_backend(struct netfront_info *info)
}
-static void netif_free(struct netfront_info *info)
-{
- close_netdev(info);
- netif_disconnect_backend(info);
- free_netdev(info->netdev);
-}
-
-
static void end_access(int ref, void *page)
{
if (ref != GRANT_INVALID_REF)
@@ -2053,6 +2115,16 @@ static int __init netif_init(void)
if (!is_running_on_xen())
return -ENODEV;
+#ifdef CONFIG_XEN
+ if (MODPARM_rx_flip && MODPARM_rx_copy) {
+ WPRINTK("Cannot specify both rx_copy and rx_flip.\n");
+ return -EINVAL;
+ }
+
+ if (!MODPARM_rx_flip && !MODPARM_rx_copy)
+ MODPARM_rx_flip = 1; /* Default is to flip. */
+#endif
+
if (is_initial_xendomain())
return 0;
@@ -2067,6 +2139,9 @@ module_init(netif_init);
static void __exit netif_exit(void)
{
+ if (is_initial_xendomain())
+ return;
+
unregister_inetaddr_notifier(&notifier_inetdev);
return xenbus_unregister_driver(&netfront);
diff --git a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
index a1c4b6f68e..d159e4ac74 100644
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
@@ -35,6 +35,10 @@
static struct proc_dir_entry *privcmd_intf;
static struct proc_dir_entry *capabilities_intf;
+#ifndef HAVE_ARCH_PRIVCMD_MMAP
+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
+#endif
+
static int privcmd_ioctl(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long data)
{
@@ -49,6 +53,8 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
return -EFAULT;
#if defined(__i386__)
+ if (hypercall.op >= (PAGE_SIZE >> 5))
+ break;
__asm__ __volatile__ (
"pushl %%ebx; pushl %%ecx; pushl %%edx; "
"pushl %%esi; pushl %%edi; "
@@ -65,45 +71,36 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
"popl %%ecx; popl %%ebx"
: "=a" (ret) : "0" (&hypercall) : "memory" );
#elif defined (__x86_64__)
- {
+ if (hypercall.op < (PAGE_SIZE >> 5)) {
long ign1, ign2, ign3;
__asm__ __volatile__ (
"movq %8,%%r10; movq %9,%%r8;"
- "shlq $5,%%rax ;"
+ "shll $5,%%eax ;"
"addq $hypercall_page,%%rax ;"
"call *%%rax"
: "=a" (ret), "=D" (ign1),
"=S" (ign2), "=d" (ign3)
- : "0" ((unsigned long)hypercall.op),
- "1" ((unsigned long)hypercall.arg[0]),
- "2" ((unsigned long)hypercall.arg[1]),
- "3" ((unsigned long)hypercall.arg[2]),
- "g" ((unsigned long)hypercall.arg[3]),
- "g" ((unsigned long)hypercall.arg[4])
+ : "0" ((unsigned int)hypercall.op),
+ "1" (hypercall.arg[0]),
+ "2" (hypercall.arg[1]),
+ "3" (hypercall.arg[2]),
+ "g" (hypercall.arg[3]),
+ "g" (hypercall.arg[4])
: "r8", "r10", "memory" );
}
#elif defined (__ia64__)
- __asm__ __volatile__ (
- ";; mov r14=%2; mov r15=%3; "
- "mov r16=%4; mov r17=%5; mov r18=%6;"
- "mov r2=%1; break 0x1000;; mov %0=r8 ;;"
- : "=r" (ret)
- : "r" (hypercall.op),
- "r" (hypercall.arg[0]),
- "r" (hypercall.arg[1]),
- "r" (hypercall.arg[2]),
- "r" (hypercall.arg[3]),
- "r" (hypercall.arg[4])
- : "r14","r15","r16","r17","r18","r2","r8","memory");
+ ret = privcmd_hypercall(&hypercall);
#endif
}
break;
case IOCTL_PRIVCMD_MMAP: {
-#define PRIVCMD_MMAP_SZ 32
privcmd_mmap_t mmapcmd;
- privcmd_mmap_entry_t msg[PRIVCMD_MMAP_SZ];
+ privcmd_mmap_entry_t msg;
privcmd_mmap_entry_t __user *p;
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ unsigned long va;
int i, rc;
if (!is_initial_xendomain())
@@ -113,85 +110,92 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
return -EFAULT;
p = mmapcmd.entry;
+ if (copy_from_user(&msg, p, sizeof(msg)))
+ return -EFAULT;
- for (i = 0; i < mmapcmd.num;
- i += PRIVCMD_MMAP_SZ, p += PRIVCMD_MMAP_SZ) {
- int j, n = ((mmapcmd.num-i)>PRIVCMD_MMAP_SZ)?
- PRIVCMD_MMAP_SZ:(mmapcmd.num-i);
-
- if (copy_from_user(&msg, p,
- n*sizeof(privcmd_mmap_entry_t)))
- return -EFAULT;
-
- for (j = 0; j < n; j++) {
- struct vm_area_struct *vma =
- find_vma( current->mm, msg[j].va );
-
- if (!vma)
- return -EINVAL;
-
- if (msg[j].va > PAGE_OFFSET)
- return -EINVAL;
-
- if ((msg[j].va + (msg[j].npages << PAGE_SHIFT))
- > vma->vm_end )
- return -EINVAL;
-
- if ((rc = direct_remap_pfn_range(
- vma,
- msg[j].va&PAGE_MASK,
- msg[j].mfn,
- msg[j].npages<<PAGE_SHIFT,
- vma->vm_page_prot,
- mmapcmd.dom)) < 0)
- return rc;
- }
+ down_read(&mm->mmap_sem);
+
+ vma = find_vma(mm, msg.va);
+ rc = -EINVAL;
+ if (!vma || (msg.va != vma->vm_start) ||
+ !privcmd_enforce_singleshot_mapping(vma))
+ goto mmap_out;
+
+ va = vma->vm_start;
+
+ for (i = 0; i < mmapcmd.num; i++) {
+ rc = -EFAULT;
+ if (copy_from_user(&msg, p, sizeof(msg)))
+ goto mmap_out;
+
+ /* Do not allow range to wrap the address space. */
+ rc = -EINVAL;
+ if ((msg.npages > (LONG_MAX >> PAGE_SHIFT)) ||
+ ((unsigned long)(msg.npages << PAGE_SHIFT) >= -va))
+ goto mmap_out;
+
+ /* Range chunks must be contiguous in va space. */
+ if ((msg.va != va) ||
+ ((msg.va+(msg.npages<<PAGE_SHIFT)) > vma->vm_end))
+ goto mmap_out;
+
+ if ((rc = direct_remap_pfn_range(
+ vma,
+ msg.va & PAGE_MASK,
+ msg.mfn,
+ msg.npages << PAGE_SHIFT,
+ vma->vm_page_prot,
+ mmapcmd.dom)) < 0)
+ goto mmap_out;
+
+ p++;
+ va += msg.npages << PAGE_SHIFT;
}
- ret = 0;
+
+ rc = 0;
+
+ mmap_out:
+ up_read(&mm->mmap_sem);
+ ret = rc;
}
break;
case IOCTL_PRIVCMD_MMAPBATCH: {
privcmd_mmapbatch_t m;
- struct vm_area_struct *vma = NULL;
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
xen_pfn_t __user *p;
- unsigned long addr, mfn;
+ unsigned long addr, mfn, nr_pages;
int i;
if (!is_initial_xendomain())
return -EPERM;
- if (copy_from_user(&m, udata, sizeof(m))) {
- ret = -EFAULT;
- goto batch_err;
- }
-
- if (m.dom == DOMID_SELF) {
- ret = -EINVAL;
- goto batch_err;
- }
+ if (copy_from_user(&m, udata, sizeof(m)))
+ return -EFAULT;
- vma = find_vma(current->mm, m.addr);
- if (!vma) {
- ret = -EINVAL;
- goto batch_err;
- }
+ nr_pages = m.num;
+ if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
+ return -EINVAL;
- if (m.addr > PAGE_OFFSET) {
- ret = -EFAULT;
- goto batch_err;
- }
+ down_read(&mm->mmap_sem);
- if ((m.addr + (m.num<<PAGE_SHIFT)) > vma->vm_end) {
- ret = -EFAULT;
- goto batch_err;
+ vma = find_vma(mm, m.addr);
+ if (!vma ||
+ (m.addr != vma->vm_start) ||
+ ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
+ !privcmd_enforce_singleshot_mapping(vma)) {
+ up_read(&mm->mmap_sem);
+ return -EINVAL;
}
p = m.arr;
addr = m.addr;
- for (i = 0; i < m.num; i++, addr += PAGE_SIZE, p++) {
- if (get_user(mfn, p))
+ for (i = 0; i < nr_pages; i++, addr += PAGE_SIZE, p++) {
+ if (get_user(mfn, p)) {
+ up_read(&mm->mmap_sem);
return -EFAULT;
+ }
ret = direct_remap_pfn_range(vma, addr & PAGE_MASK,
mfn, PAGE_SIZE,
@@ -200,15 +204,8 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
put_user(0xF0000000 | mfn, p);
}
+ up_read(&mm->mmap_sem);
ret = 0;
- break;
-
- batch_err:
- printk("batch_err ret=%d vma=%p addr=%lx "
- "num=%d arr=%p %lx-%lx\n",
- ret, vma, (unsigned long)m.addr, m.num, m.arr,
- vma ? vma->vm_start : 0, vma ? vma->vm_end : 0);
- break;
}
break;
@@ -221,13 +218,35 @@ static int privcmd_ioctl(struct inode *inode, struct file *file,
}
#ifndef HAVE_ARCH_PRIVCMD_MMAP
+static struct page *privcmd_nopage(struct vm_area_struct *vma,
+ unsigned long address,
+ int *type)
+{
+ return NOPAGE_SIGBUS;
+}
+
+static struct vm_operations_struct privcmd_vm_ops = {
+ .nopage = privcmd_nopage
+};
+
static int privcmd_mmap(struct file * file, struct vm_area_struct * vma)
{
+ /* Unsupported for auto-translate guests. */
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return -ENOSYS;
+
/* DONTCOPY is essential for Xen as copy_page_range is broken. */
vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
+ vma->vm_ops = &privcmd_vm_ops;
+ vma->vm_private_data = NULL;
return 0;
}
+
+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
+{
+ return (xchg(&vma->vm_private_data, (void *)1) == NULL);
+}
#endif
static struct file_operations privcmd_file_ops = {
diff --git a/linux-2.6-xen-sparse/drivers/xen/tpmback/common.h b/linux-2.6-xen-sparse/drivers/xen/tpmback/common.h
index 27b8fd283a..b209b4f583 100644
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/common.h
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/common.h
@@ -46,11 +46,10 @@ typedef struct tpmif_st {
atomic_t refcnt;
struct backend_info *bi;
- unsigned long mmap_vstart;
grant_handle_t shmem_handle;
grant_ref_t shmem_ref;
- struct page *pagerange;
+ struct page **mmap_pages;
char devname[20];
} tpmif_t;
@@ -80,6 +79,9 @@ int vtpm_release_packets(tpmif_t * tpmif, int send_msgs);
extern int num_frontends;
-#define MMAP_VADDR(t,_req) ((t)->mmap_vstart + ((_req) * PAGE_SIZE))
+static inline unsigned long idx_to_kaddr(tpmif_t *t, unsigned int idx)
+{
+ return (unsigned long)pfn_to_kaddr(page_to_pfn(t->mmap_pages[idx]));
+}
#endif /* __TPMIF__BACKEND__COMMON_H__ */
diff --git a/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c b/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c
index 0105bd93bf..2614aa5126 100644
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c
@@ -25,8 +25,8 @@ static tpmif_t *alloc_tpmif(domid_t domid, struct backend_info *bi)
tpmif_t *tpmif;
tpmif = kmem_cache_alloc(tpmif_cachep, GFP_KERNEL);
- if (!tpmif)
- return ERR_PTR(-ENOMEM);
+ if (tpmif == NULL)
+ goto out_of_memory;
memset(tpmif, 0, sizeof (*tpmif));
tpmif->domid = domid;
@@ -35,22 +35,27 @@ static tpmif_t *alloc_tpmif(domid_t domid, struct backend_info *bi)
snprintf(tpmif->devname, sizeof(tpmif->devname), "tpmif%d", domid);
atomic_set(&tpmif->refcnt, 1);
- tpmif->pagerange = balloon_alloc_empty_page_range(TPMIF_TX_RING_SIZE);
- BUG_ON(tpmif->pagerange == NULL);
- tpmif->mmap_vstart = (unsigned long)pfn_to_kaddr(
- page_to_pfn(tpmif->pagerange));
+ tpmif->mmap_pages = alloc_empty_pages_and_pagevec(TPMIF_TX_RING_SIZE);
+ if (tpmif->mmap_pages == NULL)
+ goto out_of_memory;
list_add(&tpmif->tpmif_list, &tpmif_list);
num_frontends++;
return tpmif;
+
+ out_of_memory:
+ if (tpmif != NULL)
+ kmem_cache_free(tpmif_cachep, tpmif);
+ printk("%s: out of memory\n", __FUNCTION__);
+ return ERR_PTR(-ENOMEM);
}
static void free_tpmif(tpmif_t * tpmif)
{
num_frontends--;
list_del(&tpmif->tpmif_list);
- balloon_dealloc_empty_page_range(tpmif->pagerange, TPMIF_TX_RING_SIZE);
+ free_empty_pages_and_pagevec(tpmif->mmap_pages, TPMIF_TX_RING_SIZE);
kmem_cache_free(tpmif_cachep, tpmif);
}
diff --git a/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c b/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c
index 466c3ee581..701a5ad03e 100644
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c
@@ -253,7 +253,7 @@ int _packet_write(struct packet *pak,
return 0;
}
- gnttab_set_map_op(&map_op, MMAP_VADDR(tpmif, i),
+ gnttab_set_map_op(&map_op, idx_to_kaddr(tpmif, i),
GNTMAP_host_map, tx->ref, tpmif->domid);
if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
@@ -270,7 +270,7 @@ int _packet_write(struct packet *pak,
tocopy = min_t(size_t, size - offset, PAGE_SIZE);
- if (copy_from_buffer((void *)(MMAP_VADDR(tpmif, i) |
+ if (copy_from_buffer((void *)(idx_to_kaddr(tpmif, i) |
(tx->addr & ~PAGE_MASK)),
&data[offset], tocopy, isuserbuffer)) {
tpmif_put(tpmif);
@@ -278,7 +278,7 @@ int _packet_write(struct packet *pak,
}
tx->size = tocopy;
- gnttab_set_unmap_op(&unmap_op, MMAP_VADDR(tpmif, i),
+ gnttab_set_unmap_op(&unmap_op, idx_to_kaddr(tpmif, i),
GNTMAP_host_map, handle);
if (unlikely
@@ -391,7 +391,7 @@ static int packet_read_shmem(struct packet *pak,
tx = &tpmif->tx->ring[i].req;
- gnttab_set_map_op(&map_op, MMAP_VADDR(tpmif, i),
+ gnttab_set_map_op(&map_op, idx_to_kaddr(tpmif, i),
GNTMAP_host_map, tx->ref, tpmif->domid);
if (unlikely(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
@@ -414,10 +414,10 @@ static int packet_read_shmem(struct packet *pak,
}
DPRINTK("Copying from mapped memory at %08lx\n",
- (unsigned long)(MMAP_VADDR(tpmif, i) |
+ (unsigned long)(idx_to_kaddr(tpmif, i) |
(tx->addr & ~PAGE_MASK)));
- src = (void *)(MMAP_VADDR(tpmif, i) |
+ src = (void *)(idx_to_kaddr(tpmif, i) |
((tx->addr & ~PAGE_MASK) + pg_offset));
if (copy_to_buffer(&buffer[offset],
src, to_copy, isuserbuffer)) {
@@ -428,7 +428,7 @@ static int packet_read_shmem(struct packet *pak,
tpmif->domid, buffer[offset], buffer[offset + 1],
buffer[offset + 2], buffer[offset + 3]);
- gnttab_set_unmap_op(&unmap_op, MMAP_VADDR(tpmif, i),
+ gnttab_set_unmap_op(&unmap_op, idx_to_kaddr(tpmif, i),
GNTMAP_host_map, handle);
if (unlikely
diff --git a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c
index 4ee5c5bbfe..f48b0e3726 100644
--- a/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c
+++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/xenbus.c
@@ -157,10 +157,12 @@ static void frontend_changed(struct xenbus_device *dev,
case XenbusStateClosing:
be->instance = -1;
+ xenbus_switch_state(dev, XenbusStateClosing);
break;
- case XenbusStateUnknown:
+ case XenbusStateUnknown: /* keep it here */
case XenbusStateClosed:
+ xenbus_switch_state(dev, XenbusStateClosed);
device_unregister(&be->dev->dev);
tpmback_remove(dev);
break;
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile b/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile
index d7c7d05172..ce5acc2457 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/Makefile
@@ -9,4 +9,5 @@ xenbus-objs += xenbus_client.o
xenbus-objs += xenbus_comms.o
xenbus-objs += xenbus_xs.o
xenbus-objs += xenbus_probe.o
+obj-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o
obj-$(CONFIG_XEN_XENBUS_DEV) += xenbus_dev.o
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
index 9b389ec06b..0111e8e3a2 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_client.c
@@ -35,8 +35,9 @@
#include <xen/xenbus.h>
#include <xen/driver_util.h>
-/* xenbus_probe.c */
-extern char *kasprintf(const char *fmt, ...);
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
#define DPRINTK(fmt, args...) \
pr_debug("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
@@ -84,7 +85,7 @@ int xenbus_watch_path2(struct xenbus_device *dev, const char *path,
const char **, unsigned int))
{
int err;
- char *state = kasprintf("%s/%s", path, path2);
+ char *state = kasprintf(GFP_KERNEL, "%s/%s", path, path2);
if (!state) {
xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch");
return -ENOMEM;
@@ -152,7 +153,7 @@ EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
*/
static char *error_path(struct xenbus_device *dev)
{
- return kasprintf("error/%s", dev->nodename);
+ return kasprintf(GFP_KERNEL, "error/%s", dev->nodename);
}
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c
index 38da320b67..f0e42ba715 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.c
@@ -30,15 +30,22 @@
* IN THE SOFTWARE.
*/
-#include <asm/hypervisor.h>
-#include <xen/evtchn.h>
#include <linux/wait.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/err.h>
+#include <linux/ptrace.h>
+#include <xen/evtchn.h>
#include <xen/xenbus.h>
+
+#include <asm/hypervisor.h>
+
#include "xenbus_comms.h"
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
static int xenbus_irq;
extern void xenbus_probe(void *);
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
index bbe4a8c5a8..ba37e61856 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c
@@ -40,6 +40,7 @@
#include <linux/wait.h>
#include <linux/fs.h>
#include <linux/poll.h>
+#include <linux/mutex.h>
#include "xenbus_comms.h"
@@ -49,6 +50,10 @@
#include <xen/xen_proc.h>
#include <asm/hypervisor.h>
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
struct xenbus_dev_transaction {
struct list_head list;
struct xenbus_transaction handle;
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
index bcd1f6df06..5320368443 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
@@ -42,6 +42,7 @@
#include <linux/mm.h>
#include <linux/notifier.h>
#include <linux/kthread.h>
+#include <linux/mutex.h>
#include <asm/io.h>
#include <asm/page.h>
@@ -55,6 +56,11 @@
#include <xen/hvm.h>
#include "xenbus_comms.h"
+#include "xenbus_probe.h"
+
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
int xen_store_evtchn;
struct xenstore_domain_interface *xen_store_interface;
@@ -67,12 +73,7 @@ static struct notifier_block *xenstore_chain;
static void wait_for_devices(struct xenbus_driver *xendrv);
static int xenbus_probe_frontend(const char *type, const char *name);
-static int xenbus_uevent_backend(struct device *dev, char **envp,
- int num_envp, char *buffer, int buffer_size);
-static int xenbus_probe_backend(const char *type, const char *domid);
-static int xenbus_dev_probe(struct device *_dev);
-static int xenbus_dev_remove(struct device *_dev);
static void xenbus_dev_shutdown(struct device *_dev);
/* If something in array of ids matches this device, return it. */
@@ -86,7 +87,7 @@ match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
return NULL;
}
-static int xenbus_match(struct device *_dev, struct device_driver *_drv)
+int xenbus_match(struct device *_dev, struct device_driver *_drv)
{
struct xenbus_driver *drv = to_xenbus_driver(_drv);
@@ -96,17 +97,6 @@ static int xenbus_match(struct device *_dev, struct device_driver *_drv)
return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
}
-struct xen_bus_type
-{
- char *root;
- unsigned int levels;
- int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename);
- int (*probe)(const char *type, const char *dir);
- struct bus_type bus;
- struct device dev;
-};
-
-
/* device/<type>/<id> => <type>-<id> */
static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
{
@@ -143,7 +133,7 @@ static void free_otherend_watch(struct xenbus_device *dev)
}
-static int read_otherend_details(struct xenbus_device *xendev,
+int read_otherend_details(struct xenbus_device *xendev,
char *id_node, char *path_node)
{
int err = xenbus_gather(XBT_NIL, xendev->nodename,
@@ -176,12 +166,6 @@ static int read_backend_details(struct xenbus_device *xendev)
}
-static int read_frontend_details(struct xenbus_device *xendev)
-{
- return read_otherend_details(xendev, "frontend-id", "frontend");
-}
-
-
/* Bus type for frontend drivers. */
static struct xen_bus_type xenbus_frontend = {
.root = "device",
@@ -191,115 +175,17 @@ static struct xen_bus_type xenbus_frontend = {
.bus = {
.name = "xen",
.match = xenbus_match,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
.probe = xenbus_dev_probe,
.remove = xenbus_dev_remove,
.shutdown = xenbus_dev_shutdown,
+#endif
},
.dev = {
.bus_id = "xen",
},
};
-/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
-static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
-{
- int domid, err;
- const char *devid, *type, *frontend;
- unsigned int typelen;
-
- type = strchr(nodename, '/');
- if (!type)
- return -EINVAL;
- type++;
- typelen = strcspn(type, "/");
- if (!typelen || type[typelen] != '/')
- return -EINVAL;
-
- devid = strrchr(nodename, '/') + 1;
-
- err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid,
- "frontend", NULL, &frontend,
- NULL);
- if (err)
- return err;
- if (strlen(frontend) == 0)
- err = -ERANGE;
- if (!err && !xenbus_exists(XBT_NIL, frontend, ""))
- err = -ENOENT;
-
- kfree(frontend);
-
- if (err)
- return err;
-
- if (snprintf(bus_id, BUS_ID_SIZE,
- "%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE)
- return -ENOSPC;
- return 0;
-}
-
-static struct xen_bus_type xenbus_backend = {
- .root = "backend",
- .levels = 3, /* backend/type/<frontend>/<id> */
- .get_bus_id = backend_bus_id,
- .probe = xenbus_probe_backend,
- .bus = {
- .name = "xen-backend",
- .match = xenbus_match,
- .probe = xenbus_dev_probe,
- .remove = xenbus_dev_remove,
-// .shutdown = xenbus_dev_shutdown,
- .uevent = xenbus_uevent_backend,
- },
- .dev = {
- .bus_id = "xen-backend",
- },
-};
-
-static int xenbus_uevent_backend(struct device *dev, char **envp,
- int num_envp, char *buffer, int buffer_size)
-{
- struct xenbus_device *xdev;
- struct xenbus_driver *drv;
- int i = 0;
- int length = 0;
-
- DPRINTK("");
-
- if (dev == NULL)
- return -ENODEV;
-
- xdev = to_xenbus_device(dev);
- if (xdev == NULL)
- return -ENODEV;
-
- /* stuff we want to pass to /sbin/hotplug */
- add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
- "XENBUS_TYPE=%s", xdev->devicetype);
-
- add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
- "XENBUS_PATH=%s", xdev->nodename);
-
- add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
- "XENBUS_BASE_PATH=%s", xenbus_backend.root);
-
- /* terminate, set to next free slot, shrink available space */
- envp[i] = NULL;
- envp = &envp[i];
- num_envp -= i;
- buffer = &buffer[length];
- buffer_size -= length;
-
- if (dev->driver) {
- drv = to_xenbus_driver(dev->driver);
- if (drv && drv->uevent)
- return drv->uevent(xdev, envp, num_envp, buffer,
- buffer_size);
- }
-
- return 0;
-}
-
static void otherend_changed(struct xenbus_watch *watch,
const char **vec, unsigned int len)
{
@@ -322,6 +208,20 @@ static void otherend_changed(struct xenbus_watch *watch,
DPRINTK("state is %d (%s), %s, %s", state, xenbus_strstate(state),
dev->otherend_watch.node, vec[XS_WATCH_PATH]);
+ /*
+ * Ignore xenbus transitions during shutdown. This prevents us doing
+ * work that can fail e.g., when the rootfs is gone.
+ */
+ if (system_state > SYSTEM_RUNNING) {
+ struct xen_bus_type *bus = bus;
+ bus = container_of(dev->dev.bus, struct xen_bus_type, bus);
+ /* If we're frontend, drive the state machine to Closed. */
+ /* This should cause the backend to release our resources. */
+ if ((bus == &xenbus_frontend) && (state == XenbusStateClosing))
+ xenbus_frontend_closed(dev);
+ return;
+ }
+
if (drv->otherend_changed)
drv->otherend_changed(dev, state);
}
@@ -345,7 +245,7 @@ static int watch_otherend(struct xenbus_device *dev)
}
-static int xenbus_dev_probe(struct device *_dev)
+int xenbus_dev_probe(struct device *_dev)
{
struct xenbus_device *dev = to_xenbus_device(_dev);
struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
@@ -392,7 +292,7 @@ fail:
return -ENODEV;
}
-static int xenbus_dev_remove(struct device *_dev)
+int xenbus_dev_remove(struct device *_dev)
{
struct xenbus_device *dev = to_xenbus_device(_dev);
struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
@@ -430,14 +330,21 @@ static void xenbus_dev_shutdown(struct device *_dev)
put_device(&dev->dev);
}
-static int xenbus_register_driver_common(struct xenbus_driver *drv,
- struct xen_bus_type *bus)
+int xenbus_register_driver_common(struct xenbus_driver *drv,
+ struct xen_bus_type *bus)
{
int ret;
drv->driver.name = drv->name;
drv->driver.bus = &bus->bus;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
drv->driver.owner = drv->owner;
+#endif
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
+ drv->driver.probe = xenbus_dev_probe;
+ drv->driver.remove = xenbus_dev_remove;
+ drv->driver.shutdown = xenbus_dev_shutdown;
+#endif
mutex_lock(&xenwatch_mutex);
ret = driver_register(&drv->driver);
@@ -462,14 +369,6 @@ int xenbus_register_frontend(struct xenbus_driver *drv)
}
EXPORT_SYMBOL_GPL(xenbus_register_frontend);
-int xenbus_register_backend(struct xenbus_driver *drv)
-{
- drv->read_otherend_details = read_frontend_details;
-
- return xenbus_register_driver_common(drv, &xenbus_backend);
-}
-EXPORT_SYMBOL_GPL(xenbus_register_backend);
-
void xenbus_unregister_driver(struct xenbus_driver *drv)
{
driver_unregister(&drv->driver);
@@ -545,45 +444,30 @@ static void xenbus_dev_release(struct device *dev)
kfree(to_xenbus_device(dev));
}
-/* Simplified asprintf. */
-char *kasprintf(const char *fmt, ...)
-{
- va_list ap;
- unsigned int len;
- char *p, dummy[1];
-
- va_start(ap, fmt);
- /* FIXME: vsnprintf has a bug, NULL should work */
- len = vsnprintf(dummy, 0, fmt, ap);
- va_end(ap);
-
- p = kmalloc(len + 1, GFP_KERNEL);
- if (!p)
- return NULL;
- va_start(ap, fmt);
- vsprintf(p, fmt, ap);
- va_end(ap);
- return p;
-}
-
static ssize_t xendev_show_nodename(struct device *dev,
- struct device_attribute *attr, char *buf)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)
+ struct device_attribute *attr,
+#endif
+ char *buf)
{
return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
}
DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);
static ssize_t xendev_show_devtype(struct device *dev,
- struct device_attribute *attr, char *buf)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)
+ struct device_attribute *attr,
+#endif
+ char *buf)
{
return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
}
DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
-static int xenbus_probe_node(struct xen_bus_type *bus,
- const char *type,
- const char *nodename)
+int xenbus_probe_node(struct xen_bus_type *bus,
+ const char *type,
+ const char *nodename)
{
int err;
struct xenbus_device *xendev;
@@ -642,7 +526,7 @@ static int xenbus_probe_frontend(const char *type, const char *name)
char *nodename;
int err;
- nodename = kasprintf("%s/%s/%s", xenbus_frontend.root, type, name);
+ nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", xenbus_frontend.root, type, name);
if (!nodename)
return -ENOMEM;
@@ -653,55 +537,6 @@ static int xenbus_probe_frontend(const char *type, const char *name)
return err;
}
-/* backend/<typename>/<frontend-uuid>/<name> */
-static int xenbus_probe_backend_unit(const char *dir,
- const char *type,
- const char *name)
-{
- char *nodename;
- int err;
-
- nodename = kasprintf("%s/%s", dir, name);
- if (!nodename)
- return -ENOMEM;
-
- DPRINTK("%s\n", nodename);
-
- err = xenbus_probe_node(&xenbus_backend, type, nodename);
- kfree(nodename);
- return err;
-}
-
-/* backend/<typename>/<frontend-domid> */
-static int xenbus_probe_backend(const char *type, const char *domid)
-{
- char *nodename;
- int err = 0;
- char **dir;
- unsigned int i, dir_n = 0;
-
- DPRINTK("");
-
- nodename = kasprintf("%s/%s/%s", xenbus_backend.root, type, domid);
- if (!nodename)
- return -ENOMEM;
-
- dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n);
- if (IS_ERR(dir)) {
- kfree(nodename);
- return PTR_ERR(dir);
- }
-
- for (i = 0; i < dir_n; i++) {
- err = xenbus_probe_backend_unit(nodename, type, dir[i]);
- if (err)
- break;
- }
- kfree(dir);
- kfree(nodename);
- return err;
-}
-
static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
{
int err = 0;
@@ -722,7 +557,7 @@ static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
return err;
}
-static int xenbus_probe_devices(struct xen_bus_type *bus)
+int xenbus_probe_devices(struct xen_bus_type *bus)
{
int err = 0;
char **dir;
@@ -764,7 +599,7 @@ static int strsep_len(const char *str, char c, unsigned int len)
return (len == 0) ? i : -ERANGE;
}
-static void dev_changed(const char *node, struct xen_bus_type *bus)
+void dev_changed(const char *node, struct xen_bus_type *bus)
{
int exists, rootlen;
struct xenbus_device *dev;
@@ -788,7 +623,7 @@ static void dev_changed(const char *node, struct xen_bus_type *bus)
rootlen = strsep_len(node, '/', bus->levels);
if (rootlen < 0)
return;
- root = kasprintf("%.*s", rootlen, node);
+ root = kasprintf(GFP_KERNEL, "%.*s", rootlen, node);
if (!root)
return;
@@ -809,25 +644,12 @@ static void frontend_changed(struct xenbus_watch *watch,
dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend);
}
-static void backend_changed(struct xenbus_watch *watch,
- const char **vec, unsigned int len)
-{
- DPRINTK("");
-
- dev_changed(vec[XS_WATCH_PATH], &xenbus_backend);
-}
-
/* We watch for devices appearing and vanishing. */
static struct xenbus_watch fe_watch = {
.node = "device",
.callback = frontend_changed,
};
-static struct xenbus_watch be_watch = {
- .node = "backend",
- .callback = backend_changed,
-};
-
static int suspend_dev(struct device *dev, void *data)
{
int err = 0;
@@ -898,7 +720,7 @@ void xenbus_suspend(void)
DPRINTK("");
bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
- bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, suspend_dev);
+ xenbus_backend_suspend(suspend_dev);
xs_suspend();
}
EXPORT_SYMBOL_GPL(xenbus_suspend);
@@ -908,7 +730,7 @@ void xenbus_resume(void)
xb_init_comms();
xs_resume();
bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
- bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, resume_dev);
+ xenbus_backend_resume(resume_dev);
}
EXPORT_SYMBOL_GPL(xenbus_resume);
@@ -941,20 +763,17 @@ void xenbus_probe(void *unused)
{
BUG_ON((xenstored_ready <= 0));
- /* Enumerate devices in xenstore. */
+ /* Enumerate devices in xenstore and watch for changes. */
xenbus_probe_devices(&xenbus_frontend);
- xenbus_probe_devices(&xenbus_backend);
-
- /* Watch for changes. */
register_xenbus_watch(&fe_watch);
- register_xenbus_watch(&be_watch);
+ xenbus_backend_probe_and_watch();
/* Notify others that xenstore is up */
notifier_call_chain(&xenstore_chain, 0, NULL);
}
-#ifdef CONFIG_PROC_FS
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST)
static struct file_operations xsd_kva_fops;
static struct proc_dir_entry *xsd_kva_intf;
static struct proc_dir_entry *xsd_port_intf;
@@ -1006,7 +825,7 @@ static int __init xenbus_probe_init(void)
/* Register ourselves with the kernel bus subsystem */
bus_register(&xenbus_frontend.bus);
- bus_register(&xenbus_backend.bus);
+ xenbus_backend_bus_register();
/*
* Domain0 doesn't have a store_evtchn or store_mfn yet.
@@ -1035,7 +854,7 @@ static int __init xenbus_probe_init(void)
xen_store_evtchn = xen_start_info->store_evtchn =
alloc_unbound.port;
-#ifdef CONFIG_PROC_FS
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST)
/* And finally publish the above info in /proc/xen */
xsd_kva_intf = create_xen_proc_entry("xsd_kva", 0600);
if (xsd_kva_intf) {
@@ -1077,7 +896,7 @@ static int __init xenbus_probe_init(void)
/* Register ourselves with the kernel device subsystem */
device_register(&xenbus_frontend.dev);
- device_register(&xenbus_backend.dev);
+ xenbus_backend_device_register();
if (!is_initial_xendomain())
xenbus_probe(NULL);
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.h b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.h
new file mode 100644
index 0000000000..2d2e567826
--- /dev/null
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.h
@@ -0,0 +1,74 @@
+/******************************************************************************
+ * xenbus_probe.h
+ *
+ * Talks to Xen Store to figure out what devices we have.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * Copyright (C) 2005 XenSource Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _XENBUS_PROBE_H
+#define _XENBUS_PROBE_H
+
+#ifdef CONFIG_XEN_BACKEND
+extern void xenbus_backend_suspend(int (*fn)(struct device *, void *));
+extern void xenbus_backend_resume(int (*fn)(struct device *, void *));
+extern void xenbus_backend_probe_and_watch(void);
+extern void xenbus_backend_bus_register(void);
+extern void xenbus_backend_device_register(void);
+#else
+static inline void xenbus_backend_suspend(int (*fn)(struct device *, void *)) {}
+static inline void xenbus_backend_resume(int (*fn)(struct device *, void *)) {}
+static inline void xenbus_backend_probe_and_watch(void) {}
+static inline void xenbus_backend_bus_register(void) {}
+static inline void xenbus_backend_device_register(void) {}
+#endif
+
+struct xen_bus_type
+{
+ char *root;
+ unsigned int levels;
+ int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename);
+ int (*probe)(const char *type, const char *dir);
+ struct bus_type bus;
+ struct device dev;
+};
+
+extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
+extern int xenbus_dev_probe(struct device *_dev);
+extern int xenbus_dev_remove(struct device *_dev);
+extern int xenbus_register_driver_common(struct xenbus_driver *drv,
+ struct xen_bus_type *bus);
+extern int xenbus_probe_node(struct xen_bus_type *bus,
+ const char *type,
+ const char *nodename);
+extern int xenbus_probe_devices(struct xen_bus_type *bus);
+
+extern void dev_changed(const char *node, struct xen_bus_type *bus);
+
+#endif
+
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe_backend.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe_backend.c
new file mode 100644
index 0000000000..934e79732d
--- /dev/null
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -0,0 +1,271 @@
+/******************************************************************************
+ * Talks to Xen Store to figure out what devices we have (backend half).
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * Copyright (C) 2005 Mike Wray, Hewlett-Packard
+ * Copyright (C) 2005, 2006 XenSource Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#define DPRINTK(fmt, args...) \
+ pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \
+ __FUNCTION__, __LINE__, ##args)
+
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/maddr.h>
+#include <asm/pgtable.h>
+#include <asm/hypervisor.h>
+#include <xen/xenbus.h>
+#include <xen/xen_proc.h>
+#include <xen/evtchn.h>
+#include <xen/features.h>
+#include <xen/hvm.h>
+
+#include "xenbus_comms.h"
+#include "xenbus_probe.h"
+
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
+static int xenbus_uevent_backend(struct device *dev, char **envp,
+ int num_envp, char *buffer, int buffer_size);
+static int xenbus_probe_backend(const char *type, const char *domid);
+
+extern int read_otherend_details(struct xenbus_device *xendev,
+ char *id_node, char *path_node);
+
+static int read_frontend_details(struct xenbus_device *xendev)
+{
+ return read_otherend_details(xendev, "frontend-id", "frontend");
+}
+
+/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
+static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
+{
+ int domid, err;
+ const char *devid, *type, *frontend;
+ unsigned int typelen;
+
+ type = strchr(nodename, '/');
+ if (!type)
+ return -EINVAL;
+ type++;
+ typelen = strcspn(type, "/");
+ if (!typelen || type[typelen] != '/')
+ return -EINVAL;
+
+ devid = strrchr(nodename, '/') + 1;
+
+ err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid,
+ "frontend", NULL, &frontend,
+ NULL);
+ if (err)
+ return err;
+ if (strlen(frontend) == 0)
+ err = -ERANGE;
+ if (!err && !xenbus_exists(XBT_NIL, frontend, ""))
+ err = -ENOENT;
+ kfree(frontend);
+
+ if (err)
+ return err;
+
+ if (snprintf(bus_id, BUS_ID_SIZE,
+ "%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE)
+ return -ENOSPC;
+ return 0;
+}
+
+static struct xen_bus_type xenbus_backend = {
+ .root = "backend",
+ .levels = 3, /* backend/type/<frontend>/<id> */
+ .get_bus_id = backend_bus_id,
+ .probe = xenbus_probe_backend,
+ .bus = {
+ .name = "xen-backend",
+ .match = xenbus_match,
+ .probe = xenbus_dev_probe,
+ .remove = xenbus_dev_remove,
+// .shutdown = xenbus_dev_shutdown,
+ .uevent = xenbus_uevent_backend,
+ },
+ .dev = {
+ .bus_id = "xen-backend",
+ },
+};
+
+static int xenbus_uevent_backend(struct device *dev, char **envp,
+ int num_envp, char *buffer, int buffer_size)
+{
+ struct xenbus_device *xdev;
+ struct xenbus_driver *drv;
+ int i = 0;
+ int length = 0;
+
+ DPRINTK("");
+
+ if (dev == NULL)
+ return -ENODEV;
+
+ xdev = to_xenbus_device(dev);
+ if (xdev == NULL)
+ return -ENODEV;
+
+ /* stuff we want to pass to /sbin/hotplug */
+ add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
+ "XENBUS_TYPE=%s", xdev->devicetype);
+
+ add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
+ "XENBUS_PATH=%s", xdev->nodename);
+
+ add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
+ "XENBUS_BASE_PATH=%s", xenbus_backend.root);
+
+ /* terminate, set to next free slot, shrink available space */
+ envp[i] = NULL;
+ envp = &envp[i];
+ num_envp -= i;
+ buffer = &buffer[length];
+ buffer_size -= length;
+
+ if (dev->driver) {
+ drv = to_xenbus_driver(dev->driver);
+ if (drv && drv->uevent)
+ return drv->uevent(xdev, envp, num_envp, buffer,
+ buffer_size);
+ }
+
+ return 0;
+}
+
+int xenbus_register_backend(struct xenbus_driver *drv)
+{
+ drv->read_otherend_details = read_frontend_details;
+
+ return xenbus_register_driver_common(drv, &xenbus_backend);
+}
+EXPORT_SYMBOL_GPL(xenbus_register_backend);
+
+/* backend/<typename>/<frontend-uuid>/<name> */
+static int xenbus_probe_backend_unit(const char *dir,
+ const char *type,
+ const char *name)
+{
+ char *nodename;
+ int err;
+
+ nodename = kasprintf(GFP_KERNEL, "%s/%s", dir, name);
+ if (!nodename)
+ return -ENOMEM;
+
+ DPRINTK("%s\n", nodename);
+
+ err = xenbus_probe_node(&xenbus_backend, type, nodename);
+ kfree(nodename);
+ return err;
+}
+
+/* backend/<typename>/<frontend-domid> */
+static int xenbus_probe_backend(const char *type, const char *domid)
+{
+ char *nodename;
+ int err = 0;
+ char **dir;
+ unsigned int i, dir_n = 0;
+
+ DPRINTK("");
+
+ nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", xenbus_backend.root, type, domid);
+ if (!nodename)
+ return -ENOMEM;
+
+ dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n);
+ if (IS_ERR(dir)) {
+ kfree(nodename);
+ return PTR_ERR(dir);
+ }
+
+ for (i = 0; i < dir_n; i++) {
+ err = xenbus_probe_backend_unit(nodename, type, dir[i]);
+ if (err)
+ break;
+ }
+ kfree(dir);
+ kfree(nodename);
+ return err;
+}
+
+static void backend_changed(struct xenbus_watch *watch,
+ const char **vec, unsigned int len)
+{
+ DPRINTK("");
+
+ dev_changed(vec[XS_WATCH_PATH], &xenbus_backend);
+}
+
+static struct xenbus_watch be_watch = {
+ .node = "backend",
+ .callback = backend_changed,
+};
+
+void xenbus_backend_suspend(int (*fn)(struct device *, void *))
+{
+ DPRINTK("");
+ bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn);
+}
+
+void xenbus_backend_resume(int (*fn)(struct device *, void *))
+{
+ DPRINTK("");
+ bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn);
+}
+
+void xenbus_backend_probe_and_watch(void)
+{
+ xenbus_probe_devices(&xenbus_backend);
+ register_xenbus_watch(&be_watch);
+}
+
+void xenbus_backend_bus_register(void)
+{
+ bus_register(&xenbus_backend.bus);
+}
+
+void xenbus_backend_device_register(void)
+{
+ device_register(&xenbus_backend.dev);
+}
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
index 190fa1e794..4c5052d13a 100644
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
@@ -42,11 +42,14 @@
#include <linux/fcntl.h>
#include <linux/kthread.h>
#include <linux/rwsem.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
#include <xen/xenbus.h>
#include "xenbus_comms.h"
-/* xenbus_probe.c */
-extern char *kasprintf(const char *fmt, ...);
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
struct xs_stored_msg {
struct list_head list;
@@ -289,9 +292,9 @@ static char *join(const char *dir, const char *name)
char *buffer;
if (strlen(name) == 0)
- buffer = kasprintf("%s", dir);
+ buffer = kasprintf(GFP_KERNEL, "%s", dir);
else
- buffer = kasprintf("%s/%s", dir, name);
+ buffer = kasprintf(GFP_KERNEL, "%s/%s", dir, name);
return (!buffer) ? ERR_PTR(-ENOMEM) : buffer;
}
diff --git a/linux-2.6-xen-sparse/drivers/xen/xenoprof/xenoprofile.c b/linux-2.6-xen-sparse/drivers/xen/xenoprof/xenoprofile.c
new file mode 100644
index 0000000000..382a50f647
--- /dev/null
+++ b/linux-2.6-xen-sparse/drivers/xen/xenoprof/xenoprofile.c
@@ -0,0 +1,500 @@
+/**
+ * @file xenoprofile.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon and Jose Renato Santos for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ *
+ * Separated out arch-generic part
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ */
+
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/smp.h>
+#include <linux/oprofile.h>
+#include <linux/sysdev.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/vmalloc.h>
+#include <asm/pgtable.h>
+#include <xen/evtchn.h>
+#include <xen/xenoprof.h>
+#include <xen/driver_util.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/xenoprof.h>
+#include "../../../drivers/oprofile/cpu_buffer.h"
+#include "../../../drivers/oprofile/event_buffer.h"
+
+#define MAX_XENOPROF_SAMPLES 16
+
+/* sample buffers shared with Xen */
+xenoprof_buf_t * xenoprof_buf[MAX_VIRT_CPUS];
+/* Shared buffer area */
+struct xenoprof_shared_buffer shared_buffer;
+
+/* Passive sample buffers shared with Xen */
+xenoprof_buf_t *p_xenoprof_buf[MAX_OPROF_DOMAINS][MAX_VIRT_CPUS];
+/* Passive shared buffer area */
+struct xenoprof_shared_buffer p_shared_buffer[MAX_OPROF_DOMAINS];
+
+static int xenoprof_start(void);
+static void xenoprof_stop(void);
+
+static int xenoprof_enabled = 0;
+static int xenoprof_is_primary = 0;
+static int active_defined;
+
+/* Number of buffers in shared area (one per VCPU) */
+int nbuf;
+/* Mappings of VIRQ_XENOPROF to irq number (per cpu) */
+int ovf_irq[NR_CPUS];
+/* cpu model type string - copied from Xen memory space on XENOPROF_init command */
+char cpu_type[XENOPROF_CPU_TYPE_SIZE];
+
+#ifdef CONFIG_PM
+
+static int xenoprof_suspend(struct sys_device * dev, pm_message_t state)
+{
+ if (xenoprof_enabled == 1)
+ xenoprof_stop();
+ return 0;
+}
+
+
+static int xenoprof_resume(struct sys_device * dev)
+{
+ if (xenoprof_enabled == 1)
+ xenoprof_start();
+ return 0;
+}
+
+
+static struct sysdev_class oprofile_sysclass = {
+ set_kset_name("oprofile"),
+ .resume = xenoprof_resume,
+ .suspend = xenoprof_suspend
+};
+
+
+static struct sys_device device_oprofile = {
+ .id = 0,
+ .cls = &oprofile_sysclass,
+};
+
+
+static int __init init_driverfs(void)
+{
+ int error;
+ if (!(error = sysdev_class_register(&oprofile_sysclass)))
+ error = sysdev_register(&device_oprofile);
+ return error;
+}
+
+
+static void exit_driverfs(void)
+{
+ sysdev_unregister(&device_oprofile);
+ sysdev_class_unregister(&oprofile_sysclass);
+}
+
+#else
+#define init_driverfs() do { } while (0)
+#define exit_driverfs() do { } while (0)
+#endif /* CONFIG_PM */
+
+unsigned long long oprofile_samples = 0;
+unsigned long long p_oprofile_samples = 0;
+
+unsigned int pdomains;
+struct xenoprof_passive passive_domains[MAX_OPROF_DOMAINS];
+
+static void xenoprof_add_pc(xenoprof_buf_t *buf, int is_passive)
+{
+ int head, tail, size;
+
+ head = buf->event_head;
+ tail = buf->event_tail;
+ size = buf->event_size;
+
+ if (tail > head) {
+ while (tail < size) {
+ oprofile_add_pc(buf->event_log[tail].eip,
+ buf->event_log[tail].mode,
+ buf->event_log[tail].event);
+ if (!is_passive)
+ oprofile_samples++;
+ else
+ p_oprofile_samples++;
+ tail++;
+ }
+ tail = 0;
+ }
+ while (tail < head) {
+ oprofile_add_pc(buf->event_log[tail].eip,
+ buf->event_log[tail].mode,
+ buf->event_log[tail].event);
+ if (!is_passive)
+ oprofile_samples++;
+ else
+ p_oprofile_samples++;
+ tail++;
+ }
+
+ buf->event_tail = tail;
+}
+
+static void xenoprof_handle_passive(void)
+{
+ int i, j;
+ int flag_domain, flag_switch = 0;
+
+ for (i = 0; i < pdomains; i++) {
+ flag_domain = 0;
+ for (j = 0; j < passive_domains[i].nbuf; j++) {
+ xenoprof_buf_t *buf = p_xenoprof_buf[i][j];
+ if (buf->event_head == buf->event_tail)
+ continue;
+ if (!flag_domain) {
+ if (!oprofile_add_domain_switch(passive_domains[i].
+ domain_id))
+ goto done;
+ flag_domain = 1;
+ }
+ xenoprof_add_pc(buf, 1);
+ flag_switch = 1;
+ }
+ }
+done:
+ if (flag_switch)
+ oprofile_add_domain_switch(COORDINATOR_DOMAIN);
+}
+
+static irqreturn_t
+xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs)
+{
+ struct xenoprof_buf * buf;
+ int cpu;
+ static unsigned long flag;
+
+ cpu = smp_processor_id();
+ buf = xenoprof_buf[cpu];
+
+ xenoprof_add_pc(buf, 0);
+
+ if (xenoprof_is_primary && !test_and_set_bit(0, &flag)) {
+ xenoprof_handle_passive();
+ smp_mb__before_clear_bit();
+ clear_bit(0, &flag);
+ }
+
+ return IRQ_HANDLED;
+}
+
+
+static void unbind_virq(void)
+{
+ int i;
+
+ for_each_online_cpu(i) {
+ if (ovf_irq[i] >= 0) {
+ unbind_from_irqhandler(ovf_irq[i], NULL);
+ ovf_irq[i] = -1;
+ }
+ }
+}
+
+
+static int bind_virq(void)
+{
+ int i, result;
+
+ for_each_online_cpu(i) {
+ result = bind_virq_to_irqhandler(VIRQ_XENOPROF,
+ i,
+ xenoprof_ovf_interrupt,
+ SA_INTERRUPT,
+ "xenoprof",
+ NULL);
+
+ if (result < 0) {
+ unbind_virq();
+ return result;
+ }
+
+ ovf_irq[i] = result;
+ }
+
+ return 0;
+}
+
+
+static void unmap_passive_list(void)
+{
+ int i;
+ for (i = 0; i < pdomains; i++)
+ xenoprof_arch_unmap_shared_buffer(&p_shared_buffer[i]);
+ pdomains = 0;
+}
+
+
+static int map_xenoprof_buffer(int max_samples)
+{
+ struct xenoprof_get_buffer get_buffer;
+ struct xenoprof_buf *buf;
+ int ret, i;
+
+ if ( shared_buffer.buffer )
+ return 0;
+
+ get_buffer.max_samples = max_samples;
+ ret = xenoprof_arch_map_shared_buffer(&get_buffer, &shared_buffer);
+ if (ret)
+ return ret;
+ nbuf = get_buffer.nbuf;
+
+ for (i=0; i< nbuf; i++) {
+ buf = (struct xenoprof_buf*)
+ &shared_buffer.buffer[i * get_buffer.bufsize];
+ BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS);
+ xenoprof_buf[buf->vcpu_id] = buf;
+ }
+
+ return 0;
+}
+
+
+static int xenoprof_setup(void)
+{
+ int ret;
+
+ if ( (ret = map_xenoprof_buffer(MAX_XENOPROF_SAMPLES)) )
+ return ret;
+
+ if ( (ret = bind_virq()) )
+ return ret;
+
+ if (xenoprof_is_primary) {
+ /* Define dom0 as an active domain if not done yet */
+ if (!active_defined) {
+ domid_t domid;
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL);
+ if (ret)
+ goto err;
+ domid = 0;
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
+ if (ret)
+ goto err;
+ active_defined = 1;
+ }
+
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_reserve_counters, NULL);
+ if (ret)
+ goto err;
+ xenoprof_arch_counter();
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_setup_events, NULL);
+
+ if (ret)
+ goto err;
+ }
+
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_enable_virq, NULL);
+ if (ret)
+ goto err;
+
+ xenoprof_enabled = 1;
+ return 0;
+ err:
+ unbind_virq();
+ return ret;
+}
+
+
+static void xenoprof_shutdown(void)
+{
+ xenoprof_enabled = 0;
+
+ HYPERVISOR_xenoprof_op(XENOPROF_disable_virq, NULL);
+
+ if (xenoprof_is_primary) {
+ HYPERVISOR_xenoprof_op(XENOPROF_release_counters, NULL);
+ active_defined = 0;
+ }
+
+ unbind_virq();
+
+ xenoprof_arch_unmap_shared_buffer(&shared_buffer);
+ if (xenoprof_is_primary)
+ unmap_passive_list();
+}
+
+
+static int xenoprof_start(void)
+{
+ int ret = 0;
+
+ if (xenoprof_is_primary)
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_start, NULL);
+ if (!ret)
+ xenoprof_arch_start();
+ return ret;
+}
+
+
+static void xenoprof_stop(void)
+{
+ if (xenoprof_is_primary)
+ HYPERVISOR_xenoprof_op(XENOPROF_stop, NULL);
+ xenoprof_arch_stop();
+}
+
+
+static int xenoprof_set_active(int * active_domains,
+ unsigned int adomains)
+{
+ int ret = 0;
+ int i;
+ int set_dom0 = 0;
+ domid_t domid;
+
+ if (!xenoprof_is_primary)
+ return 0;
+
+ if (adomains > MAX_OPROF_DOMAINS)
+ return -E2BIG;
+
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL);
+ if (ret)
+ return ret;
+
+ for (i=0; i<adomains; i++) {
+ domid = active_domains[i];
+ if (domid != active_domains[i]) {
+ ret = -EINVAL;
+ goto out;
+ }
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
+ if (ret)
+ goto out;
+ if (active_domains[i] == 0)
+ set_dom0 = 1;
+ }
+ /* dom0 must always be active but may not be in the list */
+ if (!set_dom0) {
+ domid = 0;
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
+ }
+
+out:
+ if (ret)
+ HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL);
+ active_defined = !ret;
+ return ret;
+}
+
+static int xenoprof_set_passive(int * p_domains,
+ unsigned int pdoms)
+{
+ int ret;
+ int i, j;
+ struct xenoprof_buf *buf;
+
+ if (!xenoprof_is_primary)
+ return 0;
+
+ if (pdoms > MAX_OPROF_DOMAINS)
+ return -E2BIG;
+
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_passive_list, NULL);
+ if (ret)
+ return ret;
+ unmap_passive_list();
+
+ for (i = 0; i < pdoms; i++) {
+ passive_domains[i].domain_id = p_domains[i];
+ passive_domains[i].max_samples = 2048;
+ ret = xenoprof_arch_set_passive(&passive_domains[i],
+ &p_shared_buffer[i]);
+ if (ret)
+ goto out;
+ for (j = 0; j < passive_domains[i].nbuf; j++) {
+ buf = (struct xenoprof_buf *)
+ &p_shared_buffer[i].buffer[j * passive_domains[i].bufsize];
+ BUG_ON(buf->vcpu_id >= MAX_VIRT_CPUS);
+ p_xenoprof_buf[i][buf->vcpu_id] = buf;
+ }
+ }
+
+ pdomains = pdoms;
+ return 0;
+
+out:
+ for (j = 0; j < i; j++)
+ xenoprof_arch_unmap_shared_buffer(&p_shared_buffer[i]);
+
+ return ret;
+}
+
+struct oprofile_operations xenoprof_ops = {
+#ifdef HAVE_XENOPROF_CREATE_FILES
+ .create_files = xenoprof_create_files,
+#endif
+ .set_active = xenoprof_set_active,
+ .set_passive = xenoprof_set_passive,
+ .setup = xenoprof_setup,
+ .shutdown = xenoprof_shutdown,
+ .start = xenoprof_start,
+ .stop = xenoprof_stop
+};
+
+
+/* in order to get driverfs right */
+static int using_xenoprof;
+
+int __init xenoprofile_init(struct oprofile_operations * ops)
+{
+ struct xenoprof_init init;
+ int ret, i;
+
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_init, &init);
+ if (!ret) {
+ xenoprof_arch_init_counter(&init);
+ xenoprof_is_primary = init.is_primary;
+
+ /* cpu_type is detected by Xen */
+ cpu_type[XENOPROF_CPU_TYPE_SIZE-1] = 0;
+ strncpy(cpu_type, init.cpu_type, XENOPROF_CPU_TYPE_SIZE - 1);
+ xenoprof_ops.cpu_type = cpu_type;
+
+ init_driverfs();
+ using_xenoprof = 1;
+ *ops = xenoprof_ops;
+
+ for (i=0; i<NR_CPUS; i++)
+ ovf_irq[i] = -1;
+
+ active_defined = 0;
+ }
+ printk(KERN_INFO "%s: ret %d, events %d, xenoprof_is_primary %d\n",
+ __func__, ret, init.num_events, xenoprof_is_primary);
+ return ret;
+}
+
+
+void xenoprofile_exit(void)
+{
+ if (using_xenoprof)
+ exit_driverfs();
+
+ xenoprof_arch_unmap_shared_buffer(&shared_buffer);
+ if (xenoprof_is_primary) {
+ unmap_passive_list();
+ HYPERVISOR_xenoprof_op(XENOPROF_shutdown, NULL);
+ }
+}