aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorcwc22@centipede.cl.cam.ac.uk <cwc22@centipede.cl.cam.ac.uk>2005-04-04 20:22:17 +0000
committercwc22@centipede.cl.cam.ac.uk <cwc22@centipede.cl.cam.ac.uk>2005-04-04 20:22:17 +0000
commitd2781af07b4d42e5e02620c067c1da627baee567 (patch)
tree8f84b80c468ce8cf1ed6ce5f71d2242e7dff569f
parent12af2ea309ea8f116483672d9900b863e75f0c7e (diff)
downloadxen-d2781af07b4d42e5e02620c067c1da627baee567.tar.gz
xen-d2781af07b4d42e5e02620c067c1da627baee567.tar.bz2
xen-d2781af07b4d42e5e02620c067c1da627baee567.zip
bitkeeper revision 1.1236.56.1 (4251a1f9OIyZY2I2LqBlxl0mi64FkA)
Grant tables: substantially more robust. Block front and back drivers: support for using grant tables for interdomain communication.
-rw-r--r--.rootkeys1
-rw-r--r--docs/misc/grant-tables.txt325
-rw-r--r--linux-2.4.29-xen-sparse/arch/xen/config.in1
-rw-r--r--linux-2.4.29-xen-sparse/arch/xen/defconfig-xen01
-rw-r--r--linux-2.4.29-xen-sparse/arch/xen/defconfig-xenU1
-rw-r--r--linux-2.4.29-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c5
-rw-r--r--linux-2.4.29-xen-sparse/include/asm-xen/fixmap.h4
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/Kconfig10
-rw-r--r--linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c123
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c119
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c124
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h4
-rw-r--r--linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c5
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h4
-rw-r--r--linux-2.6.11-xen-sparse/include/asm-xen/gnttab.h35
-rw-r--r--xen/arch/x86/mm.c8
-rw-r--r--xen/common/grant_table.c579
-rw-r--r--xen/include/public/grant_table.h8
-rw-r--r--xen/include/public/io/blkif.h12
-rw-r--r--xen/include/xen/grant_table.h7
20 files changed, 1118 insertions, 258 deletions
diff --git a/.rootkeys b/.rootkeys
index 1db0f788e9..ec0a0af336 100644
--- a/.rootkeys
+++ b/.rootkeys
@@ -20,6 +20,7 @@
4022a73cgxX1ryj1HgS-IwwB6NUi2A docs/misc/XenDebugger-HOWTO
412f4bd9sm5mCQ8BkrgKcAKZGadq7Q docs/misc/blkif-drivers-explained.txt
420b949cy9ZGzED74Fz_DaWlK7tT4g docs/misc/crashdb.txt
+4251a1f82AexscYEiF4Iku8Gc_kWfQ docs/misc/grant-tables.txt
40d6ccbfKKBq8jE0ula4eHEzBiQuDA docs/misc/xen_config.html
410a4c2bAO_m_l4RsiiPHnZ4ixHWbQ docs/misc/xend.tex
3f9e7d564bWFB-Czjv1qdmE6o0GqNg docs/src/interface.tex
diff --git a/docs/misc/grant-tables.txt b/docs/misc/grant-tables.txt
new file mode 100644
index 0000000000..604be3bb08
--- /dev/null
+++ b/docs/misc/grant-tables.txt
@@ -0,0 +1,325 @@
+********************************************************************************
+ A Rough Introduction to Using Grant Tables
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Christopher Clark, March, 2005.
+
+Grant tables are a mechanism for sharing and transferring frames between
+domains, without requiring the participating domains to be privileged.
+
+The first mode of use allows domA to grant domB access to a specific frame,
+whilst retaining ownership. The block front driver uses this to grant memory
+access to the block back driver, so that it may read or write as requested.
+
+ 1. domA creates a grant access reference, and transmits the ref id to domB.
+ 2. domB uses the reference to map the granted frame.
+ 3. domB performs the memory access.
+ 4. domB unmaps the granted frame.
+ 5. domA removes its grant.
+
+
+The second mode allows domA to accept a transfer of ownership of a frame from
+domB. The net front and back driver will use this for packet tx/rx. This
+mechanism is still being implemented, though the xen<->guest interface design
+is complete.
+
+ 1. domA creates an accept transfer grant reference, and transmits it to domB.
+ 2. domB uses the ref to hand over a frame it owns.
+ 3. domA accepts the transfer
+ 4. domA clears the used reference.
+
+
+********************************************************************************
+ Data structures
+ ~~~~~~~~~~~~~~~
+
+ The following data structures are used by Xen and the guests to implement
+ grant tables:
+
+ 1. Shared grant entries
+ 2. Active grant entries
+ 3. Map tracking
+
+ These are not the users primary interface to grant tables, but are discussed
+ because an understanding of how they work may be useful. Each of these is a
+ finite resource.
+
+ Shared grant entries
+ ~~~~~~~~~~~~~~~~~~~~
+
+ A set of pages are shared between Xen and a guest, holding the shared grant
+ entries. The guest writes into these entries to create grant references. The
+ index of the entry is transmitted to the remote domain: this is the
+ reference used to activate an entry. Xen will write into a shared entry to
+ indicate to a guest that its grant is in use.
+ sha->domid : remote domain being granted rights
+ sha->frame : machine frame being granted
+ sha->flags : allow access, allow transfer, remote is reading/writing, etc.
+
+ Active grant entries
+ ~~~~~~~~~~~~~~~~~~~~
+
+ Xen maintains a set of private frames per domain, holding the active grant
+ entries for safety, and to reference count mappings.
+ act->domid : remote domain being granted rights
+ act->frame : machine frame being granted
+ act->pin : used to hold reference counts
+
+ Map tracking
+ ~~~~~~~~~~~~
+
+ Every time a frame is mapped, a map track entry is stored in the metadata of
+ the mapping domain. The index of this entry is returned from the map call,
+ and is used to unmap the frame. Map track entries are also searched whenever a
+ page table entry containing a foreign frame number is overwritten: the first
+ matching map track entry is then removed, as if unmap had been invoked.
+ These are not used by the transfer mechanism.
+ map->domid : owner of the mapped frame
+ map->ref_and_flags : grant reference, ro/rw, mapped for host or device access
+
+********************************************************************************
+
+ Granting a foreign domain access to frames
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ domA [frame]--> domB
+
+
+ domA: #include <asm-xen/gnttab.h>
+ grant_ref_t gref[BATCH_SIZE];
+
+ for ( i = 0; i < BATCH_SIZE; i++ )
+ gref[i] = gnttab_grant_foreign_access( domBid, mfn, (readonly ? 1 : 0) );
+
+
+ .. gref is then somehow transmitted to domB for use.
+
+
+ Mapping foreign frames
+ ~~~~~~~~~~~~~~~~~~~~~~
+
+ domB: #include <asm-xen/hypervisor.h>
+ unsigned long mmap_vstart;
+ gnttab_op_t aop[BATCH_SIZE];
+ grant_ref_t mapped_handle[BATCH_SIZE];
+
+ if ( (mmap_vstart = allocate_empty_lowmem_region(BATCH_SIZE)) == 0 )
+ BUG();
+
+ for ( i = 0; i < BATCH_SIZE; i++ )
+ {
+ aop[i].u.map_grant_ref.host_virt_addr =
+ mmap_vstart + (i * PAGE_SIZE);
+ aop[i].u.map_grant_ref.dom = domAid;
+ aop[i].u.map_grant_ref.ref = gref[i];
+ aop[i].u.map_grant_ref.flags = ( GNTMAP_host_map | GNTMAP_readonly );
+ }
+
+ if ( unlikely(HYPERVISOR_grant_table_op(
+ GNTTABOP_map_grant_ref, aop, BATCH_SIZE)))
+ BUG();
+
+ for ( i = 0; i < BATCH_SIZE; i++ )
+ {
+ if ( unlikely(aop[i].u.map_grant_ref.dev_bus_addr == 0) )
+ {
+ tidyup_all(aop, i);
+ goto panic;
+ }
+
+ phys_to_machine_mapping[__pa(mmap_vstart + (i * PAGE_SIZE))>>PAGE_SHIFT] =
+ FOREIGN_FRAME(aop[i].u.map_grant_ref.dev_bus_addr);
+
+ mapped_handle[i] = aop[i].u.map_grant_ref.handle;
+ }
+
+
+
+ Unmapping foreign frames
+ ~~~~~~~~~~~~~~~~~~~~~~~~
+
+ domB:
+ for ( i = 0; i < BATCH_SIZE; i++ )
+ {
+ aop[i].u.unmap_grant_ref.host_virt_addr = mmap_vstart + (i * PAGE_SIZE);
+ aop[i].u.unmap_grant_ref.dev_bus_addr = 0;
+ aop[i].u.unmap_grant_ref.handle = mapped_handle[i];
+ }
+ if ( unlikely(HYPERVISOR_grant_table_op(
+ GNTTABOP_unmap_grant_ref, aop, BATCH_SIZE)))
+ BUG();
+
+
+ Ending foreign access
+ ~~~~~~~~~~~~~~~~~~~~~
+
+ Note that this only prevents further mappings; it does _not_ revoke access.
+ Should _only_ be used when the remote domain has unmapped the frame.
+ gnttab_query_foreign_access( gref ) will indicate the state of any mapping.
+
+ domA:
+ if ( gnttab_query_foreign_access( gref[i] ) == 0 )
+ gnttab_end_foreign_access( gref[i], readonly );
+
+ TODO: readonly yet to be implemented.
+
+
+********************************************************************************
+
+ Transferring ownership of a frame to another domain
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ [ XXX: Transfer mechanism is alpha-calibre code, untested, use at own risk XXX ]
+ [ XXX: show use of batch operations below, rather than single frame XXX ]
+ [ XXX: linux internal interface could/should be wrapped to be tidier XXX ]
+
+
+ Prepare to accept a frame from a foreign domain
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ domA:
+ if ( (p = alloc_page(GFP_HIGHUSER)) == NULL )
+ {
+ printk("Cannot alloc a frame to surrender\n");
+ break;
+ }
+ pfn = p - mem_map;
+ mfn = phys_to_machine_mapping[pfn];
+
+ if ( !PageHighMem(p) )
+ {
+ v = phys_to_virt(pfn << PAGE_SHIFT);
+ scrub_pages(v, 1);
+ queue_l1_entry_update(get_ptep((unsigned long)v), 0);
+ }
+
+ /* Ensure that ballooned highmem pages don't have cached mappings. */
+ kmap_flush_unused();
+
+ /* Flush updates through and flush the TLB. */
+ xen_tlb_flush();
+
+ phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
+
+ if ( HYPERVISOR_dom_mem_op(
+ MEMOP_decrease_reservation, &mfn, 1, 0) != 1 )
+ {
+ printk("MEMOP_decrease_reservation failed\n");
+ /* er... ok. free the page then */
+ __free_page(p);
+ break;
+ }
+
+ accepting_pfn = pfn;
+ ref = gnttab_grant_foreign_transfer( (domid_t) args.arg[0], pfn );
+ printk("Accepting dom %lu frame at ref (%d)\n", args.arg[0], ref);
+
+
+ Transfer a frame to a foreign domain
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ domB:
+ mmu_update_t update;
+ domid_t domid;
+ grant_ref_t gref;
+ unsigned long pfn, mfn, *v;
+ struct page *transfer_page = 0;
+
+ /* alloc a page and grant access.
+ * alloc page returns a page struct. */
+ if ( (transfer_page = alloc_page(GFP_HIGHUSER)) == NULL )
+ return -ENOMEM;
+
+ pfn = transfer_page - mem_map;
+ mfn = phys_to_machine_mapping[pfn];
+
+ /* need to remove all references to this page */
+ if ( !PageHighMem(transfer_page) )
+ {
+ v = phys_to_virt(pfn << PAGE_SHIFT);
+ scrub_pages(v, 1);
+ sprintf((char *)v, "This page (%lx) was transferred.\n", mfn);
+ queue_l1_entry_update(get_ptep((unsigned long)v), 0);
+ }
+#ifdef CONFIG_XEN_SCRUB_PAGES
+ else
+ {
+ v = kmap(transfer_page);
+ scrub_pages(v, 1);
+ sprintf((char *)v, "This page (%lx) was transferred.\n", mfn);
+ kunmap(transfer_page);
+ }
+#endif
+ /* Delete any cached kmappings */
+ kmap_flush_unused();
+
+ /* Flush updates through and flush the TLB */
+ xen_tlb_flush();
+
+ /* invalidate in P2M */
+ phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
+
+ domid = (domid_t)args.arg[0];
+ gref = (grant_ref_t)args.arg[1];
+
+ update.ptr = MMU_EXTENDED_COMMAND;
+ update.ptr |= ((gref & 0x00FF) << 2);
+ update.ptr |= mfn << PAGE_SHIFT;
+
+ update.val = MMUEXT_TRANSFER_PAGE;
+ update.val |= (domid << 16);
+ update.val |= (gref & 0xFF00);
+
+ ret = HYPERVISOR_mmu_update(&update, 1, NULL);
+
+
+ Map a transferred frame
+ ~~~~~~~~~~~~~~~~~~~~~~~
+
+ TODO:
+
+
+ Clear the used transfer reference
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ TODO:
+
+
+********************************************************************************
+
+ Using a private reserve of grant references
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Where it is known in advance how many grant references are required, and
+failure to allocate them on demand would cause difficulty, a batch can be
+allocated and held in a private reserve.
+
+To reserve a private batch:
+
+ /* housekeeping data - treat as opaque: */
+ grant_ref_t gref_head, gref_terminal;
+
+ if ( 0 > gnttab_alloc_grant_references( number_to_reserve,
+ &gref_head, &gref_terminal ))
+ return -ENOSPC;
+
+
+To release a batch back to the shared pool:
+
+ gnttab_free_grant_references( number_reserved, gref_head );
+
+
+To claim a reserved reference:
+
+ ref = gnttab_claim_grant_reference( &gref_head, gref_terminal );
+
+
+To release a claimed reference back to the reserve pool:
+
+ gnttab_release_grant_reference( &gref_head, gref );
+
+
+To use a claimed reference to grant access, use these alternative functions
+that take an additional parameter of the grant reference to use:
+
+ gnttab_grant_foreign_access_ref
+ gnttab_grant_foreign_transfer_ref
diff --git a/linux-2.4.29-xen-sparse/arch/xen/config.in b/linux-2.4.29-xen-sparse/arch/xen/config.in
index d1913f089e..23492fb5c8 100644
--- a/linux-2.4.29-xen-sparse/arch/xen/config.in
+++ b/linux-2.4.29-xen-sparse/arch/xen/config.in
@@ -22,6 +22,7 @@ fi
bool 'Scrub memory before freeing it to Xen' CONFIG_XEN_SCRUB_PAGES
bool 'Network-device frontend driver' CONFIG_XEN_NETDEV_FRONTEND
bool 'Block-device frontend driver' CONFIG_XEN_BLKDEV_FRONTEND
+bool 'Block-device uses grant tables' CONFIG_XEN_BLKDEV_GRANT
bool 'USB-device frontend driver' CONFIG_XEN_USB_FRONTEND
endmenu
# The IBM S/390 patch needs this.
diff --git a/linux-2.4.29-xen-sparse/arch/xen/defconfig-xen0 b/linux-2.4.29-xen-sparse/arch/xen/defconfig-xen0
index 2864c85cea..b67e52aff6 100644
--- a/linux-2.4.29-xen-sparse/arch/xen/defconfig-xen0
+++ b/linux-2.4.29-xen-sparse/arch/xen/defconfig-xen0
@@ -16,6 +16,7 @@ CONFIG_XEN_PHYSDEV_ACCESS=y
CONFIG_XEN_SCRUB_PAGES=y
CONFIG_XEN_NETDEV_FRONTEND=y
CONFIG_XEN_BLKDEV_FRONTEND=y
+# CONFIG_XEN_BLKDEV_GRANT is not set
# CONFIG_XEN_USB_FRONTEND is not set
CONFIG_NO_IDLE_HZ=y
CONFIG_FOREIGN_PAGES=y
diff --git a/linux-2.4.29-xen-sparse/arch/xen/defconfig-xenU b/linux-2.4.29-xen-sparse/arch/xen/defconfig-xenU
index 886199d7d3..a83c1b8392 100644
--- a/linux-2.4.29-xen-sparse/arch/xen/defconfig-xenU
+++ b/linux-2.4.29-xen-sparse/arch/xen/defconfig-xenU
@@ -15,6 +15,7 @@ CONFIG_UID16=y
CONFIG_XEN_SCRUB_PAGES=y
CONFIG_XEN_NETDEV_FRONTEND=y
CONFIG_XEN_BLKDEV_FRONTEND=y
+# CONFIG_XEN_BLKDEV_GRANT is not set
# CONFIG_XEN_USB_FRONTEND is not set
CONFIG_NO_IDLE_HZ=y
# CONFIG_FOREIGN_PAGES is not set
diff --git a/linux-2.4.29-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c b/linux-2.4.29-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c
index 51b600d26d..530383dbf0 100644
--- a/linux-2.4.29-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c
+++ b/linux-2.4.29-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c
@@ -67,9 +67,14 @@ static int xlvbd_get_vbd_info(vdisk_t *disk_info)
memset(&req, 0, sizeof(req));
req.operation = BLKIF_OP_PROBE;
req.nr_segments = 1;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ blkif_control_probe_send(&req, &rsp,
+ (unsigned long)(virt_to_machine(buf)));
+#else
req.frame_and_sects[0] = virt_to_machine(buf) | 7;
blkif_control_send(&req, &rsp);
+#endif
if ( rsp.status <= 0 )
{
diff --git a/linux-2.4.29-xen-sparse/include/asm-xen/fixmap.h b/linux-2.4.29-xen-sparse/include/asm-xen/fixmap.h
index bc6e2c2004..255ac4a468 100644
--- a/linux-2.4.29-xen-sparse/include/asm-xen/fixmap.h
+++ b/linux-2.4.29-xen-sparse/include/asm-xen/fixmap.h
@@ -17,6 +17,7 @@
#include <linux/kernel.h>
#include <asm/apicdef.h>
#include <asm/page.h>
+#include <asm-xen/gnttab.h>
#ifdef CONFIG_HIGHMEM
#include <linux/threads.h>
#include <asm/kmap_types.h>
@@ -52,7 +53,8 @@ enum fixed_addresses {
FIX_NETRING2_BASE,
FIX_NETRING3_BASE,
FIX_SHARED_INFO,
- FIX_GNTTAB,
+ FIX_GNTTAB_BEGIN,
+ FIX_GNTTAB_END = FIX_GNTTAB_BEGIN + NR_GRANT_FRAMES - 1,
#ifdef CONFIG_VGA_CONSOLE
#define NR_FIX_BTMAPS 32 /* 128KB For the Dom0 VGA Console A0000-C0000 */
#else
diff --git a/linux-2.6.11-xen-sparse/arch/xen/Kconfig b/linux-2.6.11-xen-sparse/arch/xen/Kconfig
index 1c2ba9b4a2..a5a2f8eb93 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/Kconfig
+++ b/linux-2.6.11-xen-sparse/arch/xen/Kconfig
@@ -61,6 +61,16 @@ config XEN_BLKDEV_TAP_BE
with the blktap. This option will be removed as the block drivers are
modified to use grant tables.
+config XEN_BLKDEV_GRANT
+ bool "Grant table substrate for block drivers (DANGEROUS)"
+ depends on !XEN_BLKDEV_TAP_BE
+ default n
+ help
+ This introduces the use of grant tables as a data exhange mechanism
+ between the frontend and backend block drivers. This currently
+ conflicts with the block tap, and should be considered untested
+ and likely to render your system unstable.
+
config XEN_NETDEV_BACKEND
bool "Network-device backend driver"
depends on XEN_PHYSDEV_ACCESS
diff --git a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c
index 4b25423e72..715f5dc951 100644
--- a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c
+++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c
@@ -41,9 +41,14 @@ EXPORT_SYMBOL(gnttab_end_foreign_access);
EXPORT_SYMBOL(gnttab_query_foreign_access);
EXPORT_SYMBOL(gnttab_grant_foreign_transfer);
EXPORT_SYMBOL(gnttab_end_foreign_transfer);
-
-#define NR_GRANT_REFS 512
-static grant_ref_t gnttab_free_list[NR_GRANT_REFS];
+EXPORT_SYMBOL(gnttab_alloc_grant_references);
+EXPORT_SYMBOL(gnttab_free_grant_references);
+EXPORT_SYMBOL(gnttab_claim_grant_reference);
+EXPORT_SYMBOL(gnttab_release_grant_reference);
+EXPORT_SYMBOL(gnttab_grant_foreign_access_ref);
+EXPORT_SYMBOL(gnttab_grant_foreign_transfer_ref);
+
+static grant_ref_t gnttab_free_list[NR_GRANT_ENTRIES];
static grant_ref_t gnttab_free_head;
static grant_entry_t *shared;
@@ -61,7 +66,7 @@ get_free_entry(
void)
{
grant_ref_t fh, nfh = gnttab_free_head;
- do { if ( unlikely((fh = nfh) == NR_GRANT_REFS) ) return -1; }
+ do { if ( unlikely((fh = nfh) == NR_GRANT_ENTRIES) ) return -1; }
while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh,
gnttab_free_list[fh])) != fh) );
return fh;
@@ -97,6 +102,17 @@ gnttab_grant_foreign_access(
return ref;
}
+void
+gnttab_grant_foreign_access_ref(
+ grant_ref_t ref, domid_t domid, unsigned long frame, int readonly)
+{
+ shared[ref].frame = frame;
+ shared[ref].domid = domid;
+ wmb();
+ shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
+}
+
+
int
gnttab_query_foreign_access( grant_ref_t ref )
{
@@ -124,14 +140,14 @@ gnttab_end_foreign_access( grant_ref_t ref, int readonly )
int
gnttab_grant_foreign_transfer(
- domid_t domid)
+ domid_t domid, unsigned long pfn )
{
int ref;
if ( unlikely((ref = get_free_entry()) == -1) )
return -ENOSPC;
- shared[ref].frame = 0;
+ shared[ref].frame = pfn;
shared[ref].domid = domid;
wmb();
shared[ref].flags = GTF_accept_transfer;
@@ -139,6 +155,16 @@ gnttab_grant_foreign_transfer(
return ref;
}
+void
+gnttab_grant_foreign_transfer_ref(
+ grant_ref_t ref, domid_t domid, unsigned long pfn )
+{
+ shared[ref].frame = pfn;
+ shared[ref].domid = domid;
+ wmb();
+ shared[ref].flags = GTF_accept_transfer;
+}
+
unsigned long
gnttab_end_foreign_transfer(
grant_ref_t ref)
@@ -163,6 +189,60 @@ gnttab_end_foreign_transfer(
return frame;
}
+void
+gnttab_free_grant_references( u16 count, grant_ref_t head )
+{
+ /* TODO: O(N)...? */
+ grant_ref_t to_die = 0, next = head;
+ int i;
+
+ for ( i = 0; i < count; i++ )
+ to_die = next;
+ next = gnttab_free_list[next];
+ put_free_entry( to_die );
+}
+
+int
+gnttab_alloc_grant_references( u16 count,
+ grant_ref_t *head,
+ grant_ref_t *terminal )
+{
+ int i;
+ grant_ref_t h = gnttab_free_head;
+
+ for ( i = 0; i < count; i++ )
+ if ( unlikely(get_free_entry() == -1) )
+ goto not_enough_refs;
+
+ *head = h;
+ *terminal = gnttab_free_head;
+
+ return 0;
+
+not_enough_refs:
+ gnttab_free_head = h;
+ return -ENOSPC;
+}
+
+int
+gnttab_claim_grant_reference( grant_ref_t *private_head,
+ grant_ref_t terminal )
+{
+ grant_ref_t g;
+ if ( unlikely((g = *private_head) == terminal) )
+ return -ENOSPC;
+ *private_head = gnttab_free_list[g];
+ return g;
+}
+
+void
+gnttab_release_grant_reference( grant_ref_t *private_head,
+ grant_ref_t release )
+{
+ gnttab_free_list[release] = *private_head;
+ *private_head = release;
+}
+
static int grant_ioctl(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long data)
{
@@ -194,7 +274,7 @@ static int grant_ioctl(struct inode *inode, struct file *file,
TRAP_INSTR "; "
"popl %%edi; popl %%esi; popl %%edx; popl %%ecx; popl %%ebx"
: "=a" (ret) : "0" (&hypercall) : "memory" );
-
+
return ret;
}
@@ -212,7 +292,14 @@ static int grant_read(char *page, char **start, off_t off,
gt = (grant_entry_t *)shared;
len = 0;
- for ( i = 0; i < NR_GRANT_REFS; i++ )
+ for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
+ /* TODO: safety catch here until this can handle >PAGE_SIZE output */
+ if (len > (PAGE_SIZE - 200))
+ {
+ len += sprintf( page + len, "Truncated.\n");
+ break;
+ }
+
if ( gt[i].flags )
len += sprintf( page + len,
"Grant: ref (0x%x) flags (0x%hx) dom (0x%hx) frame (0x%x)\n",
@@ -235,22 +322,25 @@ static int grant_write(struct file *file, const char __user *buffer,
static int __init gnttab_init(void)
{
gnttab_setup_table_t setup;
- unsigned long frame;
+ unsigned long frames[NR_GRANT_FRAMES];
int i;
- for ( i = 0; i < NR_GRANT_REFS; i++ )
- gnttab_free_list[i] = i + 1;
-
setup.dom = DOMID_SELF;
- setup.nr_frames = 1;
- setup.frame_list = &frame;
+ setup.nr_frames = NR_GRANT_FRAMES;
+ setup.frame_list = frames;
+
if ( HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0 )
BUG();
if ( setup.status != 0 )
BUG();
- set_fixmap_ma(FIX_GNTTAB, frame << PAGE_SHIFT);
- shared = (grant_entry_t *)fix_to_virt(FIX_GNTTAB);
+ for ( i = 0; i < NR_GRANT_FRAMES; i++ )
+ set_fixmap_ma(FIX_GNTTAB_END - i, frames[i] << PAGE_SHIFT);
+
+ shared = (grant_entry_t *)fix_to_virt(FIX_GNTTAB_END);
+
+ for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
+ gnttab_free_list[i] = i + 1;
/*
* /proc/xen/grant : used by libxc to access grant tables
@@ -269,6 +359,7 @@ static int __init gnttab_init(void)
grant_pde->read_proc = &grant_read;
grant_pde->write_proc = &grant_write;
+ printk("Grant table initialized\n");
return 0;
}
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c b/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c
index a827ab4eda..5933725bf7 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c
@@ -8,10 +8,14 @@
* arch/xen/drivers/blkif/frontend
*
* Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Copyright (c) 2005, Christopher Clark
*/
#include "common.h"
#include <asm-xen/evtchn.h>
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+#include <asm-xen/xen-public/grant_table.h>
+#endif
/*
* These are rather arbitrary. They are fairly large because adjacent requests
@@ -80,6 +84,17 @@ static inline void flush_plugged_queue(void)
}
#endif
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+/* When using grant tables to map a frame for device access then the
+ * handle returned must be used to unmap the frame. This is needed to
+ * drop the ref count on the frame.
+ */
+static u16 pending_grant_handles[MMAP_PAGES];
+#define pending_handle(_idx, _i) \
+ (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)])
+#define BLKBACK_INVALID_HANDLE (0xFFFF)
+#endif
+
#ifdef CONFIG_XEN_BLKDEV_TAP_BE
/*
* If the tap driver is used, we may get pages belonging to either the tap
@@ -100,6 +115,27 @@ static void make_response(blkif_t *blkif, unsigned long id,
static void fast_flush_area(int idx, int nr_pages)
{
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ gnttab_op_t aop[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ unsigned int i, invcount = 0;
+ u16 handle;
+
+ for ( i = 0; i < nr_pages; i++ )
+ {
+ if ( BLKBACK_INVALID_HANDLE != ( handle = pending_handle(idx, i) ) )
+ {
+ aop[i].u.unmap_grant_ref.host_virt_addr = MMAP_VADDR(idx, i);
+ aop[i].u.unmap_grant_ref.dev_bus_addr = 0;
+ aop[i].u.unmap_grant_ref.handle = handle;
+ pending_handle(idx, i) = BLKBACK_INVALID_HANDLE;
+ invcount++;
+ }
+ }
+ if ( unlikely(HYPERVISOR_grant_table_op(
+ GNTTABOP_unmap_grant_ref, aop, invcount)))
+ BUG();
+#else
+
multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST];
int i;
@@ -114,6 +150,7 @@ static void fast_flush_area(int idx, int nr_pages)
mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
BUG();
+#endif
}
@@ -347,6 +384,26 @@ static void dispatch_probe(blkif_t *blkif, blkif_request_t *req)
(blkif_last_sect(req->frame_and_sects[0]) != 7) )
goto out;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ {
+ gnttab_op_t op;
+
+ op.u.map_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx, 0);
+ op.u.map_grant_ref.flags = GNTMAP_host_map;
+ op.u.map_grant_ref.ref = blkif_gref_from_fas(req->frame_and_sects[0]);
+ op.u.map_grant_ref.dom = blkif->domid;
+
+ if ( unlikely(HYPERVISOR_grant_table_op(
+ GNTTABOP_map_grant_ref, &op, 1)))
+ BUG();
+
+ if ( op.u.map_grant_ref.dev_bus_addr == 0 )
+ goto out;
+
+ pending_handle(pending_idx, 0) = op.u.map_grant_ref.handle;
+ }
+#else /* else CONFIG_XEN_BLKDEV_GRANT */
+
#ifdef CONFIG_XEN_BLKDEV_TAP_BE
/* Grab the real frontend out of the probe message. */
if (req->frame_and_sects[1] == BLKTAP_COOKIE)
@@ -369,7 +426,8 @@ static void dispatch_probe(blkif_t *blkif, blkif_request_t *req)
goto out;
#endif
-
+#endif /* endif CONFIG_XEN_BLKDEV_GRANT */
+
rsp = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0),
PAGE_SIZE / sizeof(vdisk_t));
@@ -382,10 +440,15 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
{
extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
- unsigned long fas, remap_prot;
+ unsigned long fas = 0;
int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
pending_req_t *pending_req;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ gnttab_op_t aop[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+#else
+ unsigned long remap_prot;
multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+#endif
struct phys_req preq;
struct {
unsigned long buf; unsigned int nsec;
@@ -412,14 +475,58 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
preq.sector_number = req->sector_number;
preq.nr_sects = 0;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
for ( i = 0; i < nseg; i++ )
{
+ fas = req->frame_and_sects[i];
+ seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
+
+ if ( seg[i].nsec <= 0 )
+ goto bad_descriptor;
+ preq.nr_sects += seg[i].nsec;
+
+ aop[i].u.map_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx, i);
+
+ aop[i].u.map_grant_ref.dom = blkif->domid;
+ aop[i].u.map_grant_ref.ref = blkif_gref_from_fas(fas);
+ aop[i].u.map_grant_ref.flags = ( GNTMAP_host_map |
+ ( ( operation == READ ) ?
+ 0 : GNTMAP_readonly ) );
+ }
+
+ if ( unlikely(HYPERVISOR_grant_table_op(
+ GNTTABOP_map_grant_ref, aop, nseg)))
+ BUG();
+
+ for ( i = 0; i < nseg; i++ )
+ {
+ if ( unlikely(aop[i].u.map_grant_ref.dev_bus_addr == 0) )
+ {
+ DPRINTK("invalid buffer -- could not remap it\n");
+ fast_flush_area(pending_idx, nseg);
+ goto bad_descriptor;
+ }
+
+ phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
+ FOREIGN_FRAME(aop[i].u.map_grant_ref.dev_bus_addr);
+
+ pending_handle(pending_idx, i) = aop[i].u.map_grant_ref.handle;
+ }
+#endif
+
+ for ( i = 0; i < nseg; i++ )
+ {
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ seg[i].buf = (aop[i].u.map_grant_ref.dev_bus_addr << PAGE_SHIFT) |
+ (blkif_first_sect(fas) << 9);
+#else
fas = req->frame_and_sects[i];
seg[i].buf = (fas & PAGE_MASK) | (blkif_first_sect(fas) << 9);
seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1;
if ( seg[i].nsec <= 0 )
goto bad_descriptor;
preq.nr_sects += seg[i].nsec;
+#endif
}
if ( vbd_translate(&preq, blkif, operation) != 0 )
@@ -430,6 +537,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
goto bad_descriptor;
}
+#ifndef CONFIG_XEN_BLKDEV_GRANT
if ( operation == READ )
remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW;
else
@@ -461,6 +569,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
goto bad_descriptor;
}
}
+#endif /* end ifndef CONFIG_XEN_BLKDEV_GRANT */
pending_req = &pending_reqs[pending_idx];
pending_req->blkif = blkif;
@@ -628,9 +737,15 @@ static int __init blkif_init(void)
blkif_ctrlif_init();
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ memset( pending_grant_handles, BLKBACK_INVALID_HANDLE, MMAP_PAGES );
+ printk(KERN_ALERT "Blkif backend is using grant tables.\n");
+#endif
+
#ifdef CONFIG_XEN_BLKDEV_TAP_BE
printk(KERN_ALERT "NOTE: Blkif backend is running with tap support on!\n");
#endif
+
return 0;
}
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c
index 54bd2c749b..abfa6b5e18 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c
@@ -7,6 +7,7 @@
* Modifications by Mark A. Williamson are (c) Intel Research Cambridge
* Copyright (c) 2004, Christian Limpach
* Copyright (c) 2004, Andrew Warfield
+ * Copyright (c) 2005, Christopher Clark
*
* This file may be distributed separately from the Linux kernel, or
* incorporated into other software packages, subject to the following license:
@@ -30,6 +31,14 @@
* IN THE SOFTWARE.
*/
+#if 1
+#define ASSERT(_p) \
+ if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
+ __LINE__, __FILE__); *(int*)0=0; }
+#else
+#define ASSERT(_p)
+#endif
+
#include <linux/version.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
@@ -46,6 +55,10 @@
#include <scsi/scsi.h>
#include <asm-xen/ctrl_if.h>
#include <asm-xen/evtchn.h>
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+#include <asm-xen/xen-public/grant_table.h>
+#include <asm-xen/gnttab.h>
+#endif
typedef unsigned char byte; /* from linux/ide.h */
@@ -76,6 +89,13 @@ static blkif_front_ring_t blk_ring;
#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+static domid_t rdomid = 0;
+static grant_ref_t gref_head, gref_terminal;
+#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
+ (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE)
+#endif
+
unsigned long rec_ring_free;
blkif_request_t rec_ring[BLK_RING_SIZE];
@@ -130,7 +150,11 @@ static inline void translate_req_to_pfn(blkif_request_t *xreq,
xreq->sector_number = req->sector_number;
for ( i = 0; i < req->nr_segments; i++ )
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ xreq->frame_and_sects[i] = req->frame_and_sects[i];
+#else
xreq->frame_and_sects[i] = machine_to_phys(req->frame_and_sects[i]);
+#endif
}
static inline void translate_req_to_mfn(blkif_request_t *xreq,
@@ -145,7 +169,11 @@ static inline void translate_req_to_mfn(blkif_request_t *xreq,
xreq->sector_number = req->sector_number;
for ( i = 0; i < req->nr_segments; i++ )
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ xreq->frame_and_sects[i] = req->frame_and_sects[i];
+#else
xreq->frame_and_sects[i] = phys_to_machine(req->frame_and_sects[i]);
+#endif
}
@@ -274,6 +302,9 @@ static int blkif_queue_request(struct request *req)
int idx;
unsigned long id;
unsigned int fsect, lsect;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ int ref;
+#endif
if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
return 1;
@@ -299,8 +330,23 @@ static int blkif_queue_request(struct request *req)
buffer_ma = page_to_phys(bvec->bv_page);
fsect = bvec->bv_offset >> 9;
lsect = fsect + (bvec->bv_len >> 9) - 1;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ /* install a grant reference. */
+ ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
+ ASSERT( ref != -ENOSPC );
+
+ gnttab_grant_foreign_access_ref(
+ ref,
+ rdomid,
+ buffer_ma >> PAGE_SHIFT,
+ rq_data_dir(req) );
+
+ ring_req->frame_and_sects[ring_req->nr_segments++] =
+ (((u32) ref) << 16) | (fsect << 3) | lsect;
+#else
ring_req->frame_and_sects[ring_req->nr_segments++] =
buffer_ma | (fsect << 3) | lsect;
+#endif
}
}
@@ -719,6 +765,9 @@ static int blkif_queue_request(unsigned long id,
blkif_request_t *req;
struct buffer_head *bh;
unsigned int fsect, lsect;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ int ref;
+#endif
fsect = (buffer_ma & ~PAGE_MASK) >> 9;
lsect = fsect + nr_sectors - 1;
@@ -766,11 +815,25 @@ static int blkif_queue_request(unsigned long id,
bh->b_reqnext = (struct buffer_head *)rec_ring[req->id].id;
-
rec_ring[req->id].id = id;
-
- req->frame_and_sects[req->nr_segments] =
- buffer_ma | (fsect<<3) | lsect;
+
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ /* install a grant reference. */
+ ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
+ ASSERT( ref != -ENOSPC );
+
+ gnttab_grant_foreign_access_ref(
+ ref,
+ rdomid,
+ buffer_ma >> PAGE_SHIFT,
+ ( operation == BLKIF_OP_WRITE ? 1 : 0 ) );
+
+ req->frame_and_sects[req->nr_segments] =
+ (((u32) ref ) << 16) | (fsect << 3) | lsect;
+#else
+ req->frame_and_sects[req->nr_segments] =
+ buffer_ma | (fsect << 3) | lsect;
+#endif
if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST )
sg_next_sect += nr_sectors;
else
@@ -808,7 +871,21 @@ static int blkif_queue_request(unsigned long id,
req->sector_number = (blkif_sector_t)sector_number;
req->device = device;
req->nr_segments = 1;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ /* install a grant reference. */
+ ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
+ ASSERT( ref != -ENOSPC );
+
+ gnttab_grant_foreign_access_ref(
+ ref,
+ rdomid,
+ buffer_ma >> PAGE_SHIFT,
+ ( operation == BLKIF_OP_WRITE ? 1 : 0 ) );
+
+ req->frame_and_sects[0] = (((u32) ref)<<16) | (fsect<<3) | lsect;
+#else
req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect;
+#endif
/* Keep a private copy so we can reissue requests when recovering. */
translate_req_to_pfn(&rec_ring[xid], req );
@@ -966,6 +1043,20 @@ static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
/***************************** COMMON CODE *******************************/
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+void blkif_control_probe_send(blkif_request_t *req, blkif_response_t *rsp,
+ unsigned long address)
+{
+ int ref = gnttab_claim_grant_reference(&gref_head, gref_terminal);
+ ASSERT( ref != -ENOSPC );
+
+ gnttab_grant_foreign_access_ref( ref, rdomid, address >> PAGE_SHIFT, 0 );
+
+ req->frame_and_sects[0] = (((u32) ref) << 16) | 7;
+
+ blkif_control_send(req, rsp);
+}
+#endif
void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
{
@@ -1146,6 +1237,9 @@ static void blkif_connect(blkif_fe_interface_status_t *status)
blkif_evtchn = status->evtchn;
blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ rdomid = status->domid;
+#endif
err = request_irq(blkif_irq, blkif_int, SA_SAMPLE_RANDOM, "blkif", NULL);
if ( err )
@@ -1301,7 +1395,14 @@ int wait_for_blkif(void)
int __init xlblk_init(void)
{
int i;
-
+
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ if ( 0 > gnttab_alloc_grant_references( MAXIMUM_OUTSTANDING_BLOCK_REQS,
+ &gref_head, &gref_terminal ))
+ return 1;
+ printk(KERN_ALERT "Blkif frontend is using grant tables.\n");
+#endif
+
if ( (xen_start_info.flags & SIF_INITDOMAIN) ||
(xen_start_info.flags & SIF_BLK_BE_DOMAIN) )
return 0;
@@ -1330,12 +1431,19 @@ void blkdev_resume(void)
send_driver_status(1);
}
-/* XXXXX THIS IS A TEMPORARY FUNCTION UNTIL WE GET GRANT TABLES */
-
void blkif_completion(blkif_request_t *req)
{
int i;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ grant_ref_t gref;
+ for ( i = 0; i < req->nr_segments; i++ )
+ {
+ gref = blkif_gref_from_fas(req->frame_and_sects[i]);
+ gnttab_release_grant_reference(&gref_head, gref);
+ }
+#else
+ /* This is a hack to get the dirty logging bits set */
switch ( req->operation )
{
case BLKIF_OP_READ:
@@ -1347,5 +1455,5 @@ void blkif_completion(blkif_request_t *req)
}
break;
}
-
+#endif
}
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h
index 6045fcc601..ede57abfa5 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h
@@ -102,6 +102,10 @@ extern int blkif_ioctl(struct inode *inode, struct file *filep,
extern int blkif_check(dev_t dev);
extern int blkif_revalidate(dev_t dev);
extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp);
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+extern void blkif_control_probe_send(
+ blkif_request_t *req, blkif_response_t *rsp, unsigned long address);
+#endif
extern void do_blkif_request (request_queue_t *rq);
extern void xlvbd_update_vbds(void);
diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c
index d2a1f1f65e..f8aec64938 100644
--- a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c
+++ b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c
@@ -135,9 +135,14 @@ static vdisk_t * xlvbd_probe(int *ret)
memset(&req, 0, sizeof(req));
req.operation = BLKIF_OP_PROBE;
req.nr_segments = 1;
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ blkif_control_probe_send(&req, &rsp,
+ (unsigned long)(virt_to_machine(buf)));
+#else
req.frame_and_sects[0] = virt_to_machine(buf) | 7;
blkif_control_send(&req, &rsp);
+#endif
if ( rsp.status <= 0 ) {
printk(KERN_ALERT "Could not probe disks (%d)\n", rsp.status);
goto out;
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h
index f86762fd1e..cebd63514d 100644
--- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h
@@ -27,6 +27,7 @@
#include <asm/acpi.h>
#include <asm/apicdef.h>
#include <asm/page.h>
+#include <asm-xen/gnttab.h>
#ifdef CONFIG_HIGHMEM
#include <linux/threads.h>
#include <asm/kmap_types.h>
@@ -84,7 +85,8 @@ enum fixed_addresses {
FIX_PCIE_MCFG,
#endif
FIX_SHARED_INFO,
- FIX_GNTTAB,
+ FIX_GNTTAB_BEGIN,
+ FIX_GNTTAB_END = FIX_GNTTAB_BEGIN + NR_GRANT_FRAMES - 1,
#ifdef CONFIG_XEN_PHYSDEV_ACCESS
#define NR_FIX_ISAMAPS 256
FIX_ISAMAP_END,
diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/gnttab.h b/linux-2.6.11-xen-sparse/include/asm-xen/gnttab.h
index 8c28c889f7..642a74dbf9 100644
--- a/linux-2.6.11-xen-sparse/include/asm-xen/gnttab.h
+++ b/linux-2.6.11-xen-sparse/include/asm-xen/gnttab.h
@@ -7,6 +7,7 @@
* (i.e., mechanisms for both sender and recipient of grant references)
*
* Copyright (c) 2004, K A Fraser
+ * Copyright (c) 2005, Christopher Clark
*/
#ifndef __ASM_GNTTAB_H__
@@ -16,6 +17,10 @@
#include <asm-xen/hypervisor.h>
#include <asm-xen/xen-public/grant_table.h>
+/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */
+#define NR_GRANT_FRAMES 4
+#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t))
+
int
gnttab_grant_foreign_access(
domid_t domid, unsigned long frame, int readonly);
@@ -26,7 +31,7 @@ gnttab_end_foreign_access(
int
gnttab_grant_foreign_transfer(
- domid_t domid);
+ domid_t domid, unsigned long pfn);
unsigned long
gnttab_end_foreign_transfer(
@@ -36,4 +41,32 @@ int
gnttab_query_foreign_access(
grant_ref_t ref );
+/*
+ * operations on reserved batches of grant references
+ */
+int
+gnttab_alloc_grant_references(
+ u16 count, grant_ref_t *pprivate_head, grant_ref_t *private_terminal );
+
+void
+gnttab_free_grant_references(
+ u16 count, grant_ref_t private_head );
+
+int
+gnttab_claim_grant_reference( grant_ref_t *pprivate_head, grant_ref_t terminal
+);
+
+void
+gnttab_release_grant_reference(
+ grant_ref_t *private_head, grant_ref_t release );
+
+void
+gnttab_grant_foreign_access_ref(
+ grant_ref_t ref, domid_t domid, unsigned long frame, int readonly);
+
+void
+gnttab_grant_foreign_transfer_ref(
+ grant_ref_t, domid_t domid, unsigned long pfn);
+
+
#endif /* __ASM_GNTTAB_H__ */
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index aca041e0c6..9d28d42a6f 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -1997,7 +1997,13 @@ int update_grant_va_mapping(unsigned long va,
l1_pgentry_t ol1e = mk_l1_pgentry(_ol1e);
if ( update_l1e(pl1e, ol1e, mk_l1_pgentry(_nl1e)) )
+ {
put_page_from_l1e(ol1e, d);
+ if ( _ol1e & _PAGE_PRESENT )
+ rc = 0; /* Caller needs to invalidate TLB entry */
+ else
+ rc = 1; /* Caller need not invalidate TLB entry */
+ }
else
rc = -EINVAL;
}
@@ -3278,7 +3284,7 @@ void audit_domains_key(unsigned char key)
spin_unlock(&e->page_alloc_lock);
/* Transfer is all done: tell the guest about its new page frame. */
- gnttab_notify_transfer(e, gntref, pfn);
+ gnttab_notify_transfer(e, d, gntref, pfn);
put_domain(e);
break;
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index 7cd543b834..446783c48e 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -22,18 +22,19 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-#define GRANT_DEBUG 1
+#define GRANT_DEBUG 0
+#define GRANT_DEBUG_VERBOSE 0
#include <xen/config.h>
#include <xen/sched.h>
-#include <asm-x86/mm.h>
-#include <asm-x86/shadow.h>
+#include <asm/mm.h>
+#include <asm/shadow.h>
-#define PIN_FAIL(_rc, _f, _a...) \
+#define PIN_FAIL(_lbl, _rc, _f, _a...) \
do { \
DPRINTK( _f, ## _a ); \
rc = (_rc); \
- goto fail; \
+ goto _lbl; \
} while ( 0 )
static inline int
@@ -58,23 +59,38 @@ put_maptrack_handle(
}
static int
-__gnttab_map_grant_ref(
- gnttab_map_grant_ref_t *uop,
- unsigned long *va)
+__gnttab_activate_grant_ref(
+ struct domain *mapping_d, /* IN */
+ struct exec_domain *mapping_ed,
+ struct domain *granting_d,
+ grant_ref_t ref,
+ u16 dev_hst_ro_flags,
+ unsigned long host_virt_addr,
+ unsigned long *pframe ) /* OUT */
{
- domid_t dom, sdom;
- grant_ref_t ref;
- struct domain *ld, *rd;
- struct exec_domain *led;
- u16 flags, sflags;
- int handle;
+ domid_t sdom;
+ u16 sflags;
active_grant_entry_t *act;
grant_entry_t *sha;
- s16 rc = 0;
- unsigned long frame = 0, host_virt_addr;
+ s16 rc = 1;
+ unsigned long frame = 0;
+ int retries = 0;
- /* Returns 0 if TLB flush / invalidate required by caller.
- * va will indicate the address to be invalidated. */
+ /*
+ * Objectives of this function:
+ * . Make the record ( granting_d, ref ) active, if not already.
+ * . Update shared grant entry of owner, indicating frame is mapped.
+ * . Increment the owner act->pin reference counts.
+ * . get_page on shared frame if new mapping.
+ * . get_page_type if this is first RW mapping of frame.
+ * . Add PTE to virtual address space of mapping_d, if necessary.
+ * Returns:
+ * . -ve: error
+ * . 1: ok
+ * . 0: ok and TLB invalidate of host_virt_addr needed.
+ *
+ * On success, *pframe contains mfn.
+ */
/*
* We bound the number of times we retry CMPXCHG on memory locations that
@@ -84,62 +100,11 @@ __gnttab_map_grant_ref(
* the guest to race our updates (e.g., to change the GTF_readonly flag),
* so we allow a few retries before failing.
*/
- int retries = 0;
-
- led = current;
- ld = led->domain;
-
- /* Bitwise-OR avoids short-circuiting which screws control flow. */
- if ( unlikely(__get_user(dom, &uop->dom) |
- __get_user(ref, &uop->ref) |
- __get_user(host_virt_addr, &uop->host_virt_addr) |
- __get_user(flags, &uop->flags)) )
- {
- DPRINTK("Fault while reading gnttab_map_grant_ref_t.\n");
- return -EFAULT; /* don't set status */
- }
-
- if ( ((host_virt_addr != 0) || (flags & GNTMAP_host_map) ) &&
- unlikely(!__addr_ok(host_virt_addr)))
- {
- DPRINTK("Bad virtual address (%x) or flags (%x).\n",
- host_virt_addr, flags);
- (void)__put_user(GNTST_bad_virt_addr, &uop->handle);
- return GNTST_bad_gntref;
- }
- if ( unlikely(ref >= NR_GRANT_ENTRIES) ||
- unlikely((flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0) )
- {
- DPRINTK("Bad ref (%d) or flags (%x).\n", ref, flags);
- (void)__put_user(GNTST_bad_gntref, &uop->handle);
- return GNTST_bad_gntref;
- }
+ act = &granting_d->grant_table->active[ref];
+ sha = &granting_d->grant_table->shared[ref];
- if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
- unlikely(ld == rd) )
- {
- if ( rd != NULL )
- put_domain(rd);
- DPRINTK("Could not find domain %d\n", dom);
- (void)__put_user(GNTST_bad_domain, &uop->handle);
- return GNTST_bad_domain;
- }
-
- if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) )
- {
- put_domain(rd);
- DPRINTK("No more map handles available\n");
- (void)__put_user(GNTST_no_device_space, &uop->handle);
- return GNTST_no_device_space;
- }
- DPRINTK("Mapping grant ref (%hu) for domain (%hu) with flags (%x)\n",
- ref, dom, flags);
-
- act = &rd->grant_table->active[ref];
- sha = &rd->grant_table->shared[ref];
-
- spin_lock(&rd->grant_table->lock);
+ spin_lock(&granting_d->grant_table->lock);
if ( act->pin == 0 )
{
@@ -153,21 +118,21 @@ __gnttab_map_grant_ref(
u32 scombo, prev_scombo, new_scombo;
if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) ||
- unlikely(sdom != ld->id) )
- PIN_FAIL(GNTST_general_error,
+ unlikely(sdom != mapping_d->id) )
+ PIN_FAIL(unlock_out, GNTST_general_error,
"Bad flags (%x) or dom (%d). (NB. expected dom %d)\n",
- sflags, sdom, ld->id);
+ sflags, sdom, mapping_d->id);
/* Merge two 16-bit values into a 32-bit combined update. */
/* NB. Endianness! */
prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
new_scombo = scombo | GTF_reading;
- if ( !(flags & GNTMAP_readonly) )
+ if ( !(dev_hst_ro_flags & GNTMAP_readonly) )
{
new_scombo |= GTF_writing;
if ( unlikely(sflags & GTF_readonly) )
- PIN_FAIL(GNTST_general_error,
+ PIN_FAIL(unlock_out, GNTST_general_error,
"Attempt to write-pin a r/o grant entry.\n");
}
@@ -175,7 +140,7 @@ __gnttab_map_grant_ref(
if ( unlikely(cmpxchg_user((u32 *)&sha->flags,
prev_scombo,
new_scombo)) )
- PIN_FAIL(GNTST_general_error,
+ PIN_FAIL(unlock_out, GNTST_general_error,
"Fault while modifying shared flags and domid.\n");
/* Did the combined update work (did we see what we expected?). */
@@ -183,7 +148,7 @@ __gnttab_map_grant_ref(
break;
if ( retries++ == 4 )
- PIN_FAIL(GNTST_general_error,
+ PIN_FAIL(unlock_out, GNTST_general_error,
"Shared grant entry is unstable.\n");
/* Didn't see what we expected. Split out the seen flags & dom. */
@@ -194,25 +159,25 @@ __gnttab_map_grant_ref(
/* rmb(); */ /* not on x86 */
- frame = __translate_gpfn_to_mfn(rd, sha->frame);
+ frame = __translate_gpfn_to_mfn(granting_d, sha->frame);
if ( unlikely(!pfn_is_ram(frame)) ||
- unlikely(!((flags & GNTMAP_readonly) ?
- get_page(&frame_table[frame], rd) :
- get_page_and_type(&frame_table[frame], rd,
+ unlikely(!((dev_hst_ro_flags & GNTMAP_readonly) ?
+ get_page(&frame_table[frame], granting_d) :
+ get_page_and_type(&frame_table[frame], granting_d,
PGT_writable_page))) )
{
clear_bit(_GTF_writing, &sha->flags);
clear_bit(_GTF_reading, &sha->flags);
- PIN_FAIL(GNTST_general_error,
- "Could not pin the granted frame!\n");
+ PIN_FAIL(unlock_out, GNTST_general_error,
+ "Could not pin the granted frame (%lx)!\n", frame);
}
- if ( flags & GNTMAP_device_map )
- act->pin += (flags & GNTMAP_readonly) ?
+ if ( dev_hst_ro_flags & GNTMAP_device_map )
+ act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
GNTPIN_devr_inc : GNTPIN_devw_inc;
- if ( flags & GNTMAP_host_map )
- act->pin += (flags & GNTMAP_readonly) ?
+ if ( dev_hst_ro_flags & GNTMAP_host_map )
+ act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
GNTPIN_hstr_inc : GNTPIN_hstw_inc;
act->domid = sdom;
act->frame = frame;
@@ -226,11 +191,11 @@ __gnttab_map_grant_ref(
* A more accurate check cannot be done with a single comparison.
*/
if ( (act->pin & 0x80808080U) != 0 )
- PIN_FAIL(ENOSPC, "Risk of counter overflow %08x\n", act->pin);
+ PIN_FAIL(unlock_out, ENOSPC, "Risk of counter overflow %08x\n", act->pin);
frame = act->frame;
- if ( !(flags & GNTMAP_readonly) &&
+ if ( !(dev_hst_ro_flags & GNTMAP_readonly) &&
!((sflags = sha->flags) & GTF_writing) )
{
for ( ; ; )
@@ -238,7 +203,7 @@ __gnttab_map_grant_ref(
u16 prev_sflags;
if ( unlikely(sflags & GTF_readonly) )
- PIN_FAIL(GNTST_general_error,
+ PIN_FAIL(unlock_out, GNTST_general_error,
"Attempt to write-pin a r/o grant entry.\n");
prev_sflags = sflags;
@@ -246,14 +211,14 @@ __gnttab_map_grant_ref(
/* NB. prev_sflags is updated in place to seen value. */
if ( unlikely(cmpxchg_user(&sha->flags, prev_sflags,
prev_sflags | GTF_writing)) )
- PIN_FAIL(GNTST_general_error,
+ PIN_FAIL(unlock_out, GNTST_general_error,
"Fault while modifying shared flags.\n");
if ( likely(prev_sflags == sflags) )
break;
if ( retries++ == 4 )
- PIN_FAIL(GNTST_general_error,
+ PIN_FAIL(unlock_out, GNTST_general_error,
"Shared grant entry is unstable.\n");
sflags = prev_sflags;
@@ -263,99 +228,179 @@ __gnttab_map_grant_ref(
PGT_writable_page)) )
{
clear_bit(_GTF_writing, &sha->flags);
- PIN_FAIL(GNTST_general_error,
+ PIN_FAIL(unlock_out, GNTST_general_error,
"Attempt to write-pin a unwritable page.\n");
}
}
- if ( flags & GNTMAP_device_map )
- act->pin += (flags & GNTMAP_readonly) ?
+ if ( dev_hst_ro_flags & GNTMAP_device_map )
+ act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
GNTPIN_devr_inc : GNTPIN_devw_inc;
- if ( flags & GNTMAP_host_map )
- act->pin += (flags & GNTMAP_readonly) ?
+ if ( dev_hst_ro_flags & GNTMAP_host_map )
+ act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ?
GNTPIN_hstr_inc : GNTPIN_hstw_inc;
}
/* At this point:
- * act->pin updated to reflect mapping
- * sha->flags updated to indicate to granting domain mapping done
- * frame contains the mfn
+ * act->pin updated to reflect mapping.
+ * sha->flags updated to indicate to granting domain mapping done.
+ * frame contains the mfn.
*/
- if ( (host_virt_addr != 0) && (flags & GNTMAP_host_map) )
+ spin_unlock(&granting_d->grant_table->lock);
+
+ if ( (host_virt_addr != 0) && (dev_hst_ro_flags & GNTMAP_host_map) )
{
/* Write update into the pagetable
*/
- /* cwc22: TODO: check locking... */
-
- spin_unlock(&rd->grant_table->lock);
-
rc = update_grant_va_mapping( host_virt_addr,
(frame << PAGE_SHIFT) | _PAGE_PRESENT |
_PAGE_ACCESSED |
_PAGE_DIRTY |
- ((flags & GNTMAP_readonly) ? 0 : _PAGE_RW),
- ld, led );
+ ((dev_hst_ro_flags & GNTMAP_readonly) ? 0 : _PAGE_RW),
+ mapping_d, mapping_ed );
- spin_lock(&rd->grant_table->lock);
+ /* IMPORTANT: (rc == 0) => must flush / invalidate entry in TLB.
+ * This is done in the outer gnttab_map_grant_ref.
+ */
if ( 0 > rc )
{
/* Abort. */
- act->pin -= (flags & GNTMAP_readonly) ?
- GNTPIN_hstr_inc : GNTPIN_hstw_inc;
- if ( flags & GNTMAP_readonly )
+ spin_lock(&granting_d->grant_table->lock);
+
+ if ( dev_hst_ro_flags & GNTMAP_readonly )
act->pin -= GNTPIN_hstr_inc;
else
{
act->pin -= GNTPIN_hstw_inc;
if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 )
{
- put_page_type(&frame_table[frame]);
clear_bit(_GTF_writing, &sha->flags);
+ put_page_type(&frame_table[frame]);
}
}
if ( act->pin == 0 )
{
- put_page(&frame_table[frame]);
clear_bit(_GTF_reading, &sha->flags);
+ put_page(&frame_table[frame]);
}
- goto fail;
+
+ spin_unlock(&granting_d->grant_table->lock);
}
- rc = 0;
- *va = host_virt_addr;
+ }
+ *pframe = frame;
+ return rc;
- /* IMPORTANT: must flush / invalidate entry in TLB.
- * This is done in the outer gnttab_map_grant_ref when return 0.
- */
+ unlock_out:
+ spin_unlock(&granting_d->grant_table->lock);
+ return rc;
+}
+
+static int
+__gnttab_map_grant_ref(
+ gnttab_map_grant_ref_t *uop,
+ unsigned long *va)
+{
+ domid_t dom;
+ grant_ref_t ref;
+ struct domain *ld, *rd;
+ struct exec_domain *led;
+ u16 dev_hst_ro_flags;
+ int handle;
+ unsigned long frame, host_virt_addr;
+ int rc;
+
+ /* Returns 0 if TLB flush / invalidate required by caller.
+ * va will indicate the address to be invalidated. */
+
+ led = current;
+ ld = led->domain;
+
+ /* Bitwise-OR avoids short-circuiting which screws control flow. */
+ if ( unlikely(__get_user(dom, &uop->dom) |
+ __get_user(ref, &uop->ref) |
+ __get_user(host_virt_addr, &uop->host_virt_addr) |
+ __get_user(dev_hst_ro_flags, &uop->flags)) )
+ {
+ DPRINTK("Fault while reading gnttab_map_grant_ref_t.\n");
+ return -EFAULT; /* don't set status */
}
- /*
- * Only make the maptrack live _after_ writing the pte, in case we
- * overwrite the same frame number, causing a maptrack walk to find it.
- */
- ld->grant_table->maptrack[handle].domid = dom;
- ld->grant_table->maptrack[handle].ref_and_flags =
- (ref << MAPTRACK_REF_SHIFT) | (flags & MAPTRACK_GNTMAP_MASK);
- /* Unchecked and unconditional writes to user uop. */
- if ( flags & GNTMAP_device_map )
- (void)__put_user(frame, &uop->dev_bus_addr);
+ if ( ((host_virt_addr != 0) || (dev_hst_ro_flags & GNTMAP_host_map) ) &&
+ unlikely(!__addr_ok(host_virt_addr)))
+ {
+ DPRINTK("Bad virtual address (%x) or flags (%x).\n",
+ host_virt_addr, dev_hst_ro_flags);
+ (void)__put_user(GNTST_bad_virt_addr, &uop->handle);
+ return GNTST_bad_gntref;
+ }
- (void)__put_user(handle, &uop->handle);
+ if ( unlikely(ref >= NR_GRANT_ENTRIES) ||
+ unlikely((dev_hst_ro_flags & (GNTMAP_device_map|GNTMAP_host_map)) ==
+0) )
+ {
+ DPRINTK("Bad ref (%d) or flags (%x).\n", ref, dev_hst_ro_flags);
+ (void)__put_user(GNTST_bad_gntref, &uop->handle);
+ return GNTST_bad_gntref;
+ }
- spin_unlock(&rd->grant_table->lock);
- put_domain(rd);
- return 0;
+ if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
+ unlikely(ld == rd) )
+ {
+ if ( rd != NULL )
+ put_domain(rd);
+ DPRINTK("Could not find domain %d\n", dom);
+ (void)__put_user(GNTST_bad_domain, &uop->handle);
+ return GNTST_bad_domain;
+ }
+
+ /* get a maptrack handle */
+ if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) )
+ {
+ put_domain(rd);
+ DPRINTK("No more map handles available\n");
+ (void)__put_user(GNTST_no_device_space, &uop->handle);
+ return GNTST_no_device_space;
+ }
+
+#ifdef GRANT_DEBUG_VERBOSE
+ DPRINTK("Mapping grant ref (%hu) for domain (%hu) with flags (%x)\n",
+ ref, dom, dev_hst_ro_flags);
+#endif
+
+ if ( 0 <= ( rc = __gnttab_activate_grant_ref( ld, led, rd, ref,
+ dev_hst_ro_flags,
+ host_virt_addr, &frame)))
+ {
+ /* Only make the maptrack live _after_ writing the pte,
+ * in case we overwrite the same frame number, causing a
+ * maptrack walk to find it
+ */
+ ld->grant_table->maptrack[handle].domid = dom;
+
+ ld->grant_table->maptrack[handle].ref_and_flags
+ = (ref << MAPTRACK_REF_SHIFT) |
+ (dev_hst_ro_flags & MAPTRACK_GNTMAP_MASK);
+
+ (void)__put_user(frame, &uop->dev_bus_addr);
+
+ if ( dev_hst_ro_flags & GNTMAP_host_map )
+ *va = host_virt_addr;
+
+ (void)__put_user(handle, &uop->handle);
+ }
+ else
+ {
+ (void)__put_user(rc, &uop->handle);
+ put_maptrack_handle(ld->grant_table, handle);
+ }
- fail:
- (void)__put_user(rc, &uop->handle);
- spin_unlock(&rd->grant_table->lock);
put_domain(rd);
- put_maptrack_handle(ld->grant_table, handle);
return rc;
}
@@ -364,17 +409,21 @@ gnttab_map_grant_ref(
gnttab_map_grant_ref_t *uop, unsigned int count)
{
int i, flush = 0;
- unsigned long va = 0;
+ unsigned long va[8];
for ( i = 0; i < count; i++ )
- if ( __gnttab_map_grant_ref(&uop[i], &va) == 0 )
+ if ( __gnttab_map_grant_ref(&uop[i],
+ &va[ (flush < 8 ? flush : 0) ] ) == 0)
flush++;
- /* XXX KAF: I think we are probably flushing too much here. */
- if ( flush == 1 )
- flush_tlb_one_mask(current->domain->cpuset, va);
- else if ( flush != 0 )
- flush_tlb_mask(current->domain->cpuset);
+ if ( flush != 0 )
+ {
+ if ( flush <= 8 )
+ for ( i = 0; i < flush; i++ )
+ flush_tlb_one_mask(current->domain->cpuset, va[i]);
+ else
+ local_flush_tlb();
+ }
return 0;
}
@@ -392,6 +441,7 @@ __gnttab_unmap_grant_ref(
active_grant_entry_t *act;
grant_entry_t *sha;
grant_mapping_t *map;
+ u16 flags;
s16 rc = 1;
unsigned long frame, virt;
@@ -416,8 +466,9 @@ __gnttab_unmap_grant_ref(
return GNTST_bad_handle;
}
- dom = map->domid;
- ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT;
+ dom = map->domid;
+ ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT;
+ flags = map->ref_and_flags & MAPTRACK_GNTMAP_MASK;
if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
unlikely(ld == rd) )
@@ -428,45 +479,56 @@ __gnttab_unmap_grant_ref(
(void)__put_user(GNTST_bad_domain, &uop->status);
return GNTST_bad_domain;
}
+#ifdef GRANT_DEBUG_VERBOSE
DPRINTK("Unmapping grant ref (%hu) for domain (%hu) with handle (%hu)\n",
ref, dom, handle);
+#endif
act = &rd->grant_table->active[ref];
sha = &rd->grant_table->shared[ref];
spin_lock(&rd->grant_table->lock);
- if ( frame != 0 )
+ if ( frame == 0 )
+ frame = act->frame;
+ else if ( frame == GNTUNMAP_DEV_FROM_VIRT )
+ {
+ if ( !( flags & GNTMAP_device_map ) )
+ PIN_FAIL(unmap_out, GNTST_bad_dev_addr,
+ "Bad frame number: frame not mapped for device access.\n");
+ frame = act->frame;
+
+ /* frame will be unmapped for device access below if virt addr ok */
+ }
+ else
{
if ( unlikely(frame != act->frame) )
- PIN_FAIL(GNTST_general_error,
+ PIN_FAIL(unmap_out, GNTST_general_error,
"Bad frame number doesn't match gntref.\n");
- if ( map->ref_and_flags & GNTMAP_device_map )
- act->pin -= (map->ref_and_flags & GNTMAP_readonly) ?
- GNTPIN_devr_inc : GNTPIN_devw_inc;
+ if ( flags & GNTMAP_device_map )
+ act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc
+ : GNTPIN_devw_inc;
map->ref_and_flags &= ~GNTMAP_device_map;
(void)__put_user(0, &uop->dev_bus_addr);
- }
- else
- frame = act->frame;
- /* frame is now unmapped for device access */
+ /* frame is now unmapped for device access */
+ }
if ( (virt != 0) &&
- (map->ref_and_flags & GNTMAP_host_map) &&
+ (flags & GNTMAP_host_map) &&
((act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask)) > 0))
{
l1_pgentry_t *pl1e;
unsigned long _ol1e;
pl1e = &linear_pg_table[l1_linear_offset(virt)];
-
+
if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) )
{
DPRINTK("Could not find PTE entry for address %x\n", virt);
rc = -EINVAL;
- goto fail;
+ goto unmap_out;
}
/* check that the virtual address supplied is actually
@@ -477,7 +539,7 @@ __gnttab_unmap_grant_ref(
DPRINTK("PTE entry %x for address %x doesn't match frame %x\n",
_ol1e, virt, frame);
rc = -EINVAL;
- goto fail;
+ goto unmap_out;
}
/* Delete pagetable entry
@@ -487,35 +549,53 @@ __gnttab_unmap_grant_ref(
DPRINTK("Cannot delete PTE entry at %x for virtual address %x\n",
pl1e, virt);
rc = -EINVAL;
- goto fail;
+ goto unmap_out;
}
map->ref_and_flags &= ~GNTMAP_host_map;
- act->pin -= (map->ref_and_flags & GNTMAP_readonly) ?
- GNTPIN_hstr_inc : GNTPIN_hstw_inc;
+ act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_hstr_inc
+ : GNTPIN_hstw_inc;
+
+ if ( frame == GNTUNMAP_DEV_FROM_VIRT )
+ {
+ act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc
+ : GNTPIN_devw_inc;
+
+ map->ref_and_flags &= ~GNTMAP_device_map;
+ (void)__put_user(0, &uop->dev_bus_addr);
+ }
+
rc = 0;
*va = virt;
}
if ( (map->ref_and_flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0)
+ {
+ map->ref_and_flags = 0;
put_maptrack_handle(ld->grant_table, handle);
+ }
+
+ /* If just unmapped a writable mapping, mark as dirtied */
+ if ( unlikely(shadow_mode_log_dirty(rd)) &&
+ !( flags & GNTMAP_readonly ) )
+ mark_dirty(rd, frame);
/* If the last writable mapping has been removed, put_page_type */
- if ( ((act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) == 0) &&
- !(map->ref_and_flags & GNTMAP_readonly) )
+ if ( ( (act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask) ) == 0) &&
+ ( !( flags & GNTMAP_readonly ) ) )
{
- put_page_type(&frame_table[frame]);
clear_bit(_GTF_writing, &sha->flags);
+ put_page_type(&frame_table[frame]);
}
if ( act->pin == 0 )
{
- put_page(&frame_table[frame]);
clear_bit(_GTF_reading, &sha->flags);
+ put_page(&frame_table[frame]);
}
- fail:
+ unmap_out:
(void)__put_user(rc, &uop->status);
spin_unlock(&rd->grant_table->lock);
put_domain(rd);
@@ -527,16 +607,21 @@ gnttab_unmap_grant_ref(
gnttab_unmap_grant_ref_t *uop, unsigned int count)
{
int i, flush = 0;
- unsigned long va = 0;
+ unsigned long va[8];
for ( i = 0; i < count; i++ )
- if ( __gnttab_unmap_grant_ref(&uop[i], &va) == 0 )
+ if ( __gnttab_unmap_grant_ref(&uop[i],
+ &va[ (flush < 8 ? flush : 0) ] ) == 0)
flush++;
- if ( flush == 1 )
- flush_tlb_one_mask(current->domain->cpuset, va);
- else if ( flush != 0 )
- flush_tlb_mask(current->domain->cpuset);
+ if ( flush != 0 )
+ {
+ if ( flush <= 8 )
+ for ( i = 0; i < flush; i++ )
+ flush_tlb_one_mask(current->domain->cpuset, va[i]);
+ else
+ local_flush_tlb();
+ }
return 0;
}
@@ -547,6 +632,7 @@ gnttab_setup_table(
{
gnttab_setup_table_t op;
struct domain *d;
+ int i;
if ( count != 1 )
return -EINVAL;
@@ -557,9 +643,10 @@ gnttab_setup_table(
return -EFAULT;
}
- if ( unlikely(op.nr_frames > 1) )
+ if ( unlikely(op.nr_frames > NR_GRANT_FRAMES) )
{
- DPRINTK("Xen only supports one grant-table frame per domain.\n");
+ DPRINTK("Xen only supports at most %d grant-table frames per domain.\n",
+ NR_GRANT_FRAMES);
(void)put_user(GNTST_general_error, &uop->status);
return 0;
}
@@ -581,12 +668,15 @@ gnttab_setup_table(
return 0;
}
- if ( op.nr_frames == 1 )
+ if ( op.nr_frames <= NR_GRANT_FRAMES )
{
ASSERT(d->grant_table != NULL);
(void)put_user(GNTST_okay, &uop->status);
- (void)put_user(virt_to_phys(d->grant_table->shared) >> PAGE_SHIFT,
- &uop->frame_list[0]);
+
+ for ( i = 0; i < op.nr_frames; i++ )
+ (void)put_user( (
+ virt_to_phys( (char*)(d->grant_table->shared)+(i*PAGE_SIZE) )
+ >> PAGE_SHIFT ), &uop->frame_list[i]);
}
put_domain(d);
@@ -634,29 +724,33 @@ gnttab_dump_table(gnttab_dump_table_t *uop)
DPRINTK("Grant table for dom (%hu) MFN (%x)\n",
op.dom, shared_mfn);
- spin_lock(&gt->lock);
-
ASSERT(d->grant_table->active != NULL);
ASSERT(d->grant_table->shared != NULL);
+ ASSERT(d->grant_table->maptrack != NULL);
for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
{
- act = &gt->active[i];
sha_copy = gt->shared[i];
- if ( act->pin || act->domid || act->frame ||
- sha_copy.flags || sha_copy.domid || sha_copy.frame )
+ if ( sha_copy.flags )
{
- DPRINTK("Grant: dom (%hu) ACTIVE (%d) pin:(%x) dom:(%hu) frame:(%lx)\n",
- op.dom, i, act->pin, act->domid, act->frame);
DPRINTK("Grant: dom (%hu) SHARED (%d) flags:(%hx) dom:(%hu) frame:(%lx)\n",
op.dom, i, sha_copy.flags, sha_copy.domid, sha_copy.frame);
-
}
-
}
- ASSERT(d->grant_table->maptrack != NULL);
+ spin_lock(&gt->lock);
+
+ for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
+ {
+ act = &gt->active[i];
+
+ if ( act->pin )
+ {
+ DPRINTK("Grant: dom (%hu) ACTIVE (%d) pin:(%x) dom:(%hu) frame:(%lx)\n",
+ op.dom, i, act->pin, act->domid, act->frame);
+ }
+ }
for ( i = 0; i < NR_MAPTRACK_ENTRIES; i++ )
{
@@ -746,17 +840,18 @@ gnttab_check_unmap(
lgt = ld->grant_table;
- /* Fast exit if we're not mapping anything using grant tables */
- if ( lgt->map_count == 0 )
- return 0;
-
-#ifdef GRANT_DEBUG
- if ( ld->id != 0 ) {
+#ifdef GRANT_DEBUG_VERBOSE
+ if ( ld->id != 0 )
+ {
DPRINTK("Foreign unref rd(%d) ld(%d) frm(%x) flgs(%x).\n",
rd->id, ld->id, frame, readonly);
}
#endif
+ /* Fast exit if we're not mapping anything using grant tables */
+ if ( lgt->map_count == 0 )
+ return 0;
+
if ( get_domain(rd) == 0 )
{
DPRINTK("gnttab_check_unmap: couldn't get_domain rd(%d)\n", rd->id);
@@ -809,15 +904,15 @@ gnttab_check_unmap(
/* any more granted writable mappings? */
if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 )
{
- put_page_type(&frame_table[frame]);
clear_bit(_GTF_writing, &rgt->shared[ref].flags);
+ put_page_type(&frame_table[frame]);
}
}
if ( act->pin == 0 )
{
- put_page(&frame_table[frame]);
clear_bit(_GTF_reading, &rgt->shared[ref].flags);
+ put_page(&frame_table[frame]);
}
spin_unlock(&rgt->lock);
@@ -839,29 +934,41 @@ int
gnttab_prepare_for_transfer(
struct domain *rd, struct domain *ld, grant_ref_t ref)
{
- grant_table_t *t;
- grant_entry_t *e;
+ grant_table_t *rgt;
+ grant_entry_t *sha;
domid_t sdom;
u16 sflags;
u32 scombo, prev_scombo;
int retries = 0;
+ unsigned long target_pfn;
+
+ DPRINTK("gnttab_prepare_for_transfer rd(%hu) ld(%hu) ref(%hu).\n",
+ rd->id, ld->id, ref);
- if ( unlikely((t = rd->grant_table) == NULL) ||
+ if ( unlikely((rgt = rd->grant_table) == NULL) ||
unlikely(ref >= NR_GRANT_ENTRIES) )
{
DPRINTK("Dom %d has no g.t., or ref is bad (%d).\n", rd->id, ref);
return 0;
}
- spin_lock(&t->lock);
+ spin_lock(&rgt->lock);
- e = &t->shared[ref];
+ sha = &rgt->shared[ref];
- sflags = e->flags;
- sdom = e->domid;
+ sflags = sha->flags;
+ sdom = sha->domid;
for ( ; ; )
{
+ target_pfn = sha->frame;
+
+ if ( unlikely(target_pfn >= max_page ) )
+ {
+ DPRINTK("Bad pfn (%x)\n", target_pfn);
+ goto fail;
+ }
+
if ( unlikely(sflags != GTF_accept_transfer) ||
unlikely(sdom != ld->id) )
{
@@ -875,7 +982,7 @@ gnttab_prepare_for_transfer(
prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags;
/* NB. prev_scombo is updated in place to seen value. */
- if ( unlikely(cmpxchg_user((u32 *)&e->flags, prev_scombo,
+ if ( unlikely(cmpxchg_user((u32 *)&sha->flags, prev_scombo,
prev_scombo | GTF_transfer_committed)) )
{
DPRINTK("Fault while modifying shared flags and domid.\n");
@@ -898,29 +1005,50 @@ gnttab_prepare_for_transfer(
sdom = (u16)(prev_scombo >> 16);
}
- spin_unlock(&t->lock);
+ spin_unlock(&rgt->lock);
return 1;
fail:
- spin_unlock(&t->lock);
+ spin_unlock(&rgt->lock);
return 0;
}
void
gnttab_notify_transfer(
- struct domain *rd, grant_ref_t ref, unsigned long sframe)
+ struct domain *rd, struct domain *ld, grant_ref_t ref, unsigned long frame)
{
- unsigned long frame;
+ grant_entry_t *sha;
+ unsigned long pfn;
- /* cwc22
- * TODO: this requires that the machine_to_phys_mapping
- * has already been updated, so the accept_transfer hypercall
- * must do this.
- */
- frame = __mfn_to_gpfn(rd, sframe);
+ DPRINTK("gnttab_notify_transfer rd(%hu) ld(%hu) ref(%hu).\n",
+ rd->id, ld->id, ref);
+
+ sha = &rd->grant_table->shared[ref];
+
+ spin_lock(&rd->grant_table->lock);
+
+ pfn = sha->frame;
+
+ if ( unlikely(pfn >= max_page ) )
+ DPRINTK("Bad pfn (%x)\n", pfn);
+ else
+ {
+ machine_to_phys_mapping[frame] = pfn;
+
+ if ( unlikely(shadow_mode_log_dirty(ld)))
+ mark_dirty(ld, frame);
- wmb(); /* Ensure that the reassignment is globally visible. */
- rd->grant_table->shared[ref].frame = frame;
+ if (shadow_mode_translate(ld))
+ __phys_to_machine_mapping[pfn] = frame;
+ }
+ sha->frame = __mfn_to_gpfn(rd, frame);
+ sha->domid = rd->id;
+ wmb();
+ sha->flags = ( GTF_accept_transfer | GTF_transfer_completed );
+
+ spin_unlock(&rd->grant_table->lock);
+
+ return;
}
int
@@ -943,6 +1071,7 @@ grant_table_create(
goto no_mem;
memset(t->active, 0, sizeof(active_grant_entry_t) * NR_GRANT_ENTRIES);
+ /* Tracking of mapped foreign frames table */
if ( (t->maptrack = (void *)alloc_xenheap_page()) == NULL )
goto no_mem;
memset(t->maptrack, 0, PAGE_SIZE);
@@ -950,10 +1079,16 @@ grant_table_create(
t->maptrack[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT;
/* Shared grant table. */
- if ( (t->shared = (void *)alloc_xenheap_page()) == NULL )
+ if ( (t->shared = (void *)alloc_xenheap_pages(ORDER_GRANT_FRAMES)) == NULL )
goto no_mem;
- memset(t->shared, 0, PAGE_SIZE);
- SHARE_PFN_WITH_DOMAIN(virt_to_page(t->shared), d);
+ memset(t->shared, 0, NR_GRANT_FRAMES * PAGE_SIZE);
+
+ for ( i = 0; i < NR_GRANT_FRAMES; i++ )
+ {
+ SHARE_PFN_WITH_DOMAIN(virt_to_page((char *)(t->shared)+(i*PAGE_SIZE)), d);
+ machine_to_phys_mapping[ (virt_to_phys((char*)(t->shared)+(i*PAGE_SIZE))
+ >> PAGE_SHIFT) ] = INVALID_M2P_ENTRY;
+ }
/* Okay, install the structure. */
wmb(); /* avoid races with lock-free access to d->grant_table */
@@ -1055,7 +1190,7 @@ grant_table_destroy(
{
/* Free memory relating to this grant table. */
d->grant_table = NULL;
- free_xenheap_page((unsigned long)t->shared);
+ free_xenheap_pages((unsigned long)t->shared, ORDER_GRANT_FRAMES);
free_xenheap_page((unsigned long)t->maptrack);
xfree(t->active);
xfree(t);
diff --git a/xen/include/public/grant_table.h b/xen/include/public/grant_table.h
index b00642db7c..79c87a7cff 100644
--- a/xen/include/public/grant_table.h
+++ b/xen/include/public/grant_table.h
@@ -185,6 +185,8 @@ typedef struct {
u32 __pad;
} PACKED gnttab_unmap_grant_ref_t; /* 24 bytes */
+#define GNTUNMAP_DEV_FROM_VIRT (~0U)
+
/*
* GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least
* <nr_frames> pages. The frame addresses are written to the <frame_list>.
@@ -248,8 +250,9 @@ typedef struct {
#define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate gntref. */
#define GNTST_bad_handle (-4) /* Unrecognised or inappropriate handle. */
#define GNTST_bad_virt_addr (-5) /* Inappropriate virtual address to map. */
-#define GNTST_no_device_space (-6) /* Out of space in I/O MMU. */
-#define GNTST_permission_denied (-7) /* Not enough privilege for operation. */
+#define GNTST_bad_dev_addr (-6) /* Inappropriate device address to unmap.*/
+#define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */
+#define GNTST_permission_denied (-8) /* Not enough privilege for operation. */
#define GNTTABOP_error_msgs { \
"okay", \
@@ -258,6 +261,7 @@ typedef struct {
"invalid grant reference", \
"invalid mapping handle", \
"invalid virtual address", \
+ "invalid device address", \
"no spare translation slot in the I/O MMU", \
"permission denied" \
}
diff --git a/xen/include/public/io/blkif.h b/xen/include/public/io/blkif.h
index 836b8936e7..0f91912bbc 100644
--- a/xen/include/public/io/blkif.h
+++ b/xen/include/public/io/blkif.h
@@ -34,16 +34,24 @@ typedef struct {
blkif_vdev_t device; /* 2: only for read/write requests */
unsigned long id; /* 4: private guest value, echoed in resp */
blkif_sector_t sector_number; /* start sector idx on disk (r/w only) */
- /* @f_a_s[2:0]=last_sect ; @f_a_s[5:3]=first_sect ; @f_a_s[:12]=frame. */
+ /* @f_a_s[2:0]=last_sect ; @f_a_s[5:3]=first_sect */
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+ /* @f_a_s[:16]= grant reference (16 bits) */
+#else
+ /* @f_a_s[:12]=@frame: machine page frame number. */
+#endif
/* @first_sect: first sector in frame to transfer (inclusive). */
/* @last_sect: last sector in frame to transfer (inclusive). */
- /* @frame: machine page frame number. */
unsigned long frame_and_sects[BLKIF_MAX_SEGMENTS_PER_REQUEST];
} PACKED blkif_request_t;
#define blkif_first_sect(_fas) (((_fas)>>3)&7)
#define blkif_last_sect(_fas) ((_fas)&7)
+#ifdef CONFIG_XEN_BLKDEV_GRANT
+#define blkif_gref_from_fas(_fas) ((_fas)>>16)
+#endif
+
typedef struct {
unsigned long id; /* copied from request */
u8 operation; /* copied from request */
diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h
index b05ca702ce..f37d6d4e56 100644
--- a/xen/include/xen/grant_table.h
+++ b/xen/include/xen/grant_table.h
@@ -51,7 +51,10 @@ typedef struct {
#define GNTPIN_devr_inc (1 << GNTPIN_devr_shift)
#define GNTPIN_devr_mask (0xFFU << GNTPIN_devr_shift)
-#define NR_GRANT_ENTRIES (PAGE_SIZE / sizeof(grant_entry_t))
+#define ORDER_GRANT_FRAMES 2
+#define NR_GRANT_FRAMES (1U << ORDER_GRANT_FRAMES)
+#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t))
+
/*
* Tracks a mapping of another domain's grant reference. Each domain has a
@@ -104,7 +107,7 @@ gnttab_prepare_for_transfer(
/* Notify 'rd' of a completed transfer via an already-locked grant entry. */
void
gnttab_notify_transfer(
- struct domain *rd, grant_ref_t ref, unsigned long frame);
+ struct domain *rd, struct domain *ld, grant_ref_t ref, unsigned long frame);
/* Pre-domain destruction release of granted device mappings of other domains.*/
void