diff options
author | cwc22@centipede.cl.cam.ac.uk <cwc22@centipede.cl.cam.ac.uk> | 2005-04-04 20:22:17 +0000 |
---|---|---|
committer | cwc22@centipede.cl.cam.ac.uk <cwc22@centipede.cl.cam.ac.uk> | 2005-04-04 20:22:17 +0000 |
commit | d2781af07b4d42e5e02620c067c1da627baee567 (patch) | |
tree | 8f84b80c468ce8cf1ed6ce5f71d2242e7dff569f | |
parent | 12af2ea309ea8f116483672d9900b863e75f0c7e (diff) | |
download | xen-d2781af07b4d42e5e02620c067c1da627baee567.tar.gz xen-d2781af07b4d42e5e02620c067c1da627baee567.tar.bz2 xen-d2781af07b4d42e5e02620c067c1da627baee567.zip |
bitkeeper revision 1.1236.56.1 (4251a1f9OIyZY2I2LqBlxl0mi64FkA)
Grant tables: substantially more robust.
Block front and back drivers: support for using grant tables for interdomain communication.
20 files changed, 1118 insertions, 258 deletions
@@ -20,6 +20,7 @@ 4022a73cgxX1ryj1HgS-IwwB6NUi2A docs/misc/XenDebugger-HOWTO 412f4bd9sm5mCQ8BkrgKcAKZGadq7Q docs/misc/blkif-drivers-explained.txt 420b949cy9ZGzED74Fz_DaWlK7tT4g docs/misc/crashdb.txt +4251a1f82AexscYEiF4Iku8Gc_kWfQ docs/misc/grant-tables.txt 40d6ccbfKKBq8jE0ula4eHEzBiQuDA docs/misc/xen_config.html 410a4c2bAO_m_l4RsiiPHnZ4ixHWbQ docs/misc/xend.tex 3f9e7d564bWFB-Czjv1qdmE6o0GqNg docs/src/interface.tex diff --git a/docs/misc/grant-tables.txt b/docs/misc/grant-tables.txt new file mode 100644 index 0000000000..604be3bb08 --- /dev/null +++ b/docs/misc/grant-tables.txt @@ -0,0 +1,325 @@ +******************************************************************************** + A Rough Introduction to Using Grant Tables + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Christopher Clark, March, 2005. + +Grant tables are a mechanism for sharing and transferring frames between +domains, without requiring the participating domains to be privileged. + +The first mode of use allows domA to grant domB access to a specific frame, +whilst retaining ownership. The block front driver uses this to grant memory +access to the block back driver, so that it may read or write as requested. + + 1. domA creates a grant access reference, and transmits the ref id to domB. + 2. domB uses the reference to map the granted frame. + 3. domB performs the memory access. + 4. domB unmaps the granted frame. + 5. domA removes its grant. + + +The second mode allows domA to accept a transfer of ownership of a frame from +domB. The net front and back driver will use this for packet tx/rx. This +mechanism is still being implemented, though the xen<->guest interface design +is complete. + + 1. domA creates an accept transfer grant reference, and transmits it to domB. + 2. domB uses the ref to hand over a frame it owns. + 3. domA accepts the transfer + 4. domA clears the used reference. + + +******************************************************************************** + Data structures + ~~~~~~~~~~~~~~~ + + The following data structures are used by Xen and the guests to implement + grant tables: + + 1. Shared grant entries + 2. Active grant entries + 3. Map tracking + + These are not the users primary interface to grant tables, but are discussed + because an understanding of how they work may be useful. Each of these is a + finite resource. + + Shared grant entries + ~~~~~~~~~~~~~~~~~~~~ + + A set of pages are shared between Xen and a guest, holding the shared grant + entries. The guest writes into these entries to create grant references. The + index of the entry is transmitted to the remote domain: this is the + reference used to activate an entry. Xen will write into a shared entry to + indicate to a guest that its grant is in use. + sha->domid : remote domain being granted rights + sha->frame : machine frame being granted + sha->flags : allow access, allow transfer, remote is reading/writing, etc. + + Active grant entries + ~~~~~~~~~~~~~~~~~~~~ + + Xen maintains a set of private frames per domain, holding the active grant + entries for safety, and to reference count mappings. + act->domid : remote domain being granted rights + act->frame : machine frame being granted + act->pin : used to hold reference counts + + Map tracking + ~~~~~~~~~~~~ + + Every time a frame is mapped, a map track entry is stored in the metadata of + the mapping domain. The index of this entry is returned from the map call, + and is used to unmap the frame. Map track entries are also searched whenever a + page table entry containing a foreign frame number is overwritten: the first + matching map track entry is then removed, as if unmap had been invoked. + These are not used by the transfer mechanism. + map->domid : owner of the mapped frame + map->ref_and_flags : grant reference, ro/rw, mapped for host or device access + +******************************************************************************** + + Granting a foreign domain access to frames + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + domA [frame]--> domB + + + domA: #include <asm-xen/gnttab.h> + grant_ref_t gref[BATCH_SIZE]; + + for ( i = 0; i < BATCH_SIZE; i++ ) + gref[i] = gnttab_grant_foreign_access( domBid, mfn, (readonly ? 1 : 0) ); + + + .. gref is then somehow transmitted to domB for use. + + + Mapping foreign frames + ~~~~~~~~~~~~~~~~~~~~~~ + + domB: #include <asm-xen/hypervisor.h> + unsigned long mmap_vstart; + gnttab_op_t aop[BATCH_SIZE]; + grant_ref_t mapped_handle[BATCH_SIZE]; + + if ( (mmap_vstart = allocate_empty_lowmem_region(BATCH_SIZE)) == 0 ) + BUG(); + + for ( i = 0; i < BATCH_SIZE; i++ ) + { + aop[i].u.map_grant_ref.host_virt_addr = + mmap_vstart + (i * PAGE_SIZE); + aop[i].u.map_grant_ref.dom = domAid; + aop[i].u.map_grant_ref.ref = gref[i]; + aop[i].u.map_grant_ref.flags = ( GNTMAP_host_map | GNTMAP_readonly ); + } + + if ( unlikely(HYPERVISOR_grant_table_op( + GNTTABOP_map_grant_ref, aop, BATCH_SIZE))) + BUG(); + + for ( i = 0; i < BATCH_SIZE; i++ ) + { + if ( unlikely(aop[i].u.map_grant_ref.dev_bus_addr == 0) ) + { + tidyup_all(aop, i); + goto panic; + } + + phys_to_machine_mapping[__pa(mmap_vstart + (i * PAGE_SIZE))>>PAGE_SHIFT] = + FOREIGN_FRAME(aop[i].u.map_grant_ref.dev_bus_addr); + + mapped_handle[i] = aop[i].u.map_grant_ref.handle; + } + + + + Unmapping foreign frames + ~~~~~~~~~~~~~~~~~~~~~~~~ + + domB: + for ( i = 0; i < BATCH_SIZE; i++ ) + { + aop[i].u.unmap_grant_ref.host_virt_addr = mmap_vstart + (i * PAGE_SIZE); + aop[i].u.unmap_grant_ref.dev_bus_addr = 0; + aop[i].u.unmap_grant_ref.handle = mapped_handle[i]; + } + if ( unlikely(HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, aop, BATCH_SIZE))) + BUG(); + + + Ending foreign access + ~~~~~~~~~~~~~~~~~~~~~ + + Note that this only prevents further mappings; it does _not_ revoke access. + Should _only_ be used when the remote domain has unmapped the frame. + gnttab_query_foreign_access( gref ) will indicate the state of any mapping. + + domA: + if ( gnttab_query_foreign_access( gref[i] ) == 0 ) + gnttab_end_foreign_access( gref[i], readonly ); + + TODO: readonly yet to be implemented. + + +******************************************************************************** + + Transferring ownership of a frame to another domain + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + [ XXX: Transfer mechanism is alpha-calibre code, untested, use at own risk XXX ] + [ XXX: show use of batch operations below, rather than single frame XXX ] + [ XXX: linux internal interface could/should be wrapped to be tidier XXX ] + + + Prepare to accept a frame from a foreign domain + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + domA: + if ( (p = alloc_page(GFP_HIGHUSER)) == NULL ) + { + printk("Cannot alloc a frame to surrender\n"); + break; + } + pfn = p - mem_map; + mfn = phys_to_machine_mapping[pfn]; + + if ( !PageHighMem(p) ) + { + v = phys_to_virt(pfn << PAGE_SHIFT); + scrub_pages(v, 1); + queue_l1_entry_update(get_ptep((unsigned long)v), 0); + } + + /* Ensure that ballooned highmem pages don't have cached mappings. */ + kmap_flush_unused(); + + /* Flush updates through and flush the TLB. */ + xen_tlb_flush(); + + phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY; + + if ( HYPERVISOR_dom_mem_op( + MEMOP_decrease_reservation, &mfn, 1, 0) != 1 ) + { + printk("MEMOP_decrease_reservation failed\n"); + /* er... ok. free the page then */ + __free_page(p); + break; + } + + accepting_pfn = pfn; + ref = gnttab_grant_foreign_transfer( (domid_t) args.arg[0], pfn ); + printk("Accepting dom %lu frame at ref (%d)\n", args.arg[0], ref); + + + Transfer a frame to a foreign domain + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + domB: + mmu_update_t update; + domid_t domid; + grant_ref_t gref; + unsigned long pfn, mfn, *v; + struct page *transfer_page = 0; + + /* alloc a page and grant access. + * alloc page returns a page struct. */ + if ( (transfer_page = alloc_page(GFP_HIGHUSER)) == NULL ) + return -ENOMEM; + + pfn = transfer_page - mem_map; + mfn = phys_to_machine_mapping[pfn]; + + /* need to remove all references to this page */ + if ( !PageHighMem(transfer_page) ) + { + v = phys_to_virt(pfn << PAGE_SHIFT); + scrub_pages(v, 1); + sprintf((char *)v, "This page (%lx) was transferred.\n", mfn); + queue_l1_entry_update(get_ptep((unsigned long)v), 0); + } +#ifdef CONFIG_XEN_SCRUB_PAGES + else + { + v = kmap(transfer_page); + scrub_pages(v, 1); + sprintf((char *)v, "This page (%lx) was transferred.\n", mfn); + kunmap(transfer_page); + } +#endif + /* Delete any cached kmappings */ + kmap_flush_unused(); + + /* Flush updates through and flush the TLB */ + xen_tlb_flush(); + + /* invalidate in P2M */ + phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY; + + domid = (domid_t)args.arg[0]; + gref = (grant_ref_t)args.arg[1]; + + update.ptr = MMU_EXTENDED_COMMAND; + update.ptr |= ((gref & 0x00FF) << 2); + update.ptr |= mfn << PAGE_SHIFT; + + update.val = MMUEXT_TRANSFER_PAGE; + update.val |= (domid << 16); + update.val |= (gref & 0xFF00); + + ret = HYPERVISOR_mmu_update(&update, 1, NULL); + + + Map a transferred frame + ~~~~~~~~~~~~~~~~~~~~~~~ + + TODO: + + + Clear the used transfer reference + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + TODO: + + +******************************************************************************** + + Using a private reserve of grant references + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Where it is known in advance how many grant references are required, and +failure to allocate them on demand would cause difficulty, a batch can be +allocated and held in a private reserve. + +To reserve a private batch: + + /* housekeeping data - treat as opaque: */ + grant_ref_t gref_head, gref_terminal; + + if ( 0 > gnttab_alloc_grant_references( number_to_reserve, + &gref_head, &gref_terminal )) + return -ENOSPC; + + +To release a batch back to the shared pool: + + gnttab_free_grant_references( number_reserved, gref_head ); + + +To claim a reserved reference: + + ref = gnttab_claim_grant_reference( &gref_head, gref_terminal ); + + +To release a claimed reference back to the reserve pool: + + gnttab_release_grant_reference( &gref_head, gref ); + + +To use a claimed reference to grant access, use these alternative functions +that take an additional parameter of the grant reference to use: + + gnttab_grant_foreign_access_ref + gnttab_grant_foreign_transfer_ref diff --git a/linux-2.4.29-xen-sparse/arch/xen/config.in b/linux-2.4.29-xen-sparse/arch/xen/config.in index d1913f089e..23492fb5c8 100644 --- a/linux-2.4.29-xen-sparse/arch/xen/config.in +++ b/linux-2.4.29-xen-sparse/arch/xen/config.in @@ -22,6 +22,7 @@ fi bool 'Scrub memory before freeing it to Xen' CONFIG_XEN_SCRUB_PAGES bool 'Network-device frontend driver' CONFIG_XEN_NETDEV_FRONTEND bool 'Block-device frontend driver' CONFIG_XEN_BLKDEV_FRONTEND +bool 'Block-device uses grant tables' CONFIG_XEN_BLKDEV_GRANT bool 'USB-device frontend driver' CONFIG_XEN_USB_FRONTEND endmenu # The IBM S/390 patch needs this. diff --git a/linux-2.4.29-xen-sparse/arch/xen/defconfig-xen0 b/linux-2.4.29-xen-sparse/arch/xen/defconfig-xen0 index 2864c85cea..b67e52aff6 100644 --- a/linux-2.4.29-xen-sparse/arch/xen/defconfig-xen0 +++ b/linux-2.4.29-xen-sparse/arch/xen/defconfig-xen0 @@ -16,6 +16,7 @@ CONFIG_XEN_PHYSDEV_ACCESS=y CONFIG_XEN_SCRUB_PAGES=y CONFIG_XEN_NETDEV_FRONTEND=y CONFIG_XEN_BLKDEV_FRONTEND=y +# CONFIG_XEN_BLKDEV_GRANT is not set # CONFIG_XEN_USB_FRONTEND is not set CONFIG_NO_IDLE_HZ=y CONFIG_FOREIGN_PAGES=y diff --git a/linux-2.4.29-xen-sparse/arch/xen/defconfig-xenU b/linux-2.4.29-xen-sparse/arch/xen/defconfig-xenU index 886199d7d3..a83c1b8392 100644 --- a/linux-2.4.29-xen-sparse/arch/xen/defconfig-xenU +++ b/linux-2.4.29-xen-sparse/arch/xen/defconfig-xenU @@ -15,6 +15,7 @@ CONFIG_UID16=y CONFIG_XEN_SCRUB_PAGES=y CONFIG_XEN_NETDEV_FRONTEND=y CONFIG_XEN_BLKDEV_FRONTEND=y +# CONFIG_XEN_BLKDEV_GRANT is not set # CONFIG_XEN_USB_FRONTEND is not set CONFIG_NO_IDLE_HZ=y # CONFIG_FOREIGN_PAGES is not set diff --git a/linux-2.4.29-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c b/linux-2.4.29-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c index 51b600d26d..530383dbf0 100644 --- a/linux-2.4.29-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c +++ b/linux-2.4.29-xen-sparse/arch/xen/drivers/blkif/frontend/vbd.c @@ -67,9 +67,14 @@ static int xlvbd_get_vbd_info(vdisk_t *disk_info) memset(&req, 0, sizeof(req)); req.operation = BLKIF_OP_PROBE; req.nr_segments = 1; +#ifdef CONFIG_XEN_BLKDEV_GRANT + blkif_control_probe_send(&req, &rsp, + (unsigned long)(virt_to_machine(buf))); +#else req.frame_and_sects[0] = virt_to_machine(buf) | 7; blkif_control_send(&req, &rsp); +#endif if ( rsp.status <= 0 ) { diff --git a/linux-2.4.29-xen-sparse/include/asm-xen/fixmap.h b/linux-2.4.29-xen-sparse/include/asm-xen/fixmap.h index bc6e2c2004..255ac4a468 100644 --- a/linux-2.4.29-xen-sparse/include/asm-xen/fixmap.h +++ b/linux-2.4.29-xen-sparse/include/asm-xen/fixmap.h @@ -17,6 +17,7 @@ #include <linux/kernel.h> #include <asm/apicdef.h> #include <asm/page.h> +#include <asm-xen/gnttab.h> #ifdef CONFIG_HIGHMEM #include <linux/threads.h> #include <asm/kmap_types.h> @@ -52,7 +53,8 @@ enum fixed_addresses { FIX_NETRING2_BASE, FIX_NETRING3_BASE, FIX_SHARED_INFO, - FIX_GNTTAB, + FIX_GNTTAB_BEGIN, + FIX_GNTTAB_END = FIX_GNTTAB_BEGIN + NR_GRANT_FRAMES - 1, #ifdef CONFIG_VGA_CONSOLE #define NR_FIX_BTMAPS 32 /* 128KB For the Dom0 VGA Console A0000-C0000 */ #else diff --git a/linux-2.6.11-xen-sparse/arch/xen/Kconfig b/linux-2.6.11-xen-sparse/arch/xen/Kconfig index 1c2ba9b4a2..a5a2f8eb93 100644 --- a/linux-2.6.11-xen-sparse/arch/xen/Kconfig +++ b/linux-2.6.11-xen-sparse/arch/xen/Kconfig @@ -61,6 +61,16 @@ config XEN_BLKDEV_TAP_BE with the blktap. This option will be removed as the block drivers are modified to use grant tables. +config XEN_BLKDEV_GRANT + bool "Grant table substrate for block drivers (DANGEROUS)" + depends on !XEN_BLKDEV_TAP_BE + default n + help + This introduces the use of grant tables as a data exhange mechanism + between the frontend and backend block drivers. This currently + conflicts with the block tap, and should be considered untested + and likely to render your system unstable. + config XEN_NETDEV_BACKEND bool "Network-device backend driver" depends on XEN_PHYSDEV_ACCESS diff --git a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c index 4b25423e72..715f5dc951 100644 --- a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c +++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c @@ -41,9 +41,14 @@ EXPORT_SYMBOL(gnttab_end_foreign_access); EXPORT_SYMBOL(gnttab_query_foreign_access); EXPORT_SYMBOL(gnttab_grant_foreign_transfer); EXPORT_SYMBOL(gnttab_end_foreign_transfer); - -#define NR_GRANT_REFS 512 -static grant_ref_t gnttab_free_list[NR_GRANT_REFS]; +EXPORT_SYMBOL(gnttab_alloc_grant_references); +EXPORT_SYMBOL(gnttab_free_grant_references); +EXPORT_SYMBOL(gnttab_claim_grant_reference); +EXPORT_SYMBOL(gnttab_release_grant_reference); +EXPORT_SYMBOL(gnttab_grant_foreign_access_ref); +EXPORT_SYMBOL(gnttab_grant_foreign_transfer_ref); + +static grant_ref_t gnttab_free_list[NR_GRANT_ENTRIES]; static grant_ref_t gnttab_free_head; static grant_entry_t *shared; @@ -61,7 +66,7 @@ get_free_entry( void) { grant_ref_t fh, nfh = gnttab_free_head; - do { if ( unlikely((fh = nfh) == NR_GRANT_REFS) ) return -1; } + do { if ( unlikely((fh = nfh) == NR_GRANT_ENTRIES) ) return -1; } while ( unlikely((nfh = cmpxchg(&gnttab_free_head, fh, gnttab_free_list[fh])) != fh) ); return fh; @@ -97,6 +102,17 @@ gnttab_grant_foreign_access( return ref; } +void +gnttab_grant_foreign_access_ref( + grant_ref_t ref, domid_t domid, unsigned long frame, int readonly) +{ + shared[ref].frame = frame; + shared[ref].domid = domid; + wmb(); + shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); +} + + int gnttab_query_foreign_access( grant_ref_t ref ) { @@ -124,14 +140,14 @@ gnttab_end_foreign_access( grant_ref_t ref, int readonly ) int gnttab_grant_foreign_transfer( - domid_t domid) + domid_t domid, unsigned long pfn ) { int ref; if ( unlikely((ref = get_free_entry()) == -1) ) return -ENOSPC; - shared[ref].frame = 0; + shared[ref].frame = pfn; shared[ref].domid = domid; wmb(); shared[ref].flags = GTF_accept_transfer; @@ -139,6 +155,16 @@ gnttab_grant_foreign_transfer( return ref; } +void +gnttab_grant_foreign_transfer_ref( + grant_ref_t ref, domid_t domid, unsigned long pfn ) +{ + shared[ref].frame = pfn; + shared[ref].domid = domid; + wmb(); + shared[ref].flags = GTF_accept_transfer; +} + unsigned long gnttab_end_foreign_transfer( grant_ref_t ref) @@ -163,6 +189,60 @@ gnttab_end_foreign_transfer( return frame; } +void +gnttab_free_grant_references( u16 count, grant_ref_t head ) +{ + /* TODO: O(N)...? */ + grant_ref_t to_die = 0, next = head; + int i; + + for ( i = 0; i < count; i++ ) + to_die = next; + next = gnttab_free_list[next]; + put_free_entry( to_die ); +} + +int +gnttab_alloc_grant_references( u16 count, + grant_ref_t *head, + grant_ref_t *terminal ) +{ + int i; + grant_ref_t h = gnttab_free_head; + + for ( i = 0; i < count; i++ ) + if ( unlikely(get_free_entry() == -1) ) + goto not_enough_refs; + + *head = h; + *terminal = gnttab_free_head; + + return 0; + +not_enough_refs: + gnttab_free_head = h; + return -ENOSPC; +} + +int +gnttab_claim_grant_reference( grant_ref_t *private_head, + grant_ref_t terminal ) +{ + grant_ref_t g; + if ( unlikely((g = *private_head) == terminal) ) + return -ENOSPC; + *private_head = gnttab_free_list[g]; + return g; +} + +void +gnttab_release_grant_reference( grant_ref_t *private_head, + grant_ref_t release ) +{ + gnttab_free_list[release] = *private_head; + *private_head = release; +} + static int grant_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long data) { @@ -194,7 +274,7 @@ static int grant_ioctl(struct inode *inode, struct file *file, TRAP_INSTR "; " "popl %%edi; popl %%esi; popl %%edx; popl %%ecx; popl %%ebx" : "=a" (ret) : "0" (&hypercall) : "memory" ); - + return ret; } @@ -212,7 +292,14 @@ static int grant_read(char *page, char **start, off_t off, gt = (grant_entry_t *)shared; len = 0; - for ( i = 0; i < NR_GRANT_REFS; i++ ) + for ( i = 0; i < NR_GRANT_ENTRIES; i++ ) + /* TODO: safety catch here until this can handle >PAGE_SIZE output */ + if (len > (PAGE_SIZE - 200)) + { + len += sprintf( page + len, "Truncated.\n"); + break; + } + if ( gt[i].flags ) len += sprintf( page + len, "Grant: ref (0x%x) flags (0x%hx) dom (0x%hx) frame (0x%x)\n", @@ -235,22 +322,25 @@ static int grant_write(struct file *file, const char __user *buffer, static int __init gnttab_init(void) { gnttab_setup_table_t setup; - unsigned long frame; + unsigned long frames[NR_GRANT_FRAMES]; int i; - for ( i = 0; i < NR_GRANT_REFS; i++ ) - gnttab_free_list[i] = i + 1; - setup.dom = DOMID_SELF; - setup.nr_frames = 1; - setup.frame_list = &frame; + setup.nr_frames = NR_GRANT_FRAMES; + setup.frame_list = frames; + if ( HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0 ) BUG(); if ( setup.status != 0 ) BUG(); - set_fixmap_ma(FIX_GNTTAB, frame << PAGE_SHIFT); - shared = (grant_entry_t *)fix_to_virt(FIX_GNTTAB); + for ( i = 0; i < NR_GRANT_FRAMES; i++ ) + set_fixmap_ma(FIX_GNTTAB_END - i, frames[i] << PAGE_SHIFT); + + shared = (grant_entry_t *)fix_to_virt(FIX_GNTTAB_END); + + for ( i = 0; i < NR_GRANT_ENTRIES; i++ ) + gnttab_free_list[i] = i + 1; /* * /proc/xen/grant : used by libxc to access grant tables @@ -269,6 +359,7 @@ static int __init gnttab_init(void) grant_pde->read_proc = &grant_read; grant_pde->write_proc = &grant_write; + printk("Grant table initialized\n"); return 0; } diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c b/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c index a827ab4eda..5933725bf7 100644 --- a/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c +++ b/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c @@ -8,10 +8,14 @@ * arch/xen/drivers/blkif/frontend * * Copyright (c) 2003-2004, Keir Fraser & Steve Hand + * Copyright (c) 2005, Christopher Clark */ #include "common.h" #include <asm-xen/evtchn.h> +#ifdef CONFIG_XEN_BLKDEV_GRANT +#include <asm-xen/xen-public/grant_table.h> +#endif /* * These are rather arbitrary. They are fairly large because adjacent requests @@ -80,6 +84,17 @@ static inline void flush_plugged_queue(void) } #endif +#ifdef CONFIG_XEN_BLKDEV_GRANT +/* When using grant tables to map a frame for device access then the + * handle returned must be used to unmap the frame. This is needed to + * drop the ref count on the frame. + */ +static u16 pending_grant_handles[MMAP_PAGES]; +#define pending_handle(_idx, _i) \ + (pending_grant_handles[((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) + (_i)]) +#define BLKBACK_INVALID_HANDLE (0xFFFF) +#endif + #ifdef CONFIG_XEN_BLKDEV_TAP_BE /* * If the tap driver is used, we may get pages belonging to either the tap @@ -100,6 +115,27 @@ static void make_response(blkif_t *blkif, unsigned long id, static void fast_flush_area(int idx, int nr_pages) { +#ifdef CONFIG_XEN_BLKDEV_GRANT + gnttab_op_t aop[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + unsigned int i, invcount = 0; + u16 handle; + + for ( i = 0; i < nr_pages; i++ ) + { + if ( BLKBACK_INVALID_HANDLE != ( handle = pending_handle(idx, i) ) ) + { + aop[i].u.unmap_grant_ref.host_virt_addr = MMAP_VADDR(idx, i); + aop[i].u.unmap_grant_ref.dev_bus_addr = 0; + aop[i].u.unmap_grant_ref.handle = handle; + pending_handle(idx, i) = BLKBACK_INVALID_HANDLE; + invcount++; + } + } + if ( unlikely(HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, aop, invcount))) + BUG(); +#else + multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST]; int i; @@ -114,6 +150,7 @@ static void fast_flush_area(int idx, int nr_pages) mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL; if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) ) BUG(); +#endif } @@ -347,6 +384,26 @@ static void dispatch_probe(blkif_t *blkif, blkif_request_t *req) (blkif_last_sect(req->frame_and_sects[0]) != 7) ) goto out; +#ifdef CONFIG_XEN_BLKDEV_GRANT + { + gnttab_op_t op; + + op.u.map_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx, 0); + op.u.map_grant_ref.flags = GNTMAP_host_map; + op.u.map_grant_ref.ref = blkif_gref_from_fas(req->frame_and_sects[0]); + op.u.map_grant_ref.dom = blkif->domid; + + if ( unlikely(HYPERVISOR_grant_table_op( + GNTTABOP_map_grant_ref, &op, 1))) + BUG(); + + if ( op.u.map_grant_ref.dev_bus_addr == 0 ) + goto out; + + pending_handle(pending_idx, 0) = op.u.map_grant_ref.handle; + } +#else /* else CONFIG_XEN_BLKDEV_GRANT */ + #ifdef CONFIG_XEN_BLKDEV_TAP_BE /* Grab the real frontend out of the probe message. */ if (req->frame_and_sects[1] == BLKTAP_COOKIE) @@ -369,7 +426,8 @@ static void dispatch_probe(blkif_t *blkif, blkif_request_t *req) goto out; #endif - +#endif /* endif CONFIG_XEN_BLKDEV_GRANT */ + rsp = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0), PAGE_SIZE / sizeof(vdisk_t)); @@ -382,10 +440,15 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) { extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ; - unsigned long fas, remap_prot; + unsigned long fas = 0; int i, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; pending_req_t *pending_req; +#ifdef CONFIG_XEN_BLKDEV_GRANT + gnttab_op_t aop[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +#else + unsigned long remap_prot; multicall_entry_t mcl[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +#endif struct phys_req preq; struct { unsigned long buf; unsigned int nsec; @@ -412,14 +475,58 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) preq.sector_number = req->sector_number; preq.nr_sects = 0; +#ifdef CONFIG_XEN_BLKDEV_GRANT for ( i = 0; i < nseg; i++ ) { + fas = req->frame_and_sects[i]; + seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1; + + if ( seg[i].nsec <= 0 ) + goto bad_descriptor; + preq.nr_sects += seg[i].nsec; + + aop[i].u.map_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx, i); + + aop[i].u.map_grant_ref.dom = blkif->domid; + aop[i].u.map_grant_ref.ref = blkif_gref_from_fas(fas); + aop[i].u.map_grant_ref.flags = ( GNTMAP_host_map | + ( ( operation == READ ) ? + 0 : GNTMAP_readonly ) ); + } + + if ( unlikely(HYPERVISOR_grant_table_op( + GNTTABOP_map_grant_ref, aop, nseg))) + BUG(); + + for ( i = 0; i < nseg; i++ ) + { + if ( unlikely(aop[i].u.map_grant_ref.dev_bus_addr == 0) ) + { + DPRINTK("invalid buffer -- could not remap it\n"); + fast_flush_area(pending_idx, nseg); + goto bad_descriptor; + } + + phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] = + FOREIGN_FRAME(aop[i].u.map_grant_ref.dev_bus_addr); + + pending_handle(pending_idx, i) = aop[i].u.map_grant_ref.handle; + } +#endif + + for ( i = 0; i < nseg; i++ ) + { +#ifdef CONFIG_XEN_BLKDEV_GRANT + seg[i].buf = (aop[i].u.map_grant_ref.dev_bus_addr << PAGE_SHIFT) | + (blkif_first_sect(fas) << 9); +#else fas = req->frame_and_sects[i]; seg[i].buf = (fas & PAGE_MASK) | (blkif_first_sect(fas) << 9); seg[i].nsec = blkif_last_sect(fas) - blkif_first_sect(fas) + 1; if ( seg[i].nsec <= 0 ) goto bad_descriptor; preq.nr_sects += seg[i].nsec; +#endif } if ( vbd_translate(&preq, blkif, operation) != 0 ) @@ -430,6 +537,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) goto bad_descriptor; } +#ifndef CONFIG_XEN_BLKDEV_GRANT if ( operation == READ ) remap_prot = _PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW; else @@ -461,6 +569,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) goto bad_descriptor; } } +#endif /* end ifndef CONFIG_XEN_BLKDEV_GRANT */ pending_req = &pending_reqs[pending_idx]; pending_req->blkif = blkif; @@ -628,9 +737,15 @@ static int __init blkif_init(void) blkif_ctrlif_init(); +#ifdef CONFIG_XEN_BLKDEV_GRANT + memset( pending_grant_handles, BLKBACK_INVALID_HANDLE, MMAP_PAGES ); + printk(KERN_ALERT "Blkif backend is using grant tables.\n"); +#endif + #ifdef CONFIG_XEN_BLKDEV_TAP_BE printk(KERN_ALERT "NOTE: Blkif backend is running with tap support on!\n"); #endif + return 0; } diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c index 54bd2c749b..abfa6b5e18 100644 --- a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c +++ b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c @@ -7,6 +7,7 @@ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge * Copyright (c) 2004, Christian Limpach * Copyright (c) 2004, Andrew Warfield + * Copyright (c) 2005, Christopher Clark * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: @@ -30,6 +31,14 @@ * IN THE SOFTWARE. */ +#if 1 +#define ASSERT(_p) \ + if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \ + __LINE__, __FILE__); *(int*)0=0; } +#else +#define ASSERT(_p) +#endif + #include <linux/version.h> #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) @@ -46,6 +55,10 @@ #include <scsi/scsi.h> #include <asm-xen/ctrl_if.h> #include <asm-xen/evtchn.h> +#ifdef CONFIG_XEN_BLKDEV_GRANT +#include <asm-xen/xen-public/grant_table.h> +#include <asm-xen/gnttab.h> +#endif typedef unsigned char byte; /* from linux/ide.h */ @@ -76,6 +89,13 @@ static blkif_front_ring_t blk_ring; #define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) +#ifdef CONFIG_XEN_BLKDEV_GRANT +static domid_t rdomid = 0; +static grant_ref_t gref_head, gref_terminal; +#define MAXIMUM_OUTSTANDING_BLOCK_REQS \ + (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE) +#endif + unsigned long rec_ring_free; blkif_request_t rec_ring[BLK_RING_SIZE]; @@ -130,7 +150,11 @@ static inline void translate_req_to_pfn(blkif_request_t *xreq, xreq->sector_number = req->sector_number; for ( i = 0; i < req->nr_segments; i++ ) +#ifdef CONFIG_XEN_BLKDEV_GRANT + xreq->frame_and_sects[i] = req->frame_and_sects[i]; +#else xreq->frame_and_sects[i] = machine_to_phys(req->frame_and_sects[i]); +#endif } static inline void translate_req_to_mfn(blkif_request_t *xreq, @@ -145,7 +169,11 @@ static inline void translate_req_to_mfn(blkif_request_t *xreq, xreq->sector_number = req->sector_number; for ( i = 0; i < req->nr_segments; i++ ) +#ifdef CONFIG_XEN_BLKDEV_GRANT + xreq->frame_and_sects[i] = req->frame_and_sects[i]; +#else xreq->frame_and_sects[i] = phys_to_machine(req->frame_and_sects[i]); +#endif } @@ -274,6 +302,9 @@ static int blkif_queue_request(struct request *req) int idx; unsigned long id; unsigned int fsect, lsect; +#ifdef CONFIG_XEN_BLKDEV_GRANT + int ref; +#endif if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) ) return 1; @@ -299,8 +330,23 @@ static int blkif_queue_request(struct request *req) buffer_ma = page_to_phys(bvec->bv_page); fsect = bvec->bv_offset >> 9; lsect = fsect + (bvec->bv_len >> 9) - 1; +#ifdef CONFIG_XEN_BLKDEV_GRANT + /* install a grant reference. */ + ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); + ASSERT( ref != -ENOSPC ); + + gnttab_grant_foreign_access_ref( + ref, + rdomid, + buffer_ma >> PAGE_SHIFT, + rq_data_dir(req) ); + + ring_req->frame_and_sects[ring_req->nr_segments++] = + (((u32) ref) << 16) | (fsect << 3) | lsect; +#else ring_req->frame_and_sects[ring_req->nr_segments++] = buffer_ma | (fsect << 3) | lsect; +#endif } } @@ -719,6 +765,9 @@ static int blkif_queue_request(unsigned long id, blkif_request_t *req; struct buffer_head *bh; unsigned int fsect, lsect; +#ifdef CONFIG_XEN_BLKDEV_GRANT + int ref; +#endif fsect = (buffer_ma & ~PAGE_MASK) >> 9; lsect = fsect + nr_sectors - 1; @@ -766,11 +815,25 @@ static int blkif_queue_request(unsigned long id, bh->b_reqnext = (struct buffer_head *)rec_ring[req->id].id; - rec_ring[req->id].id = id; - - req->frame_and_sects[req->nr_segments] = - buffer_ma | (fsect<<3) | lsect; + +#ifdef CONFIG_XEN_BLKDEV_GRANT + /* install a grant reference. */ + ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); + ASSERT( ref != -ENOSPC ); + + gnttab_grant_foreign_access_ref( + ref, + rdomid, + buffer_ma >> PAGE_SHIFT, + ( operation == BLKIF_OP_WRITE ? 1 : 0 ) ); + + req->frame_and_sects[req->nr_segments] = + (((u32) ref ) << 16) | (fsect << 3) | lsect; +#else + req->frame_and_sects[req->nr_segments] = + buffer_ma | (fsect << 3) | lsect; +#endif if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST ) sg_next_sect += nr_sectors; else @@ -808,7 +871,21 @@ static int blkif_queue_request(unsigned long id, req->sector_number = (blkif_sector_t)sector_number; req->device = device; req->nr_segments = 1; +#ifdef CONFIG_XEN_BLKDEV_GRANT + /* install a grant reference. */ + ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); + ASSERT( ref != -ENOSPC ); + + gnttab_grant_foreign_access_ref( + ref, + rdomid, + buffer_ma >> PAGE_SHIFT, + ( operation == BLKIF_OP_WRITE ? 1 : 0 ) ); + + req->frame_and_sects[0] = (((u32) ref)<<16) | (fsect<<3) | lsect; +#else req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect; +#endif /* Keep a private copy so we can reissue requests when recovering. */ translate_req_to_pfn(&rec_ring[xid], req ); @@ -966,6 +1043,20 @@ static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) /***************************** COMMON CODE *******************************/ +#ifdef CONFIG_XEN_BLKDEV_GRANT +void blkif_control_probe_send(blkif_request_t *req, blkif_response_t *rsp, + unsigned long address) +{ + int ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); + ASSERT( ref != -ENOSPC ); + + gnttab_grant_foreign_access_ref( ref, rdomid, address >> PAGE_SHIFT, 0 ); + + req->frame_and_sects[0] = (((u32) ref) << 16) | 7; + + blkif_control_send(req, rsp); +} +#endif void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp) { @@ -1146,6 +1237,9 @@ static void blkif_connect(blkif_fe_interface_status_t *status) blkif_evtchn = status->evtchn; blkif_irq = bind_evtchn_to_irq(blkif_evtchn); +#ifdef CONFIG_XEN_BLKDEV_GRANT + rdomid = status->domid; +#endif err = request_irq(blkif_irq, blkif_int, SA_SAMPLE_RANDOM, "blkif", NULL); if ( err ) @@ -1301,7 +1395,14 @@ int wait_for_blkif(void) int __init xlblk_init(void) { int i; - + +#ifdef CONFIG_XEN_BLKDEV_GRANT + if ( 0 > gnttab_alloc_grant_references( MAXIMUM_OUTSTANDING_BLOCK_REQS, + &gref_head, &gref_terminal )) + return 1; + printk(KERN_ALERT "Blkif frontend is using grant tables.\n"); +#endif + if ( (xen_start_info.flags & SIF_INITDOMAIN) || (xen_start_info.flags & SIF_BLK_BE_DOMAIN) ) return 0; @@ -1330,12 +1431,19 @@ void blkdev_resume(void) send_driver_status(1); } -/* XXXXX THIS IS A TEMPORARY FUNCTION UNTIL WE GET GRANT TABLES */ - void blkif_completion(blkif_request_t *req) { int i; +#ifdef CONFIG_XEN_BLKDEV_GRANT + grant_ref_t gref; + for ( i = 0; i < req->nr_segments; i++ ) + { + gref = blkif_gref_from_fas(req->frame_and_sects[i]); + gnttab_release_grant_reference(&gref_head, gref); + } +#else + /* This is a hack to get the dirty logging bits set */ switch ( req->operation ) { case BLKIF_OP_READ: @@ -1347,5 +1455,5 @@ void blkif_completion(blkif_request_t *req) } break; } - +#endif } diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h index 6045fcc601..ede57abfa5 100644 --- a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h +++ b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h @@ -102,6 +102,10 @@ extern int blkif_ioctl(struct inode *inode, struct file *filep, extern int blkif_check(dev_t dev); extern int blkif_revalidate(dev_t dev); extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp); +#ifdef CONFIG_XEN_BLKDEV_GRANT +extern void blkif_control_probe_send( + blkif_request_t *req, blkif_response_t *rsp, unsigned long address); +#endif extern void do_blkif_request (request_queue_t *rq); extern void xlvbd_update_vbds(void); diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c index d2a1f1f65e..f8aec64938 100644 --- a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c +++ b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c @@ -135,9 +135,14 @@ static vdisk_t * xlvbd_probe(int *ret) memset(&req, 0, sizeof(req)); req.operation = BLKIF_OP_PROBE; req.nr_segments = 1; +#ifdef CONFIG_XEN_BLKDEV_GRANT + blkif_control_probe_send(&req, &rsp, + (unsigned long)(virt_to_machine(buf))); +#else req.frame_and_sects[0] = virt_to_machine(buf) | 7; blkif_control_send(&req, &rsp); +#endif if ( rsp.status <= 0 ) { printk(KERN_ALERT "Could not probe disks (%d)\n", rsp.status); goto out; diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h index f86762fd1e..cebd63514d 100644 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h @@ -27,6 +27,7 @@ #include <asm/acpi.h> #include <asm/apicdef.h> #include <asm/page.h> +#include <asm-xen/gnttab.h> #ifdef CONFIG_HIGHMEM #include <linux/threads.h> #include <asm/kmap_types.h> @@ -84,7 +85,8 @@ enum fixed_addresses { FIX_PCIE_MCFG, #endif FIX_SHARED_INFO, - FIX_GNTTAB, + FIX_GNTTAB_BEGIN, + FIX_GNTTAB_END = FIX_GNTTAB_BEGIN + NR_GRANT_FRAMES - 1, #ifdef CONFIG_XEN_PHYSDEV_ACCESS #define NR_FIX_ISAMAPS 256 FIX_ISAMAP_END, diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/gnttab.h b/linux-2.6.11-xen-sparse/include/asm-xen/gnttab.h index 8c28c889f7..642a74dbf9 100644 --- a/linux-2.6.11-xen-sparse/include/asm-xen/gnttab.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/gnttab.h @@ -7,6 +7,7 @@ * (i.e., mechanisms for both sender and recipient of grant references) * * Copyright (c) 2004, K A Fraser + * Copyright (c) 2005, Christopher Clark */ #ifndef __ASM_GNTTAB_H__ @@ -16,6 +17,10 @@ #include <asm-xen/hypervisor.h> #include <asm-xen/xen-public/grant_table.h> +/* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */ +#define NR_GRANT_FRAMES 4 +#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t)) + int gnttab_grant_foreign_access( domid_t domid, unsigned long frame, int readonly); @@ -26,7 +31,7 @@ gnttab_end_foreign_access( int gnttab_grant_foreign_transfer( - domid_t domid); + domid_t domid, unsigned long pfn); unsigned long gnttab_end_foreign_transfer( @@ -36,4 +41,32 @@ int gnttab_query_foreign_access( grant_ref_t ref ); +/* + * operations on reserved batches of grant references + */ +int +gnttab_alloc_grant_references( + u16 count, grant_ref_t *pprivate_head, grant_ref_t *private_terminal ); + +void +gnttab_free_grant_references( + u16 count, grant_ref_t private_head ); + +int +gnttab_claim_grant_reference( grant_ref_t *pprivate_head, grant_ref_t terminal +); + +void +gnttab_release_grant_reference( + grant_ref_t *private_head, grant_ref_t release ); + +void +gnttab_grant_foreign_access_ref( + grant_ref_t ref, domid_t domid, unsigned long frame, int readonly); + +void +gnttab_grant_foreign_transfer_ref( + grant_ref_t, domid_t domid, unsigned long pfn); + + #endif /* __ASM_GNTTAB_H__ */ diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index aca041e0c6..9d28d42a6f 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -1997,7 +1997,13 @@ int update_grant_va_mapping(unsigned long va, l1_pgentry_t ol1e = mk_l1_pgentry(_ol1e); if ( update_l1e(pl1e, ol1e, mk_l1_pgentry(_nl1e)) ) + { put_page_from_l1e(ol1e, d); + if ( _ol1e & _PAGE_PRESENT ) + rc = 0; /* Caller needs to invalidate TLB entry */ + else + rc = 1; /* Caller need not invalidate TLB entry */ + } else rc = -EINVAL; } @@ -3278,7 +3284,7 @@ void audit_domains_key(unsigned char key) spin_unlock(&e->page_alloc_lock); /* Transfer is all done: tell the guest about its new page frame. */ - gnttab_notify_transfer(e, gntref, pfn); + gnttab_notify_transfer(e, d, gntref, pfn); put_domain(e); break; diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c index 7cd543b834..446783c48e 100644 --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -22,18 +22,19 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#define GRANT_DEBUG 1 +#define GRANT_DEBUG 0 +#define GRANT_DEBUG_VERBOSE 0 #include <xen/config.h> #include <xen/sched.h> -#include <asm-x86/mm.h> -#include <asm-x86/shadow.h> +#include <asm/mm.h> +#include <asm/shadow.h> -#define PIN_FAIL(_rc, _f, _a...) \ +#define PIN_FAIL(_lbl, _rc, _f, _a...) \ do { \ DPRINTK( _f, ## _a ); \ rc = (_rc); \ - goto fail; \ + goto _lbl; \ } while ( 0 ) static inline int @@ -58,23 +59,38 @@ put_maptrack_handle( } static int -__gnttab_map_grant_ref( - gnttab_map_grant_ref_t *uop, - unsigned long *va) +__gnttab_activate_grant_ref( + struct domain *mapping_d, /* IN */ + struct exec_domain *mapping_ed, + struct domain *granting_d, + grant_ref_t ref, + u16 dev_hst_ro_flags, + unsigned long host_virt_addr, + unsigned long *pframe ) /* OUT */ { - domid_t dom, sdom; - grant_ref_t ref; - struct domain *ld, *rd; - struct exec_domain *led; - u16 flags, sflags; - int handle; + domid_t sdom; + u16 sflags; active_grant_entry_t *act; grant_entry_t *sha; - s16 rc = 0; - unsigned long frame = 0, host_virt_addr; + s16 rc = 1; + unsigned long frame = 0; + int retries = 0; - /* Returns 0 if TLB flush / invalidate required by caller. - * va will indicate the address to be invalidated. */ + /* + * Objectives of this function: + * . Make the record ( granting_d, ref ) active, if not already. + * . Update shared grant entry of owner, indicating frame is mapped. + * . Increment the owner act->pin reference counts. + * . get_page on shared frame if new mapping. + * . get_page_type if this is first RW mapping of frame. + * . Add PTE to virtual address space of mapping_d, if necessary. + * Returns: + * . -ve: error + * . 1: ok + * . 0: ok and TLB invalidate of host_virt_addr needed. + * + * On success, *pframe contains mfn. + */ /* * We bound the number of times we retry CMPXCHG on memory locations that @@ -84,62 +100,11 @@ __gnttab_map_grant_ref( * the guest to race our updates (e.g., to change the GTF_readonly flag), * so we allow a few retries before failing. */ - int retries = 0; - - led = current; - ld = led->domain; - - /* Bitwise-OR avoids short-circuiting which screws control flow. */ - if ( unlikely(__get_user(dom, &uop->dom) | - __get_user(ref, &uop->ref) | - __get_user(host_virt_addr, &uop->host_virt_addr) | - __get_user(flags, &uop->flags)) ) - { - DPRINTK("Fault while reading gnttab_map_grant_ref_t.\n"); - return -EFAULT; /* don't set status */ - } - - if ( ((host_virt_addr != 0) || (flags & GNTMAP_host_map) ) && - unlikely(!__addr_ok(host_virt_addr))) - { - DPRINTK("Bad virtual address (%x) or flags (%x).\n", - host_virt_addr, flags); - (void)__put_user(GNTST_bad_virt_addr, &uop->handle); - return GNTST_bad_gntref; - } - if ( unlikely(ref >= NR_GRANT_ENTRIES) || - unlikely((flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0) ) - { - DPRINTK("Bad ref (%d) or flags (%x).\n", ref, flags); - (void)__put_user(GNTST_bad_gntref, &uop->handle); - return GNTST_bad_gntref; - } + act = &granting_d->grant_table->active[ref]; + sha = &granting_d->grant_table->shared[ref]; - if ( unlikely((rd = find_domain_by_id(dom)) == NULL) || - unlikely(ld == rd) ) - { - if ( rd != NULL ) - put_domain(rd); - DPRINTK("Could not find domain %d\n", dom); - (void)__put_user(GNTST_bad_domain, &uop->handle); - return GNTST_bad_domain; - } - - if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) ) - { - put_domain(rd); - DPRINTK("No more map handles available\n"); - (void)__put_user(GNTST_no_device_space, &uop->handle); - return GNTST_no_device_space; - } - DPRINTK("Mapping grant ref (%hu) for domain (%hu) with flags (%x)\n", - ref, dom, flags); - - act = &rd->grant_table->active[ref]; - sha = &rd->grant_table->shared[ref]; - - spin_lock(&rd->grant_table->lock); + spin_lock(&granting_d->grant_table->lock); if ( act->pin == 0 ) { @@ -153,21 +118,21 @@ __gnttab_map_grant_ref( u32 scombo, prev_scombo, new_scombo; if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) || - unlikely(sdom != ld->id) ) - PIN_FAIL(GNTST_general_error, + unlikely(sdom != mapping_d->id) ) + PIN_FAIL(unlock_out, GNTST_general_error, "Bad flags (%x) or dom (%d). (NB. expected dom %d)\n", - sflags, sdom, ld->id); + sflags, sdom, mapping_d->id); /* Merge two 16-bit values into a 32-bit combined update. */ /* NB. Endianness! */ prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags; new_scombo = scombo | GTF_reading; - if ( !(flags & GNTMAP_readonly) ) + if ( !(dev_hst_ro_flags & GNTMAP_readonly) ) { new_scombo |= GTF_writing; if ( unlikely(sflags & GTF_readonly) ) - PIN_FAIL(GNTST_general_error, + PIN_FAIL(unlock_out, GNTST_general_error, "Attempt to write-pin a r/o grant entry.\n"); } @@ -175,7 +140,7 @@ __gnttab_map_grant_ref( if ( unlikely(cmpxchg_user((u32 *)&sha->flags, prev_scombo, new_scombo)) ) - PIN_FAIL(GNTST_general_error, + PIN_FAIL(unlock_out, GNTST_general_error, "Fault while modifying shared flags and domid.\n"); /* Did the combined update work (did we see what we expected?). */ @@ -183,7 +148,7 @@ __gnttab_map_grant_ref( break; if ( retries++ == 4 ) - PIN_FAIL(GNTST_general_error, + PIN_FAIL(unlock_out, GNTST_general_error, "Shared grant entry is unstable.\n"); /* Didn't see what we expected. Split out the seen flags & dom. */ @@ -194,25 +159,25 @@ __gnttab_map_grant_ref( /* rmb(); */ /* not on x86 */ - frame = __translate_gpfn_to_mfn(rd, sha->frame); + frame = __translate_gpfn_to_mfn(granting_d, sha->frame); if ( unlikely(!pfn_is_ram(frame)) || - unlikely(!((flags & GNTMAP_readonly) ? - get_page(&frame_table[frame], rd) : - get_page_and_type(&frame_table[frame], rd, + unlikely(!((dev_hst_ro_flags & GNTMAP_readonly) ? + get_page(&frame_table[frame], granting_d) : + get_page_and_type(&frame_table[frame], granting_d, PGT_writable_page))) ) { clear_bit(_GTF_writing, &sha->flags); clear_bit(_GTF_reading, &sha->flags); - PIN_FAIL(GNTST_general_error, - "Could not pin the granted frame!\n"); + PIN_FAIL(unlock_out, GNTST_general_error, + "Could not pin the granted frame (%lx)!\n", frame); } - if ( flags & GNTMAP_device_map ) - act->pin += (flags & GNTMAP_readonly) ? + if ( dev_hst_ro_flags & GNTMAP_device_map ) + act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ? GNTPIN_devr_inc : GNTPIN_devw_inc; - if ( flags & GNTMAP_host_map ) - act->pin += (flags & GNTMAP_readonly) ? + if ( dev_hst_ro_flags & GNTMAP_host_map ) + act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ? GNTPIN_hstr_inc : GNTPIN_hstw_inc; act->domid = sdom; act->frame = frame; @@ -226,11 +191,11 @@ __gnttab_map_grant_ref( * A more accurate check cannot be done with a single comparison. */ if ( (act->pin & 0x80808080U) != 0 ) - PIN_FAIL(ENOSPC, "Risk of counter overflow %08x\n", act->pin); + PIN_FAIL(unlock_out, ENOSPC, "Risk of counter overflow %08x\n", act->pin); frame = act->frame; - if ( !(flags & GNTMAP_readonly) && + if ( !(dev_hst_ro_flags & GNTMAP_readonly) && !((sflags = sha->flags) & GTF_writing) ) { for ( ; ; ) @@ -238,7 +203,7 @@ __gnttab_map_grant_ref( u16 prev_sflags; if ( unlikely(sflags & GTF_readonly) ) - PIN_FAIL(GNTST_general_error, + PIN_FAIL(unlock_out, GNTST_general_error, "Attempt to write-pin a r/o grant entry.\n"); prev_sflags = sflags; @@ -246,14 +211,14 @@ __gnttab_map_grant_ref( /* NB. prev_sflags is updated in place to seen value. */ if ( unlikely(cmpxchg_user(&sha->flags, prev_sflags, prev_sflags | GTF_writing)) ) - PIN_FAIL(GNTST_general_error, + PIN_FAIL(unlock_out, GNTST_general_error, "Fault while modifying shared flags.\n"); if ( likely(prev_sflags == sflags) ) break; if ( retries++ == 4 ) - PIN_FAIL(GNTST_general_error, + PIN_FAIL(unlock_out, GNTST_general_error, "Shared grant entry is unstable.\n"); sflags = prev_sflags; @@ -263,99 +228,179 @@ __gnttab_map_grant_ref( PGT_writable_page)) ) { clear_bit(_GTF_writing, &sha->flags); - PIN_FAIL(GNTST_general_error, + PIN_FAIL(unlock_out, GNTST_general_error, "Attempt to write-pin a unwritable page.\n"); } } - if ( flags & GNTMAP_device_map ) - act->pin += (flags & GNTMAP_readonly) ? + if ( dev_hst_ro_flags & GNTMAP_device_map ) + act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ? GNTPIN_devr_inc : GNTPIN_devw_inc; - if ( flags & GNTMAP_host_map ) - act->pin += (flags & GNTMAP_readonly) ? + if ( dev_hst_ro_flags & GNTMAP_host_map ) + act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ? GNTPIN_hstr_inc : GNTPIN_hstw_inc; } /* At this point: - * act->pin updated to reflect mapping - * sha->flags updated to indicate to granting domain mapping done - * frame contains the mfn + * act->pin updated to reflect mapping. + * sha->flags updated to indicate to granting domain mapping done. + * frame contains the mfn. */ - if ( (host_virt_addr != 0) && (flags & GNTMAP_host_map) ) + spin_unlock(&granting_d->grant_table->lock); + + if ( (host_virt_addr != 0) && (dev_hst_ro_flags & GNTMAP_host_map) ) { /* Write update into the pagetable */ - /* cwc22: TODO: check locking... */ - - spin_unlock(&rd->grant_table->lock); - rc = update_grant_va_mapping( host_virt_addr, (frame << PAGE_SHIFT) | _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY | - ((flags & GNTMAP_readonly) ? 0 : _PAGE_RW), - ld, led ); + ((dev_hst_ro_flags & GNTMAP_readonly) ? 0 : _PAGE_RW), + mapping_d, mapping_ed ); - spin_lock(&rd->grant_table->lock); + /* IMPORTANT: (rc == 0) => must flush / invalidate entry in TLB. + * This is done in the outer gnttab_map_grant_ref. + */ if ( 0 > rc ) { /* Abort. */ - act->pin -= (flags & GNTMAP_readonly) ? - GNTPIN_hstr_inc : GNTPIN_hstw_inc; - if ( flags & GNTMAP_readonly ) + spin_lock(&granting_d->grant_table->lock); + + if ( dev_hst_ro_flags & GNTMAP_readonly ) act->pin -= GNTPIN_hstr_inc; else { act->pin -= GNTPIN_hstw_inc; if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 ) { - put_page_type(&frame_table[frame]); clear_bit(_GTF_writing, &sha->flags); + put_page_type(&frame_table[frame]); } } if ( act->pin == 0 ) { - put_page(&frame_table[frame]); clear_bit(_GTF_reading, &sha->flags); + put_page(&frame_table[frame]); } - goto fail; + + spin_unlock(&granting_d->grant_table->lock); } - rc = 0; - *va = host_virt_addr; + } + *pframe = frame; + return rc; - /* IMPORTANT: must flush / invalidate entry in TLB. - * This is done in the outer gnttab_map_grant_ref when return 0. - */ + unlock_out: + spin_unlock(&granting_d->grant_table->lock); + return rc; +} + +static int +__gnttab_map_grant_ref( + gnttab_map_grant_ref_t *uop, + unsigned long *va) +{ + domid_t dom; + grant_ref_t ref; + struct domain *ld, *rd; + struct exec_domain *led; + u16 dev_hst_ro_flags; + int handle; + unsigned long frame, host_virt_addr; + int rc; + + /* Returns 0 if TLB flush / invalidate required by caller. + * va will indicate the address to be invalidated. */ + + led = current; + ld = led->domain; + + /* Bitwise-OR avoids short-circuiting which screws control flow. */ + if ( unlikely(__get_user(dom, &uop->dom) | + __get_user(ref, &uop->ref) | + __get_user(host_virt_addr, &uop->host_virt_addr) | + __get_user(dev_hst_ro_flags, &uop->flags)) ) + { + DPRINTK("Fault while reading gnttab_map_grant_ref_t.\n"); + return -EFAULT; /* don't set status */ } - /* - * Only make the maptrack live _after_ writing the pte, in case we - * overwrite the same frame number, causing a maptrack walk to find it. - */ - ld->grant_table->maptrack[handle].domid = dom; - ld->grant_table->maptrack[handle].ref_and_flags = - (ref << MAPTRACK_REF_SHIFT) | (flags & MAPTRACK_GNTMAP_MASK); - /* Unchecked and unconditional writes to user uop. */ - if ( flags & GNTMAP_device_map ) - (void)__put_user(frame, &uop->dev_bus_addr); + if ( ((host_virt_addr != 0) || (dev_hst_ro_flags & GNTMAP_host_map) ) && + unlikely(!__addr_ok(host_virt_addr))) + { + DPRINTK("Bad virtual address (%x) or flags (%x).\n", + host_virt_addr, dev_hst_ro_flags); + (void)__put_user(GNTST_bad_virt_addr, &uop->handle); + return GNTST_bad_gntref; + } - (void)__put_user(handle, &uop->handle); + if ( unlikely(ref >= NR_GRANT_ENTRIES) || + unlikely((dev_hst_ro_flags & (GNTMAP_device_map|GNTMAP_host_map)) == +0) ) + { + DPRINTK("Bad ref (%d) or flags (%x).\n", ref, dev_hst_ro_flags); + (void)__put_user(GNTST_bad_gntref, &uop->handle); + return GNTST_bad_gntref; + } - spin_unlock(&rd->grant_table->lock); - put_domain(rd); - return 0; + if ( unlikely((rd = find_domain_by_id(dom)) == NULL) || + unlikely(ld == rd) ) + { + if ( rd != NULL ) + put_domain(rd); + DPRINTK("Could not find domain %d\n", dom); + (void)__put_user(GNTST_bad_domain, &uop->handle); + return GNTST_bad_domain; + } + + /* get a maptrack handle */ + if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) ) + { + put_domain(rd); + DPRINTK("No more map handles available\n"); + (void)__put_user(GNTST_no_device_space, &uop->handle); + return GNTST_no_device_space; + } + +#ifdef GRANT_DEBUG_VERBOSE + DPRINTK("Mapping grant ref (%hu) for domain (%hu) with flags (%x)\n", + ref, dom, dev_hst_ro_flags); +#endif + + if ( 0 <= ( rc = __gnttab_activate_grant_ref( ld, led, rd, ref, + dev_hst_ro_flags, + host_virt_addr, &frame))) + { + /* Only make the maptrack live _after_ writing the pte, + * in case we overwrite the same frame number, causing a + * maptrack walk to find it + */ + ld->grant_table->maptrack[handle].domid = dom; + + ld->grant_table->maptrack[handle].ref_and_flags + = (ref << MAPTRACK_REF_SHIFT) | + (dev_hst_ro_flags & MAPTRACK_GNTMAP_MASK); + + (void)__put_user(frame, &uop->dev_bus_addr); + + if ( dev_hst_ro_flags & GNTMAP_host_map ) + *va = host_virt_addr; + + (void)__put_user(handle, &uop->handle); + } + else + { + (void)__put_user(rc, &uop->handle); + put_maptrack_handle(ld->grant_table, handle); + } - fail: - (void)__put_user(rc, &uop->handle); - spin_unlock(&rd->grant_table->lock); put_domain(rd); - put_maptrack_handle(ld->grant_table, handle); return rc; } @@ -364,17 +409,21 @@ gnttab_map_grant_ref( gnttab_map_grant_ref_t *uop, unsigned int count) { int i, flush = 0; - unsigned long va = 0; + unsigned long va[8]; for ( i = 0; i < count; i++ ) - if ( __gnttab_map_grant_ref(&uop[i], &va) == 0 ) + if ( __gnttab_map_grant_ref(&uop[i], + &va[ (flush < 8 ? flush : 0) ] ) == 0) flush++; - /* XXX KAF: I think we are probably flushing too much here. */ - if ( flush == 1 ) - flush_tlb_one_mask(current->domain->cpuset, va); - else if ( flush != 0 ) - flush_tlb_mask(current->domain->cpuset); + if ( flush != 0 ) + { + if ( flush <= 8 ) + for ( i = 0; i < flush; i++ ) + flush_tlb_one_mask(current->domain->cpuset, va[i]); + else + local_flush_tlb(); + } return 0; } @@ -392,6 +441,7 @@ __gnttab_unmap_grant_ref( active_grant_entry_t *act; grant_entry_t *sha; grant_mapping_t *map; + u16 flags; s16 rc = 1; unsigned long frame, virt; @@ -416,8 +466,9 @@ __gnttab_unmap_grant_ref( return GNTST_bad_handle; } - dom = map->domid; - ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT; + dom = map->domid; + ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT; + flags = map->ref_and_flags & MAPTRACK_GNTMAP_MASK; if ( unlikely((rd = find_domain_by_id(dom)) == NULL) || unlikely(ld == rd) ) @@ -428,45 +479,56 @@ __gnttab_unmap_grant_ref( (void)__put_user(GNTST_bad_domain, &uop->status); return GNTST_bad_domain; } +#ifdef GRANT_DEBUG_VERBOSE DPRINTK("Unmapping grant ref (%hu) for domain (%hu) with handle (%hu)\n", ref, dom, handle); +#endif act = &rd->grant_table->active[ref]; sha = &rd->grant_table->shared[ref]; spin_lock(&rd->grant_table->lock); - if ( frame != 0 ) + if ( frame == 0 ) + frame = act->frame; + else if ( frame == GNTUNMAP_DEV_FROM_VIRT ) + { + if ( !( flags & GNTMAP_device_map ) ) + PIN_FAIL(unmap_out, GNTST_bad_dev_addr, + "Bad frame number: frame not mapped for device access.\n"); + frame = act->frame; + + /* frame will be unmapped for device access below if virt addr ok */ + } + else { if ( unlikely(frame != act->frame) ) - PIN_FAIL(GNTST_general_error, + PIN_FAIL(unmap_out, GNTST_general_error, "Bad frame number doesn't match gntref.\n"); - if ( map->ref_and_flags & GNTMAP_device_map ) - act->pin -= (map->ref_and_flags & GNTMAP_readonly) ? - GNTPIN_devr_inc : GNTPIN_devw_inc; + if ( flags & GNTMAP_device_map ) + act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc + : GNTPIN_devw_inc; map->ref_and_flags &= ~GNTMAP_device_map; (void)__put_user(0, &uop->dev_bus_addr); - } - else - frame = act->frame; - /* frame is now unmapped for device access */ + /* frame is now unmapped for device access */ + } if ( (virt != 0) && - (map->ref_and_flags & GNTMAP_host_map) && + (flags & GNTMAP_host_map) && ((act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask)) > 0)) { l1_pgentry_t *pl1e; unsigned long _ol1e; pl1e = &linear_pg_table[l1_linear_offset(virt)]; - + if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) ) { DPRINTK("Could not find PTE entry for address %x\n", virt); rc = -EINVAL; - goto fail; + goto unmap_out; } /* check that the virtual address supplied is actually @@ -477,7 +539,7 @@ __gnttab_unmap_grant_ref( DPRINTK("PTE entry %x for address %x doesn't match frame %x\n", _ol1e, virt, frame); rc = -EINVAL; - goto fail; + goto unmap_out; } /* Delete pagetable entry @@ -487,35 +549,53 @@ __gnttab_unmap_grant_ref( DPRINTK("Cannot delete PTE entry at %x for virtual address %x\n", pl1e, virt); rc = -EINVAL; - goto fail; + goto unmap_out; } map->ref_and_flags &= ~GNTMAP_host_map; - act->pin -= (map->ref_and_flags & GNTMAP_readonly) ? - GNTPIN_hstr_inc : GNTPIN_hstw_inc; + act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_hstr_inc + : GNTPIN_hstw_inc; + + if ( frame == GNTUNMAP_DEV_FROM_VIRT ) + { + act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc + : GNTPIN_devw_inc; + + map->ref_and_flags &= ~GNTMAP_device_map; + (void)__put_user(0, &uop->dev_bus_addr); + } + rc = 0; *va = virt; } if ( (map->ref_and_flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0) + { + map->ref_and_flags = 0; put_maptrack_handle(ld->grant_table, handle); + } + + /* If just unmapped a writable mapping, mark as dirtied */ + if ( unlikely(shadow_mode_log_dirty(rd)) && + !( flags & GNTMAP_readonly ) ) + mark_dirty(rd, frame); /* If the last writable mapping has been removed, put_page_type */ - if ( ((act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) == 0) && - !(map->ref_and_flags & GNTMAP_readonly) ) + if ( ( (act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask) ) == 0) && + ( !( flags & GNTMAP_readonly ) ) ) { - put_page_type(&frame_table[frame]); clear_bit(_GTF_writing, &sha->flags); + put_page_type(&frame_table[frame]); } if ( act->pin == 0 ) { - put_page(&frame_table[frame]); clear_bit(_GTF_reading, &sha->flags); + put_page(&frame_table[frame]); } - fail: + unmap_out: (void)__put_user(rc, &uop->status); spin_unlock(&rd->grant_table->lock); put_domain(rd); @@ -527,16 +607,21 @@ gnttab_unmap_grant_ref( gnttab_unmap_grant_ref_t *uop, unsigned int count) { int i, flush = 0; - unsigned long va = 0; + unsigned long va[8]; for ( i = 0; i < count; i++ ) - if ( __gnttab_unmap_grant_ref(&uop[i], &va) == 0 ) + if ( __gnttab_unmap_grant_ref(&uop[i], + &va[ (flush < 8 ? flush : 0) ] ) == 0) flush++; - if ( flush == 1 ) - flush_tlb_one_mask(current->domain->cpuset, va); - else if ( flush != 0 ) - flush_tlb_mask(current->domain->cpuset); + if ( flush != 0 ) + { + if ( flush <= 8 ) + for ( i = 0; i < flush; i++ ) + flush_tlb_one_mask(current->domain->cpuset, va[i]); + else + local_flush_tlb(); + } return 0; } @@ -547,6 +632,7 @@ gnttab_setup_table( { gnttab_setup_table_t op; struct domain *d; + int i; if ( count != 1 ) return -EINVAL; @@ -557,9 +643,10 @@ gnttab_setup_table( return -EFAULT; } - if ( unlikely(op.nr_frames > 1) ) + if ( unlikely(op.nr_frames > NR_GRANT_FRAMES) ) { - DPRINTK("Xen only supports one grant-table frame per domain.\n"); + DPRINTK("Xen only supports at most %d grant-table frames per domain.\n", + NR_GRANT_FRAMES); (void)put_user(GNTST_general_error, &uop->status); return 0; } @@ -581,12 +668,15 @@ gnttab_setup_table( return 0; } - if ( op.nr_frames == 1 ) + if ( op.nr_frames <= NR_GRANT_FRAMES ) { ASSERT(d->grant_table != NULL); (void)put_user(GNTST_okay, &uop->status); - (void)put_user(virt_to_phys(d->grant_table->shared) >> PAGE_SHIFT, - &uop->frame_list[0]); + + for ( i = 0; i < op.nr_frames; i++ ) + (void)put_user( ( + virt_to_phys( (char*)(d->grant_table->shared)+(i*PAGE_SIZE) ) + >> PAGE_SHIFT ), &uop->frame_list[i]); } put_domain(d); @@ -634,29 +724,33 @@ gnttab_dump_table(gnttab_dump_table_t *uop) DPRINTK("Grant table for dom (%hu) MFN (%x)\n", op.dom, shared_mfn); - spin_lock(>->lock); - ASSERT(d->grant_table->active != NULL); ASSERT(d->grant_table->shared != NULL); + ASSERT(d->grant_table->maptrack != NULL); for ( i = 0; i < NR_GRANT_ENTRIES; i++ ) { - act = >->active[i]; sha_copy = gt->shared[i]; - if ( act->pin || act->domid || act->frame || - sha_copy.flags || sha_copy.domid || sha_copy.frame ) + if ( sha_copy.flags ) { - DPRINTK("Grant: dom (%hu) ACTIVE (%d) pin:(%x) dom:(%hu) frame:(%lx)\n", - op.dom, i, act->pin, act->domid, act->frame); DPRINTK("Grant: dom (%hu) SHARED (%d) flags:(%hx) dom:(%hu) frame:(%lx)\n", op.dom, i, sha_copy.flags, sha_copy.domid, sha_copy.frame); - } - } - ASSERT(d->grant_table->maptrack != NULL); + spin_lock(>->lock); + + for ( i = 0; i < NR_GRANT_ENTRIES; i++ ) + { + act = >->active[i]; + + if ( act->pin ) + { + DPRINTK("Grant: dom (%hu) ACTIVE (%d) pin:(%x) dom:(%hu) frame:(%lx)\n", + op.dom, i, act->pin, act->domid, act->frame); + } + } for ( i = 0; i < NR_MAPTRACK_ENTRIES; i++ ) { @@ -746,17 +840,18 @@ gnttab_check_unmap( lgt = ld->grant_table; - /* Fast exit if we're not mapping anything using grant tables */ - if ( lgt->map_count == 0 ) - return 0; - -#ifdef GRANT_DEBUG - if ( ld->id != 0 ) { +#ifdef GRANT_DEBUG_VERBOSE + if ( ld->id != 0 ) + { DPRINTK("Foreign unref rd(%d) ld(%d) frm(%x) flgs(%x).\n", rd->id, ld->id, frame, readonly); } #endif + /* Fast exit if we're not mapping anything using grant tables */ + if ( lgt->map_count == 0 ) + return 0; + if ( get_domain(rd) == 0 ) { DPRINTK("gnttab_check_unmap: couldn't get_domain rd(%d)\n", rd->id); @@ -809,15 +904,15 @@ gnttab_check_unmap( /* any more granted writable mappings? */ if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 ) { - put_page_type(&frame_table[frame]); clear_bit(_GTF_writing, &rgt->shared[ref].flags); + put_page_type(&frame_table[frame]); } } if ( act->pin == 0 ) { - put_page(&frame_table[frame]); clear_bit(_GTF_reading, &rgt->shared[ref].flags); + put_page(&frame_table[frame]); } spin_unlock(&rgt->lock); @@ -839,29 +934,41 @@ int gnttab_prepare_for_transfer( struct domain *rd, struct domain *ld, grant_ref_t ref) { - grant_table_t *t; - grant_entry_t *e; + grant_table_t *rgt; + grant_entry_t *sha; domid_t sdom; u16 sflags; u32 scombo, prev_scombo; int retries = 0; + unsigned long target_pfn; + + DPRINTK("gnttab_prepare_for_transfer rd(%hu) ld(%hu) ref(%hu).\n", + rd->id, ld->id, ref); - if ( unlikely((t = rd->grant_table) == NULL) || + if ( unlikely((rgt = rd->grant_table) == NULL) || unlikely(ref >= NR_GRANT_ENTRIES) ) { DPRINTK("Dom %d has no g.t., or ref is bad (%d).\n", rd->id, ref); return 0; } - spin_lock(&t->lock); + spin_lock(&rgt->lock); - e = &t->shared[ref]; + sha = &rgt->shared[ref]; - sflags = e->flags; - sdom = e->domid; + sflags = sha->flags; + sdom = sha->domid; for ( ; ; ) { + target_pfn = sha->frame; + + if ( unlikely(target_pfn >= max_page ) ) + { + DPRINTK("Bad pfn (%x)\n", target_pfn); + goto fail; + } + if ( unlikely(sflags != GTF_accept_transfer) || unlikely(sdom != ld->id) ) { @@ -875,7 +982,7 @@ gnttab_prepare_for_transfer( prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags; /* NB. prev_scombo is updated in place to seen value. */ - if ( unlikely(cmpxchg_user((u32 *)&e->flags, prev_scombo, + if ( unlikely(cmpxchg_user((u32 *)&sha->flags, prev_scombo, prev_scombo | GTF_transfer_committed)) ) { DPRINTK("Fault while modifying shared flags and domid.\n"); @@ -898,29 +1005,50 @@ gnttab_prepare_for_transfer( sdom = (u16)(prev_scombo >> 16); } - spin_unlock(&t->lock); + spin_unlock(&rgt->lock); return 1; fail: - spin_unlock(&t->lock); + spin_unlock(&rgt->lock); return 0; } void gnttab_notify_transfer( - struct domain *rd, grant_ref_t ref, unsigned long sframe) + struct domain *rd, struct domain *ld, grant_ref_t ref, unsigned long frame) { - unsigned long frame; + grant_entry_t *sha; + unsigned long pfn; - /* cwc22 - * TODO: this requires that the machine_to_phys_mapping - * has already been updated, so the accept_transfer hypercall - * must do this. - */ - frame = __mfn_to_gpfn(rd, sframe); + DPRINTK("gnttab_notify_transfer rd(%hu) ld(%hu) ref(%hu).\n", + rd->id, ld->id, ref); + + sha = &rd->grant_table->shared[ref]; + + spin_lock(&rd->grant_table->lock); + + pfn = sha->frame; + + if ( unlikely(pfn >= max_page ) ) + DPRINTK("Bad pfn (%x)\n", pfn); + else + { + machine_to_phys_mapping[frame] = pfn; + + if ( unlikely(shadow_mode_log_dirty(ld))) + mark_dirty(ld, frame); - wmb(); /* Ensure that the reassignment is globally visible. */ - rd->grant_table->shared[ref].frame = frame; + if (shadow_mode_translate(ld)) + __phys_to_machine_mapping[pfn] = frame; + } + sha->frame = __mfn_to_gpfn(rd, frame); + sha->domid = rd->id; + wmb(); + sha->flags = ( GTF_accept_transfer | GTF_transfer_completed ); + + spin_unlock(&rd->grant_table->lock); + + return; } int @@ -943,6 +1071,7 @@ grant_table_create( goto no_mem; memset(t->active, 0, sizeof(active_grant_entry_t) * NR_GRANT_ENTRIES); + /* Tracking of mapped foreign frames table */ if ( (t->maptrack = (void *)alloc_xenheap_page()) == NULL ) goto no_mem; memset(t->maptrack, 0, PAGE_SIZE); @@ -950,10 +1079,16 @@ grant_table_create( t->maptrack[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT; /* Shared grant table. */ - if ( (t->shared = (void *)alloc_xenheap_page()) == NULL ) + if ( (t->shared = (void *)alloc_xenheap_pages(ORDER_GRANT_FRAMES)) == NULL ) goto no_mem; - memset(t->shared, 0, PAGE_SIZE); - SHARE_PFN_WITH_DOMAIN(virt_to_page(t->shared), d); + memset(t->shared, 0, NR_GRANT_FRAMES * PAGE_SIZE); + + for ( i = 0; i < NR_GRANT_FRAMES; i++ ) + { + SHARE_PFN_WITH_DOMAIN(virt_to_page((char *)(t->shared)+(i*PAGE_SIZE)), d); + machine_to_phys_mapping[ (virt_to_phys((char*)(t->shared)+(i*PAGE_SIZE)) + >> PAGE_SHIFT) ] = INVALID_M2P_ENTRY; + } /* Okay, install the structure. */ wmb(); /* avoid races with lock-free access to d->grant_table */ @@ -1055,7 +1190,7 @@ grant_table_destroy( { /* Free memory relating to this grant table. */ d->grant_table = NULL; - free_xenheap_page((unsigned long)t->shared); + free_xenheap_pages((unsigned long)t->shared, ORDER_GRANT_FRAMES); free_xenheap_page((unsigned long)t->maptrack); xfree(t->active); xfree(t); diff --git a/xen/include/public/grant_table.h b/xen/include/public/grant_table.h index b00642db7c..79c87a7cff 100644 --- a/xen/include/public/grant_table.h +++ b/xen/include/public/grant_table.h @@ -185,6 +185,8 @@ typedef struct { u32 __pad; } PACKED gnttab_unmap_grant_ref_t; /* 24 bytes */ +#define GNTUNMAP_DEV_FROM_VIRT (~0U) + /* * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least * <nr_frames> pages. The frame addresses are written to the <frame_list>. @@ -248,8 +250,9 @@ typedef struct { #define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate gntref. */ #define GNTST_bad_handle (-4) /* Unrecognised or inappropriate handle. */ #define GNTST_bad_virt_addr (-5) /* Inappropriate virtual address to map. */ -#define GNTST_no_device_space (-6) /* Out of space in I/O MMU. */ -#define GNTST_permission_denied (-7) /* Not enough privilege for operation. */ +#define GNTST_bad_dev_addr (-6) /* Inappropriate device address to unmap.*/ +#define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */ +#define GNTST_permission_denied (-8) /* Not enough privilege for operation. */ #define GNTTABOP_error_msgs { \ "okay", \ @@ -258,6 +261,7 @@ typedef struct { "invalid grant reference", \ "invalid mapping handle", \ "invalid virtual address", \ + "invalid device address", \ "no spare translation slot in the I/O MMU", \ "permission denied" \ } diff --git a/xen/include/public/io/blkif.h b/xen/include/public/io/blkif.h index 836b8936e7..0f91912bbc 100644 --- a/xen/include/public/io/blkif.h +++ b/xen/include/public/io/blkif.h @@ -34,16 +34,24 @@ typedef struct { blkif_vdev_t device; /* 2: only for read/write requests */ unsigned long id; /* 4: private guest value, echoed in resp */ blkif_sector_t sector_number; /* start sector idx on disk (r/w only) */ - /* @f_a_s[2:0]=last_sect ; @f_a_s[5:3]=first_sect ; @f_a_s[:12]=frame. */ + /* @f_a_s[2:0]=last_sect ; @f_a_s[5:3]=first_sect */ +#ifdef CONFIG_XEN_BLKDEV_GRANT + /* @f_a_s[:16]= grant reference (16 bits) */ +#else + /* @f_a_s[:12]=@frame: machine page frame number. */ +#endif /* @first_sect: first sector in frame to transfer (inclusive). */ /* @last_sect: last sector in frame to transfer (inclusive). */ - /* @frame: machine page frame number. */ unsigned long frame_and_sects[BLKIF_MAX_SEGMENTS_PER_REQUEST]; } PACKED blkif_request_t; #define blkif_first_sect(_fas) (((_fas)>>3)&7) #define blkif_last_sect(_fas) ((_fas)&7) +#ifdef CONFIG_XEN_BLKDEV_GRANT +#define blkif_gref_from_fas(_fas) ((_fas)>>16) +#endif + typedef struct { unsigned long id; /* copied from request */ u8 operation; /* copied from request */ diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h index b05ca702ce..f37d6d4e56 100644 --- a/xen/include/xen/grant_table.h +++ b/xen/include/xen/grant_table.h @@ -51,7 +51,10 @@ typedef struct { #define GNTPIN_devr_inc (1 << GNTPIN_devr_shift) #define GNTPIN_devr_mask (0xFFU << GNTPIN_devr_shift) -#define NR_GRANT_ENTRIES (PAGE_SIZE / sizeof(grant_entry_t)) +#define ORDER_GRANT_FRAMES 2 +#define NR_GRANT_FRAMES (1U << ORDER_GRANT_FRAMES) +#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t)) + /* * Tracks a mapping of another domain's grant reference. Each domain has a @@ -104,7 +107,7 @@ gnttab_prepare_for_transfer( /* Notify 'rd' of a completed transfer via an already-locked grant entry. */ void gnttab_notify_transfer( - struct domain *rd, grant_ref_t ref, unsigned long frame); + struct domain *rd, struct domain *ld, grant_ref_t ref, unsigned long frame); /* Pre-domain destruction release of granted device mappings of other domains.*/ void |