/****************************************************************************** * arch/x86/mm.c * * Copyright (c) 2002-2005 K A Fraser * Copyright (c) 2004 Christian Limpach * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * A description of the x86 page table API: * * Domains trap to do_mmu_update with a list of update requests. * This is a list of (ptr, val) pairs, where the requested operation * is *ptr = val. * * Reference counting of pages: * ---------------------------- * Each page has two refcounts: tot_count and type_count. * * TOT_COUNT is the obvious reference count. It counts all uses of a * physical page frame by a domain, including uses as a page directory, * a page table, or simple mappings via a PTE. This count prevents a * domain from releasing a frame back to the free pool when it still holds * a reference to it. * * TYPE_COUNT is more subtle. A frame can be put to one of three * mutually-exclusive uses: it might be used as a page directory, or a * page table, or it may be mapped writable by the domain [of course, a * frame may not be used in any of these three ways!]. * So, type_count is a count of the number of times a frame is being * referred to in its current incarnation. Therefore, a page can only * change its type when its type count is zero. * * Pinning the page type: * ---------------------- * The type of a page can be pinned/unpinned with the commands * MMUEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is, * pinning is not reference counted, so it can't be nested). * This is useful to prevent a page's type count falling to zero, at which * point safety checks would need to be carried out next time the count * is increased again. * * A further note on writable page mappings: * ----------------------------------------- * For simplicity, the count of writable mappings for a page may not * correspond to reality. The 'writable count' is incremented for every * PTE which maps the page with the _PAGE_RW flag set. However, for * write access to be possible the page directory entry must also have * its _PAGE_RW bit set. We do not check this as it complicates the * reference counting considerably [consider the case of multiple * directory entries referencing a single page table, some with the RW * bit set, others not -- it starts getting a bit messy]. * In normal use, this simplification shouldn't be a problem. * However, the logic can be added if required. * * One more note on read-only page mappings: * ----------------------------------------- * We want domains to be able to map pages for read-only access. The * main reason is that page tables and directories should be readable * by a domain, but it would not be safe for them to be writable. * However, domains have free access to rings 1 & 2 of the Intel * privilege model. In terms of page protection, these are considered * to be part of 'supervisor mode'. The WP bit in CR0 controls whether * read-only restrictions are respected in supervisor mode -- if the * bit is clear then any mapped page is writable. * * We get round this by always setting the WP bit and disallowing * updates to it. This is very unlikely to cause a problem for guest * OS's, which will generally use the WP bit to simplify copy-on-write * implementation (in that case, OS wants a fault when it writes to * an application-supplied buffer). */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VERBOSE #define MEM_LOG(_f, _a...) \ printk("DOM%u: (file=mm.c, line=%d) " _f "\n", \ current->domain->domain_id , __LINE__ , ## _a ) #else #define MEM_LOG(_f, _a...) ((void)0) #endif /* * PTE updates can be done with ordinary writes except: * 1. Debug builds get extra checking by using CMPXCHG[8B]. * 2. PAE builds perform an atomic 8-byte store with CMPXCHG8B. */ #if !defined(NDEBUG) || defined(CONFIG_X86_PAE) #define PTE_UPDATE_WITH_CMPXCHG #endif /* * Both do_mmuext_op() and do_mmu_update(): * We steal the m.s.b. of the @count parameter to indicate whether this * invocation of do_mmu_update() is resuming a previou d long type); static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t, unsigned long gl1mfn); /* Used to defer flushing of memory structures. */ struct percpu_mm_info { #define DOP_FLUSH_TLB (1<<0) /* Flush the local TLB. */ #define DOP_FLUSH_ALL_TLBS (1<<1) /* Flush TLBs of all VCPUs of current dom. */ #define DOP_RELOAD_LDT (1<<2) /* Reload the LDT shadow mapping. */ unsigned int deferred_ops; /* If non-NULL, specifies a foreign subject domain for some operations. */ struct domain *foreign; }; static DEFINE_PER_CPU(struct percpu_mm_info, percpu_mm_info); /* * Returns the current foreign domain; defaults to the currently-executing * domain if a foreign override hasn't been specified. */ #define FOREIGNDOM (this_cpu(percpu_mm_info).foreign ?: current->domain) /* Private domain structs for DOMID_XEN and DOMID_IO. */ static struct domain *dom_xen, *dom_io; /* Frame table and its size in pages. */ struct page_info *frame_table; unsigned long max_page; unsigned long total_pages; void __init init_frametable(void) { unsigned long nr_pages, page_step, i, mfn; frame_table = (struct page_info *)FRAMETABLE_VIRT_START; nr_pages = PFN_UP(max_page * sizeof(*frame_table)); page_step = (1 << L2_PAGETABLE_SHIFT) >> PAGE_SHIFT; for ( i = 0; i < nr_pages; i += page_step ) { mfn = alloc_boot_pages(min(nr_pages - i, page_step), page_step); if ( mfn == 0 ) panic("Not enough memory for frame table\n"); map_pages_to_xen( FRAMETABLE_VIRT_START + (i << PAGE_SHIFT), mfn, page_step, PAGE_HYPERVISOR); } memset(frame_table, 0, nr_pages << PAGE_SHIFT); } void arch_init_memory(void) { extern void subarch_init_memory(void); unsigned long i, pfn, rstart_pfn, rend_pfn; /* * Initialise our DOMID_XEN domain. * Any Xen-heap pages that we will allow to be mapped will have * their domain field set to dom_xen. */ dom_xen = alloc_domain(DOMID_XEN); BUG_ON(dom_xen == NULL); /* * Initialise our DOMID_IO domain. * This domain owns I/O pages that are within the range of the page_info * array. Mappings occur at the priv of the caller. */ dom_io = alloc_domain(DOMID_IO); BUG_ON(dom_io == NULL); /* First 1MB of RAM is historically marked as I/O. */ for ( i = 0; i < 0x100; i++ ) share_xen_page_with_guest(mfn_to_page(i), dom_io, XENSHARE_writable); /* Any areas not specified as RAM by the e820 map are considered I/O. */ for ( i = 0, pfn = 0; i < e820.nr_map; i++ ) { if ( e820.map[i].type != E820_RAM ) continue; /* Every page from cursor to start of next RAM region is I/O. */ rstart_pfn = PFN_UP(e820.map[i].addr); rend_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); for ( ; pfn < rstart_pfn; pfn++ ) { BUG_ON(!mfn_valid(pfn)); share_xen_page_with_guest( mfn_to_page(pfn), dom_io, XENSHARE_writable); } /* Skip the RAM region. */ pfn = rend_pfn; } BUG_ON(pfn != max_page); subarch_init_memory(); } int memory_is_conventional_ram(paddr_t p) { int i; for ( i = 0; i < e820.nr_map; i++ ) { if ( (e820.map[i].type == E820_RAM) && (e820.map[i].addr <= p) && (e820.map[i].size > p) ) return 1; } return 0; } void share_xen_page_with_guest( struct page_info *page, struct domain *d, int readonly) { if ( page_get_owner(page) == d ) return; set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY); spin_lock(&d->page_alloc_lock); /* The incremented type count pins as writable or read-only. */ page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page); page->u.inuse.type_info |= PGT_validated | 1; page_set_owner(page, d); wmb(); /* install valid domain ptr before updating refcnt. */ ASSERT(page->count_info == 0); page->count_info |= PGC_allocated | 1; if ( unlikely(d->xenheap_pages++ == 0) ) get_knownalive_domain(d); list_add_tail(&page->list, &d->xenpage_list); spin_unlock(&d->page_alloc_lock); } void share_xen_page_with_privileged_guests( struct page_info *page, int readonly) { share_xen_page_with_guest(page, dom_xen, readonly); } #if defined(CONFIG_X86_PAE) #ifdef NDEBUG /* Only PDPTs above 4GB boundary need to be shadowed in low memory. */ #define l3tab_needs_shadow(mfn) ((mfn) >= 0x100000) #else /* * In debug builds we shadow a selection of <4GB PDPTs to exercise code paths. * We cannot safely shadow the idle page table, nor shadow (v1) page tables * (detected by lack of an owning domain). As required for correctness, we * always shadow PDPTs above 4GB. */ #define l3tab_needs_shadow(mfn) \ (((((mfn) << PAGE_SHIFT) != __pa(idle_pg_table)) && \ (page_get_owner(mfn_to_page(mfn)) != NULL) && \ ((mfn) & 1)) || /* odd MFNs are shadowed */ \ ((mfn) >= 0x100000)) #endif static l1_pgentry_t *fix_pae_highmem_pl1e; /* Cache the address of PAE high-memory fixmap page tables. */ static int __init cache_pae_fixmap_address(void) { unsigned long fixmap_base = fix_to_virt(FIX_PAE_HIGHMEM_0); l2_pgentry_t *pl2e = virt_to_xen_l2e(fixmap_base); fix_pae_highmem_pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(fixmap_base); return 0; } __initcall(cache_pae_fixmap_address); static DEFINE_PER_CPU(u32, make_cr3_timestamp); void make_cr3(struct vcpu *v, unsigned long mfn) /* Takes the MFN of a PAE l3 table, copies the contents to below 4GB if * necessary, and sets v->arch.cr3 to the value to load in CR3. */ { l3_pgentry_t *highmem_l3tab, *lowmem_l3tab; struct pae_l3_cache *cache = &v->arch.pae_l3_cache; unsigned int cpu = smp_processor_id(); /* Fast path: does this mfn need a shadow at all? */ if ( !l3tab_needs_shadow(mfn) ) { v->arch.cr3 = mfn << PAGE_SHIFT; /* Cache is no longer in use or valid */ cache->high_mfn = 0; return; } /* Caching logic is not interrupt safe. */ ASSERT(!in_irq()); /* Protects against pae_flush_pgd(). */ spin_lock(&cache->lock); cache->inuse_idx ^= 1; cache->high_mfn = mfn; /* Map the guest L3 table and copy to the chosen low-memory cache. */ *(fix_pae_highmem_pl1e - cpu) = l1e_from_pfn(mfn, __PAGE_HYPERVISOR); /* First check the previous high mapping can't be in the TLB. * (i.e. have we loaded CR3 since we last did this?) */ if ( unlikely(this_cpu(make_cr3_timestamp) == this_cpu(tlbflush_time)) ) local_flush_tlb_one(fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu)); highmem_l3tab = (l3_pgentry_t *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu); lowmem_l3tab = cache->table[cache->inuse_idx]; memcpy(lowmem_l3tab, highmem_l3tab, sizeof(cache->table[0])); *(fix_pae_highmem_pl1e - cpu) = l1e_empty(); this_cpu(make_cr3_timestamp) = this_cpu(tlbflush_time); v->arch.cr3 = __pa(lowmem_l3tab); spin_unlock(&cache->lock); } #else /* !CONFIG_X86_PAE */ void make_cr3(struct vcpu *v, unsigned long mfn) { v->arch.cr3 = mfn << PAGE_SHIFT; } #endif /* !CONFIG_X86_PAE */ void write_ptbase(struct vcpu *v) { write_cr3(v->arch.cr3); } void invalidate_shadow_ldt(struct vcpu *v) { int i; unsigned long pfn; struct page_info *page; if ( v->arch.shadow_ldt_mapcnt == 0 ) return; v->arch.shadow_ldt_mapcnt = 0; for ( i = 16; i < 32; i++ ) { pfn = l1e_get_pfn(v->arch.perdomain_ptes[i]); if ( pfn == 0 ) continue; v->arch.perdomain_ptes[i] = l1e_empty(); page = mfn_to_page(pfn); ASSERT_PAGE_IS_TYPE(page, PGT_ldt_page); ASSERT_PAGE_IS_DOMAIN(page, v->domain); put_page_and_type(page); } /* Dispose of the (now possibly invalid) mappings from the TLB. */ ASSERT(v->processor == smp_processor_id()); this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB | DOP_RELOAD_LDT; } static int alloc_segdesc_page(struct page_info *page) { struct desc_struct *descs; int i; descs = map_domain_page(page_to_mfn(page)); for ( i = 0; i < 512; i++ ) if ( unlikely(!check_descriptor(&descs[i])) ) goto fail; unmap_domain_page(descs); return 1; fail: unmap_domain_page(descs); return 0; } /* Map shadow page at offset @off. */ int map_ldt_shadow_page(unsigned int off) { struct vcpu *v = current; struct domain *d = v->domain; unsigned long gmfn, mfn; l1_pgentry_t l1e, nl1e; unsigned long gva = v->arch.guest_context.ldt_base + (off << PAGE_SHIFT); int res; #if defined(__x86_64__) /* If in user mode, switch to kernel mode just to read LDT mapping. */ int user_mode = !(v->arch.flags & TF_kernel_mode); #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v) #elif defined(__i386__) #define TOGGLE_MODE() ((void)0) #endif BUG_ON(unlikely(in_irq())); TOGGLE_MODE(); __copy_from_user(&l1e, &linear_pg_table[l1_linear_offset(gva)], sizeof(l1e)); TOGGLE_MODE(); if ( unlikely(!(l1e_get_flags(l1e) & _PAGE_PRESENT)) ) return 0; gmfn = l1e_get_pfn(l1e); mfn = gmfn_to_mfn(d, gmfn); if ( unlikely(!VALID_MFN(mfn)) ) return 0; res = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page); if ( !res && unlikely(shadow_mode_refcounts(d)) ) { shadow_lock(d); shadow_remove_write_access(d->vcpu[0], _mfn(mfn), 0, 0); res = get_page_and_type(mfn_to_page(mfn), d, PGT_ldt_page); shadow_unlock(d); } if ( unlikely(!res) ) return 0; nl1e = l1e_from_pfn(mfn, l1e_get_flags(l1e) | _PAGE_RW); v->arch.perdomain_ptes[off + 16] = nl1e; v->arch.shadow_ldt_mapcnt++; return 1; } static int get_page_from_pagenr(unsigned long page_nr, struct domain *d) { struct page_info *page = mfn_to_page(page_nr); if ( unlikely(!mfn_valid(page_nr)) || unlikely(!get_page(page, d)) ) { MEM_LOG("Could not get page ref for pfn %lx", page_nr); return 0; } return 1; } static int get_page_and_type_from_pagenr(unsigned long page_nr, unsigned long type, struct domain *d) { struct page_info *page = mfn_to_page(page_nr); if ( unlikely(!get_page_from_pagenr(page_nr, d)) ) return 0; if ( unlikely(!get_page_type(page, type)) ) { put_page(page); return 0; } return 1; } #ifndef CONFIG_X86_PAE /* We do not support guest linear mappings on PAE. */ /* * We allow root tables to map each other (a.k.a. linear page tables). It * needs some special care with reference counts and access permissions: * 1. The mapping entry must be read-only, or the guest may get write access * to its own PTEs. * 2. We must only bump the reference counts for an *already validated* * L2 table, or we can end up in a deadlock in get_page_type() by waiting * on a validation that is required to complete that validation. * 3. We only need to increment the reference counts for the mapped page * frame if it is mapped by a different root table. This is sufficient and * also necessary to allow validation of a root table mapping itself. */ static int get_linear_pagetable( root_pgentry_t re, unsigned long re_pfn, struct domain *d) { unsigned long x, y; struct page_info *page; unsigned long pfn; ASSERT( !shadow_mode_refcounts(d) ); if ( (root_get_flags(re) & _PAGE_RW) ) { MEM_LOG("Attempt to create linear p.t. with write perms"); return 0; } if ( (pfn = root_get_pfn(re)) != re_pfn ) { /* Make sure the mapped frame belongs to the correct domain. */ if ( unlikely(!get_page_from_pagenr(pfn, d)) ) return 0; /* * Make sure that the mapped frame is an already-validated L2 table. * If so, atomically increment the count (checking for overflow). */ page = mfn_to_page(pfn); y = page->u.inuse.type_info; do { x = y; if ( unlikely((x & PGT_count_mask) == PGT_count_mask) || unlikely((x & (PGT_type_mask|PGT_validated)) != (PGT_root_page_table|PGT_validated)) ) { put_page(page); return 0; } } while ( (y = cmpxchg(&page->u.inuse.type_info, x, x + 1)) != x ); } return 1; } #endif /* !CONFIG_X86_PAE */ int get_page_from_l1e( l1_pgentry_t l1e, struct domain *d) { unsigned long mfn = l1e_get_pfn(l1e); struct page_info *page = mfn_to_page(mfn); int okay; if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) ) return 1; if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) ) { MEM_LOG("Bad L1 flags %x", l1e_get_flags(l1e) & L1_DISALLOW_MASK); return 0; } if ( unlikely(!mfn_valid(mfn)) || unlikely(page_get_owner(page) == dom_io) ) { /* DOMID_IO reverts to caller for privilege checks. */ if ( d == dom_io ) d = current->domain; if ( !iomem_access_permitted(d, mfn, mfn) ) { MEM_LOG("Non-privileged (%u) attempt to map I/O space %08lx", d->domain_id, mfn); return 0; } /* No reference counting for out-of-range I/O pages. */ if ( !mfn_valid(mfn) ) return 1; d = dom_io; } /* Foreign mappings into guests in shadow external mode don't * contribute to writeable mapping refcounts. (This allows the * qemu-dm helper process in dom0 to map the domain's memory without * messing up the count of "real" writable mappings.) */ okay = (((l1e_get_flags(l1e) & _PAGE_RW) && !(unlikely(shadow_mode_external(d) && (d != current->domain)))) ? get_page_and_type(page, d, PGT_writable_page) : get_page(page, d)); if ( !okay ) { MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte " for dom%d", mfn, get_gpfn_from_mfn(mfn), l1e_get_intpte(l1e), d->domain_id); } return okay; } /* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. */ static int get_page_from_l2e( l2_pgentry_t l2e, unsigned long pfn, struct domain *d, unsigned long vaddr) { int rc; if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) return 1; if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) ) { MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK); return 0; } vaddr >>= L2_PAGETABLE_SHIFT; vaddr <<= PGT_va_shift; rc = get_page_and_type_from_pagenr( l2e_get_pfn(l2e), PGT_l1_page_table | vaddr, d); #if CONFIG_PAGING_LEVELS == 2 if ( unlikely(!rc) ) rc = get_linear_pagetable(l2e, pfn, d); #endif return rc; } #if CONFIG_PAGING_LEVELS >= 3 static int get_page_from_l3e( l3_pgentry_t l3e, unsigned long pfn, struct domain *d, unsigned long vaddr) { int rc; if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) return 1; if ( unlikely((l3e_get_flags(l3e) & L3_DISALLOW_MASK)) ) { MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & L3_DISALLOW_MASK); return 0; } vaddr >>= L3_PAGETABLE_SHIFT; vaddr <<= PGT_va_shift; rc = get_page_and_type_from_pagenr( l3e_get_pfn(l3e), PGT_l2_page_table | vaddr, d); return rc; } #endif /* 3 level */ #if CONFIG_PAGING_LEVELS >= 4 static int get_page_from_l4e( l4_pgentry_t l4e, unsigned long pfn, struct domain *d, unsigned long vaddr) { int rc; if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) ) return 1; if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) ) { MEM_LOG("Bad L4 flags %x", l4e_get_flags(l4e) & L4_DISALLOW_MASK); return 0; } vaddr >>= L4_PAGETABLE_SHIFT; vaddr <<= PGT_va_shift; rc = get_page_and_type_from_pagenr( l4e_get_pfn(l4e), PGT_l3_page_table | vaddr, d); if ( unlikely(!rc) ) rc = get_linear_pagetable(l4e, pfn, d); return rc; } #endif /* 4 level */ #ifdef __x86_64__ #define adjust_guest_l1e(pl1e) \ do { \ if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) ) \ l1e_add_flags((pl1e), _PAGE_USER); \ } while ( 0 ) #define adjust_guest_l2e(pl2e) \ do { \ if ( likely(l2e_get_flags((pl2e)) & _PAGE_PRESENT) ) \ l2e_add_flags((pl2e), _PAGE_USER); \ } while ( 0 ) #define adjust_guest_l3e(pl3e) \ do { \ if ( likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) ) \ l3e_add_flags((pl3e), _PAGE_USER); \ } while ( 0 ) #define adjust_guest_l4e(pl4e) \ do { \ if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) ) \ l4e_add_flags((pl4e), _PAGE_USER); \ } while ( 0 ) #else #define adjust_guest_l1e(_p) ((void)0) #define adjust_guest_l2e(_p) ((void)0) #define adjust_guest_l3e(_p) ((void)0) #endif void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d) { unsigned long pfn = l1e_get_pfn(l1e); struct page_info *page = mfn_to_page(pfn); struct domain *e; struct vcpu *v; if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) || !mfn_valid(pfn) ) return; e = page_get_owner(page); /* * Check if this is a mapping that was established via a grant reference. * If it was then we should not be here: we require that such mappings are * explicitly destroyed via the grant-table interface. * * The upshot of this is that the guest can end up with active grants that * it cannot destroy (because it no longer has a PTE to present to the * grant-table interface). This can lead to subtle hard-to-catch bugs, * hence a special grant PTE flag can be enabled to catch the bug early. * * (Note that the undestroyable active grants are not a security hole in * Xen. All active grants can safely be cleaned up when the domain dies.) */ if ( (l1e_get_flags(l1e) & _PAGE_GNTTAB) && !(d->domain_flags & (DOMF_shutdown|DOMF_dying)) ) { MEM_LOG("Attempt to implicitly unmap a granted PTE %" PRIpte, l1e_get_intpte(l1e)); domain_crash(d); } /* Remember we didn't take a type-count of foreign writable mappings * to shadow external domains */ if ( (l1e_get_flags(l1e) & _PAGE_RW) && !(unlikely((e != d) && shadow_mode_external(e))) ) { put_page_and_type(page); } else { /* We expect this is rare so we blow the entire shadow LDT. */ if ( unlikely(((page->u.inuse.type_info & PGT_type_mask) == PGT_ldt_page)) && unlikely(((page->u.inuse.type_info & PGT_count_mask) != 0)) && (d == e) ) { for_each_vcpu ( d, v ) invalidate_shadow_ldt(v); } put_page(page); } } /* * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. * Note also that this automatically deals correctly with linear p.t.'s. */ static void put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn) { if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) && (l2e_get_pfn(l2e) != pfn) ) put_page_and_type(mfn_to_page(l2e_get_pfn(l2e))); } #if CONFIG_PAGING_LEVELS >= 3 static void put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn) { if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) && (l3e_get_pfn(l3e) != pfn) ) put_page_and_type(mfn_to_page(l3e_get_pfn(l3e))); } #endif #if CONFIG_PAGING_LEVELS >= 4 static void put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn) { if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) && (l4e_get_pfn(l4e) != pfn) ) put_page_and_type(mfn_to_page(l4e_get_pfn(l4e))); } #endif static int alloc_l1_table(struct page_info *page) { struct domain *d = page_get_owner(page); unsigned long pfn = page_to_mfn(page); l1_pgentry_t *pl1e; int i; ASSERT(!shadow_mode_refcounts(d)); pl1e = map_domain_page(pfn); for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) { if ( is_guest_l1_slot(i) && unlikely(!get_page_from_l1e(pl1e[i], d)) ) goto fail; adjust_guest_l1e(pl1e[i]); } unmap_domain_page(pl1e); return 1; fail: MEM_LOG("Failure in alloc_l1_table: entry %d", i); while ( i-- > 0 ) if ( is_guest_l1_slot(i) ) put_page_from_l1e(pl1e[i], d); unmap_domain_page(pl1e); return 0; } #ifdef CONFIG_X86_PAE static int create_pae_xen_mappings(l3_pgentry_t *pl3e) { struct page_info *page; l2_pgentry_t *pl2e; l3_pgentry_t l3e3; int i; pl3e = (l3_pgentry_t *)((unsigned long)pl3e & PAGE_MASK); /* 3rd L3 slot contains L2 with Xen-private mappings. It *must* exist. */ l3e3 = pl3e[3]; if ( !(l3e_get_flags(l3e3) & _PAGE_PRESENT) ) { MEM_LOG("PAE L3 3rd slot is empty"); return 0; } /* * The Xen-private mappings include linear mappings. The L2 thus cannot * be shared by multiple L3 tables. The test here is adequate because: * 1. Cannot appear in slots != 3 because the page would then then have * unknown va backpointer, which get_page_type() explicitly disallows. * 2. Cannot appear in another page table's L3: * a. alloc_l3_table() calls this function and this check will fail * b. mod_l3_entry() disallows updates to slot 3 in an existing table * * XXX -- this needs revisiting for shadow_mode_refcount()==true... */ page = l3e_get_page(l3e3); BUG_ON(page->u.inuse.type_info & PGT_pinned); BUG_ON((page->u.inuse.type_info & PGT_count_mask) == 0); if ( (page->u.inuse.type_info & PGT_count_mask) != 1 ) { MEM_LOG("PAE L3 3rd slot is shared"); return 0; } /* Xen private mappings. */ pl2e = map_domain_page(l3e_get_pfn(l3e3)); memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)], &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT], L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t)); for ( i = 0; i < PDPT_L2_ENTRIES; i++ ) pl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] = l2e_from_page( virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt) + i, __PAGE_HYPERVISOR); for ( i = 0; i < (LINEARPT_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ ) pl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] = (l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ? l2e_from_pfn(l3e_get_pfn(pl3e[i]), __PAGE_HYPERVISOR) : l2e_empty(); unmap_domain_page(pl2e); return 1; } /* Flush a pgdir update into low-memory caches. */ static void pae_flush_pgd( unsigned long mfn, unsigned int idx, l3_pgentry_t nl3e) { struct domain *d = page_get_owner(mfn_to_page(mfn)); struct vcpu *v; intpte_t _ol3e, _nl3e, _pl3e; l3_pgentry_t *l3tab_ptr; struct pae_l3_cache *cache; /* If below 4GB then the pgdir is not shadowed in low memory. */ if ( !l3tab_needs_shadow(mfn) ) return; for_each_vcpu ( d, v ) { cache = &v->arch.pae_l3_cache; spin_lock(&cache->lock); if ( cache->high_mfn == mfn ) { l3tab_ptr = &cache->table[cache->inuse_idx][idx]; _ol3e = l3e_get_intpte(*l3tab_ptr); _nl3e = l3e_get_intpte(nl3e); _pl3e = cmpxchg((intpte_t *)l3tab_ptr, _ol3e, _nl3e); BUG_ON(_pl3e != _ol3e); } spin_unlock(&cache->lock); } flush_tlb_mask(d->domain_dirty_cpumask); } static inline int l1_backptr( unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type) { unsigned long l2_backptr = l2_type & PGT_va_mask; ASSERT(l2_backptr != PGT_va_unknown); ASSERT(l2_backptr != PGT_va_mutable); *backptr = ((l2_backptr >> PGT_va_shift) << L3_PAGETABLE_SHIFT) | (offset_in_l2 << L2_PAGETABLE_SHIFT); return 1; } #elif CONFIG_X86_64 # define create_pae_xen_mappings(pl3e) (1) # define pae_flush_pgd(mfn, idx, nl3e) ((void)0) static inline int l1_backptr( unsigned long *backptr, unsigned long offset_in_l2, unsigned long l2_type) { unsigned long l2_backptr = l2_type & PGT_va_mask; ASSERT(l2_backptr != PGT_va_unknown); ASSERT(l2_backptr != PGT_va_mutable); *backptr = ((l2_backptr >> PGT_va_shift) << L3_PAGETABLE_SHIFT) | (offset_in_l2 << L2_PAGETABLE_SHIFT); return 1; } static inline int l2_backptr( unsigned long *backptr, unsigned long offset_in_l3, unsigned long l3_type) { unsigned long l3_backptr = l3_type & PGT_va_mask; ASSERT(l3_backptr != PGT_va_unknown); ASSERT(l3_backptr != PGT_va_mutable); *backptr = ((l3_backptr >> PGT_va_shift) << L4_PAGETABLE_SHIFT) | (offset_in_l3 << L3_PAGETABLE_SHIFT); return 1; } static inline int l3_backptr( unsigned long *backptr, unsigned long offset_in_l4, unsigned long l4_type) { *backptr = (offset_in_l4 << L4_PAGETABLE_SHIFT); return 1; } #else # define create_pae_xen_mappings(pl3e) (1) # define l1_backptr(bp,l2o,l2t) \ ({ *(bp) = (unsigned long)(l2o) << L2_PAGETABLE_SHIFT; 1; }) #endif static int alloc_l2_table(struct page_info *page, unsigned long type) { struct domain *d = page_get_owner(page); unsigned long pfn = page_to_mfn(page); unsigned long vaddr; l2_pgentry_t *pl2e; int i; ASSERT(!shadow_mode_refcounts(d)); pl2e = map_domain_page(pfn); for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) { if ( !l1_backptr(&vaddr, i, type) ) goto fail; if ( is_guest_l2_slot(type, i) && unlikely(!get_page_from_l2e(pl2e[i], pfn, d, vaddr)) ) goto fail; adjust_guest_l2e(pl2e[i]); } #if CONFIG_PAGING_LEVELS == 2 /* Xen private mappings. */ memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT], &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT], L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t)); pl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = l2e_from_pfn(pfn, __PAGE_HYPERVISOR); for ( i = 0; i < PDPT_L2_ENTRIES; i++ ) pl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] = l2e_from_page( virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt) + i, __PAGE_HYPERVISOR); #endif unmap_domain_page(pl2e); return 1; fail: MEM_LOG("Failure in alloc_l2_table: entry %d", i); while ( i-- > 0 ) if ( is_guest_l2_slot(type, i) ) put_page_from_l2e(pl2e[i], pfn); unmap_domain_page(pl2e); return 0; } #if CONFIG_PAGING_LEVELS >= 3 static int alloc_l3_table(struct page_info *page, unsigned long type) { struct domain *d = page_get_owner(page); unsigned long pfn = page_to_mfn(page); unsigned long vaddr; l3_pgentry_t *pl3e; int i; ASSERT(!shadow_mode_refcounts(d)); #ifdef CONFIG_X86_PAE /* * PAE pgdirs above 4GB are unacceptable if the guest does not understand * the weird 'extended cr3' format for dealing with high-order address * bits. We cut some slack for control tools (before vcpu0 is initialised). */ if ( (pfn >= 0x100000) && unlikely(!VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3)) && d->vcpu[0] && test_bit(_VCPUF_initialised, &d->vcpu[0]->vcpu_flags) ) { MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn); return 0; } #endif pl3e = map_domain_page(pfn); for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) { #if CONFIG_PAGING_LEVELS >= 4 if ( !l2_backptr(&vaddr, i, type) ) goto fail; #else vaddr = (unsigned long)i << L3_PAGETABLE_SHIFT; #endif if ( is_guest_l3_slot(i) && unlikely(!get_page_from_l3e(pl3e[i], pfn, d, vaddr)) ) goto fail; adjust_guest_l3e(pl3e[i]); } if ( !create_pae_xen_mappings(pl3e) ) goto fail; unmap_domain_page(pl3e); return 1; fail: MEM_LOG("Failure in alloc_l3_table: entry %d", i); while ( i-- > 0 ) if ( is_guest_l3_slot(i) ) put_page_from_l3e(pl3e[i], pfn); unmap_domain_page(pl3e); return 0; } #else #define alloc_l3_table(page, type) (0) #endif #if CONFIG_PAGING_LEVELS >= 4 static int alloc_l4_table(struct page_info *page, unsigned long type) { struct domain *d = page_get_owner(page); unsigned long pfn = page_to_mfn(page); l4_pgentry_t *pl4e = page_to_virt(page); unsigned long vaddr; int i; ASSERT(!shadow_mode_refcounts(d)); for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ ) { if ( !l3_backptr(&vaddr, i, type) ) goto fail; if ( is_guest_l4_slot(i) && unlikely(!get_page_from_l4e(pl4e[i], pfn, d, vaddr)) ) goto fail; adjust_guest_l4e(pl4e[i]); } /* Xen private mappings. */ memcpy(&pl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT], &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT], ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t)); pl4e[l4_table_offset(LINEAR_PT_VIRT_START)] = l4e_from_pfn(pfn, __PAGE_HYPERVISOR); pl4e[l4_table_offset(PERDOMAIN_VIRT_START)] = l4e_from_page( virt_to_page(page_get_owner(page)->arch.mm_perdomain_l3), __PAGE_HYPERVISOR); return 1; fail: MEM_LOG("Failure in alloc_l4_table: entry %d", i); while ( i-- > 0 ) if ( is_guest_l4_slot(i) ) put_page_from_l4e(pl4e[i], pfn); return 0; } #else #define alloc_l4_table(page, type) (0) #endif static void free_l1_table(struct page_info *page) { struct domain *d = page_get_owner(page); unsigned long pfn = page_to_mfn(page); l1_pgentry_t *pl1e; int i; pl1e = map_domain_page(pfn); for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) if ( is_guest_l1_slot(i) ) put_page_from_l1e(pl1e[i], d); unmap_domain_page(pl1e); } static void free_l2_table(struct page_info *page) { unsigned long pfn = page_to_mfn(page); l2_pgentry_t *pl2e; int i; pl2e = map_domain_page(pfn); for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) if ( is_guest_l2_slot(page->u.inuse.type_info, i) ) put_page_from_l2e(pl2e[i], pfn); unmap_domain_page(pl2e); } #if CONFIG_PAGING_LEVELS >= 3 static void free_l3_table(struct page_info *page) { unsigned long pfn = page_to_mfn(page); l3_pgentry_t *pl3e; int i; pl3e = map_domain_page(pfn); for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) if ( is_guest_l3_slot(i) ) put_page_from_l3e(pl3e[i], pfn); unmap_domain_page(pl3e); } #endif #if CONFIG_PAGING_LEVELS >= 4 static void free_l4_table(struct page_info *page) { unsigned long pfn = page_to_mfn(page); l4_pgentry_t *pl4e = page_to_virt(page); int i; for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ ) if ( is_guest_l4_slot(i) ) put_page_from_l4e(pl4e[i], pfn); } #endif static inline int update_l1e(l1_pgentry_t *pl1e, l1_pgentry_t ol1e, l1_pgentry_t nl1e, unsigned long gl1mfn, struct vcpu *v) { int rv = 1; if ( unlikely(shadow_mode_enabled(v->domain)) ) shadow_lock(v->domain); #ifndef PTE_UPDATE_WITH_CMPXCHG rv = (!__copy_to_user(pl1e, &nl1e, sizeof(nl1e))); #else { intpte_t o = l1e_get_intpte(ol1e); intpte_t n = l1e_get_intpte(nl1e); for ( ; ; ) { if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ) { MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte ": saw %" PRIpte, l1e_get_intpte(ol1e), l1e_get_intpte(nl1e), o); rv = 0; break; } if ( o == l1e_get_intpte(ol1e) ) break; /* Allowed to change in Accessed/Dirty flags only. */ BUG_ON((o ^ l1e_get_intpte(ol1e)) & ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY)); ol1e = l1e_from_intpte(o); } } #endif if ( unlikely(shadow_mode_enabled(v->domain)) ) { shadow_validate_guest_entry(v, _mfn(gl1mfn), pl1e); shadow_unlock(v->domain); } return rv; } /* Update the L1 entry at pl1e to new value nl1e. */ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, unsigned long gl1mfn) { l1_pgentry_t ol1e; struct domain *d = current->domain; if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ) return 0; if ( l1e_get_flags(nl1e) & _PAGE_PRESENT ) { if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) ) { MEM_LOG("Bad L1 flags %x", l1e_get_flags(nl1e) & L1_DISALLOW_MASK); return 0; } adjust_guest_l1e(nl1e); /* Fast path for identical mapping, r/w and presence. */ if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) ) return update_l1e(pl1e, ol1e, nl1e, gl1mfn, current); if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) ) return 0; if ( unlikely(!update_l1e(pl1e, ol1e, nl1e, gl1mfn, current)) ) { put_page_from_l1e(nl1e, d); return 0; } } else { if ( unlikely(!update_l1e(pl1e, ol1e, nl1e, gl1mfn, current)) ) return 0; } put_page_from_l1e(ol1e, d); return 1; } #ifndef PTE_UPDATE_WITH_CMPXCHG #define _UPDATE_ENTRY(_t,_p,_o,_n) ({ (*(_p) = (_n)); 1; }) #else #define _UPDATE_ENTRY(_t,_p,_o,_n) ({ \ for ( ; ; ) \ { \ intpte_t __o = cmpxchg((intpte_t *)(_p), \ _t ## e_get_intpte(_o), \ _t ## e_get_intpte(_n)); \ if ( __o == _t ## e_get_intpte(_o) ) \ break; \ /* Allowed to change in Accessed/Dirty flags only. */ \ BUG_ON((__o ^ _t ## e_get_intpte(_o)) & \ ~(int)(_PAGE_ACCESSED|_PAGE_DIRTY)); \ _o = _t ## e_from_intpte(__o); \ } \ 1; }) #endif #define UPDATE_ENTRY(_t,_p,_o,_n,_m) ({ \ int rv; \ if ( unlikely(shadow_mode_enabled(current->domain)) ) \ shadow_lock(current->domain); \ rv = _UPDATE_ENTRY(_t, _p, _o, _n); \ if ( unlikely(shadow_mode_enabled(current->domain)) ) \ { \ shadow_validate_guest_entry(current, _mfn(_m), (_p)); \ shadow_unlock(current->domain); \ } \ rv; \ }) /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */ static int mod_l2_entry(l2_pgentry_t *pl2e, l2_pgentry_t nl2e, unsigned long pfn, unsigned long type) { l2_pgentry_t ol2e; unsigned long vaddr = 0; if ( unlikely(!is_guest_l2_slot(type,pgentry_ptr_to_slot(pl2e))) ) { MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e); return 0; } if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) ) return 0; if ( l2e_get_flags(nl2e) & _PAGE_PRESENT ) { if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) ) { MEM_LOG("Bad L2 flags %x", l2e_get_flags(nl2e) & L2_DISALLOW_MASK); return 0; } adjust_guest_l2e(nl2e); /* Fast path for identical mapping and presence. */ if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT)) return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn); if ( unlikely(!l1_backptr(&vaddr, pgentry_ptr_to_slot(pl2e), type)) || unlikely(!get_page_from_l2e(nl2e, pfn, current->domain, vaddr)) ) return 0; if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn)) ) { put_page_from_l2e(nl2e, pfn); return 0; } } else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn)) ) { return 0; } put_page_from_l2e(ol2e, pfn); return 1; } #if CONFIG_PAGING_LEVELS >= 3 /* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */ static int mod_l3_entry(l3_pgentry_t *pl3e, l3_pgentry_t nl3e, unsigned long pfn, unsigned long type) { l3_pgentry_t ol3e; unsigned long vaddr; int okay; if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) ) { MEM_LOG("Illegal L3 update attempt in Xen-private area %p", pl3e); return 0; } #ifdef CONFIG_X86_PAE /* * Disallow updates to final L3 slot. It contains Xen mappings, and it * would be a pain to ensure they remain continuously valid throughout. */ if ( pgentry_ptr_to_slot(pl3e) >= 3 ) return 0; #endif if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) ) return 0; if ( l3e_get_flags(nl3e) & _PAGE_PRESENT ) { if ( unlikely(l3e_get_flags(nl3e) & L3_DISALLOW_MASK) ) { MEM_LOG("Bad L3 flags %x", l3e_get_flags(nl3e) & L3_DISALLOW_MASK); return 0; } adjust_guest_l3e(nl3e); /* Fast path for identical mapping and presence. */ if (!l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT)) return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn); #if CONFIG_PAGING_LEVELS >= 4 if ( unlikely(!l2_backptr(&vaddr, pgentry_ptr_to_slot(pl3e), type)) || unlikely(!get_page_from_l3e(nl3e, pfn, current->domain, vaddr)) ) return 0; #else vaddr = (((unsigned long)pl3e & ~PAGE_MASK) / sizeof(l3_pgentry_t)) << L3_PAGETABLE_SHIFT; if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain, vaddr)) ) return 0; #endif if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn)) ) { put_page_from_l3e(nl3e, pfn); return 0; } } else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn)) ) { return 0; } okay = create_pae_xen_mappings(pl3e); BUG_ON(!okay); pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e); put_page_from_l3e(ol3e, pfn); return 1; } #endif #if CONFIG_PAGING_LEVELS >= 4 /* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */ static int mod_l4_entry(l4_pgentry_t *pl4e, l4_pgentry_t nl4e, unsigned long pfn, unsigned long type) { l4_pgentry_t ol4e; unsigned long vaddr; if ( unlikely(!is_guest_l4_slot(pgentry_ptr_to_slot(pl4e))) ) { MEM_LOG("Illegal L4 update attempt in Xen-private area %p", pl4e); return 0; } if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) ) return 0; if ( l4e_get_flags(nl4e) & _PAGE_PRESENT ) { if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) ) { MEM_LOG("Bad L4 flags %x", l4e_get_flags(nl4e) & L4_DISALLOW_MASK); return 0; } adjust_guest_l4e(nl4e); /* Fast path for identical mapping and presence. */ if (!l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT)) return UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn); if ( unlikely(!l3_backptr(&vaddr, pgentry_ptr_to_slot(pl4e), type)) || unlikely(!get_page_from_l4e(nl4e, pfn, current->domain, vaddr)) ) return 0; if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn)) ) { put_page_from_l4e(nl4e, pfn); return 0; } } else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn)) ) { return 0; } put_page_from_l4e(ol4e, pfn); return 1; } #endif int alloc_page_type(struct page_info *page, unsigned long type) { struct domain *owner = page_get_owner(page); if ( owner != NULL ) mark_dirty(owner, page_to_mfn(page)); switch ( type & PGT_type_mask ) { case PGT_l1_page_table: return alloc_l1_table(page); case PGT_l2_page_table: return alloc_l2_table(page, type); case PGT_l3_page_table: return alloc_l3_table(page, type); case PGT_l4_page_table: return alloc_l4_table(page, type); case PGT_gdt_page: case PGT_ldt_page: return alloc_segdesc_page(page); default: printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n", type, page->u.inuse.type_info, page->count_info); BUG(); } return 0; } void free_page_type(struct page_info *page, unsigned long type) { struct domain *owner = page_get_owner(page); unsigned long gmfn; if ( likely(owner != NULL) ) { /* * We have to flush before the next use of the linear mapping * (e.g., update_va_mapping()) or we could end up modifying a page * that is no longer a page table (and hence screw up ref counts). */ this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS; if ( unlikely(shadow_mode_enabled(owner) && !shadow_lock_is_acquired(owner)) ) { /* Raw page tables are rewritten during save/restore. */ if ( !shadow_mode_translate(owner) ) mark_dirty(owner, page_to_mfn(page)); if ( shadow_mode_refcounts(owner) ) return; gmfn = mfn_to_gmfn(owner, page_to_mfn(page)); ASSERT(VALID_M2P(gmfn)); shadow_lock(owner); shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn)); shadow_unlock(owner); } } switch ( type & PGT_type_mask ) { case PGT_l1_page_table: free_l1_table(page); break; case PGT_l2_page_table: free_l2_table(page); break; #if CONFIG_PAGING_LEVELS >= 3 case PGT_l3_page_table: free_l3_table(page); break; #endif #if CONFIG_PAGING_LEVELS >= 4 case PGT_l4_page_table: free_l4_table(page); break; #endif default: printk("%s: type %lx pfn %lx\n",__FUNCTION__, type, page_to_mfn(page)); BUG(); } } void put_page_type(struct page_info *page) { unsigned long nx, x, y = page->u.inuse.type_info; again: do { x = y; nx = x - 1; ASSERT((x & PGT_count_mask) != 0); /* * The page should always be validated while a reference is held. The * exception is during domain destruction, when we forcibly invalidate * page-table pages if we detect a referential loop. * See domain.c:relinquish_list(). */ ASSERT((x & PGT_validated) || test_bit(_DOMF_dying, &page_get_owner(page)->domain_flags)); if ( unlikely((nx & PGT_count_mask) == 0) ) { if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) && likely(nx & PGT_validated) ) { /* * Page-table pages must be unvalidated when count is zero. The * 'free' is safe because the refcnt is non-zero and validated * bit is clear => other ops will spin or fail. */ if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_validated)) != x) ) goto again; /* We cleared the 'valid bit' so we do the clean up. */ free_page_type(page, x); /* Carry on, but with the 'valid bit' now clear. */ x &= ~PGT_validated; nx &= ~PGT_validated; } /* Record TLB information for flush later. */ page->tlbflush_timestamp = tlbflush_current_time(); } else if ( unlikely((nx & (PGT_pinned|PGT_type_mask|PGT_count_mask)) == (PGT_pinned|PGT_l1_page_table|1)) ) { /* Page is now only pinned. Make the back pointer mutable again. */ nx |= PGT_va_mutable; } } while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); } int get_page_type(struct page_info *page, unsigned long type) { unsigned long nx, x, y = page->u.inuse.type_info; again: do { x = y; nx = x + 1; if ( unlikely((nx & PGT_count_mask) == 0) ) { MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page)); return 0; } else if ( unlikely((x & PGT_count_mask) == 0) ) { if ( (x & (PGT_type_mask|PGT_va_mask)) != type ) { if ( (x & PGT_type_mask) != (type & PGT_type_mask) ) { /* * On type change we check to flush stale TLB * entries. This may be unnecessary (e.g., page * was GDT/LDT) but those circumstances should be * very rare. */ cpumask_t mask = page_get_owner(page)->domain_dirty_cpumask; tlbflush_filter(mask, page->tlbflush_timestamp); if ( unlikely(!cpus_empty(mask)) ) { perfc_incrc(need_flush_tlb_flush); flush_tlb_mask(mask); } } /* We lose existing type, back pointer, and validity. */ nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated); nx |= type; /* No special validation needed for writable pages. */ /* Page tables and GDT/LDT need to be scanned for validity. */ if ( type == PGT_writable_page ) nx |= PGT_validated; } } else { if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) ) { if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) ) { if ( ((x & PGT_type_mask) != PGT_l2_page_table) || ((type & PGT_type_mask) != PGT_l1_page_table) ) MEM_LOG("Bad type (saw %" PRtype_info " != exp %" PRtype_info ") " "for mfn %lx (pfn %lx)", x, type, page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page))); return 0; } else if ( (x & PGT_va_mask) == PGT_va_mutable ) { /* The va backpointer is mutable, hence we update it. */ nx &= ~PGT_va_mask; nx |= type; /* we know the actual type is correct */ } else if ( (type & PGT_va_mask) != PGT_va_mutable ) { ASSERT((type & PGT_va_mask) != (x & PGT_va_mask)); #ifdef CONFIG_X86_PAE /* We use backptr as extra typing. Cannot be unknown. */ if ( (type & PGT_type_mask) == PGT_l2_page_table ) return 0; #endif /* Fixme: add code to propagate va_unknown to subtables. */ if ( ((type & PGT_type_mask) >= PGT_l2_page_table) && !shadow_mode_refcounts(page_get_owner(page)) ) return 0; /* This table is possibly mapped at multiple locations. */ nx &= ~PGT_va_mask; nx |= PGT_va_unknown; } } if ( unlikely(!(x & PGT_validated)) ) { /* Someone else is updating validation of this page. Wait... */ while ( (y = page->u.inuse.type_info) == x ) cpu_relax(); goto again; } } } while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); if ( unlikely(!(nx & PGT_validated)) ) { /* Try to validate page type; drop the new reference on failure. */ if ( unlikely(!alloc_page_type(page, type)) ) { MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %" PRtype_info ": caf=%08x taf=%" PRtype_info, page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)), type, page->count_info, page->u.inuse.type_info); /* Noone else can get a reference. We hold the only ref. */ page->u.inuse.type_info = 0; return 0; } /* Noone else is updating simultaneously. */ __set_bit(_PGT_validated, &page->u.inuse.type_info); } return 1; } int new_guest_cr3(unsigned long mfn) { struct vcpu *v = current; struct domain *d = v->domain; int okay; unsigned long old_base_mfn; if ( hvm_guest(v) && !hvm_paging_enabled(v) ) domain_crash_synchronous(); if ( shadow_mode_refcounts(d) ) { okay = get_page_from_pagenr(mfn, d); if ( unlikely(!okay) ) { MEM_LOG("Error while installing new baseptr %lx", mfn); return 0; } } else { okay = get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d); if ( unlikely(!okay) ) { /* Switch to idle pagetable: this VCPU has no active p.t. now. */ MEM_LOG("New baseptr %lx: slow path via idle pagetables", mfn); old_base_mfn = pagetable_get_pfn(v->arch.guest_table); v->arch.guest_table = pagetable_null(); update_cr3(v); write_cr3(__pa(idle_pg_table)); if ( old_base_mfn != 0 ) put_page_and_type(mfn_to_page(old_base_mfn)); /* Retry the validation with no active p.t. for this VCPU. */ okay = get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d); if ( !okay ) { /* Failure here is unrecoverable: the VCPU has no pagetable! */ MEM_LOG("Fatal error while installing new baseptr %lx", mfn); domain_crash(d); ASSERT(v->processor == smp_processor_id()); this_cpu(percpu_mm_info).deferred_ops = 0; return 0; } } } invalidate_shadow_ldt(v); old_base_mfn = pagetable_get_pfn(v->arch.guest_table); v->arch.guest_table = pagetable_from_pfn(mfn); update_cr3(v); /* update shadow_table and cr3 fields of vcpu struct */ write_ptbase(v); if ( likely(old_base_mfn != 0) ) { if ( shadow_mode_refcounts(d) ) put_page(mfn_to_page(old_base_mfn)); else put_page_and_type(mfn_to_page(old_base_mfn)); } return 1; } static void process_deferred_ops(void) { unsigned int deferred_ops; struct domain *d = current->domain; struct percpu_mm_info *info = &this_cpu(percpu_mm_info); deferred_ops = info->deferred_ops; info->deferred_ops = 0; if ( deferred_ops & (DOP_FLUSH_ALL_TLBS|DOP_FLUSH_TLB) ) { if ( deferred_ops & DOP_FLUSH_ALL_TLBS ) flush_tlb_mask(d->domain_dirty_cpumask); else local_flush_tlb(); } if ( deferred_ops & DOP_RELOAD_LDT ) (void)map_ldt_shadow_page(0); if ( unlikely(info->foreign != NULL) ) { put_domain(info->foreign); info->foreign = NULL; } } static int set_foreigndom(domid_t domid) { struct domain *e, *d = current->domain; struct percpu_mm_info *info = &this_cpu(percpu_mm_info); int okay = 1; ASSERT(info->foreign == NULL); if ( likely(domid == DOMID_SELF) ) goto out; if ( domid == d->domain_id ) { MEM_LOG("Dom %u tried to specify itself as foreign domain", d->domain_id); okay = 0; } else if ( !IS_PRIV(d) ) { switch ( domid ) { case DOMID_IO: get_knownalive_domain(dom_io); info->foreign = dom_io; break; default: MEM_LOG("Dom %u cannot set foreign dom", d->domain_id); okay = 0; break; } } else { info->foreign = e = find_domain_by_id(domid); if ( e == NULL ) { switch ( domid ) { case DOMID_XEN: get_knownalive_domain(dom_xen); info->foreign = dom_xen; break; case DOMID_IO: get_knownalive_domain(dom_io); info->foreign = dom_io; break; default: MEM_LOG("Unknown domain '%u'", domid); okay = 0; break; } } } out: return okay; } static inline cpumask_t vcpumask_to_pcpumask( struct domain *d, unsigned long vmask) { unsigned int vcpu_id; cpumask_t pmask = CPU_MASK_NONE; struct vcpu *v; while ( vmask != 0 ) { vcpu_id = find_first_set_bit(vmask); vmask &= ~(1UL << vcpu_id); if ( (vcpu_id < MAX_VIRT_CPUS) && ((v = d->vcpu[vcpu_id]) != NULL) ) cpus_or(pmask, pmask, v->vcpu_dirty_cpumask); } return pmask; } int do_mmuext_op( XEN_GUEST_HANDLE(mmuext_op_t) uops, unsigned int count, XEN_GUEST_HANDLE(uint) pdone, unsigned int foreigndom) { struct mmuext_op op; int rc = 0, i = 0, okay; unsigned long mfn, type; unsigned int done = 0; struct page_info *page; struct vcpu *v = current; struct domain *d = v->domain; LOCK_BIGLOCK(d); if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) { count &= ~MMU_UPDATE_PREEMPTED; if ( unlikely(!guest_handle_is_null(pdone)) ) (void)copy_from_guest(&done, pdone, 1); } if ( !set_foreigndom(foreigndom) ) { rc = -ESRCH; goto out; } if ( unlikely(!guest_handle_okay(uops, count)) ) { rc = -EFAULT; goto out; } for ( i = 0; i < count; i++ ) { if ( hypercall_preempt_check() ) { rc = hypercall_create_continuation( __HYPERVISOR_mmuext_op, "hihi", uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); break; } if ( unlikely(__copy_from_guest(&op, uops, 1) != 0) ) { MEM_LOG("Bad __copy_from_guest"); rc = -EFAULT; break; } okay = 1; mfn = op.arg1.mfn; page = mfn_to_page(mfn); switch ( op.cmd ) { case MMUEXT_PIN_L1_TABLE: type = PGT_l1_page_table | PGT_va_mutable; goto pin_page; case MMUEXT_PIN_L2_TABLE: case MMUEXT_PIN_L3_TABLE: case MMUEXT_PIN_L4_TABLE: /* Ignore pinning of subdirectories. */ if ( (op.cmd - MMUEXT_PIN_L1_TABLE) != (CONFIG_PAGING_LEVELS - 1) ) break; type = PGT_root_page_table; pin_page: if ( shadow_mode_refcounts(FOREIGNDOM) ) break; okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM); if ( unlikely(!okay) ) { MEM_LOG("Error while pinning mfn %lx", mfn); break; } if ( unlikely(test_and_set_bit(_PGT_pinned, &page->u.inuse.type_info)) ) { MEM_LOG("Mfn %lx already pinned", mfn); put_page_and_type(page); okay = 0; break; } break; case MMUEXT_UNPIN_TABLE: if ( shadow_mode_refcounts(d) ) break; if ( unlikely(!(okay = get_page_from_pagenr(mfn, d))) ) { MEM_LOG("Mfn %lx bad domain (dom=%p)", mfn, page_get_owner(page)); } else if ( likely(test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info)) ) { put_page_and_type(page); put_page(page); if ( shadow_mode_enabled(d) ) { shadow_lock(d); shadow_remove_all_shadows(v, _mfn(mfn)); shadow_unlock(d); } } else { okay = 0; put_page(page); MEM_LOG("Mfn %lx not pinned", mfn); } break; case MMUEXT_NEW_BASEPTR: mfn = gmfn_to_mfn(current->domain, mfn); okay = new_guest_cr3(mfn); this_cpu(percpu_mm_info).deferred_ops &= ~DOP_FLUSH_TLB; break; #ifdef __x86_64__ case MMUEXT_NEW_USER_BASEPTR: okay = 1; if (likely(mfn != 0)) okay = get_page_and_type_from_pagenr( mfn, PGT_root_page_table, d); if ( unlikely(!okay) ) { MEM_LOG("Error while installing new mfn %lx", mfn); } else { unsigned long old_mfn = pagetable_get_pfn(v->arch.guest_table_user); v->arch.guest_table_user = pagetable_from_pfn(mfn); if ( old_mfn != 0 ) put_page_and_type(mfn_to_page(old_mfn)); } break; #endif case MMUEXT_TLB_FLUSH_LOCAL: this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB; break; case MMUEXT_INVLPG_LOCAL: if ( !shadow_mode_enabled(d) || shadow_invlpg(v, op.arg1.linear_addr) != 0 ) local_flush_tlb_one(op.arg1.linear_addr); break; case MMUEXT_TLB_FLUSH_MULTI: case MMUEXT_INVLPG_MULTI: { unsigned long vmask; cpumask_t pmask; if ( unlikely(get_user(vmask, (unsigned long *)op.arg2.vcpumask)) ) { okay = 0; break; } pmask = vcpumask_to_pcpumask(d, vmask); if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI ) flush_tlb_mask(pmask); else flush_tlb_one_mask(pmask, op.arg1.linear_addr); break; } case MMUEXT_TLB_FLUSH_ALL: flush_tlb_mask(d->domain_dirty_cpumask); break; case MMUEXT_INVLPG_ALL: flush_tlb_one_mask(d->domain_dirty_cpumask, op.arg1.linear_addr); break; case MMUEXT_FLUSH_CACHE: if ( unlikely(!cache_flush_permitted(d)) ) { MEM_LOG("Non-physdev domain tried to FLUSH_CACHE."); okay = 0; } else { wbinvd(); } break; case MMUEXT_SET_LDT: { unsigned long ptr = op.arg1.linear_addr; unsigned long ents = op.arg2.nr_ents; if ( shadow_mode_external(d) ) { MEM_LOG("ignoring SET_LDT hypercall from external " "domain %u", d->domain_id); okay = 0; } else if ( ((ptr & (PAGE_SIZE-1)) != 0) || (ents > 8192) || !array_access_ok(ptr, ents, LDT_ENTRY_SIZE) ) { okay = 0; MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents); } else if ( (v->arch.guest_context.ldt_ents != ents) || (v->arch.guest_context.ldt_base != ptr) ) { invalidate_shadow_ldt(v); v->arch.guest_context.ldt_base = ptr; v->arch.guest_context.ldt_ents = ents; load_LDT(v); this_cpu(percpu_mm_info).deferred_ops &= ~DOP_RELOAD_LDT; if ( ents != 0 ) this_cpu(percpu_mm_info).deferred_ops |= DOP_RELOAD_LDT; } break; } default: MEM_LOG("Invalid extended pt command 0x%x", op.cmd); okay = 0; break; } if ( unlikely(!okay) ) { rc = -EINVAL; break; } guest_handle_add_offset(uops, 1); } out: process_deferred_ops(); /* Add incremental work we have done to the @done output parameter. */ done += i; if ( unlikely(!guest_handle_is_null(pdone)) ) copy_to_guest(pdone, &done, 1); UNLOCK_BIGLOCK(d); return rc; } int do_mmu_update( XEN_GUEST_HANDLE(mmu_update_t) ureqs, unsigned int count, XEN_GUEST_HANDLE(uint) pdone, unsigned int foreigndom) { struct mmu_update req; void *va; unsigned long gpfn, gmfn, mfn; struct page_info *page; int rc = 0, okay = 1, i = 0; unsigned int cmd, done = 0; struct vcpu *v = current; struct domain *d = v->domain; unsigned long type_info; struct domain_mmap_cache mapcache, sh_mapcache; LOCK_BIGLOCK(d); if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) { count &= ~MMU_UPDATE_PREEMPTED; if ( unlikely(!guest_handle_is_null(pdone)) ) (void)copy_from_guest(&done, pdone, 1); } domain_mmap_cache_init(&mapcache); domain_mmap_cache_init(&sh_mapcache); if ( !set_foreigndom(foreigndom) ) { rc = -ESRCH; goto out; } perfc_incrc(calls_to_mmu_update); perfc_addc(num_page_updates, count); if ( unlikely(!guest_handle_okay(ureqs, count)) ) { rc = -EFAULT; goto out; } for ( i = 0; i < count; i++ ) { if ( hypercall_preempt_check() ) { rc = hypercall_create_continuation( __HYPERVISOR_mmu_update, "hihi", ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); break; } if ( unlikely(__copy_from_guest(&req, ureqs, 1) != 0) ) { MEM_LOG("Bad __copy_from_guest"); rc = -EFAULT; break; } cmd = req.ptr & (sizeof(l1_pgentry_t)-1); okay = 0; switch ( cmd ) { /* * MMU_NORMAL_PT_UPDATE: Normal update to any level of page table. */ case MMU_NORMAL_PT_UPDATE: gmfn = req.ptr >> PAGE_SHIFT; mfn = gmfn_to_mfn(d, gmfn); if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) ) { MEM_LOG("Could not get page for normal update"); break; } va = map_domain_page_with_cache(mfn, &mapcache); va = (void *)((unsigned long)va + (unsigned long)(req.ptr & ~PAGE_MASK)); page = mfn_to_page(mfn); switch ( (type_info = page->u.inuse.type_info) & PGT_type_mask ) { case PGT_l1_page_table: case PGT_l2_page_table: case PGT_l3_page_table: case PGT_l4_page_table: { if ( shadow_mode_refcounts(d) ) { DPRINTK("mmu update on shadow-refcounted domain!"); break; } if ( unlikely(!get_page_type( page, type_info & (PGT_type_mask|PGT_va_mask))) ) goto not_a_pt; switch ( type_info & PGT_type_mask ) { case PGT_l1_page_table: { l1_pgentry_t l1e = l1e_from_intpte(req.val); okay = mod_l1_entry(va, l1e, mfn); } break; case PGT_l2_page_table: { l2_pgentry_t l2e = l2e_from_intpte(req.val); okay = mod_l2_entry( (l2_pgentry_t *)va, l2e, mfn, type_info); } break; #if CONFIG_PAGING_LEVELS >= 3 case PGT_l3_page_table: { l3_pgentry_t l3e = l3e_from_intpte(req.val); okay = mod_l3_entry(va, l3e, mfn, type_info); } break; #endif #if CONFIG_PAGING_LEVELS >= 4 case PGT_l4_page_table: { l4_pgentry_t l4e = l4e_from_intpte(req.val); okay = mod_l4_entry(va, l4e, mfn, type_info); } break; #endif } put_page_type(page); } break; default: not_a_pt: { if ( unlikely(!get_page_type(page, PGT_writable_page)) ) break; if ( unlikely(shadow_mode_enabled(d)) ) shadow_lock(d); *(intpte_t *)va = req.val; okay = 1; if ( unlikely(shadow_mode_enabled(d)) ) { shadow_validate_guest_entry(v, _mfn(mfn), va); shadow_unlock(d); } put_page_type(page); } break; } unmap_domain_page_with_cache(va, &mapcache); put_page(page); break; case MMU_MACHPHYS_UPDATE: mfn = req.ptr >> PAGE_SHIFT; gpfn = req.val; if ( unlikely(!get_page_from_pagenr(mfn, FOREIGNDOM)) ) { MEM_LOG("Could not get page for mach->phys update"); break; } if ( shadow_mode_translate(FOREIGNDOM) ) shadow_guest_physmap_add_page(FOREIGNDOM, gpfn, mfn); else set_gpfn_from_mfn(mfn, gpfn); okay = 1; // Mark the new gfn dirty... mark_dirty(FOREIGNDOM, mfn); put_page(mfn_to_page(mfn)); break; default: MEM_LOG("Invalid page update command %x", cmd); break; } if ( unlikely(!okay) ) { rc = -EINVAL; break; } guest_handle_add_offset(ureqs, 1); } out: domain_mmap_cache_destroy(&mapcache); domain_mmap_cache_destroy(&sh_mapcache); process_deferred_ops(); /* Add incremental work we have done to the @done output parameter. */ done += i; if ( unlikely(!guest_handle_is_null(pdone)) ) copy_to_guest(pdone, &done, 1); UNLOCK_BIGLOCK(d); return rc; } static int create_grant_pte_mapping( unsigned long pte_addr, l1_pgentry_t nl1e, struct vcpu *v) { int rc = GNTST_okay; void *va; unsigned long gmfn, mfn; struct page_info *page; u32 type_info; l1_pgentry_t ol1e; struct domain *d = v->domain; ASSERT(spin_is_locked(&d->big_lock)); adjust_guest_l1e(nl1e); gmfn = pte_addr >> PAGE_SHIFT; mfn = gmfn_to_mfn(d, gmfn); if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) ) { MEM_LOG("Could not get page for normal update"); return GNTST_general_error; } va = map_domain_page(mfn); va = (void *)((unsigned long)va + (pte_addr & ~PAGE_MASK)); page = mfn_to_page(mfn); type_info = page->u.inuse.type_info; if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) || !get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) ) { MEM_LOG("Grant map attempted to update a non-L1 page"); rc = GNTST_general_error; goto failed; } ol1e = *(l1_pgentry_t *)va; if ( !update_l1e(va, ol1e, nl1e, mfn, v) ) { put_page_type(page); rc = GNTST_general_error; goto failed; } if ( !shadow_mode_refcounts(d) ) put_page_from_l1e(ol1e, d); put_page_type(page); failed: unmap_domain_page(va); put_page(page); return rc; } static int destroy_grant_pte_mapping( unsigned long addr, unsigned long frame, struct domain *d) { int rc = GNTST_okay; void *va; unsigned long gmfn, mfn; struct page_info *page; u32 type_info; l1_pgentry_t ol1e; gmfn = addr >> PAGE_SHIFT; mfn = gmfn_to_mfn(d, gmfn); if ( unlikely(!get_page_from_pagenr(mfn, current->domain)) ) { MEM_LOG("Could not get page for normal update"); return GNTST_general_error; } va = map_domain_page(mfn); va = (void *)((unsigned long)va + (addr & ~PAGE_MASK)); page = mfn_to_page(mfn); type_info = page->u.inuse.type_info; if ( ((type_info & PGT_type_mask) != PGT_l1_page_table) || !get_page_type(page, type_info & (PGT_type_mask|PGT_va_mask)) ) { MEM_LOG("Grant map attempted to update a non-L1 page"); rc = GNTST_general_error; goto failed; } if ( __copy_from_user(&ol1e, (l1_pgentry_t *)va, sizeof(ol1e)) ) { put_page_type(page); rc = GNTST_general_error; goto failed; } /* Check that the virtual address supplied is actually mapped to frame. */ if ( unlikely((l1e_get_intpte(ol1e) >> PAGE_SHIFT) != frame) ) { MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx", (unsigned long)l1e_get_intpte(ol1e), addr, frame); put_page_type(page); rc = GNTST_general_error; goto failed; } /* Delete pagetable entry. */ if ( unlikely(!update_l1e( (l1_pgentry_t *)va, ol1e, l1e_empty(), mfn, d->vcpu[0] /* Change if we go to per-vcpu shadows. */)) ) { MEM_LOG("Cannot delete PTE entry at %p", va); put_page_type(page); rc = GNTST_general_error; goto failed; } put_page_type(page); failed: unmap_domain_page(va); put_page(page); return rc; } static int create_grant_va_mapping( unsigned long va, l1_pgentry_t nl1e, struct vcpu *v) { l1_pgentry_t *pl1e, ol1e; struct domain *d = v->domain; ASSERT(spin_is_locked(&d->big_lock)); adjust_guest_l1e(nl1e); pl1e = &linear_pg_table[l1_linear_offset(va)]; if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) || !update_l1e(pl1e, ol1e, nl1e, l2e_get_pfn(__linear_l2_table[l2_linear_offset(va)]), v) ) return GNTST_general_error; if ( !shadow_mode_refcounts(d) ) put_page_from_l1e(ol1e, d); return GNTST_okay; } static int destroy_grant_va_mapping( unsigned long addr, unsigned long frame, struct domain *d) { l1_pgentry_t *pl1e, ol1e; pl1e = &linear_pg_table[l1_linear_offset(addr)]; if ( unlikely(__get_user(ol1e.l1, &pl1e->l1) != 0) ) { MEM_LOG("Could not find PTE entry for address %lx", addr); return GNTST_general_error; } /* * Check that the virtual address supplied is actually mapped to * frame. */ if ( unlikely(l1e_get_pfn(ol1e) != frame) ) { MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx", l1e_get_pfn(ol1e), addr, frame); return GNTST_general_error; } /* Delete pagetable entry. */ if ( unlikely(!update_l1e(pl1e, ol1e, l1e_empty(), l2e_get_pfn(__linear_l2_table[l2_linear_offset(addr)]), d->vcpu[0] /* Change for per-vcpu shadows */)) ) { MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e); return GNTST_general_error; } return 0; } int create_grant_host_mapping( unsigned long addr, unsigned long frame, unsigned int flags) { l1_pgentry_t pte = l1e_from_pfn(frame, GRANT_PTE_FLAGS); if ( (flags & GNTMAP_application_map) ) l1e_add_flags(pte,_PAGE_USER); if ( !(flags & GNTMAP_readonly) ) l1e_add_flags(pte,_PAGE_RW); if ( flags & GNTMAP_contains_pte ) return create_grant_pte_mapping(addr, pte, current); return create_grant_va_mapping(addr, pte, current); } int destroy_grant_host_mapping( unsigned long addr, unsigned long frame, unsigned int flags) { if ( flags & GNTMAP_contains_pte ) return destroy_grant_pte_mapping(addr, frame, current->domain); return destroy_grant_va_mapping(addr, frame, current->domain); } int steal_page( struct domain *d, struct page_info *page, unsigned int memflags) { u32 _d, _nd, x, y; spin_lock(&d->page_alloc_lock); /* * The tricky bit: atomically release ownership while there is just one * benign reference to the page (PGC_allocated). If that reference * disappears then the deallocation routine will safely spin. */ _d = pickle_domptr(d); _nd = page->u.inuse._domain; y = page->count_info; do { x = y; if (unlikely((x & (PGC_count_mask|PGC_allocated)) != (1 | PGC_allocated)) || unlikely(_nd != _d)) { DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u), sd=%p," " caf=%08x, taf=%" PRtype_info "\n", (void *) page_to_mfn(page), d, d->domain_id, unpickle_domptr(_nd), x, page->u.inuse.type_info); spin_unlock(&d->page_alloc_lock); return -1; } __asm__ __volatile__( LOCK_PREFIX "cmpxchg8b %2" : "=d" (_nd), "=a" (y), "=m" (*(volatile u64 *)(&page->count_info)) : "0" (_d), "1" (x), "c" (NULL), "b" (x) ); } while (unlikely(_nd != _d) || unlikely(y != x)); /* * Unlink from 'd'. At least one reference remains (now anonymous), so * noone else is spinning to try to delete this page from 'd'. */ if ( !(memflags & MEMF_no_refcount) ) d->tot_pages--; list_del(&page->list); spin_unlock(&d->page_alloc_lock); return 0; } int do_update_va_mapping(unsigned long va, u64 val64, unsigned long flags) { l1_pgentry_t val = l1e_from_intpte(val64); struct vcpu *v = current; struct domain *d = v->domain; unsigned long vmask, bmap_ptr; cpumask_t pmask; int rc = 0; perfc_incrc(calls_to_update_va); if ( unlikely(!__addr_ok(va) && !shadow_mode_external(d)) ) return -EINVAL; if ( unlikely(shadow_mode_refcounts(d)) ) { DPRINTK("Grant op on a shadow-refcounted domain\n"); return -EINVAL; } LOCK_BIGLOCK(d); if ( likely(rc == 0) && unlikely(shadow_mode_enabled(d)) ) { if ( unlikely(this_cpu(percpu_mm_info).foreign && (shadow_mode_translate(d) || shadow_mode_translate( this_cpu(percpu_mm_info).foreign))) ) { /* * The foreign domain's pfn's are in a different namespace. There's * not enough information in just a gpte to figure out how to * (re-)shadow this entry. */ domain_crash(d); } } if ( unlikely(!mod_l1_entry( &linear_pg_table[l1_linear_offset(va)], val, l2e_get_pfn(__linear_l2_table[l2_linear_offset(va)]))) ) rc = -EINVAL; switch ( flags & UVMF_FLUSHTYPE_MASK ) { case UVMF_TLB_FLUSH: switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) ) { case UVMF_LOCAL: local_flush_tlb(); break; case UVMF_ALL: flush_tlb_mask(d->domain_dirty_cpumask); break; default: if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) ) rc = -EFAULT; pmask = vcpumask_to_pcpumask(d, vmask); flush_tlb_mask(pmask); break; } break; case UVMF_INVLPG: switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) ) { case UVMF_LOCAL: if ( !shadow_mode_enabled(d) || (shadow_invlpg(current, va) != 0) ) local_flush_tlb_one(va); break; case UVMF_ALL: flush_tlb_one_mask(d->domain_dirty_cpumask, va); break; default: if ( unlikely(get_user(vmask, (unsigned long *)bmap_ptr)) ) rc = -EFAULT; pmask = vcpumask_to_pcpumask(d, vmask); flush_tlb_one_mask(pmask, va); break; } break; } process_deferred_ops(); UNLOCK_BIGLOCK(d); return rc; } int do_update_va_mapping_otherdomain(unsigned long va, u64 val64, unsigned long flags, domid_t domid) { int rc; if ( unlikely(!IS_PRIV(current->domain)) ) return -EPERM; if ( !set_foreigndom(domid) ) return -ESRCH; rc = do_update_va_mapping(va, val64, flags); return rc; } /************************* * Descriptor Tables */ void destroy_gdt(struct vcpu *v) { int i; unsigned long pfn; v->arch.guest_context.gdt_ents = 0; for ( i = 0; i < FIRST_RESERVED_GDT_PAGE; i++ ) { if ( (pfn = l1e_get_pfn(v->arch.perdomain_ptes[i])) != 0 ) put_page_and_type(mfn_to_page(pfn)); v->arch.perdomain_ptes[i] = l1e_empty(); v->arch.guest_context.gdt_frames[i] = 0; } } long set_gdt(struct vcpu *v, unsigned long *frames, unsigned int entries) { struct domain *d = v->domain; /* NB. There are 512 8-byte entries per GDT page. */ int i, nr_pages = (entries + 511) / 512; unsigned long mfn; if ( entries > FIRST_RESERVED_GDT_ENTRY ) return -EINVAL; /* Check the pages in the new GDT. */ for ( i = 0; i < nr_pages; i++ ) { mfn = frames[i] = gmfn_to_mfn(d, frames[i]); if ( !mfn_valid(mfn) || !get_page_and_type(mfn_to_page(mfn), d, PGT_gdt_page) ) goto fail; } /* Tear down the old GDT. */ destroy_gdt(v); /* Install the new GDT. */ v->arch.guest_context.gdt_ents = entries; for ( i = 0; i < nr_pages; i++ ) { v->arch.guest_context.gdt_frames[i] = frames[i]; v->arch.perdomain_ptes[i] = l1e_from_pfn(frames[i], __PAGE_HYPERVISOR); } return 0; fail: while ( i-- > 0 ) put_page_and_type(mfn_to_page(frames[i])); return -EINVAL; } long do_set_gdt(XEN_GUEST_HANDLE(ulong) frame_list, unsigned int entries) { int nr_pages = (entries + 511) / 512; unsigned long frames[16]; long ret; /* Rechecked in set_gdt, but ensures a sane limit for copy_from_user(). */ if ( entries > FIRST_RESERVED_GDT_ENTRY ) return -EINVAL; if ( copy_from_guest((unsigned long *)frames, frame_list, nr_pages) ) return -EFAULT; LOCK_BIGLOCK(current->domain); if ( (ret = set_gdt(current, frames, entries)) == 0 ) local_flush_tlb(); UNLOCK_BIGLOCK(current->domain); return ret; } long do_update_descriptor(u64 pa, u64 desc) { struct domain *dom = current->domain; unsigned long gmfn = pa >> PAGE_SHIFT; unsigned long mfn; unsigned int offset; struct desc_struct *gdt_pent, d; struct page_info *page; long ret = -EINVAL; offset = ((unsigned int)pa & ~PAGE_MASK) / sizeof(struct desc_struct); *(u64 *)&d = desc; LOCK_BIGLOCK(dom); if ( !VALID_MFN(mfn = gmfn_to_mfn(dom, gmfn)) || (((unsigned int)pa % sizeof(struct desc_struct)) != 0) || !mfn_valid(mfn) || !check_descriptor(&d) ) { UNLOCK_BIGLOCK(dom); return -EINVAL; } page = mfn_to_page(mfn); if ( unlikely(!get_page(page, dom)) ) { UNLOCK_BIGLOCK(dom); return -EINVAL; } /* Check if the given frame is in use in an unsafe context. */ switch ( page->u.inuse.type_info & PGT_type_mask ) { case PGT_gdt_page: if ( unlikely(!get_page_type(page, PGT_gdt_page)) ) goto out; break; case PGT_ldt_page: if ( unlikely(!get_page_type(page, PGT_ldt_page)) ) goto out; break; default: if ( unlikely(!get_page_type(page, PGT_writable_page)) ) goto out; break; } mark_dirty(dom, mfn); /* All is good so make the update. */ gdt_pent = map_domain_page(mfn); memcpy(&gdt_pent[offset], &d, 8); unmap_domain_page(gdt_pent); put_page_type(page); ret = 0; /* success */ out: put_page(page); UNLOCK_BIGLOCK(dom); return ret; } typedef struct e820entry e820entry_t; DEFINE_XEN_GUEST_HANDLE(e820entry_t); long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) { switch ( op ) { case XENMEM_add_to_physmap: { struct xen_add_to_physmap xatp; unsigned long prev_mfn, mfn = 0, gpfn; struct domain *d; if ( copy_from_guest(&xatp, arg, 1) ) return -EFAULT; if ( xatp.domid == DOMID_SELF ) { d = current->domain; get_knownalive_domain(d); } else if ( !IS_PRIV(current->domain) ) return -EPERM; else if ( (d = find_domain_by_id(xatp.domid)) == NULL ) return -ESRCH; switch ( xatp.space ) { case XENMAPSPACE_shared_info: if ( xatp.idx == 0 ) mfn = virt_to_mfn(d->shared_info); break; case XENMAPSPACE_grant_table: if ( xatp.idx < NR_GRANT_FRAMES ) mfn = virt_to_mfn(d->grant_table->shared) + xatp.idx; break; default: break; } if ( !shadow_mode_translate(d) || (mfn == 0) ) { put_domain(d); return -EINVAL; } LOCK_BIGLOCK(d); /* Remove previously mapped page if it was present. */ prev_mfn = gmfn_to_mfn(d, xatp.gpfn); if ( mfn_valid(prev_mfn) ) { if ( IS_XEN_HEAP_FRAME(mfn_to_page(prev_mfn)) ) /* Xen heap frames are simply unhooked from this phys slot. */ guest_physmap_remove_page(d, xatp.gpfn, prev_mfn); else /* Normal domain memory is freed, to avoid leaking memory. */ guest_remove_page(d, xatp.gpfn); } /* Unmap from old location, if any. */ gpfn = get_gpfn_from_mfn(mfn); if ( gpfn != INVALID_M2P_ENTRY ) guest_physmap_remove_page(d, gpfn, mfn); /* Map at new location. */ guest_physmap_add_page(d, xatp.gpfn, mfn); UNLOCK_BIGLOCK(d); put_domain(d); break; } case XENMEM_memory_map: { return -ENOSYS; } case XENMEM_machine_memory_map: { struct xen_memory_map memmap; XEN_GUEST_HANDLE(e820entry_t) buffer; int count; if ( !IS_PRIV(current->domain) ) return -EINVAL; if ( copy_from_guest(&memmap, arg, 1) ) return -EFAULT; if ( memmap.nr_entries < e820.nr_map + 1 ) return -EINVAL; buffer = guest_handle_cast(memmap.buffer, e820entry_t); count = min((unsigned int)e820.nr_map, memmap.nr_entries); if ( copy_to_guest(buffer, &e820.map[0], count) < 0 ) return -EFAULT; memmap.nr_entries = count; if ( copy_to_guest(arg, &memmap, 1) ) return -EFAULT; return 0; } case XENMEM_machphys_mapping: { struct xen_machphys_mapping mapping = { .v_start = MACH2PHYS_VIRT_START, .v_end = MACH2PHYS_VIRT_END, .max_mfn = MACH2PHYS_NR_ENTRIES - 1 }; if ( copy_to_guest(arg, &mapping, 1) ) return -EFAULT; return 0; } default: return subarch_memory_op(op, arg); } return 0; } /************************* * Writable Pagetables */ static int ptwr_emulated_update( unsigned long addr, paddr_t old, paddr_t val, unsigned int bytes, unsigned int do_cmpxchg) { unsigned long pfn; struct page_info *page; l1_pgentry_t pte, ol1e, nl1e, *pl1e; struct vcpu *v = current; struct domain *d = v->domain; /* Aligned access only, thank you. */ if ( !access_ok(addr, bytes) || ((addr & (bytes-1)) != 0) ) { MEM_LOG("ptwr_emulate: Unaligned or bad size ptwr access (%d, %lx)", bytes, addr); return X86EMUL_UNHANDLEABLE; } /* Turn a sub-word access into a full-word access. */ if ( bytes != sizeof(paddr_t) ) { paddr_t full; unsigned int rc, offset = addr & (sizeof(paddr_t)-1); /* Align address; read full word. */ addr &= ~(sizeof(paddr_t)-1); if ( (rc = copy_from_user(&full, (void *)addr, sizeof(paddr_t))) != 0 ) { propagate_page_fault(addr+sizeof(paddr_t)-rc, 0); /* read fault */ return X86EMUL_PROPAGATE_FAULT; } /* Mask out bits provided by caller. */ full &= ~((((paddr_t)1 << (bytes*8)) - 1) << (offset*8)); /* Shift the caller value and OR in the missing bits. */ val &= (((paddr_t)1 << (bytes*8)) - 1); val <<= (offset)*8; val |= full; /* Also fill in missing parts of the cmpxchg old value. */ old &= (((paddr_t)1 << (bytes*8)) - 1); old <<= (offset)*8; old |= full; } /* Read the PTE that maps the page being updated. */ if ( __copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)], sizeof(pte)) ) { MEM_LOG("ptwr_emulate: Cannot read thru linear_pg_table"); return X86EMUL_UNHANDLEABLE; } pfn = l1e_get_pfn(pte); page = mfn_to_page(pfn); /* We are looking only for read-only mappings of p.t. pages. */ ASSERT((l1e_get_flags(pte) & (_PAGE_RW|_PAGE_PRESENT)) == _PAGE_PRESENT); ASSERT((page->u.inuse.type_info & PGT_type_mask) == PGT_l1_page_table); ASSERT((page->u.inuse.type_info & PGT_count_mask) != 0); ASSERT(page_get_owner(page) == d); /* Check the new PTE. */ nl1e = l1e_from_intpte(val); if ( unlikely(!get_page_from_l1e(nl1e, d)) ) { if ( (CONFIG_PAGING_LEVELS == 3) && (bytes == 4) && !do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) ) { /* * If this is a half-write to a PAE PTE then we assume that the * guest has simply got the two writes the wrong way round. We * zap the PRESENT bit on the assumption the bottom half will be * written immediately after we return to the guest. */ MEM_LOG("ptwr_emulate: fixing up invalid PAE PTE %"PRIpte"\n", l1e_get_intpte(nl1e)); l1e_remove_flags(nl1e, _PAGE_PRESENT); } else { MEM_LOG("ptwr_emulate: could not get_page_from_l1e()"); return X86EMUL_UNHANDLEABLE; } } adjust_guest_l1e(nl1e); /* Checked successfully: do the update (write or cmpxchg). */ pl1e = map_domain_page(page_to_mfn(page)); pl1e = (l1_pgentry_t *)((unsigned long)pl1e + (addr & ~PAGE_MASK)); if ( do_cmpxchg ) { if ( shadow_mode_enabled(d) ) shadow_lock(d); ol1e = l1e_from_intpte(old); if ( cmpxchg((intpte_t *)pl1e, old, val) != old ) { if ( shadow_mode_enabled(d) ) shadow_unlock(d); unmap_domain_page(pl1e); put_page_from_l1e(nl1e, d); return X86EMUL_CMPXCHG_FAILED; } if ( unlikely(shadow_mode_enabled(v->domain)) ) { shadow_validate_guest_entry(v, _mfn(page_to_mfn(page)), pl1e); shadow_unlock(v->domain); } } else { ol1e = *pl1e; if ( !update_l1e(pl1e, ol1e, nl1e, page_to_mfn(page), v) ) BUG(); } unmap_domain_page(pl1e); /* Finally, drop the old PTE. */ put_page_from_l1e(ol1e, d); return X86EMUL_CONTINUE; } static int ptwr_emulated_write( unsigned long addr, unsigned long val, unsigned int bytes, struct x86_emulate_ctxt *ctxt) { return ptwr_emulated_update(addr, 0, val, bytes, 0); } static int ptwr_emulated_cmpxchg( unsigned long addr, unsigned long old, unsigned long new, unsigned int bytes, struct x86_emulate_ctxt *ctxt) { return ptwr_emulated_update(addr, old, new, bytes, 1); } static int ptwr_emulated_cmpxchg8b( unsigned long addr, unsigned long old, unsigned long old_hi, unsigned long new, unsigned long new_hi, struct x86_emulate_ctxt *ctxt) { if ( CONFIG_PAGING_LEVELS == 2 ) return X86EMUL_UNHANDLEABLE; else return ptwr_emulated_update( addr, ((u64)old_hi << 32) | old, ((u64)new_hi << 32) | new, 8, 1); } static struct x86_emulate_ops ptwr_emulate_ops = { .read_std = x86_emulate_read_std, .write_std = x86_emulate_write_std, .read_emulated = x86_emulate_read_std, .write_emulated = ptwr_emulated_write, .cmpxchg_emulated = ptwr_emulated_cmpxchg, .cmpxchg8b_emulated = ptwr_emulated_cmpxchg8b }; /* Write page fault handler: check if guest is trying to modify a PTE. */ int ptwr_do_page_fault(struct domain *d, unsigned long addr, struct cpu_user_regs *regs) { unsigned long pfn; struct page_info *page; l1_pgentry_t pte; l2_pgentry_t *pl2e, l2e; struct x86_emulate_ctxt emul_ctxt; LOCK_BIGLOCK(d); /* * Attempt to read the PTE that maps the VA being accessed. By checking for * PDE validity in the L2 we avoid many expensive fixups in __get_user(). */ pl2e = &__linear_l2_table[l2_linear_offset(addr)]; if ( __copy_from_user(&l2e, pl2e, sizeof(l2e)) || !(l2e_get_flags(l2e) & _PAGE_PRESENT) || __copy_from_user(&pte, &linear_pg_table[l1_linear_offset(addr)], sizeof(pte)) ) goto bail; pfn = l1e_get_pfn(pte); page = mfn_to_page(pfn); /* We are looking only for read-only mappings of p.t. pages. */ if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) || ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) || ((page->u.inuse.type_info & PGT_count_mask) == 0) || (page_get_owner(page) != d) ) goto bail; emul_ctxt.regs = guest_cpu_user_regs(); emul_ctxt.cr2 = addr; emul_ctxt.mode = X86EMUL_MODE_HOST; if ( x86_emulate_memop(&emul_ctxt, &ptwr_emulate_ops) ) goto bail; UNLOCK_BIGLOCK(d); perfc_incrc(ptwr_emulations); return EXCRET_fault_fixed; bail: UNLOCK_BIGLOCK(d); return 0; } int map_pages_to_xen( unsigned long virt, unsigned long mfn, unsigned long nr_mfns, unsigned long flags) { l2_pgentry_t *pl2e, ol2e; l1_pgentry_t *pl1e, ol1e; unsigned int i; unsigned int map_small_pages = !!(flags & MAP_SMALL_PAGES); flags &= ~MAP_SMALL_PAGES; while ( nr_mfns != 0 ) { pl2e = virt_to_xen_l2e(virt); if ( ((((virt>>PAGE_SHIFT) | mfn) & ((1<= (1<= __end_of_fixed_addresses); map_pages_to_xen(fix_to_virt(idx), mfn, 1, flags); } #ifdef MEMORY_GUARD void memguard_init(void) { map_pages_to_xen( PAGE_OFFSET, 0, xenheap_phys_end >> PAGE_SHIFT, __PAGE_HYPERVISOR|MAP_SMALL_PAGES); } static void __memguard_change_range(void *p, unsigned long l, int guard) { unsigned long _p = (unsigned long)p; unsigned long _l = (unsigned long)l; unsigned long flags = __PAGE_HYPERVISOR | MAP_SMALL_PAGES; /* Ensure we are dealing with a page-aligned whole number of pages. */ ASSERT((_p&PAGE_MASK) != 0); ASSERT((_l&PAGE_MASK) != 0); ASSERT((_p&~PAGE_MASK) == 0); ASSERT((_l&~PAGE_MASK) == 0); if ( guard ) flags &= ~_PAGE_PRESENT; map_pages_to_xen( _p, virt_to_maddr(p) >> PAGE_SHIFT, _l >> PAGE_SHIFT, flags); } void memguard_guard_range(void *p, unsigned long l) { __memguard_change_range(p, l, 1); } void memguard_unguard_range(void *p, unsigned long l) { __memguard_change_range(p, l, 0); } #endif void memguard_guard_stack(void *p) { BUILD_BUG_ON((DEBUG_STACK_SIZE + PAGE_SIZE) > STACK_SIZE); p = (void *)((unsigned long)p + STACK_SIZE - DEBUG_STACK_SIZE - PAGE_SIZE); memguard_guard_range(p, PAGE_SIZE); } /* * Local variables: * mode: C * c-set-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */