/****************************************************************************** * memory.c * * Code to handle memory-related requests. * * Copyright (c) 2003-2004, B Dragovic * Copyright (c) 2003-2005, K A Fraser */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct memop_args { /* INPUT */ struct domain *domain; /* Domain to be affected. */ XEN_GUEST_HANDLE(xen_pfn_t) extent_list; /* List of extent base addrs. */ unsigned int nr_extents; /* Number of extents to allocate or free. */ unsigned int extent_order; /* Size of each extent. */ unsigned int memflags; /* Allocation flags. */ /* INPUT/OUTPUT */ unsigned int nr_done; /* Number of extents processed so far. */ int preempted; /* Was the hypercall preempted? */ }; static void increase_reservation(struct memop_args *a) { struct page_info *page; unsigned long i; xen_pfn_t mfn; struct domain *d = a->domain; if ( !guest_handle_is_null(a->extent_list) && !guest_handle_subrange_okay(a->extent_list, a->nr_done, a->nr_extents-1) ) return; if ( !multipage_allocation_permitted(current->domain, a->extent_order) ) return; for ( i = a->nr_done; i < a->nr_extents; i++ ) { if ( hypercall_preempt_check() ) { a->preempted = 1; goto out; } page = alloc_domheap_pages(d, a->extent_order, a->memflags); if ( unlikely(page == NULL) ) { gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: " "id=%d memflags=%x (%ld of %d)\n", a->extent_order, d->domain_id, a->memflags, i, a->nr_extents); goto out; } /* Inform the domain of the new page's machine address. */ if ( !guest_handle_is_null(a->extent_list) ) { mfn = page_to_mfn(page); if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) ) goto out; } } out: a->nr_done = i; } static void populate_physmap(struct memop_args *a) { struct page_info *page; unsigned long i, j; xen_pfn_t gpfn, mfn; struct domain *d = a->domain; if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done, a->nr_extents-1) ) return; if ( a->memflags & MEMF_populate_on_demand ? a->extent_order > MAX_ORDER : !multipage_allocation_permitted(current->domain, a->extent_order) ) return; for ( i = a->nr_done; i < a->nr_extents; i++ ) { if ( hypercall_preempt_check() ) { a->preempted = 1; goto out; } if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) ) goto out; if ( a->memflags & MEMF_populate_on_demand ) { if ( guest_physmap_mark_populate_on_demand(d, gpfn, a->extent_order) < 0 ) goto out; } else { page = alloc_domheap_pages(d, a->extent_order, a->memflags); if ( unlikely(page == NULL) ) { if ( !opt_tmem || (a->extent_order != 0) ) gdprintk(XENLOG_INFO, "Could not allocate order=%d extent:" " id=%d memflags=%x (%ld of %d)\n", a->extent_order, d->domain_id, a->memflags, i, a->nr_extents); goto out; } mfn = page_to_mfn(page); guest_physmap_add_page(d, gpfn, mfn, a->extent_order); if ( !paging_mode_translate(d) ) { for ( j = 0; j < (1 << a->extent_order); j++ ) set_gpfn_from_mfn(mfn + j, gpfn + j); /* Inform the domain of the new page's machine address. */ if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) ) goto out; } } } out: a->nr_done = i; } int guest_remove_page(struct domain *d, unsigned long gmfn) { struct page_info *page; #ifdef CONFIG_X86 p2m_type_t p2mt; #endif unsigned long mfn; #ifdef CONFIG_X86 mfn = mfn_x(get_gfn_query(d, gmfn, &p2mt)); if ( unlikely(p2m_is_paging(p2mt)) ) { guest_physmap_remove_page(d, gmfn, mfn, 0); put_gfn(d, gmfn); /* If the page hasn't yet been paged out, there is an * actual page that needs to be released. */ if ( p2mt == p2m_ram_paging_out ) { ASSERT(mfn_valid(mfn)); page = mfn_to_page(mfn); if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); } p2m_mem_paging_drop_page(d, gmfn, p2mt); return 1; } #else mfn = gmfn_to_mfn(d, gmfn); #endif if ( unlikely(!mfn_valid(mfn)) ) { put_gfn(d, gmfn); gdprintk(XENLOG_INFO, "Domain %u page number %lx invalid\n", d->domain_id, gmfn); return 0; } #ifdef CONFIG_X86 if ( p2m_is_shared(p2mt) ) { /* Unshare the page, bail out on error. We unshare because * we might be the only one using this shared page, and we * need to trigger proper cleanup. Once done, this is * like any other page. */ if ( mem_sharing_unshare_page(d, gmfn, 0) ) { put_gfn(d, gmfn); (void)mem_sharing_notify_enomem(d, gmfn, 0); return 0; } /* Maybe the mfn changed */ mfn = mfn_x(get_gfn_query_unlocked(d, gmfn, &p2mt)); ASSERT(!p2m_is_shared(p2mt)); } #endif /* CONFIG_X86 */ page = mfn_to_page(mfn); if ( unlikely(!get_page(page, d)) ) { put_gfn(d, gmfn); gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id); return 0; } if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) ) put_page_and_type(page); if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) put_page(page); guest_physmap_remove_page(d, gmfn, mfn, 0); put_page(page); put_gfn(d, gmfn); return 1; } static void decrease_reservation(struct memop_args *a) { unsigned long i, j; xen_pfn_t gmfn; if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done, a->nr_extents-1) || a->extent_order > MAX_ORDER ) return; for ( i = a->nr_done; i < a->nr_extents; i++ ) { if ( hypercall_preempt_check() ) { a->preempted = 1; goto out; } if ( unlikely(__copy_from_guest_offset(&gmfn, a->extent_list, i, 1)) ) goto out; if ( tb_init_done ) { struct { u64 gfn; int d:16,order:16; } t; t.gfn = gmfn; t.d = a->domain->domain_id; t.order = a->extent_order; __trace_var(TRC_MEM_DECREASE_RESERVATION, 0, sizeof(t), &t); } /* See if populate-on-demand wants to handle this */ if ( is_hvm_domain(a->domain) && p2m_pod_decrease_reservation(a->domain, gmfn, a->extent_order) ) continue; for ( j = 0; j < (1 << a->extent_order); j++ ) if ( !guest_remove_page(a->domain, gmfn + j) ) goto out; } out: a->nr_done = i; } static long memory_exchange(XEN_GUEST_HANDLE_PARAM(xen_memory_exchange_t) arg) { struct xen_memory_exchange exch; PAGE_LIST_HEAD(in_chunk_list); PAGE_LIST_HEAD(out_chunk_list); unsigned long in_chunk_order, out_chunk_order; xen_pfn_t gpfn, gmfn, mfn; unsigned long i, j, k = 0; /* gcc ... */ unsigned int memflags = 0; long rc = 0; struct domain *d; struct page_info *page; if ( copy_from_guest(&exch, arg, 1) ) return -EFAULT; /* Various sanity checks. */ if ( (exch.nr_exchanged > exch.in.nr_extents) || /* Input and output domain identifiers match? */ (exch.in.domid != exch.out.domid) || /* Extent orders are sensible? */ (exch.in.extent_order > MAX_ORDER) || (exch.out.extent_order > MAX_ORDER) || /* Sizes of input and output lists do not overflow a long? */ ((~0UL >> exch.in.extent_order) < exch.in.nr_extents) || ((~0UL >> exch.out.extent_order) < exch.out.nr_extents) || /* Sizes of input and output lists match? */ ((exch.in.nr_extents << exch.in.extent_order) != (exch.out.nr_extents << exch.out.extent_order)) ) { rc = -EINVAL; goto fail_early; } if ( !guest_handle_okay(exch.in.extent_start, exch.in.nr_extents) || !guest_handle_okay(exch.out.extent_start, exch.out.nr_extents) ) { rc = -EFAULT; goto fail_early; } /* Only privileged guests can allocate multi-page contiguous extents. */ if ( !multipage_allocation_permitted(current->domain, exch.in.extent_order) || !multipage_allocation_permitted(current->domain, exch.out.extent_order) ) { rc = -EPERM; goto fail_early; } if ( exch.in.extent_order <= exch.out.extent_order ) { in_chunk_order = exch.out.extent_order - exch.in.extent_order; out_chunk_order = 0; } else { in_chunk_order = 0; out_chunk_order = exch.in.extent_order - exch.out.extent_order; } d = rcu_lock_domain_by_any_id(exch.in.domid); if ( d == NULL ) { rc = -ESRCH; goto fail_early; } rc = xsm_memory_exchange(XSM_TARGET, d); if ( rc ) { rcu_unlock_domain(d); goto fail_early; } memflags |= MEMF_bits(domain_clamp_alloc_bitsize( d, XENMEMF_get_address_bits(exch.out.mem_flags) ? : (BITS_PER_LONG+PAGE_SHIFT))); memflags |= MEMF_node(XENMEMF_get_node(exch.out.mem_flags)); for ( i = (exch.nr_exchanged >> in_chunk_order); i < (exch.in.nr_extents >> in_chunk_order); i++ ) { if ( hypercall_preempt_check() ) { exch.nr_exchanged = i << in_chunk_order; rcu_unlock_domain(d); if ( __copy_field_to_guest(arg, &exch, nr_exchanged) ) return -EFAULT; return hypercall_create_continuation( __HYPERVISOR_memory_op, "lh", XENMEM_exchange, arg); } /* Steal a chunk's worth of input pages from the domain. */ for ( j = 0; j < (1UL << in_chunk_order); j++ ) { if ( unlikely(__copy_from_guest_offset( &gmfn, exch.in.extent_start, (i<count_info) ) BUG(); mfn = page_to_mfn(page); gfn = mfn_to_gmfn(d, mfn); /* Pages were unshared above */ BUG_ON(SHARED_M2P(gfn)); guest_physmap_remove_page(d, gfn, mfn, 0); put_page(page); } /* Assign each output page to the domain. */ for ( j = 0; (page = page_list_remove_head(&out_chunk_list)); ++j ) { if ( assign_pages(d, page, exch.out.extent_order, MEMF_no_refcount) ) { unsigned long dec_count; bool_t drop_dom_ref; /* * Pages in in_chunk_list is stolen without * decreasing the tot_pages. If the domain is dying when * assign pages, we need decrease the count. For those pages * that has been assigned, it should be covered by * domain_relinquish_resources(). */ dec_count = (((1UL << exch.in.extent_order) * (1UL << in_chunk_order)) - (j * (1UL << exch.out.extent_order))); spin_lock(&d->page_alloc_lock); domain_adjust_tot_pages(d, -dec_count); drop_dom_ref = (dec_count && !d->tot_pages); spin_unlock(&d->page_alloc_lock); if ( drop_dom_ref ) put_domain(d); free_domheap_pages(page, exch.out.extent_order); goto dying; } if ( __copy_from_guest_offset(&gpfn, exch.out.extent_start, (i << out_chunk_order) + j, 1) ) { rc = -EFAULT; continue; } mfn = page_to_mfn(page); guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order); if ( !paging_mode_translate(d) ) { for ( k = 0; k < (1UL << exch.out.extent_order); k++ ) set_gpfn_from_mfn(mfn + k, gpfn + k); if ( __copy_to_guest_offset(exch.out.extent_start, (i << out_chunk_order) + j, &mfn, 1) ) rc = -EFAULT; } } BUG_ON( !(d->is_dying) && (j != (1UL << out_chunk_order)) ); } exch.nr_exchanged = exch.in.nr_extents; if ( __copy_field_to_guest(arg, &exch, nr_exchanged) ) rc = -EFAULT; rcu_unlock_domain(d); return rc; /* * Failed a chunk! Free any partial chunk work. Tell caller how many * chunks succeeded. */ fail: /* Reassign any input pages we managed to steal. */ while ( (page = page_list_remove_head(&in_chunk_list)) ) { put_gfn(d, gmfn + k--); if ( assign_pages(d, page, 0, MEMF_no_refcount) ) BUG(); } dying: rcu_unlock_domain(d); /* Free any output pages we managed to allocate. */ while ( (page = page_list_remove_head(&out_chunk_list)) ) free_domheap_pages(page, exch.out.extent_order); exch.nr_exchanged = i << in_chunk_order; fail_early: if ( __copy_field_to_guest(arg, &exch, nr_exchanged) ) rc = -EFAULT; return rc; } long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) { struct domain *d; long rc; unsigned int address_bits; unsigned long start_extent; struct xen_memory_reservation reservation; struct memop_args args; domid_t domid; int op = cmd & MEMOP_CMD_MASK; switch ( op ) { case XENMEM_increase_reservation: case XENMEM_decrease_reservation: case XENMEM_populate_physmap: start_extent = cmd >> MEMOP_EXTENT_SHIFT; if ( copy_from_guest(&reservation, arg, 1) ) return start_extent; /* Is size too large for us to encode a continuation? */ if ( reservation.nr_extents > (UINT_MAX >> MEMOP_EXTENT_SHIFT) ) return start_extent; if ( unlikely(start_extent >= reservation.nr_extents) ) return start_extent; args.extent_list = reservation.extent_start; args.nr_extents = reservation.nr_extents; args.extent_order = reservation.extent_order; args.nr_done = start_extent; args.preempted = 0; args.memflags = 0; address_bits = XENMEMF_get_address_bits(reservation.mem_flags); if ( (address_bits != 0) && (address_bits < (get_order_from_pages(max_page) + PAGE_SHIFT)) ) { if ( address_bits <= PAGE_SHIFT ) return start_extent; args.memflags = MEMF_bits(address_bits); } args.memflags |= MEMF_node(XENMEMF_get_node(reservation.mem_flags)); if ( reservation.mem_flags & XENMEMF_exact_node_request ) args.memflags |= MEMF_exact_node; if ( op == XENMEM_populate_physmap && (reservation.mem_flags & XENMEMF_populate_on_demand) ) args.memflags |= MEMF_populate_on_demand; d = rcu_lock_domain_by_any_id(reservation.domid); if ( d == NULL ) return start_extent; args.domain = d; rc = xsm_memory_adjust_reservation(XSM_TARGET, current->domain, d); if ( rc ) { rcu_unlock_domain(d); return rc; } switch ( op ) { case XENMEM_increase_reservation: increase_reservation(&args); break; case XENMEM_decrease_reservation: decrease_reservation(&args); break; default: /* XENMEM_populate_physmap */ populate_physmap(&args); break; } rcu_unlock_domain(d); rc = args.nr_done; if ( args.preempted ) return hypercall_create_continuation( __HYPERVISOR_memory_op, "lh", op | (rc << MEMOP_EXTENT_SHIFT), arg); break; case XENMEM_exchange: rc = memory_exchange(guest_handle_cast(arg, xen_memory_exchange_t)); break; case XENMEM_maximum_ram_page: rc = max_page; break; case XENMEM_current_reservation: case XENMEM_maximum_reservation: case XENMEM_maximum_gpfn: if ( copy_from_guest(&domid, arg, 1) ) return -EFAULT; d = rcu_lock_domain_by_any_id(domid); if ( d == NULL ) return -ESRCH; rc = xsm_memory_stat_reservation(XSM_TARGET, current->domain, d); if ( rc ) { rcu_unlock_domain(d); return rc; } switch ( op ) { case XENMEM_current_reservation: rc = d->tot_pages; break; case XENMEM_maximum_reservation: rc = d->max_pages; break; default: ASSERT(op == XENMEM_maximum_gpfn); rc = domain_get_maximum_gpfn(d); break; } rcu_unlock_domain(d); break; case XENMEM_remove_from_physmap: { struct xen_remove_from_physmap xrfp; struct page_info *page; struct domain *d; if ( copy_from_guest(&xrfp, arg, 1) ) return -EFAULT; d = rcu_lock_domain_by_any_id(xrfp.domid); if ( d == NULL ) return -ESRCH; rc = xsm_remove_from_physmap(XSM_TARGET, current->domain, d); if ( rc ) { rcu_unlock_domain(d); return rc; } page = get_page_from_gfn(d, xrfp.gpfn, NULL, P2M_ALLOC); if ( page ) { guest_physmap_remove_page(d, xrfp.gpfn, page_to_mfn(page), 0); put_page(page); } else rc = -ENOENT; rcu_unlock_domain(d); break; } case XENMEM_claim_pages: if ( copy_from_guest(&reservation, arg, 1) ) return -EFAULT; if ( !guest_handle_is_null(reservation.extent_start) ) return -EINVAL; if ( reservation.extent_order != 0 ) return -EINVAL; if ( reservation.mem_flags != 0 ) return -EINVAL; d = rcu_lock_domain_by_id(reservation.domid); if ( d == NULL ) return -EINVAL; rc = xsm_claim_pages(XSM_PRIV, d); if ( !rc ) rc = domain_set_outstanding_pages(d, reservation.nr_extents); rcu_unlock_domain(d); break; default: rc = arch_memory_op(op, arg); break; } return rc; } /* * Local variables: * mode: C * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */