diff options
author | kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk> | 2005-06-23 10:14:23 +0000 |
---|---|---|
committer | kaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk> | 2005-06-23 10:14:23 +0000 |
commit | c3f032a16658db9e84f787a15b7e8e2180843a7a (patch) | |
tree | 12c671089771e1cd94e1a78d3ad15465ce1ad8e8 | |
parent | 5ce68b82c6e12f09d361dc9d3bc3cc965ce6d1cd (diff) | |
parent | 60ca0a28155157a77bb87db6858b42e9e95757c4 (diff) | |
download | xen-c3f032a16658db9e84f787a15b7e8e2180843a7a.tar.gz xen-c3f032a16658db9e84f787a15b7e8e2180843a7a.tar.bz2 xen-c3f032a16658db9e84f787a15b7e8e2180843a7a.zip |
bitkeeper revision 1.1742 (42ba8b7fiNx5Cu0J91l9HCIo_TwvXQ)
Merge firebug.cl.cam.ac.uk:/local/scratch/kaf24/xen-unstable.bk
into firebug.cl.cam.ac.uk:/local/scratch/kaf24/xeno-unstable-ia64.bk
-rw-r--r-- | .rootkeys | 5 | ||||
-rw-r--r-- | xen/arch/ia64/Makefile | 2 | ||||
-rw-r--r-- | xen/arch/ia64/asm-offsets.c | 8 | ||||
-rw-r--r-- | xen/arch/ia64/dom0_ops.c | 157 | ||||
-rw-r--r-- | xen/arch/ia64/domain.c | 2 | ||||
-rw-r--r-- | xen/arch/ia64/grant_table.c | 1291 | ||||
-rw-r--r-- | xen/arch/ia64/hypercall.c | 17 | ||||
-rw-r--r-- | xen/arch/ia64/hyperprivop.S | 507 | ||||
-rw-r--r-- | xen/arch/ia64/mm.c | 141 | ||||
-rw-r--r-- | xen/arch/ia64/patch/linux-2.6.11/uaccess.h | 27 | ||||
-rw-r--r-- | xen/arch/ia64/privop.c | 22 | ||||
-rw-r--r-- | xen/arch/ia64/regionreg.c | 39 | ||||
-rw-r--r-- | xen/arch/ia64/vmmu.c | 52 | ||||
-rw-r--r-- | xen/arch/ia64/vmx_hypercall.c | 186 | ||||
-rw-r--r-- | xen/arch/ia64/xenmisc.c | 31 | ||||
-rw-r--r-- | xen/include/asm-ia64/config.h | 3 | ||||
-rw-r--r-- | xen/include/asm-ia64/domain.h | 5 | ||||
-rw-r--r-- | xen/include/asm-ia64/event.h | 1 | ||||
-rw-r--r-- | xen/include/asm-ia64/mm.h | 30 | ||||
-rw-r--r-- | xen/include/asm-ia64/shadow.h | 1 | ||||
-rw-r--r-- | xen/include/asm-ia64/vmx_uaccess.h | 156 | ||||
-rw-r--r-- | xen/include/asm-ia64/xensystem.h | 32 | ||||
-rw-r--r-- | xen/include/public/arch-ia64.h | 26 |
23 files changed, 2677 insertions, 64 deletions
@@ -1132,6 +1132,7 @@ 421098b2PHgzf_Gg4R65YRNi_QzMKQ xen/arch/ia64/dom0_ops.c 421098b2O7jsNfzQXA1v3rbAc1QhpA xen/arch/ia64/dom_fw.c 421098b2ZlaBcyiuuPr3WpzaSDwg6Q xen/arch/ia64/domain.c +42b33bb9GLR-tzcaHalk8fz9cgK0aA xen/arch/ia64/grant_table.c 42a08294zRikvZk_CR1iVojHjcVFZw xen/arch/ia64/hpsimserial.c 4239e98a_HX-FCIcXtVqY0BbrDqVug xen/arch/ia64/hypercall.c 4295e18f42gf1T-8W97A3KSlBaY1tA xen/arch/ia64/hyperprivop.S @@ -1139,6 +1140,7 @@ 421098b3ys5GAr4z6_H1jD33oem82g xen/arch/ia64/irq.c 4272a8e4lavI6DrTvqaIhXeR5RuKBw xen/arch/ia64/ivt.S 421098b3Heh72KuoVlND3CH6c0B0aA xen/arch/ia64/lib/Makefile +42b2eaeez20voHWlBDjrqORiNg6uhg xen/arch/ia64/mm.c 421098b3O0MYMUsmYVFy84VV_1gFwQ xen/arch/ia64/mm_init.c 428b9f38Gp0KcPokG9Nq5v1rGk2FkA xen/arch/ia64/mmio.c 425ae516maKAsHBJVSzs19cdRgt3Nw xen/arch/ia64/patch/linux-2.6.11/cpumask.h @@ -1237,6 +1239,7 @@ 428b9f38PglyXM-mJJfo19ycuQrEhw xen/arch/ia64/vlsapic.c 428b9f38EmpBsMHL3WbOZoieteBGdQ xen/arch/ia64/vmmu.c 428b9f38hU-X5aX0MIY3EU0Yw4PjcA xen/arch/ia64/vmx_entry.S +42b2eaf3YR7Sfx76IvKeqfHJiU6qXw xen/arch/ia64/vmx_hypercall.c 428b9f38S76bWI96g7uPLmE-uAcmdg xen/arch/ia64/vmx_init.c 428b9f385AMSyCRYBsckQClQY4ZgHA xen/arch/ia64/vmx_interrupt.c 428b9f380IOjPmj0N6eelH-WJjl1xg xen/arch/ia64/vmx_ivt.S @@ -1405,6 +1408,7 @@ 421098b7Z6OwjZnrTZkh34DoDfcjrA xen/include/asm-ia64/regionreg.h 421098b707cY5YluUcWK5Pc-71ETVw xen/include/asm-ia64/regs.h 4214e2f3fbO_n9Z1kIcBR83d7W4OJw xen/include/asm-ia64/serial.h +42b89683nnFDAElJewfm2JxG-pv1BQ xen/include/asm-ia64/shadow.h 429fb3bc53qJOyKJCBfhDNmTasj8Gw xen/include/asm-ia64/slab.h 421098b7GkWOnlzSmPvNAhByOSZ1Dw xen/include/asm-ia64/time.h 421098b7FK3xgShpnH0I0Ou3O4fJ2Q xen/include/asm-ia64/tlb.h @@ -1418,6 +1422,7 @@ 428b9f38is0zTsIm96_BKo4MLw0SzQ xen/include/asm-ia64/vmx_pal_vsa.h 428b9f38iDqbugHUheJrcTCD7zlb4g xen/include/asm-ia64/vmx_phy_mode.h 428b9f38grd_B0AGB1yp0Gi2befHaQ xen/include/asm-ia64/vmx_platform.h +42b8e0d63B41CDo2Nqmf8Vt0_RercA xen/include/asm-ia64/vmx_uaccess.h 428b9f38XgwHchZEpOzRtWfz0agFNQ xen/include/asm-ia64/vmx_vcpu.h 428b9f38tDTTJbkoONcAB9ODP8CiVg xen/include/asm-ia64/vmx_vpd.h 428b9f38_o0U5uJqmxZf_bqi6_PqVw xen/include/asm-ia64/vtm.h diff --git a/xen/arch/ia64/Makefile b/xen/arch/ia64/Makefile index 03f56326fb..d2be0994ab 100644 --- a/xen/arch/ia64/Makefile +++ b/xen/arch/ia64/Makefile @@ -10,7 +10,7 @@ OBJS = xensetup.o setup.o time.o irq.o ia64_ksyms.o process.o smp.o \ extable.o linuxextable.o xenirq.o xentime.o \ regionreg.o entry.o unaligned.o privop.o vcpu.o \ irq_ia64.o irq_lsapic.o vhpt.o xenasm.o hyperprivop.o dom_fw.o \ - sn_console.o + grant_table.o sn_console.o ifeq ($(CONFIG_VTI),y) OBJS += vmx_init.o vmx_virt.o vmx_vcpu.o vmx_process.o vmx_vsa.o vmx_ivt.o \ diff --git a/xen/arch/ia64/asm-offsets.c b/xen/arch/ia64/asm-offsets.c index 4b019209d5..ca8daa2f87 100644 --- a/xen/arch/ia64/asm-offsets.c +++ b/xen/arch/ia64/asm-offsets.c @@ -54,11 +54,14 @@ void foo(void) DEFINE(XSI_BANKNUM_OFS, offsetof(vcpu_info_t, arch.banknum)); DEFINE(XSI_BANK0_OFS, offsetof(vcpu_info_t, arch.bank0_regs[0])); DEFINE(XSI_BANK1_OFS, offsetof(vcpu_info_t, arch.bank1_regs[0])); + DEFINE(XSI_RR0_OFS, offsetof(vcpu_info_t, arch.rrs[0])); DEFINE(XSI_METAPHYS_OFS, offsetof(vcpu_info_t, arch.metaphysical_mode)); DEFINE(XSI_PRECOVER_IFS_OFS, offsetof(vcpu_info_t, arch.precover_ifs)); DEFINE(XSI_INCOMPL_REG_OFS, offsetof(vcpu_info_t, arch.incomplete_regframe)); DEFINE(XSI_PEND_OFS, offsetof(vcpu_info_t, arch.pending_interruption)); DEFINE(XSI_RR0_OFS, offsetof(vcpu_info_t, arch.rrs[0])); + DEFINE(XSI_TPR_OFS, offsetof(vcpu_info_t, arch.tpr)); + DEFINE(XSI_ITV_OFS, offsetof(vcpu_info_t, arch.itv)); //DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked)); //DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid)); //DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader)); @@ -78,8 +81,13 @@ void foo(void) DEFINE(IA64_VCPU_IRR0_OFFSET, offsetof (struct vcpu, arch.irr[0])); DEFINE(IA64_VCPU_IRR3_OFFSET, offsetof (struct vcpu, arch.irr[3])); DEFINE(IA64_VCPU_INSVC3_OFFSET, offsetof (struct vcpu, arch.insvc[3])); + DEFINE(IA64_VCPU_STARTING_RID_OFFSET, offsetof (struct vcpu, arch.starting_rid)); + DEFINE(IA64_VCPU_ENDING_RID_OFFSET, offsetof (struct vcpu, arch.ending_rid)); + DEFINE(IA64_VCPU_DOMAIN_ITM_OFFSET, offsetof (struct vcpu, arch.domain_itm)); + DEFINE(IA64_VCPU_DOMAIN_ITM_LAST_OFFSET, offsetof (struct vcpu, arch.domain_itm_last)); BLANK(); + DEFINE(IA64_CPUINFO_ITM_NEXT_OFFSET, offsetof (struct cpuinfo_ia64, itm_next)); //DEFINE(IA64_SIGHAND_SIGLOCK_OFFSET,offsetof (struct sighand_struct, siglock)); diff --git a/xen/arch/ia64/dom0_ops.c b/xen/arch/ia64/dom0_ops.c index c1b1d5c241..10bbd60957 100644 --- a/xen/arch/ia64/dom0_ops.c +++ b/xen/arch/ia64/dom0_ops.c @@ -27,6 +27,128 @@ long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op) switch ( op->cmd ) { + case DOM0_GETPAGEFRAMEINFO: + { + struct pfn_info *page; + unsigned long pfn = op->u.getpageframeinfo.pfn; + domid_t dom = op->u.getpageframeinfo.domain; + struct domain *d; + + ret = -EINVAL; + + if ( unlikely(pfn >= max_page) || + unlikely((d = find_domain_by_id(dom)) == NULL) ) + break; + + page = &frame_table[pfn]; + + if ( likely(get_page(page, d)) ) + { + ret = 0; + + op->u.getpageframeinfo.type = NOTAB; + + if ( (page->u.inuse.type_info & PGT_count_mask) != 0 ) + { + switch ( page->u.inuse.type_info & PGT_type_mask ) + { + default: + panic("No such page type\n"); + break; + } + } + + put_page(page); + } + + put_domain(d); + + copy_to_user(u_dom0_op, op, sizeof(*op)); + } + break; + + case DOM0_GETPAGEFRAMEINFO2: + { +#define GPF2_BATCH 128 + int n,j; + int num = op->u.getpageframeinfo2.num; + domid_t dom = op->u.getpageframeinfo2.domain; + unsigned long *s_ptr = (unsigned long*) op->u.getpageframeinfo2.array; + struct domain *d; + unsigned long *l_arr; + ret = -ESRCH; + + if ( unlikely((d = find_domain_by_id(dom)) == NULL) ) + break; + + if ( unlikely(num > 1024) ) + { + ret = -E2BIG; + break; + } + + l_arr = (unsigned long *)alloc_xenheap_page(); + + ret = 0; + for( n = 0; n < num; ) + { + int k = ((num-n)>GPF2_BATCH)?GPF2_BATCH:(num-n); + + if ( copy_from_user(l_arr, &s_ptr[n], k*sizeof(unsigned long)) ) + { + ret = -EINVAL; + break; + } + + for( j = 0; j < k; j++ ) + { + struct pfn_info *page; + unsigned long mfn = l_arr[j]; + + if ( unlikely(mfn >= max_page) ) + goto e2_err; + + page = &frame_table[mfn]; + + if ( likely(get_page(page, d)) ) + { + unsigned long type = 0; + + switch( page->u.inuse.type_info & PGT_type_mask ) + { + default: + panic("No such page type\n"); + break; + } + + if ( page->u.inuse.type_info & PGT_pinned ) + type |= LPINTAB; + l_arr[j] |= type; + put_page(page); + } + else + { + e2_err: + l_arr[j] |= XTAB; + } + + } + + if ( copy_to_user(&s_ptr[n], l_arr, k*sizeof(unsigned long)) ) + { + ret = -EINVAL; + break; + } + + n += j; + } + + free_xenheap_page((unsigned long)l_arr); + + put_domain(d); + } + break; +#ifndef CONFIG_VTI /* * NOTE: DOM0_GETMEMLIST has somewhat different semantics on IA64 - * it actually allocates and maps pages. @@ -70,7 +192,42 @@ long arch_do_dom0_op(dom0_op_t *op, dom0_op_t *u_dom0_op) } } break; +#else + case DOM0_GETMEMLIST: + { + int i; + struct domain *d = find_domain_by_id(op->u.getmemlist.domain); + unsigned long max_pfns = op->u.getmemlist.max_pfns; + unsigned long pfn; + unsigned long *buffer = op->u.getmemlist.buffer; + struct list_head *list_ent; + + ret = -EINVAL; + if (!d) { + ret = 0; + + spin_lock(&d->page_alloc_lock); + list_ent = d->page_list.next; + for (i = 0; (i < max_pfns) && (list_ent != &d->page_list); i++) { + pfn = list_entry(list_ent, struct pfn_info, list) - + frame_table; + if (put_user(pfn, buffer)) { + ret = -EFAULT; + break; + } + buffer++; + list_ent = frame_table[pfn].list.next; + } + spin_unlock(&d->page_alloc_lock); + op->u.getmemlist.num_pfns = i; + copy_to_user(u_dom0_op, op, sizeof(*op)); + + put_domain(d); + } + } + break; +#endif // CONFIG_VTI default: ret = -ENOSYS; diff --git a/xen/arch/ia64/domain.c b/xen/arch/ia64/domain.c index 40a38b2e07..6903c3532a 100644 --- a/xen/arch/ia64/domain.c +++ b/xen/arch/ia64/domain.c @@ -258,6 +258,8 @@ void arch_do_createdomain(struct vcpu *v) #define DOMAIN_RID_BITS_DEFAULT 18 if (!allocate_rid_range(d,DOMAIN_RID_BITS_DEFAULT)) // FIXME BUG(); + v->arch.starting_rid = d->arch.starting_rid; + v->arch.ending_rid = d->arch.ending_rid; // the following will eventually need to be negotiated dynamically d->xen_vastart = 0xf000000000000000; d->xen_vaend = 0xf300000000000000; diff --git a/xen/arch/ia64/grant_table.c b/xen/arch/ia64/grant_table.c new file mode 100644 index 0000000000..9f19152749 --- /dev/null +++ b/xen/arch/ia64/grant_table.c @@ -0,0 +1,1291 @@ +#ifndef CONFIG_VTI +// temporarily in arch/ia64 until can merge into common/grant_table.c +/****************************************************************************** + * common/grant_table.c + * + * Mechanism for granting foreign access to page frames, and receiving + * page-ownership transfers. + * + * Copyright (c) 2005 Christopher Clark + * Copyright (c) 2004 K A Fraser + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define GRANT_DEBUG 0 +#define GRANT_DEBUG_VERBOSE 0 + +#include <xen/config.h> +#include <xen/lib.h> +#include <xen/sched.h> +#include <xen/shadow.h> +#include <xen/mm.h> +#ifdef __ia64__ +#define __addr_ok(a) 1 // FIXME-ia64: a variant of access_ok?? +// FIXME-ia64: need to implement real cmpxchg_user on ia64 +//#define cmpxchg_user(_p,_o,_n) ((*_p == _o) ? ((*_p = _n), 0) : ((_o = *_p), 0)) +// FIXME-ia64: these belong in an asm/grant_table.h... PAGE_SIZE different +#undef ORDER_GRANT_FRAMES +//#undef NUM_GRANT_FRAMES +#define ORDER_GRANT_FRAMES 0 +//#define NUM_GRANT_FRAMES (1U << ORDER_GRANT_FRAMES) +#endif + +#define PIN_FAIL(_lbl, _rc, _f, _a...) \ + do { \ + DPRINTK( _f, ## _a ); \ + rc = (_rc); \ + goto _lbl; \ + } while ( 0 ) + +static inline int +get_maptrack_handle( + grant_table_t *t) +{ + unsigned int h; + if ( unlikely((h = t->maptrack_head) == t->maptrack_limit) ) + return -1; + t->maptrack_head = t->maptrack[h].ref_and_flags >> MAPTRACK_REF_SHIFT; + t->map_count++; + return h; +} + +static inline void +put_maptrack_handle( + grant_table_t *t, int handle) +{ + t->maptrack[handle].ref_and_flags = t->maptrack_head << MAPTRACK_REF_SHIFT; + t->maptrack_head = handle; + t->map_count--; +} + +static int +__gnttab_activate_grant_ref( + struct domain *mapping_d, /* IN */ + struct vcpu *mapping_ed, + struct domain *granting_d, + grant_ref_t ref, + u16 dev_hst_ro_flags, + unsigned long host_virt_addr, + unsigned long *pframe ) /* OUT */ +{ + domid_t sdom; + u16 sflags; + active_grant_entry_t *act; + grant_entry_t *sha; + s16 rc = 1; + unsigned long frame = 0; + int retries = 0; + + /* + * Objectives of this function: + * . Make the record ( granting_d, ref ) active, if not already. + * . Update shared grant entry of owner, indicating frame is mapped. + * . Increment the owner act->pin reference counts. + * . get_page on shared frame if new mapping. + * . get_page_type if this is first RW mapping of frame. + * . Add PTE to virtual address space of mapping_d, if necessary. + * Returns: + * . -ve: error + * . 1: ok + * . 0: ok and TLB invalidate of host_virt_addr needed. + * + * On success, *pframe contains mfn. + */ + + /* + * We bound the number of times we retry CMPXCHG on memory locations that + * we share with a guest OS. The reason is that the guest can modify that + * location at a higher rate than we can read-modify-CMPXCHG, so the guest + * could cause us to livelock. There are a few cases where it is valid for + * the guest to race our updates (e.g., to change the GTF_readonly flag), + * so we allow a few retries before failing. + */ + + act = &granting_d->grant_table->active[ref]; + sha = &granting_d->grant_table->shared[ref]; + + spin_lock(&granting_d->grant_table->lock); + + if ( act->pin == 0 ) + { + /* CASE 1: Activating a previously inactive entry. */ + + sflags = sha->flags; + sdom = sha->domid; + + for ( ; ; ) + { + u32 scombo, prev_scombo, new_scombo; + + if ( unlikely((sflags & GTF_type_mask) != GTF_permit_access) || + unlikely(sdom != mapping_d->domain_id) ) + PIN_FAIL(unlock_out, GNTST_general_error, + "Bad flags (%x) or dom (%d). (NB. expected dom %d)\n", + sflags, sdom, mapping_d->domain_id); + + /* Merge two 16-bit values into a 32-bit combined update. */ + /* NB. Endianness! */ + prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags; + + new_scombo = scombo | GTF_reading; + if ( !(dev_hst_ro_flags & GNTMAP_readonly) ) + { + new_scombo |= GTF_writing; + if ( unlikely(sflags & GTF_readonly) ) + PIN_FAIL(unlock_out, GNTST_general_error, + "Attempt to write-pin a r/o grant entry.\n"); + } + + /* NB. prev_scombo is updated in place to seen value. */ + if ( unlikely(cmpxchg_user((u32 *)&sha->flags, + prev_scombo, + new_scombo)) ) + PIN_FAIL(unlock_out, GNTST_general_error, + "Fault while modifying shared flags and domid.\n"); + + /* Did the combined update work (did we see what we expected?). */ + if ( likely(prev_scombo == scombo) ) + break; + + if ( retries++ == 4 ) + PIN_FAIL(unlock_out, GNTST_general_error, + "Shared grant entry is unstable.\n"); + + /* Didn't see what we expected. Split out the seen flags & dom. */ + /* NB. Endianness! */ + sflags = (u16)prev_scombo; + sdom = (u16)(prev_scombo >> 16); + } + + /* rmb(); */ /* not on x86 */ + + frame = __gpfn_to_mfn_foreign(granting_d, sha->frame); + +#ifdef __ia64__ +// FIXME-ia64: any error checking need to be done here? +#else + if ( unlikely(!pfn_valid(frame)) || + unlikely(!((dev_hst_ro_flags & GNTMAP_readonly) ? + get_page(&frame_table[frame], granting_d) : + get_page_and_type(&frame_table[frame], granting_d, + PGT_writable_page))) ) + { + clear_bit(_GTF_writing, &sha->flags); + clear_bit(_GTF_reading, &sha->flags); + PIN_FAIL(unlock_out, GNTST_general_error, + "Could not pin the granted frame (%lx)!\n", frame); + } +#endif + + if ( dev_hst_ro_flags & GNTMAP_device_map ) + act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ? + GNTPIN_devr_inc : GNTPIN_devw_inc; + if ( dev_hst_ro_flags & GNTMAP_host_map ) + act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ? + GNTPIN_hstr_inc : GNTPIN_hstw_inc; + act->domid = sdom; + act->frame = frame; + } + else + { + /* CASE 2: Active modications to an already active entry. */ + + /* + * A cheesy check for possible pin-count overflow. + * A more accurate check cannot be done with a single comparison. + */ + if ( (act->pin & 0x80808080U) != 0 ) + PIN_FAIL(unlock_out, ENOSPC, + "Risk of counter overflow %08x\n", act->pin); + + frame = act->frame; + + if ( !(dev_hst_ro_flags & GNTMAP_readonly) && + !((sflags = sha->flags) & GTF_writing) ) + { + for ( ; ; ) + { + u16 prev_sflags; + + if ( unlikely(sflags & GTF_readonly) ) + PIN_FAIL(unlock_out, GNTST_general_error, + "Attempt to write-pin a r/o grant entry.\n"); + + prev_sflags = sflags; + + /* NB. prev_sflags is updated in place to seen value. */ + if ( unlikely(cmpxchg_user(&sha->flags, prev_sflags, + prev_sflags | GTF_writing)) ) + PIN_FAIL(unlock_out, GNTST_general_error, + "Fault while modifying shared flags.\n"); + + if ( likely(prev_sflags == sflags) ) + break; + + if ( retries++ == 4 ) + PIN_FAIL(unlock_out, GNTST_general_error, + "Shared grant entry is unstable.\n"); + + sflags = prev_sflags; + } + +#ifdef __ia64__ +// FIXME-ia64: any error checking need to be done here? +#else + if ( unlikely(!get_page_type(&frame_table[frame], + PGT_writable_page)) ) + { + clear_bit(_GTF_writing, &sha->flags); + PIN_FAIL(unlock_out, GNTST_general_error, + "Attempt to write-pin a unwritable page.\n"); + } +#endif + } + + if ( dev_hst_ro_flags & GNTMAP_device_map ) + act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ? + GNTPIN_devr_inc : GNTPIN_devw_inc; + + if ( dev_hst_ro_flags & GNTMAP_host_map ) + act->pin += (dev_hst_ro_flags & GNTMAP_readonly) ? + GNTPIN_hstr_inc : GNTPIN_hstw_inc; + } + + /* + * At this point: + * act->pin updated to reflect mapping. + * sha->flags updated to indicate to granting domain mapping done. + * frame contains the mfn. + */ + + spin_unlock(&granting_d->grant_table->lock); + +#ifdef __ia64__ +// FIXME-ia64: any error checking need to be done here? +#else + if ( (host_virt_addr != 0) && (dev_hst_ro_flags & GNTMAP_host_map) ) + { + /* Write update into the pagetable. */ + l1_pgentry_t pte; + pte = l1e_from_pfn(frame, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY); + if ( !(dev_hst_ro_flags & GNTMAP_readonly) ) + l1e_add_flags(pte,_PAGE_RW); + rc = update_grant_va_mapping( host_virt_addr, pte, + mapping_d, mapping_ed ); + + /* + * IMPORTANT: (rc == 0) => must flush / invalidate entry in TLB. + * This is done in the outer gnttab_map_grant_ref. + */ + + if ( rc < 0 ) + { + /* Failure: undo and abort. */ + + spin_lock(&granting_d->grant_table->lock); + + if ( dev_hst_ro_flags & GNTMAP_readonly ) + { + act->pin -= GNTPIN_hstr_inc; + } + else + { + act->pin -= GNTPIN_hstw_inc; + if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 ) + { + clear_bit(_GTF_writing, &sha->flags); + put_page_type(&frame_table[frame]); + } + } + + if ( act->pin == 0 ) + { + clear_bit(_GTF_reading, &sha->flags); + put_page(&frame_table[frame]); + } + + spin_unlock(&granting_d->grant_table->lock); + } + + } +#endif + + *pframe = frame; + return rc; + + unlock_out: + spin_unlock(&granting_d->grant_table->lock); + return rc; +} + +/* + * Returns 0 if TLB flush / invalidate required by caller. + * va will indicate the address to be invalidated. + */ +static int +__gnttab_map_grant_ref( + gnttab_map_grant_ref_t *uop, + unsigned long *va) +{ + domid_t dom; + grant_ref_t ref; + struct domain *ld, *rd; + struct vcpu *led; + u16 dev_hst_ro_flags; + int handle; + unsigned long frame = 0, host_virt_addr; + int rc; + + led = current; + ld = led->domain; + + /* Bitwise-OR avoids short-circuiting which screws control flow. */ + if ( unlikely(__get_user(dom, &uop->dom) | + __get_user(ref, &uop->ref) | + __get_user(host_virt_addr, &uop->host_virt_addr) | + __get_user(dev_hst_ro_flags, &uop->flags)) ) + { + DPRINTK("Fault while reading gnttab_map_grant_ref_t.\n"); + return -EFAULT; /* don't set status */ + } + + + if ( ((host_virt_addr != 0) || (dev_hst_ro_flags & GNTMAP_host_map)) && + unlikely(!__addr_ok(host_virt_addr))) + { + DPRINTK("Bad virtual address (%lx) or flags (%x).\n", + host_virt_addr, dev_hst_ro_flags); + (void)__put_user(GNTST_bad_virt_addr, &uop->handle); + return GNTST_bad_gntref; + } + + if ( unlikely(ref >= NR_GRANT_ENTRIES) || + unlikely((dev_hst_ro_flags & + (GNTMAP_device_map|GNTMAP_host_map)) == 0) ) + { + DPRINTK("Bad ref (%d) or flags (%x).\n", ref, dev_hst_ro_flags); + (void)__put_user(GNTST_bad_gntref, &uop->handle); + return GNTST_bad_gntref; + } + + if ( unlikely((rd = find_domain_by_id(dom)) == NULL) || + unlikely(ld == rd) ) + { + if ( rd != NULL ) + put_domain(rd); + DPRINTK("Could not find domain %d\n", dom); + (void)__put_user(GNTST_bad_domain, &uop->handle); + return GNTST_bad_domain; + } + + /* Get a maptrack handle. */ + if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) ) + { + int i; + grant_mapping_t *new_mt; + grant_table_t *lgt = ld->grant_table; + + /* Grow the maptrack table. */ + new_mt = alloc_xenheap_pages(lgt->maptrack_order + 1); + if ( new_mt == NULL ) + { + put_domain(rd); + DPRINTK("No more map handles available\n"); + (void)__put_user(GNTST_no_device_space, &uop->handle); + return GNTST_no_device_space; + } + + memcpy(new_mt, lgt->maptrack, PAGE_SIZE << lgt->maptrack_order); + for ( i = lgt->maptrack_limit; i < (lgt->maptrack_limit << 1); i++ ) + new_mt[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT; + + free_xenheap_pages(lgt->maptrack, lgt->maptrack_order); + lgt->maptrack = new_mt; + lgt->maptrack_order += 1; + lgt->maptrack_limit <<= 1; + + printk("Doubled maptrack size\n"); + handle = get_maptrack_handle(ld->grant_table); + } + +#if GRANT_DEBUG_VERBOSE + DPRINTK("Mapping grant ref (%hu) for domain (%hu) with flags (%x)\n", + ref, dom, dev_hst_ro_flags); +#endif + + if ( 0 <= ( rc = __gnttab_activate_grant_ref( ld, led, rd, ref, + dev_hst_ro_flags, + host_virt_addr, &frame))) + { + /* + * Only make the maptrack live _after_ writing the pte, in case we + * overwrite the same frame number, causing a maptrack walk to find it + */ + ld->grant_table->maptrack[handle].domid = dom; + + ld->grant_table->maptrack[handle].ref_and_flags + = (ref << MAPTRACK_REF_SHIFT) | + (dev_hst_ro_flags & MAPTRACK_GNTMAP_MASK); + + (void)__put_user(frame, &uop->dev_bus_addr); + + if ( dev_hst_ro_flags & GNTMAP_host_map ) + *va = host_virt_addr; + + (void)__put_user(handle, &uop->handle); + } + else + { + (void)__put_user(rc, &uop->handle); + put_maptrack_handle(ld->grant_table, handle); + } + + put_domain(rd); + return rc; +} + +static long +gnttab_map_grant_ref( + gnttab_map_grant_ref_t *uop, unsigned int count) +{ + int i, flush = 0; + unsigned long va = 0; + + for ( i = 0; i < count; i++ ) + if ( __gnttab_map_grant_ref(&uop[i], &va) == 0 ) + flush++; + +#ifdef __ia64__ +// FIXME-ia64: probably need to do something here to avoid stale mappings? +#else + if ( flush == 1 ) + flush_tlb_one_mask(current->domain->cpumask, va); + else if ( flush != 0 ) + flush_tlb_mask(current->domain->cpumask); +#endif + + return 0; +} + +static int +__gnttab_unmap_grant_ref( + gnttab_unmap_grant_ref_t *uop, + unsigned long *va) +{ + domid_t dom; + grant_ref_t ref; + u16 handle; + struct domain *ld, *rd; + + active_grant_entry_t *act; + grant_entry_t *sha; + grant_mapping_t *map; + u16 flags; + s16 rc = 1; + unsigned long frame, virt; + + ld = current->domain; + + /* Bitwise-OR avoids short-circuiting which screws control flow. */ + if ( unlikely(__get_user(virt, &uop->host_virt_addr) | + __get_user(frame, &uop->dev_bus_addr) | + __get_user(handle, &uop->handle)) ) + { + DPRINTK("Fault while reading gnttab_unmap_grant_ref_t.\n"); + return -EFAULT; /* don't set status */ + } + + map = &ld->grant_table->maptrack[handle]; + + if ( unlikely(handle >= ld->grant_table->maptrack_limit) || + unlikely(!(map->ref_and_flags & MAPTRACK_GNTMAP_MASK)) ) + { + DPRINTK("Bad handle (%d).\n", handle); + (void)__put_user(GNTST_bad_handle, &uop->status); + return GNTST_bad_handle; + } + + dom = map->domid; + ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT; + flags = map->ref_and_flags & MAPTRACK_GNTMAP_MASK; + + if ( unlikely((rd = find_domain_by_id(dom)) == NULL) || + unlikely(ld == rd) ) + { + if ( rd != NULL ) + put_domain(rd); + DPRINTK("Could not find domain %d\n", dom); + (void)__put_user(GNTST_bad_domain, &uop->status); + return GNTST_bad_domain; + } + +#if GRANT_DEBUG_VERBOSE + DPRINTK("Unmapping grant ref (%hu) for domain (%hu) with handle (%hu)\n", + ref, dom, handle); +#endif + + act = &rd->grant_table->active[ref]; + sha = &rd->grant_table->shared[ref]; + + spin_lock(&rd->grant_table->lock); + + if ( frame == 0 ) + { + frame = act->frame; + } + else if ( frame == GNTUNMAP_DEV_FROM_VIRT ) + { + if ( !( flags & GNTMAP_device_map ) ) + PIN_FAIL(unmap_out, GNTST_bad_dev_addr, + "Bad frame number: frame not mapped for dev access.\n"); + frame = act->frame; + + /* Frame will be unmapped for device access below if virt addr okay. */ + } + else + { + if ( unlikely(frame != act->frame) ) + PIN_FAIL(unmap_out, GNTST_general_error, + "Bad frame number doesn't match gntref.\n"); + if ( flags & GNTMAP_device_map ) + act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc + : GNTPIN_devw_inc; + + map->ref_and_flags &= ~GNTMAP_device_map; + (void)__put_user(0, &uop->dev_bus_addr); + + /* Frame is now unmapped for device access. */ + } + +#ifdef __ia64__ +// FIXME-ia64: any error checking need to be done here? +#else + if ( (virt != 0) && + (flags & GNTMAP_host_map) && + ((act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask)) > 0)) + { + l1_pgentry_t *pl1e; + unsigned long _ol1e; + + pl1e = &linear_pg_table[l1_linear_offset(virt)]; + + if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) ) + { + DPRINTK("Could not find PTE entry for address %lx\n", virt); + rc = -EINVAL; + goto unmap_out; + } + + /* + * Check that the virtual address supplied is actually mapped to + * act->frame. + */ + if ( unlikely((_ol1e >> PAGE_SHIFT) != frame )) + { + DPRINTK("PTE entry %lx for address %lx doesn't match frame %lx\n", + _ol1e, virt, frame); + rc = -EINVAL; + goto unmap_out; + } + + /* Delete pagetable entry. */ + if ( unlikely(__put_user(0, (unsigned long *)pl1e))) + { + DPRINTK("Cannot delete PTE entry at %p for virtual address %lx\n", + pl1e, virt); + rc = -EINVAL; + goto unmap_out; + } + + map->ref_and_flags &= ~GNTMAP_host_map; + + act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_hstr_inc + : GNTPIN_hstw_inc; + + if ( frame == GNTUNMAP_DEV_FROM_VIRT ) + { + act->pin -= (flags & GNTMAP_readonly) ? GNTPIN_devr_inc + : GNTPIN_devw_inc; + + map->ref_and_flags &= ~GNTMAP_device_map; + (void)__put_user(0, &uop->dev_bus_addr); + } + + rc = 0; + *va = virt; + } +#endif + + if ( (map->ref_and_flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0) + { + map->ref_and_flags = 0; + put_maptrack_handle(ld->grant_table, handle); + } + +#ifdef __ia64__ +// FIXME-ia64: any error checking need to be done here? I think not and then +// this can probably be macro-ized into nothingness +#else + /* If just unmapped a writable mapping, mark as dirtied */ + if ( unlikely(shadow_mode_log_dirty(rd)) && + !( flags & GNTMAP_readonly ) ) + mark_dirty(rd, frame); +#endif + + /* If the last writable mapping has been removed, put_page_type */ + if ( ( (act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask) ) == 0) && + ( !( flags & GNTMAP_readonly ) ) ) + { + clear_bit(_GTF_writing, &sha->flags); + put_page_type(&frame_table[frame]); + } + + if ( act->pin == 0 ) + { + clear_bit(_GTF_reading, &sha->flags); + put_page(&frame_table[frame]); + } + + unmap_out: + (void)__put_user(rc, &uop->status); + spin_unlock(&rd->grant_table->lock); + put_domain(rd); + return rc; +} + +static long +gnttab_unmap_grant_ref( + gnttab_unmap_grant_ref_t *uop, unsigned int count) +{ + int i, flush = 0; + unsigned long va = 0; + + for ( i = 0; i < count; i++ ) + if ( __gnttab_unmap_grant_ref(&uop[i], &va) == 0 ) + flush++; + +#ifdef __ia64__ +// FIXME-ia64: probably need to do something here to avoid stale mappings? +#else + if ( flush == 1 ) + flush_tlb_one_mask(current->domain->cpumask, va); + else if ( flush != 0 ) + flush_tlb_mask(current->domain->cpumask); +#endif + + return 0; +} + +static long +gnttab_setup_table( + gnttab_setup_table_t *uop, unsigned int count) +{ + gnttab_setup_table_t op; + struct domain *d; + int i; + + if ( count != 1 ) + return -EINVAL; + + if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) ) + { + DPRINTK("Fault while reading gnttab_setup_table_t.\n"); + return -EFAULT; + } + + if ( unlikely(op.nr_frames > NR_GRANT_FRAMES) ) + { + DPRINTK("Xen only supports up to %d grant-table frames per domain.\n", + NR_GRANT_FRAMES); + (void)put_user(GNTST_general_error, &uop->status); + return 0; + } + + if ( op.dom == DOMID_SELF ) + { + op.dom = current->domain->domain_id; + } + else if ( unlikely(!IS_PRIV(current->domain)) ) + { + (void)put_user(GNTST_permission_denied, &uop->status); + return 0; + } + + if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) ) + { + DPRINTK("Bad domid %d.\n", op.dom); + (void)put_user(GNTST_bad_domain, &uop->status); + return 0; + } + + if ( op.nr_frames <= NR_GRANT_FRAMES ) + { + ASSERT(d->grant_table != NULL); + (void)put_user(GNTST_okay, &uop->status); + for ( i = 0; i < op.nr_frames; i++ ) + (void)put_user( + (virt_to_phys(d->grant_table->shared) >> PAGE_SHIFT) + i, + &uop->frame_list[i]); + } + + put_domain(d); + return 0; +} + +#if GRANT_DEBUG +static int +gnttab_dump_table(gnttab_dump_table_t *uop) +{ + grant_table_t *gt; + gnttab_dump_table_t op; + struct domain *d; + u32 shared_mfn; + active_grant_entry_t *act; + grant_entry_t sha_copy; + grant_mapping_t *maptrack; + int i; + + + if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) ) + { + DPRINTK("Fault while reading gnttab_dump_table_t.\n"); + return -EFAULT; + } + + if ( op.dom == DOMID_SELF ) + { + op.dom = current->domain->domain_id; + } + + if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) ) + { + DPRINTK("Bad domid %d.\n", op.dom); + (void)put_user(GNTST_bad_domain, &uop->status); + return 0; + } + + ASSERT(d->grant_table != NULL); + gt = d->grant_table; + (void)put_user(GNTST_okay, &uop->status); + + shared_mfn = virt_to_phys(d->grant_table->shared); + + DPRINTK("Grant table for dom (%hu) MFN (%x)\n", + op.dom, shared_mfn); + + ASSERT(d->grant_table->active != NULL); + ASSERT(d->grant_table->shared != NULL); + ASSERT(d->grant_table->maptrack != NULL); + + for ( i = 0; i < NR_GRANT_ENTRIES; i++ ) + { + sha_copy = gt->shared[i]; + + if ( sha_copy.flags ) + { + DPRINTK("Grant: dom (%hu) SHARED (%d) flags:(%hx) " + "dom:(%hu) frame:(%lx)\n", + op.dom, i, sha_copy.flags, sha_copy.domid, sha_copy.frame); + } + } + + spin_lock(>->lock); + + for ( i = 0; i < NR_GRANT_ENTRIES; i++ ) + { + act = >->active[i]; + + if ( act->pin ) + { + DPRINTK("Grant: dom (%hu) ACTIVE (%d) pin:(%x) " + "dom:(%hu) frame:(%lx)\n", + op.dom, i, act->pin, act->domid, act->frame); + } + } + + for ( i = 0; i < gt->maptrack_limit; i++ ) + { + maptrack = >->maptrack[i]; + + if ( maptrack->ref_and_flags & MAPTRACK_GNTMAP_MASK ) + { + DPRINTK("Grant: dom (%hu) MAP (%d) ref:(%hu) flags:(%x) " + "dom:(%hu)\n", + op.dom, i, + maptrack->ref_and_flags >> MAPTRACK_REF_SHIFT, + maptrack->ref_and_flags & MAPTRACK_GNTMAP_MASK, + maptrack->domid); + } + } + + spin_unlock(>->lock); + + put_domain(d); + return 0; +} +#endif + +long +do_grant_table_op( + unsigned int cmd, void *uop, unsigned int count) +{ + long rc; + + if ( count > 512 ) + return -EINVAL; + + LOCK_BIGLOCK(current->domain); + + rc = -EFAULT; + switch ( cmd ) + { + case GNTTABOP_map_grant_ref: + if ( unlikely(!array_access_ok( + uop, count, sizeof(gnttab_map_grant_ref_t))) ) + goto out; + rc = gnttab_map_grant_ref((gnttab_map_grant_ref_t *)uop, count); + break; + case GNTTABOP_unmap_grant_ref: + if ( unlikely(!array_access_ok( + uop, count, sizeof(gnttab_unmap_grant_ref_t))) ) + goto out; + rc = gnttab_unmap_grant_ref((gnttab_unmap_grant_ref_t *)uop, count); + break; + case GNTTABOP_setup_table: + rc = gnttab_setup_table((gnttab_setup_table_t *)uop, count); + break; +#if GRANT_DEBUG + case GNTTABOP_dump_table: + rc = gnttab_dump_table((gnttab_dump_table_t *)uop); + break; +#endif + default: + rc = -ENOSYS; + break; + } + +out: + UNLOCK_BIGLOCK(current->domain); + + return rc; +} + +int +gnttab_check_unmap( + struct domain *rd, struct domain *ld, unsigned long frame, int readonly) +{ + /* Called when put_page is invoked on a page belonging to a foreign domain. + * Instead of decrementing the frame table ref count, locate the grant + * table entry, if any, and if found, decrement that count. + * Called a _lot_ at domain creation because pages mapped by priv domains + * also traverse this. + */ + + /* Note: If the same frame is mapped multiple times, and then one of + * the ptes is overwritten, which maptrack handle gets invalidated? + * Advice: Don't do it. Explicitly unmap. + */ + + unsigned int handle, ref, refcount; + grant_table_t *lgt, *rgt; + active_grant_entry_t *act; + grant_mapping_t *map; + int found = 0; + + lgt = ld->grant_table; + +#if GRANT_DEBUG_VERBOSE + if ( ld->domain_id != 0 ) + { + DPRINTK("Foreign unref rd(%d) ld(%d) frm(%x) flgs(%x).\n", + rd->domain_id, ld->domain_id, frame, readonly); + } +#endif + + /* Fast exit if we're not mapping anything using grant tables */ + if ( lgt->map_count == 0 ) + return 0; + + if ( get_domain(rd) == 0 ) + { + DPRINTK("gnttab_check_unmap: couldn't get_domain rd(%d)\n", + rd->domain_id); + return 0; + } + + rgt = rd->grant_table; + + for ( handle = 0; handle < lgt->maptrack_limit; handle++ ) + { + map = &lgt->maptrack[handle]; + + if ( ( map->ref_and_flags & MAPTRACK_GNTMAP_MASK ) && + ( readonly ? 1 : (!(map->ref_and_flags & GNTMAP_readonly)))) + { + ref = (map->ref_and_flags >> MAPTRACK_REF_SHIFT); + act = &rgt->active[ref]; + + spin_lock(&rgt->lock); + + if ( act->frame != frame ) + { + spin_unlock(&rgt->lock); + continue; + } + + refcount = act->pin & ( readonly ? GNTPIN_hstr_mask + : GNTPIN_hstw_mask ); + if ( refcount == 0 ) + { + spin_unlock(&rgt->lock); + continue; + } + + /* gotcha */ + DPRINTK("Grant unref rd(%d) ld(%d) frm(%lx) flgs(%x).\n", + rd->domain_id, ld->domain_id, frame, readonly); + + if ( readonly ) + act->pin -= GNTPIN_hstr_inc; + else + { + act->pin -= GNTPIN_hstw_inc; + + /* any more granted writable mappings? */ + if ( (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) == 0 ) + { + clear_bit(_GTF_writing, &rgt->shared[ref].flags); + put_page_type(&frame_table[frame]); + } + } + + if ( act->pin == 0 ) + { + clear_bit(_GTF_reading, &rgt->shared[ref].flags); + put_page(&frame_table[frame]); + } + spin_unlock(&rgt->lock); + + clear_bit(GNTMAP_host_map, &map->ref_and_flags); + + if ( !(map->ref_and_flags & GNTMAP_device_map) ) + put_maptrack_handle(lgt, handle); + + found = 1; + break; + } + } + put_domain(rd); + + return found; +} + +int +gnttab_prepare_for_transfer( + struct domain *rd, struct domain *ld, grant_ref_t ref) +{ + grant_table_t *rgt; + grant_entry_t *sha; + domid_t sdom; + u16 sflags; + u32 scombo, prev_scombo; + int retries = 0; + unsigned long target_pfn; + + DPRINTK("gnttab_prepare_for_transfer rd(%hu) ld(%hu) ref(%hu).\n", + rd->domain_id, ld->domain_id, ref); + + if ( unlikely((rgt = rd->grant_table) == NULL) || + unlikely(ref >= NR_GRANT_ENTRIES) ) + { + DPRINTK("Dom %d has no g.t., or ref is bad (%d).\n", + rd->domain_id, ref); + return 0; + } + + spin_lock(&rgt->lock); + + sha = &rgt->shared[ref]; + + sflags = sha->flags; + sdom = sha->domid; + + for ( ; ; ) + { + target_pfn = sha->frame; + + if ( unlikely(target_pfn >= max_page ) ) + { + DPRINTK("Bad pfn (%lx)\n", target_pfn); + goto fail; + } + + if ( unlikely(sflags != GTF_accept_transfer) || + unlikely(sdom != ld->domain_id) ) + { + DPRINTK("Bad flags (%x) or dom (%d). (NB. expected dom %d)\n", + sflags, sdom, ld->domain_id); + goto fail; + } + + /* Merge two 16-bit values into a 32-bit combined update. */ + /* NB. Endianness! */ + prev_scombo = scombo = ((u32)sdom << 16) | (u32)sflags; + + /* NB. prev_scombo is updated in place to seen value. */ + if ( unlikely(cmpxchg_user((u32 *)&sha->flags, prev_scombo, + prev_scombo | GTF_transfer_committed)) ) + { + DPRINTK("Fault while modifying shared flags and domid.\n"); + goto fail; + } + + /* Did the combined update work (did we see what we expected?). */ + if ( likely(prev_scombo == scombo) ) + break; + + if ( retries++ == 4 ) + { + DPRINTK("Shared grant entry is unstable.\n"); + goto fail; + } + + /* Didn't see what we expected. Split out the seen flags & dom. */ + /* NB. Endianness! */ + sflags = (u16)prev_scombo; + sdom = (u16)(prev_scombo >> 16); + } + + spin_unlock(&rgt->lock); + return 1; + + fail: + spin_unlock(&rgt->lock); + return 0; +} + +void +gnttab_notify_transfer( + struct domain *rd, struct domain *ld, grant_ref_t ref, unsigned long frame) +{ + grant_entry_t *sha; + unsigned long pfn; + + DPRINTK("gnttab_notify_transfer rd(%hu) ld(%hu) ref(%hu).\n", + rd->domain_id, ld->domain_id, ref); + + sha = &rd->grant_table->shared[ref]; + + spin_lock(&rd->grant_table->lock); + +#ifdef __ia64__ +// FIXME-ia64: any error checking need to be done here? +#else + pfn = sha->frame; + + if ( unlikely(pfn >= max_page ) ) + DPRINTK("Bad pfn (%lx)\n", pfn); + else + { + machine_to_phys_mapping[frame] = pfn; + + if ( unlikely(shadow_mode_log_dirty(ld))) + mark_dirty(ld, frame); + + if (shadow_mode_translate(ld)) + __phys_to_machine_mapping[pfn] = frame; + } +#endif + sha->frame = __mfn_to_gpfn(rd, frame); + sha->domid = rd->domain_id; + wmb(); + sha->flags = ( GTF_accept_transfer | GTF_transfer_completed ); + + spin_unlock(&rd->grant_table->lock); + + return; +} + +int +grant_table_create( + struct domain *d) +{ + grant_table_t *t; + int i; + + if ( (t = xmalloc(grant_table_t)) == NULL ) + goto no_mem; + + /* Simple stuff. */ + memset(t, 0, sizeof(*t)); + spin_lock_init(&t->lock); + + /* Active grant table. */ + if ( (t->active = xmalloc_array(active_grant_entry_t, NR_GRANT_ENTRIES)) + == NULL ) + goto no_mem; + memset(t->active, 0, sizeof(active_grant_entry_t) * NR_GRANT_ENTRIES); + + /* Tracking of mapped foreign frames table */ + if ( (t->maptrack = alloc_xenheap_page()) == NULL ) + goto no_mem; + t->maptrack_order = 0; + t->maptrack_limit = PAGE_SIZE / sizeof(grant_mapping_t); + memset(t->maptrack, 0, PAGE_SIZE); + for ( i = 0; i < t->maptrack_limit; i++ ) + t->maptrack[i].ref_and_flags = (i+1) << MAPTRACK_REF_SHIFT; + + /* Shared grant table. */ + t->shared = alloc_xenheap_pages(ORDER_GRANT_FRAMES); + if ( t->shared == NULL ) + goto no_mem; + memset(t->shared, 0, NR_GRANT_FRAMES * PAGE_SIZE); + +#ifdef __ia64__ +// I don't think there's anything to do here on ia64?... +#else + for ( i = 0; i < NR_GRANT_FRAMES; i++ ) + { + SHARE_PFN_WITH_DOMAIN( + virt_to_page((char *)(t->shared)+(i*PAGE_SIZE)), d); + machine_to_phys_mapping[(virt_to_phys(t->shared) >> PAGE_SHIFT) + i] = + INVALID_M2P_ENTRY; + } +#endif + + /* Okay, install the structure. */ + wmb(); /* avoid races with lock-free access to d->grant_table */ + d->grant_table = t; + return 0; + + no_mem: + if ( t != NULL ) + { + xfree(t->active); + if ( t->maptrack != NULL ) + free_xenheap_page(t->maptrack); + xfree(t); + } + return -ENOMEM; +} + +void +gnttab_release_dev_mappings(grant_table_t *gt) +{ + grant_mapping_t *map; + domid_t dom; + grant_ref_t ref; + u16 handle; + struct domain *ld, *rd; + unsigned long frame; + active_grant_entry_t *act; + grant_entry_t *sha; + + ld = current->domain; + + for ( handle = 0; handle < gt->maptrack_limit; handle++ ) + { + map = >->maptrack[handle]; + + if ( map->ref_and_flags & GNTMAP_device_map ) + { + dom = map->domid; + ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT; + + DPRINTK("Grant release (%hu) ref:(%hu) flags:(%x) dom:(%hu)\n", + handle, ref, + map->ref_and_flags & MAPTRACK_GNTMAP_MASK, dom); + + if ( unlikely((rd = find_domain_by_id(dom)) == NULL) || + unlikely(ld == rd) ) + { + if ( rd != NULL ) + put_domain(rd); + + printk(KERN_WARNING "Grant release: No dom%d\n", dom); + continue; + } + + act = &rd->grant_table->active[ref]; + sha = &rd->grant_table->shared[ref]; + + spin_lock(&rd->grant_table->lock); + + if ( act->pin & (GNTPIN_devw_mask | GNTPIN_devr_mask) ) + { + frame = act->frame; + + if ( ( (act->pin & GNTPIN_hstw_mask) == 0 ) && + ( (act->pin & GNTPIN_devw_mask) > 0 ) ) + { + clear_bit(_GTF_writing, &sha->flags); + put_page_type(&frame_table[frame]); + } + + act->pin &= ~(GNTPIN_devw_mask | GNTPIN_devr_mask); + + if ( act->pin == 0 ) + { + clear_bit(_GTF_reading, &sha->flags); + map->ref_and_flags = 0; + put_page(&frame_table[frame]); + } + else + map->ref_and_flags &= ~GNTMAP_device_map; + } + + spin_unlock(&rd->grant_table->lock); + + put_domain(rd); + } + } +} + + +void +grant_table_destroy( + struct domain *d) +{ + grant_table_t *t; + + if ( (t = d->grant_table) != NULL ) + { + /* Free memory relating to this grant table. */ + d->grant_table = NULL; + free_xenheap_pages(t->shared, ORDER_GRANT_FRAMES); + free_xenheap_page(t->maptrack); + xfree(t->active); + xfree(t); + } +} + +void +grant_table_init( + void) +{ + /* Nothing. */ +} +#endif + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/arch/ia64/hypercall.c b/xen/arch/ia64/hypercall.c index 648bbfbbe8..b6f3167c09 100644 --- a/xen/arch/ia64/hypercall.c +++ b/xen/arch/ia64/hypercall.c @@ -40,6 +40,17 @@ ia64_hypercall (struct pt_regs *regs) #endif x = pal_emulator_static(regs->r28); if (regs->r28 == PAL_HALT_LIGHT) { +#if 1 +#define SPURIOUS_VECTOR 15 + if (vcpu_check_pending_interrupts(v)!=SPURIOUS_VECTOR) { +//printf("Domain trying to go idle when interrupt pending!\n"); +//this shouldn't happen, but it apparently does quite a bit! so don't +//allow it to happen... i.e. if a domain has an interrupt pending and +//it tries to halt itself because it thinks it is idle, just return here +//as deliver_pending_interrupt is called on the way out and will deliver it + } + else +#endif do_sched_op(SCHEDOP_yield); //break; } @@ -138,6 +149,12 @@ ia64_hypercall (struct pt_regs *regs) regs->r8 = do_event_channel_op(regs->r14); break; +#ifndef CONFIG_VTI + case __HYPERVISOR_grant_table_op: + regs->r8 = do_grant_table_op(regs->r14, regs->r15, regs->r16); + break; +#endif + case __HYPERVISOR_console_io: regs->r8 = do_console_io(regs->r14, regs->r15, regs->r16); break; diff --git a/xen/arch/ia64/hyperprivop.S b/xen/arch/ia64/hyperprivop.S index 235c8322eb..ab6069c5ba 100644 --- a/xen/arch/ia64/hyperprivop.S +++ b/xen/arch/ia64/hyperprivop.S @@ -66,10 +66,13 @@ GLOBAL_ENTRY(fast_hyperprivop) cmp.eq p7,p6=XEN_HYPER_RFI,r17 (p7) br.sptk.many hyper_rfi;; + // HYPERPRIVOP_GET_IVR? + cmp.eq p7,p6=XEN_HYPER_GET_IVR,r17 +(p7) br.sptk.many hyper_get_ivr;; + cmp.ne p7,p0=r20,r0 (p7) br.spnt.many dispatch_break_fault ;; -// hard to test, because only called from rbs_switch // HYPERPRIVOP_COVER? cmp.eq p7,p6=XEN_HYPER_COVER,r17 (p7) br.sptk.many hyper_cover;; @@ -82,6 +85,42 @@ GLOBAL_ENTRY(fast_hyperprivop) cmp.eq p7,p6=XEN_HYPER_RSM_DT,r17 (p7) br.sptk.many hyper_rsm_dt;; + // HYPERPRIVOP_GET_TPR? + cmp.eq p7,p6=XEN_HYPER_GET_TPR,r17 +(p7) br.sptk.many hyper_get_tpr;; + + // HYPERPRIVOP_SET_TPR? + cmp.eq p7,p6=XEN_HYPER_SET_TPR,r17 +(p7) br.sptk.many hyper_set_tpr;; + + // HYPERPRIVOP_EOI? + cmp.eq p7,p6=XEN_HYPER_EOI,r17 +(p7) br.sptk.many hyper_eoi;; + + // HYPERPRIVOP_SET_ITM? + cmp.eq p7,p6=XEN_HYPER_SET_ITM,r17 +(p7) br.sptk.many hyper_set_itm;; + + // HYPERPRIVOP_SET_RR? + cmp.eq p7,p6=XEN_HYPER_SET_RR,r17 +(p7) br.sptk.many hyper_set_rr;; + + // HYPERPRIVOP_GET_RR? + cmp.eq p7,p6=XEN_HYPER_GET_RR,r17 +(p7) br.sptk.many hyper_get_rr;; + + // HYPERPRIVOP_PTC_GA? + cmp.eq p7,p6=XEN_HYPER_PTC_GA,r17 +(p7) br.sptk.many hyper_ptc_ga;; + + // HYPERPRIVOP_ITC_D? + cmp.eq p7,p6=XEN_HYPER_ITC_D,r17 +(p7) br.sptk.many hyper_itc_d;; + + // HYPERPRIVOP_ITC_I? + cmp.eq p7,p6=XEN_HYPER_ITC_I,r17 +(p7) br.sptk.many hyper_itc_i;; + // if not one of the above, give up for now and do it the slow way br.sptk.many dispatch_break_fault ;; @@ -343,14 +382,15 @@ GLOBAL_ENTRY(fast_break_reflect) // ensure that, if giving up, registers at entry to fast_hyperprivop unchanged ENTRY(hyper_rfi) // if no interrupts pending, proceed + mov r30=r0 cmp.eq p7,p0=r20,r0 (p7) br.sptk.many 1f - // interrupts pending, if rfi'ing to interrupts on, go slow way + ;; adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;; ld8 r21=[r20];; // r21 = vcr.ipsr extr.u r22=r21,IA64_PSR_I_BIT,1 ;; - cmp.ne p7,p0=r22,r0 ;; -(p7) br.spnt.many dispatch_break_fault ;; + mov r30=r22 + // r30 determines whether we might deliver an immediate extint 1: adds r20=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;; ld8 r21=[r20];; // r21 = vcr.ipsr @@ -384,13 +424,17 @@ ENTRY(hyper_rfi) (p7) cmp.geu p0,p7=r22,r24 ;; // !(iip>=high) (p7) br.sptk.many dispatch_break_fault ;; - // OK now, let's do an rfi. +1: // OK now, let's do an rfi. #ifdef FAST_HYPERPRIVOP_CNT movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_RFI);; ld8 r23=[r20];; adds r23=1,r23;; st8 [r20]=r23;; #endif + cmp.ne p6,p0=r30,r0 +(p6) br.cond.sptk.many check_extint; + ;; +just_do_rfi: // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip mov cr.iip=r22;; adds r20=XSI_INCOMPL_REG_OFS-XSI_PSR_IC_OFS,r18 ;; @@ -403,11 +447,12 @@ ENTRY(hyper_rfi) dep r21=-1,r21,IA64_PSR_CPL1_BIT,1 ;; // vpsr.i = vcr.ipsr.i; vpsr.ic = vcr.ipsr.ic mov r19=r0 ;; - extr.u r22=r21,IA64_PSR_I_BIT,1 ;; - cmp.ne p7,p6=r22,r0 ;; + extr.u r23=r21,IA64_PSR_I_BIT,1 ;; + cmp.ne p7,p6=r23,r0 ;; + // not done yet (p7) dep r19=-1,r19,32,1 - extr.u r22=r21,IA64_PSR_IC_BIT,1 ;; - cmp.ne p7,p6=r22,r0 ;; + extr.u r23=r21,IA64_PSR_IC_BIT,1 ;; + cmp.ne p7,p6=r23,r0 ;; (p7) dep r19=-1,r19,0,1 ;; st8 [r18]=r19 ;; // force on psr.ic, i, dt, rt, it, bn @@ -421,6 +466,80 @@ ENTRY(hyper_rfi) rfi ;; +check_extint: + br.sptk.many dispatch_break_fault ;; + + // r18=&vpsr.i|vpsr.ic, r21==vpsr, r22=vcr.iip + mov r30=IA64_KR(CURRENT);; + adds r24=IA64_VCPU_INSVC3_OFFSET,r30;; + mov r25=192 + adds r22=IA64_VCPU_IRR3_OFFSET,r30;; + ld8 r23=[r22];; + cmp.eq p6,p0=r23,r0;; +(p6) adds r22=-8,r22;; +(p6) adds r24=-8,r24;; +(p6) adds r25=-64,r25;; +(p6) ld8 r23=[r22];; +(p6) cmp.eq p6,p0=r23,r0;; +(p6) adds r22=-8,r22;; +(p6) adds r24=-8,r24;; +(p6) adds r25=-64,r25;; +(p6) ld8 r23=[r22];; +(p6) cmp.eq p6,p0=r23,r0;; +(p6) adds r22=-8,r22;; +(p6) adds r24=-8,r24;; +(p6) adds r25=-64,r25;; +(p6) ld8 r23=[r22];; +(p6) cmp.eq p6,p0=r23,r0;; + cmp.eq p6,p0=r23,r0 +(p6) br.cond.sptk.many 1f; // this is actually an error + // r22 points to non-zero element of irr, r23 has value + // r24 points to corr element of insvc, r25 has elt*64 + ld8 r26=[r24];; + cmp.geu p6,p0=r26,r23 +(p6) br.cond.spnt.many 1f; + // not masked by insvc, get vector number + shr.u r26=r23,1;; + or r26=r23,r26;; + shr.u r27=r26,2;; + or r26=r26,r27;; + shr.u r27=r26,4;; + or r26=r26,r27;; + shr.u r27=r26,8;; + or r26=r26,r27;; + shr.u r27=r26,16;; + or r26=r26,r27;; + shr.u r27=r26,32;; + or r26=r26,r27;; + andcm r26=0xffffffffffffffff,r26;; + popcnt r26=r26;; + sub r26=63,r26;; + // r26 now contains the bit index (mod 64) + mov r27=1;; + shl r27=r27,r26;; + // r27 now contains the (within the proper word) bit mask + add r26=r25,r26 + // r26 now contains the vector [0..255] + adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;; + ld8 r20=[r20] ;; + extr.u r28=r20,16,1 + extr.u r29=r20,4,4 ;; + cmp.ne p6,p0=r28,r0 // if tpr.mmi is set, return SPURIOUS +(p6) br.cond.sptk.many 1f; + shl r29=r29,4;; + adds r29=15,r29;; + cmp.ge p6,p0=r29,r26 +(p6) br.cond.sptk.many 1f; + // OK, have an unmasked vector to process/return + ld8 r25=[r24];; + or r25=r25,r27;; + st8 [r24]=r25;; + ld8 r25=[r22];; + andcm r25=r25,r27;; + st8 [r22]=r25;; + mov r8=r26;; + // not done yet + ENTRY(hyper_cover) #ifdef FAST_HYPERPRIVOP_CNT movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_COVER);; @@ -455,7 +574,6 @@ ENTRY(hyper_cover) rfi ;; -#if 1 // return from metaphysical mode (meta=1) to virtual mode (meta=0) ENTRY(hyper_ssm_dt) #ifdef FAST_HYPERPRIVOP_CNT @@ -526,4 +644,373 @@ ENTRY(hyper_rsm_dt) mov pr=r31,-1 ;; rfi ;; + +ENTRY(hyper_get_tpr) +#ifdef FAST_HYPERPRIVOP_CNT + movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_TPR);; + ld8 r21=[r20];; + adds r21=1,r21;; + st8 [r20]=r21;; +#endif + mov r24=cr.ipsr + mov r25=cr.iip;; + adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;; + ld8 r8=[r20];; + extr.u r26=r24,41,2 ;; + cmp.eq p6,p7=2,r26 ;; +(p6) mov r26=0 +(p6) adds r25=16,r25 +(p7) adds r26=1,r26 + ;; + dep r24=r26,r24,41,2 + ;; + mov cr.ipsr=r24 + mov cr.iip=r25 + mov pr=r31,-1 ;; + rfi + ;; +END(hyper_get_tpr) + +// if we get to here, there are no interrupts pending so we +// can change virtual tpr to any value without fear of provoking +// (or accidentally missing) delivering an interrupt +ENTRY(hyper_set_tpr) +#ifdef FAST_HYPERPRIVOP_CNT + movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_TPR);; + ld8 r21=[r20];; + adds r21=1,r21;; + st8 [r20]=r21;; +#endif + mov r24=cr.ipsr + mov r25=cr.iip;; + movl r27=0xff00;; + adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;; + andcm r8=r8,r27;; + st8 [r20]=r8;; + extr.u r26=r24,41,2 ;; + cmp.eq p6,p7=2,r26 ;; +(p6) mov r26=0 +(p6) adds r25=16,r25 +(p7) adds r26=1,r26 + ;; + dep r24=r26,r24,41,2 + ;; + mov cr.ipsr=r24 + mov cr.iip=r25 + mov pr=r31,-1 ;; + rfi + ;; +END(hyper_set_tpr) + +ENTRY(hyper_get_ivr) +#ifdef FAST_HYPERPRIVOP_CNT + movl r22=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_IVR);; + ld8 r21=[r22];; + adds r21=1,r21;; + st8 [r22]=r21;; #endif + mov r8=15;; + // when we get to here r20=~=interrupts pending + cmp.eq p7,p0=r20,r0;; +(p7) adds r20=XSI_PEND_OFS-XSI_PSR_IC_OFS,r18 ;; +(p7) st4 [r20]=r0;; +(p7) br.spnt.many 1f ;; + mov r30=IA64_KR(CURRENT);; + adds r24=IA64_VCPU_INSVC3_OFFSET,r30;; + mov r25=192 + adds r22=IA64_VCPU_IRR3_OFFSET,r30;; + ld8 r23=[r22];; + cmp.eq p6,p0=r23,r0;; +(p6) adds r22=-8,r22;; +(p6) adds r24=-8,r24;; +(p6) adds r25=-64,r25;; +(p6) ld8 r23=[r22];; +(p6) cmp.eq p6,p0=r23,r0;; +(p6) adds r22=-8,r22;; +(p6) adds r24=-8,r24;; +(p6) adds r25=-64,r25;; +(p6) ld8 r23=[r22];; +(p6) cmp.eq p6,p0=r23,r0;; +(p6) adds r22=-8,r22;; +(p6) adds r24=-8,r24;; +(p6) adds r25=-64,r25;; +(p6) ld8 r23=[r22];; +(p6) cmp.eq p6,p0=r23,r0;; + cmp.eq p6,p0=r23,r0 +(p6) br.cond.sptk.many 1f; // this is actually an error + // r22 points to non-zero element of irr, r23 has value + // r24 points to corr element of insvc, r25 has elt*64 + ld8 r26=[r24];; + cmp.geu p6,p0=r26,r23 +(p6) br.cond.spnt.many 1f; + // not masked by insvc, get vector number + shr.u r26=r23,1;; + or r26=r23,r26;; + shr.u r27=r26,2;; + or r26=r26,r27;; + shr.u r27=r26,4;; + or r26=r26,r27;; + shr.u r27=r26,8;; + or r26=r26,r27;; + shr.u r27=r26,16;; + or r26=r26,r27;; + shr.u r27=r26,32;; + or r26=r26,r27;; + andcm r26=0xffffffffffffffff,r26;; + popcnt r26=r26;; + sub r26=63,r26;; + // r26 now contains the bit index (mod 64) + mov r27=1;; + shl r27=r27,r26;; + // r27 now contains the (within the proper word) bit mask + add r26=r25,r26 + // r26 now contains the vector [0..255] + adds r20=XSI_TPR_OFS-XSI_PSR_IC_OFS,r18 ;; + ld8 r20=[r20] ;; + extr.u r28=r20,16,1 + extr.u r29=r20,4,4 ;; + cmp.ne p6,p0=r28,r0 // if tpr.mmi is set, return SPURIOUS +(p6) br.cond.sptk.many 1f; + shl r29=r29,4;; + adds r29=15,r29;; + cmp.ge p6,p0=r29,r26 +(p6) br.cond.sptk.many 1f; + // OK, have an unmasked vector to process/return + ld8 r25=[r24];; + or r25=r25,r27;; + st8 [r24]=r25;; + ld8 r25=[r22];; + andcm r25=r25,r27;; + st8 [r22]=r25;; + mov r8=r26;; + // if its a clock tick, remember itm to avoid delivering it twice + adds r20=XSI_ITV_OFS-XSI_PSR_IC_OFS,r18 ;; + ld8 r20=[r20];; + extr.u r20=r20,0,8;; + cmp.eq p6,p0=r20,r8 + adds r22=IA64_VCPU_DOMAIN_ITM_LAST_OFFSET,r30 + adds r23=IA64_VCPU_DOMAIN_ITM_OFFSET,r30;; + ld8 r23=[r23];; +(p6) st8 [r22]=r23;; + // all done +1: mov r24=cr.ipsr + mov r25=cr.iip;; + extr.u r26=r24,41,2 ;; + cmp.eq p6,p7=2,r26 ;; +(p6) mov r26=0 +(p6) adds r25=16,r25 +(p7) adds r26=1,r26 + ;; + dep r24=r26,r24,41,2 + ;; + mov cr.ipsr=r24 + mov cr.iip=r25 + mov pr=r31,-1 ;; + rfi + ;; +END(hyper_get_ivr) + +ENTRY(hyper_eoi) + // when we get to here r20=~=interrupts pending + cmp.ne p7,p0=r20,r0 +(p7) br.spnt.many dispatch_break_fault ;; +#ifdef FAST_HYPERPRIVOP_CNT + movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_EOI);; + ld8 r21=[r20];; + adds r21=1,r21;; + st8 [r20]=r21;; +#endif + mov r22=IA64_KR(CURRENT);; + adds r22=IA64_VCPU_INSVC3_OFFSET,r22;; + ld8 r23=[r22];; + cmp.eq p6,p0=r23,r0;; +(p6) adds r22=-8,r22;; +(p6) ld8 r23=[r22];; +(p6) cmp.eq p6,p0=r23,r0;; +(p6) adds r22=-8,r22;; +(p6) ld8 r23=[r22];; +(p6) cmp.eq p6,p0=r23,r0;; +(p6) adds r22=-8,r22;; +(p6) ld8 r23=[r22];; +(p6) cmp.eq p6,p0=r23,r0;; + cmp.eq p6,p0=r23,r0 +(p6) br.cond.sptk.many 1f; // this is actually an error + // r22 points to non-zero element of insvc, r23 has value + shr.u r24=r23,1;; + or r24=r23,r24;; + shr.u r25=r24,2;; + or r24=r24,r25;; + shr.u r25=r24,4;; + or r24=r24,r25;; + shr.u r25=r24,8;; + or r24=r24,r25;; + shr.u r25=r24,16;; + or r24=r24,r25;; + shr.u r25=r24,32;; + or r24=r24,r25;; + andcm r24=0xffffffffffffffff,r24;; + popcnt r24=r24;; + sub r24=63,r24;; + // r24 now contains the bit index + mov r25=1;; + shl r25=r25,r24;; + andcm r23=r23,r25;; + st8 [r22]=r23;; +1: mov r24=cr.ipsr + mov r25=cr.iip;; + extr.u r26=r24,41,2 ;; + cmp.eq p6,p7=2,r26 ;; +(p6) mov r26=0 +(p6) adds r25=16,r25 +(p7) adds r26=1,r26 + ;; + dep r24=r26,r24,41,2 + ;; + mov cr.ipsr=r24 + mov cr.iip=r25 + mov pr=r31,-1 ;; + rfi + ;; +END(hyper_eoi) + +ENTRY(hyper_set_itm) + // when we get to here r20=~=interrupts pending + cmp.ne p7,p0=r20,r0 +(p7) br.spnt.many dispatch_break_fault ;; +#ifdef FAST_HYPERPRIVOP_CNT + movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_ITM);; + ld8 r21=[r20];; + adds r21=1,r21;; + st8 [r20]=r21;; +#endif + movl r20=(PERCPU_ADDR)+IA64_CPUINFO_ITM_NEXT_OFFSET;; + ld8 r21=[r20];; + mov r20=IA64_KR(CURRENT);; + adds r20=IA64_VCPU_DOMAIN_ITM_OFFSET,r20;; + st8 [r20]=r8;; + cmp.geu p6,p0=r21,r8;; +(p6) mov r21=r8;; + // now "safe set" cr.itm=r21 + mov r23=100;; +2: mov cr.itm=r21;; + srlz.d;; + mov r22=ar.itc ;; + cmp.leu p6,p0=r21,r22;; + add r21=r21,r23;; + shl r23=r23,1;; +(p6) br.cond.spnt.few 2b;; +1: mov r24=cr.ipsr + mov r25=cr.iip;; + extr.u r26=r24,41,2 ;; + cmp.eq p6,p7=2,r26 ;; +(p6) mov r26=0 +(p6) adds r25=16,r25 +(p7) adds r26=1,r26 + ;; + dep r24=r26,r24,41,2 + ;; + mov cr.ipsr=r24 + mov cr.iip=r25 + mov pr=r31,-1 ;; + rfi + ;; +END(hyper_set_itm) + +ENTRY(hyper_get_rr) +#ifdef FAST_HYPERPRIVOP_CNT + movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_GET_RR);; + ld8 r21=[r20];; + adds r21=1,r21;; + st8 [r20]=r21;; +#endif + extr.u r25=r8,61,3;; + adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;; + shl r25=r25,3;; + add r20=r20,r25;; + ld8 r8=[r20];; +1: mov r24=cr.ipsr + mov r25=cr.iip;; + extr.u r26=r24,41,2 ;; + cmp.eq p6,p7=2,r26 ;; +(p6) mov r26=0 +(p6) adds r25=16,r25 +(p7) adds r26=1,r26 + ;; + dep r24=r26,r24,41,2 + ;; + mov cr.ipsr=r24 + mov cr.iip=r25 + mov pr=r31,-1 ;; + rfi + ;; +END(hyper_get_rr) + +ENTRY(hyper_set_rr) + extr.u r25=r8,61,3;; + cmp.leu p7,p0=7,r25 // punt on setting rr7 +(p7) br.spnt.many dispatch_break_fault ;; +#ifdef FAST_HYPERPRIVOP_CNT + movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_SET_RR);; + ld8 r21=[r20];; + adds r21=1,r21;; + st8 [r20]=r21;; +#endif + extr.u r26=r9,8,24 // r26 = r9.rid + mov r20=IA64_KR(CURRENT);; + adds r21=IA64_VCPU_STARTING_RID_OFFSET,r20;; + ld4 r22=[r21];; + adds r21=IA64_VCPU_ENDING_RID_OFFSET,r20;; + ld4 r23=[r21];; + adds r24=IA64_VCPU_META_SAVED_RR0_OFFSET,r20;; + add r22=r26,r22;; + cmp.geu p6,p0=r22,r23 // if r9.rid + starting_rid >= ending_rid +(p6) br.cond.sptk.many 1f; // this is an error, but just ignore/return + // r21=starting_rid + adds r20=XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 ;; + shl r25=r25,3;; + add r20=r20,r25;; + st8 [r20]=r9;; // store away exactly what was passed + // but adjust value actually placed in rr[r8] + // r22 contains adjusted rid, "mangle" it (see regionreg.c) + // and set ps to PAGE_SHIFT and ve to 1 + extr.u r27=r22,0,8 + extr.u r28=r22,8,8 + extr.u r29=r22,16,8;; + dep.z r23=PAGE_SHIFT,2,6;; + dep r23=-1,r23,0,1;; // mangling is swapping bytes 1 & 3 + dep r23=r27,r23,24,8;; + dep r23=r28,r23,16,8;; + dep r23=r29,r23,8,8 + cmp.eq p6,p0=r25,r0;; // if rr0, save for metaphysical +(p6) st4 [r24]=r23 + mov rr[r8]=r23;; + // done, mosey on back +1: mov r24=cr.ipsr + mov r25=cr.iip;; + extr.u r26=r24,41,2 ;; + cmp.eq p6,p7=2,r26 ;; +(p6) mov r26=0 +(p6) adds r25=16,r25 +(p7) adds r26=1,r26 + ;; + dep r24=r26,r24,41,2 + ;; + mov cr.ipsr=r24 + mov cr.iip=r25 + mov pr=r31,-1 ;; + rfi + ;; +END(hyper_set_rr) + +ENTRY(hyper_ptc_ga) + br.spnt.many dispatch_break_fault ;; +END(hyper_ptc_ga) + +ENTRY(hyper_itc_d) + br.spnt.many dispatch_break_fault ;; +END(hyper_itc_d) + +ENTRY(hyper_itc_i) + br.spnt.many dispatch_break_fault ;; +END(hyper_itc_i) + diff --git a/xen/arch/ia64/mm.c b/xen/arch/ia64/mm.c new file mode 100644 index 0000000000..755596f89e --- /dev/null +++ b/xen/arch/ia64/mm.c @@ -0,0 +1,141 @@ +/****************************************************************************** + * arch/ia64/mm.c + * + * Copyright (c) 2002-2005 K A Fraser + * Copyright (c) 2004 Christian Limpach + * Copyright (c) 2005, Intel Corporation. + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * A description of the x86 page table API: + * + * Domains trap to do_mmu_update with a list of update requests. + * This is a list of (ptr, val) pairs, where the requested operation + * is *ptr = val. + * + * Reference counting of pages: + * ---------------------------- + * Each page has two refcounts: tot_count and type_count. + * + * TOT_COUNT is the obvious reference count. It counts all uses of a + * physical page frame by a domain, including uses as a page directory, + * a page table, or simple mappings via a PTE. This count prevents a + * domain from releasing a frame back to the free pool when it still holds + * a reference to it. + * + * TYPE_COUNT is more subtle. A frame can be put to one of three + * mutually-exclusive uses: it might be used as a page directory, or a + * page table, or it may be mapped writable by the domain [of course, a + * frame may not be used in any of these three ways!]. + * So, type_count is a count of the number of times a frame is being + * referred to in its current incarnation. Therefore, a page can only + * change its type when its type count is zero. + * + * Pinning the page type: + * ---------------------- + * The type of a page can be pinned/unpinned with the commands + * MMUEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is, + * pinning is not reference counted, so it can't be nested). + * This is useful to prevent a page's type count falling to zero, at which + * point safety checks would need to be carried out next time the count + * is increased again. + * + * A further note on writable page mappings: + * ----------------------------------------- + * For simplicity, the count of writable mappings for a page may not + * correspond to reality. The 'writable count' is incremented for every + * PTE which maps the page with the _PAGE_RW flag set. However, for + * write access to be possible the page directory entry must also have + * its _PAGE_RW bit set. We do not check this as it complicates the + * reference counting considerably [consider the case of multiple + * directory entries referencing a single page table, some with the RW + * bit set, others not -- it starts getting a bit messy]. + * In normal use, this simplification shouldn't be a problem. + * However, the logic can be added if required. + * + * One more note on read-only page mappings: + * ----------------------------------------- + * We want domains to be able to map pages for read-only access. The + * main reason is that page tables and directories should be readable + * by a domain, but it would not be safe for them to be writable. + * However, domains have free access to rings 1 & 2 of the Intel + * privilege model. In terms of page protection, these are considered + * to be part of 'supervisor mode'. The WP bit in CR0 controls whether + * read-only restrictions are respected in supervisor mode -- if the + * bit is clear then any mapped page is writable. + * + * We get round this by always setting the WP bit and disallowing + * updates to it. This is very unlikely to cause a problem for guest + * OS's, which will generally use the WP bit to simplify copy-on-write + * implementation (in that case, OS wants a fault when it writes to + * an application-supplied buffer). + */ + +#include <xen/config.h> +#include <public/xen.h> +#include <xen/init.h> +#include <xen/lib.h> +#include <xen/mm.h> +#include <xen/errno.h> +#include <asm/vmx_vcpu.h> +#include <asm/vmmu.h> +#include <asm/regionreg.h> + +/* + uregs->ptr is virtual address + uregs->val is pte value + */ +#ifdef CONFIG_VTI +int do_mmu_update(mmu_update_t *ureqs,u64 count,u64 *pdone,u64 foreigndom) +{ + int i,cmd; + u64 mfn, gpfn; + VCPU *vcpu; + mmu_update_t req; + ia64_rr rr; + thash_cb_t *hcb; + thash_data_t entry={0}; + vcpu = current; + hcb = vmx_vcpu_get_vtlb(vcpu); + for ( i = 0; i < count; i++ ) + { + copy_from_user(&req, ureqs, sizeof(req)); + cmd = req.ptr&3; + req.ptr &= ~3; + if(cmd ==MMU_NORMAL_PT_UPDATE){ + entry.page_flags = req.val; + entry.locked = 1; + entry.tc = 1; + entry.cl = DSIDE_TLB; + rr = vmx_vcpu_rr(vcpu, req.ptr); + entry.ps = rr.ps; + entry.rid = rr.rid; + vtlb_insert(hcb, &entry, req.ptr); + }else if(cmd == MMU_MACHPHYS_UPDATE){ + mfn = req.ptr >>PAGE_SHIFT; + gpfn = req.val; + set_machinetophys(mfn,gpfn); + }else{ + printf("Unkown command of mmu_update:ptr: %lx,val: %lx \n",req.ptr,req.val); + while(1); + } + ureqs ++; + } + return 0; +} +#endif diff --git a/xen/arch/ia64/patch/linux-2.6.11/uaccess.h b/xen/arch/ia64/patch/linux-2.6.11/uaccess.h index def5aaac47..a81d3aec3f 100644 --- a/xen/arch/ia64/patch/linux-2.6.11/uaccess.h +++ b/xen/arch/ia64/patch/linux-2.6.11/uaccess.h @@ -1,6 +1,17 @@ ---- ../../linux-2.6.11/include/asm-ia64/uaccess.h 2005-06-06 10:36:23.000000000 -0600 -+++ include/asm-ia64/uaccess.h 2005-06-10 18:08:06.000000000 -0600 -@@ -60,6 +60,11 @@ +--- ../../linux-2.6.11/include/asm-ia64/uaccess.h 2005-03-02 00:37:53.000000000 -0700 ++++ include/asm-ia64/uaccess.h 2005-06-21 21:53:20.000000000 -0600 +@@ -32,6 +32,10 @@ + * David Mosberger-Tang <davidm@hpl.hp.com> + */ + ++#ifdef CONFIG_VTI ++#include <asm/vmx_uaccess.h> ++#else // CONFIG_VTI ++ + #include <linux/compiler.h> + #include <linux/errno.h> + #include <linux/sched.h> +@@ -60,6 +64,11 @@ * address TASK_SIZE is never valid. We also need to make sure that the address doesn't * point inside the virtually mapped linear page table. */ @@ -12,7 +23,7 @@ #define __access_ok(addr, size, segment) \ ({ \ __chk_user_ptr(addr); \ -@@ -67,6 +72,7 @@ +@@ -67,6 +76,7 @@ && ((segment).seg == KERNEL_DS.seg \ || likely(REGION_OFFSET((unsigned long) (addr)) < RGN_MAP_LIMIT))); \ }) @@ -20,3 +31,11 @@ #define access_ok(type, addr, size) __access_ok((addr), (size), get_fs()) static inline int +@@ -343,6 +353,7 @@ + __su_ret; \ + }) + ++#endif // CONFIG_VTI + /* Generic code can't deal with the location-relative format that we use for compactness. */ + #define ARCH_HAS_SORT_EXTABLE + #define ARCH_HAS_SEARCH_EXTABLE diff --git a/xen/arch/ia64/privop.c b/xen/arch/ia64/privop.c index c4fbcca45c..a45631b226 100644 --- a/xen/arch/ia64/privop.c +++ b/xen/arch/ia64/privop.c @@ -205,8 +205,7 @@ IA64FAULT priv_itc_d(VCPU *vcpu, INST64 inst) return(IA64_ILLOP_FAULT); if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT) return(IA64_ILLOP_FAULT); - if (!inst.inst) pte = vcpu_get_tmp(vcpu,0); - else pte = vcpu_get_gr(vcpu,inst.M41.r2); + pte = vcpu_get_gr(vcpu,inst.M41.r2); return (vcpu_itc_d(vcpu,pte,itir,ifa)); } @@ -220,8 +219,7 @@ IA64FAULT priv_itc_i(VCPU *vcpu, INST64 inst) return(IA64_ILLOP_FAULT); if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT) return(IA64_ILLOP_FAULT); - if (!inst.inst) pte = vcpu_get_tmp(vcpu,0); - else pte = vcpu_get_gr(vcpu,inst.M41.r2); + pte = vcpu_get_gr(vcpu,inst.M41.r2); return (vcpu_itc_i(vcpu,pte,itir,ifa)); } @@ -800,12 +798,14 @@ ia64_hyperprivop(unsigned long iim, REGS *regs) (void)vcpu_cover(v); return 1; case HYPERPRIVOP_ITC_D: - inst.inst = 0; - (void)priv_itc_d(v,inst); + (void)vcpu_get_itir(v,&itir); + (void)vcpu_get_ifa(v,&ifa); + (void)vcpu_itc_d(v,regs->r8,itir,ifa); return 1; case HYPERPRIVOP_ITC_I: - inst.inst = 0; - (void)priv_itc_i(v,inst); + (void)vcpu_get_itir(v,&itir); + (void)vcpu_get_ifa(v,&ifa); + (void)vcpu_itc_i(v,regs->r8,itir,ifa); return 1; case HYPERPRIVOP_SSM_I: (void)vcpu_set_psr_i(v); @@ -832,10 +832,8 @@ ia64_hyperprivop(unsigned long iim, REGS *regs) regs->r8 = val; return 1; case HYPERPRIVOP_PTC_GA: - // FIXME: this doesn't seem to work yet, turned off - //(void)vcpu_ptc_ga(v,regs->r8,regs->r9); - //return 1; - break; + (void)vcpu_ptc_ga(v,regs->r8,(1L << ((regs->r9 & 0xfc) >> 2))); + return 1; case HYPERPRIVOP_ITR_D: (void)vcpu_get_itir(v,&itir); (void)vcpu_get_ifa(v,&ifa); diff --git a/xen/arch/ia64/regionreg.c b/xen/arch/ia64/regionreg.c index a40d0098e3..10b26d9cba 100644 --- a/xen/arch/ia64/regionreg.c +++ b/xen/arch/ia64/regionreg.c @@ -148,11 +148,10 @@ int allocate_rid_range(struct domain *d, unsigned long ridbits) for (j = i; j < i + n_rid_blocks; ++j) ridblock_owner[j] = d; // setup domain struct - d->rid_bits = ridbits; - d->starting_rid = i << IA64_MIN_IMPL_RID_BITS; - d->ending_rid = (i+n_rid_blocks) << IA64_MIN_IMPL_RID_BITS; + d->arch.rid_bits = ridbits; + d->arch.starting_rid = i << IA64_MIN_IMPL_RID_BITS; d->arch.ending_rid = (i+n_rid_blocks) << IA64_MIN_IMPL_RID_BITS; printf("###allocating rid_range, domain %p: starting_rid=%lx, ending_rid=%lx\n", -d,d->starting_rid, d->ending_rid); +d,d->arch.starting_rid, d->arch.ending_rid); return 1; } @@ -161,14 +160,14 @@ d,d->starting_rid, d->ending_rid); int deallocate_rid_range(struct domain *d) { int i; - int rid_block_end = d->ending_rid >> IA64_MIN_IMPL_RID_BITS; - int rid_block_start = d->starting_rid >> IA64_MIN_IMPL_RID_BITS; + int rid_block_end = d->arch.ending_rid >> IA64_MIN_IMPL_RID_BITS; + int rid_block_start = d->arch.starting_rid >> IA64_MIN_IMPL_RID_BITS; return 1; // KLUDGE ALERT // // not all domains will have allocated RIDs (physical mode loaders for instance) // - if (d->rid_bits == 0) return 1; + if (d->arch.rid_bits == 0) return 1; #ifdef DEBUG for (i = rid_block_start; i < rid_block_end; ++i) { @@ -179,9 +178,9 @@ int deallocate_rid_range(struct domain *d) for (i = rid_block_start; i < rid_block_end; ++i) ridblock_owner[i] = NULL; - d->rid_bits = 0; - d->starting_rid = 0; - d->ending_rid = 0; + d->arch.rid_bits = 0; + d->arch.starting_rid = 0; + d->arch.ending_rid = 0; return 1; } @@ -193,9 +192,8 @@ int deallocate_rid_range(struct domain *d) // a region register; anytime it is "viewable" outside of this module, // it should be unmangled -//This appears to work in Xen... turn it on later so no complications yet -#define CONFIG_MANGLE_RIDS -#ifdef CONFIG_MANGLE_RIDS +// NOTE: this function is also implemented in assembly code in hyper_set_rr!! +// Must ensure these two remain consistent! static inline unsigned long vmMangleRID(unsigned long RIDVal) { @@ -214,11 +212,6 @@ vmMangleRID(unsigned long RIDVal) // since vmMangleRID is symmetric, use it for unmangling also #define vmUnmangleRID(x) vmMangleRID(x) -#else -// no mangling/unmangling -#define vmMangleRID(x) (x) -#define vmUnmangleRID(x) (x) -#endif static inline void set_rr_no_srlz(unsigned long rr, unsigned long rrval) @@ -265,12 +258,12 @@ int set_one_rr(unsigned long rr, unsigned long val) rrv.rrval = val; newrrv.rrval = 0; - newrid = v->domain->starting_rid + rrv.rid; + newrid = v->arch.starting_rid + rrv.rid; - if (newrid > v->domain->ending_rid) { + if (newrid > v->arch.ending_rid) { printk("can't set rr%d to %lx, starting_rid=%lx," "ending_rid=%lx, val=%lx\n", rreg, newrid, - v->domain->starting_rid,v->domain->ending_rid,val); + v->arch.starting_rid,v->arch.ending_rid,val); return 0; } @@ -358,7 +351,7 @@ unsigned long physicalize_rid(struct vcpu *v, unsigned long rrval) ia64_rr rrv; rrv.rrval = rrval; - rrv.rid += v->domain->starting_rid; + rrv.rid += v->arch.starting_rid; return rrv.rrval; } @@ -368,7 +361,7 @@ virtualize_rid(struct vcpu *v, unsigned long rrval) ia64_rr rrv; rrv.rrval = rrval; - rrv.rid -= v->domain->starting_rid; + rrv.rid -= v->arch.starting_rid; return rrv.rrval; } diff --git a/xen/arch/ia64/vmmu.c b/xen/arch/ia64/vmmu.c index 60126b23b2..078b1663aa 100644 --- a/xen/arch/ia64/vmmu.c +++ b/xen/arch/ia64/vmmu.c @@ -792,3 +792,55 @@ IA64FAULT vmx_vcpu_tak(VCPU *vcpu, UINT64 vadr, UINT64 *key) return IA64_NO_FAULT; } +/* + * [FIXME] Is there any effective way to move this routine + * into vmx_uaccess.h? struct exec_domain is incomplete type + * in that way... + * + * This is the interface to lookup virtual TLB, and then + * return corresponding machine address in 2nd parameter. + * The 3rd parameter contains how many bytes mapped by + * matched vTLB entry, thus to allow caller copy more once. + * + * If failed to lookup, -EFAULT is returned. Or else reutrn + * 0. All upper domain access utilities rely on this routine + * to determine the real machine address. + * + * Yes, put_user and get_user seems to somhow slow upon it. + * However it's the necessary steps for any vmx domain virtual + * address, since that's difference address space as HV's one. + * Later some short-circuit may be created for special case + */ +long +__domain_va_to_ma(unsigned long va, unsigned long* ma, unsigned long *len) +{ + unsigned long mpfn, gpfn, m, n = *len; + thash_cb_t *vtlb; + unsigned long end; /* end of the area mapped by current entry */ + thash_data_t *entry; + struct vcpu *v = current; + ia64_rr vrr; + + vtlb = vmx_vcpu_get_vtlb(v); + vrr = vmx_vcpu_rr(v, va); + entry = vtlb_lookup_ex(vtlb, vrr.rid, va, DSIDE_TLB); + if (entry == NULL) + return -EFAULT; + + gpfn =(entry->ppn>>(PAGE_SHIFT-12)); + gpfn =PAGEALIGN(gpfn,(entry->ps-PAGE_SHIFT)); + gpfn = gpfn | POFFSET(va>>PAGE_SHIFT,(entry->ps-PAGE_SHIFT)); + + mpfn = __gpfn_to_mfn(v->domain, gpfn); + m = (mpfn<<PAGE_SHIFT) | (va & (PAGE_SIZE - 1)); + /* machine address may be not continuous */ + end = PAGEALIGN(m, PAGE_SHIFT) + PAGE_SIZE; + /*end = PAGEALIGN(m, entry->ps) + PSIZE(entry->ps);*/ + /* Current entry can't map all requested area */ + if ((m + n) > end) + n = end - m; + + *ma = m; + *len = n; + return 0; +} diff --git a/xen/arch/ia64/vmx_hypercall.c b/xen/arch/ia64/vmx_hypercall.c new file mode 100644 index 0000000000..5e0d8917eb --- /dev/null +++ b/xen/arch/ia64/vmx_hypercall.c @@ -0,0 +1,186 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_hyparcall.c: handling hypercall from domain + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + */ + +#include <xen/config.h> +#include <xen/errno.h> +#include <asm/vmx_vcpu.h> +#include <public/xen.h> +#include <public/event_channel.h> +#include <asm/vmmu.h> +#include <asm/tlb.h> +#include <asm/regionreg.h> +#include <asm/page.h> +#include <xen/mm.h> + + +void hyper_not_support(void) +{ + VCPU *vcpu=current; + vmx_vcpu_set_gr(vcpu, 8, -1, 0); + vmx_vcpu_increment_iip(vcpu); +} + +void hyper_mmu_update(void) +{ + VCPU *vcpu=current; + u64 r32,r33,r34,r35,ret; + vmx_vcpu_get_gr(vcpu,16,&r32); + vmx_vcpu_get_gr(vcpu,17,&r33); + vmx_vcpu_get_gr(vcpu,18,&r34); + vmx_vcpu_get_gr(vcpu,19,&r35); + ret=do_mmu_update((mmu_update_t*)r32,r33,r34,r35); + vmx_vcpu_set_gr(vcpu, 8, ret, 0); + vmx_vcpu_increment_iip(vcpu); +} + +void hyper_dom_mem_op(void) +{ + VCPU *vcpu=current; + u64 r32,r33,r34,r35,r36; + u64 ret; + vmx_vcpu_get_gr(vcpu,16,&r32); + vmx_vcpu_get_gr(vcpu,17,&r33); + vmx_vcpu_get_gr(vcpu,18,&r34); + vmx_vcpu_get_gr(vcpu,19,&r35); + vmx_vcpu_get_gr(vcpu,20,&r36); + ret=do_dom_mem_op(r32,(u64 *)r33,r34,r35,r36); + printf("do_dom_mem return value: %lx\n", ret); + vmx_vcpu_set_gr(vcpu, 8, ret, 0); + + vmx_vcpu_increment_iip(vcpu); +} + + +void hyper_sched_op(void) +{ + VCPU *vcpu=current; + u64 r32,ret; + vmx_vcpu_get_gr(vcpu,16,&r32); + ret=do_sched_op(r32); + vmx_vcpu_set_gr(vcpu, 8, ret, 0); + + vmx_vcpu_increment_iip(vcpu); +} + +void hyper_dom0_op(void) +{ + VCPU *vcpu=current; + u64 r32,ret; + vmx_vcpu_get_gr(vcpu,16,&r32); + ret=do_dom0_op((dom0_op_t *)r32); + vmx_vcpu_set_gr(vcpu, 8, ret, 0); + + vmx_vcpu_increment_iip(vcpu); +} + +void hyper_event_channel_op(void) +{ + VCPU *vcpu=current; + u64 r32,ret; + vmx_vcpu_get_gr(vcpu,16,&r32); + ret=do_event_channel_op((evtchn_op_t *)r32); + vmx_vcpu_set_gr(vcpu, 8, ret, 0); + vmx_vcpu_increment_iip(vcpu); +} + +void hyper_xen_version(void) +{ + VCPU *vcpu=current; + u64 r32,ret; + vmx_vcpu_get_gr(vcpu,16,&r32); + ret=do_xen_version((int )r32); + vmx_vcpu_set_gr(vcpu, 8, ret, 0); + vmx_vcpu_increment_iip(vcpu); +} + +static int do_lock_page(VCPU *vcpu, u64 va, u64 lock) +{ + int i; + ia64_rr rr; + thash_cb_t *hcb; + hcb = vmx_vcpu_get_vtlb(vcpu); + rr = vmx_vcpu_rr(vcpu, va); + return thash_lock_tc(hcb, va ,1U<<rr.ps, rr.rid, DSIDE_TLB, lock); +} + +/* + * Lock guest page in vTLB, so that it's not relinquished by recycle + * session when HV is servicing that hypercall. + */ +void hyper_lock_page(void) +{ +//TODO: + VCPU *vcpu=current; + u64 va,lock, ret; + vmx_vcpu_get_gr(vcpu,16,&va); + vmx_vcpu_get_gr(vcpu,17,&lock); + ret=do_lock_page(vcpu, va, lock); + vmx_vcpu_set_gr(vcpu, 8, ret, 0); + + vmx_vcpu_increment_iip(vcpu); +} + +static int do_set_shared_page(VCPU *vcpu, u64 gpa) +{ + u64 shared_info, o_info; + if(vcpu->domain!=dom0) + return -EPERM; + shared_info = __gpa_to_mpa(vcpu->domain, gpa); + o_info = (u64)vcpu->domain->shared_info; + vcpu->domain->shared_info= (shared_info_t *)__va(shared_info); + + /* Copy existing shared info into new page */ + if (!o_info) { + memcpy((void*)vcpu->domain->shared_info, (void*)o_info, PAGE_SIZE); + /* If original page belongs to xen heap, then relinguish back + * to xen heap. Or else, leave to domain itself to decide. + */ + if (likely(IS_XEN_HEAP_FRAME(virt_to_page(o_info)))) + free_xenheap_page(o_info); + } + return 0; +} + +void hyper_set_shared_page(void) +{ + VCPU *vcpu=current; + u64 gpa,ret; + vmx_vcpu_get_gr(vcpu,16,&gpa); + + ret=do_set_shared_page(vcpu, gpa); + vmx_vcpu_set_gr(vcpu, 8, ret, 0); + + vmx_vcpu_increment_iip(vcpu); +} + +/* +void hyper_grant_table_op(void) +{ + VCPU *vcpu=current; + u64 r32,r33,r34,ret; + vmx_vcpu_get_gr(vcpu,16,&r32); + vmx_vcpu_get_gr(vcpu,17,&r33); + vmx_vcpu_get_gr(vcpu,18,&r34); + + ret=do_grant_table_op((unsigned int)r32, (void *)r33, (unsigned int)r34); + vmx_vcpu_set_gr(vcpu, 8, ret, 0); +} +*/ diff --git a/xen/arch/ia64/xenmisc.c b/xen/arch/ia64/xenmisc.c index 6703b397ab..da9c034e96 100644 --- a/xen/arch/ia64/xenmisc.c +++ b/xen/arch/ia64/xenmisc.c @@ -62,8 +62,10 @@ void sync_lazy_execstate_cpu(unsigned int cpu) {} void sync_lazy_execstate_mask(cpumask_t mask) {} void sync_lazy_execstate_all(void) {} +#ifdef CONFIG_VTI int grant_table_create(struct domain *d) { return 0; } void grant_table_destroy(struct domain *d) { return; } +#endif struct pt_regs *guest_cpu_user_regs(void) { return ia64_task_regs(current); } @@ -72,6 +74,35 @@ void raise_actimer_softirq(void) raise_softirq(AC_TIMER_SOFTIRQ); } +#ifndef CONFIG_VTI +unsigned long +__gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn) +{ + if (d == dom0) + return(gpfn); + else { + unsigned long pte = lookup_domain_mpa(d,gpfn << PAGE_SHIFT); + if (!pte) { +printk("__gpfn_to_mfn_foreign: bad gpfn. spinning...\n"); +while(1); + return 0; + } + return ((pte & _PFN_MASK) >> PAGE_SHIFT); + } +} + +u32 +__mfn_to_gpfn(struct domain *d, unsigned long frame) +{ + // FIXME: is this right? +if ((frame << PAGE_SHIFT) & _PAGE_PPN_MASK) { +printk("__mfn_to_gpfn: bad frame. spinning...\n"); +while(1); +} + return frame; +} +#endif + unsigned long __hypercall_create_continuation( unsigned int op, unsigned int nr_args, ...) { diff --git a/xen/include/asm-ia64/config.h b/xen/include/asm-ia64/config.h index 9df0d907aa..e64246d418 100644 --- a/xen/include/asm-ia64/config.h +++ b/xen/include/asm-ia64/config.h @@ -103,6 +103,9 @@ extern char _end[]; /* standard ELF symbol */ #define get_cpu() 0 #define put_cpu() do {} while(0) +// needed for common/dom0_ops.c until hyperthreading is supported +#define smp_num_siblings 1 + // from linux/include/linux/mm.h struct page; diff --git a/xen/include/asm-ia64/domain.h b/xen/include/asm-ia64/domain.h index 27ff16e560..717a7fcf23 100644 --- a/xen/include/asm-ia64/domain.h +++ b/xen/include/asm-ia64/domain.h @@ -54,9 +54,6 @@ struct arch_domain { u64 entry; #endif }; -#define starting_rid arch.starting_rid -#define ending_rid arch.ending_rid -#define rid_bits arch.rid_bits #define xen_vastart arch.xen_vastart #define xen_vaend arch.xen_vaend #define shared_info_va arch.shared_info_va @@ -83,6 +80,8 @@ struct arch_vcpu { int metaphysical_rr0; // from arch_domain (so is pinned) int metaphysical_saved_rr0; // from arch_domain (so is pinned) int breakimm; // from arch_domain (so is pinned) + int starting_rid; /* first RID assigned to domain */ + int ending_rid; /* one beyond highest RID assigned to domain */ struct mm_struct *active_mm; struct thread_struct _thread; // this must be last #ifdef CONFIG_VTI diff --git a/xen/include/asm-ia64/event.h b/xen/include/asm-ia64/event.h index e7b5cda8b1..b643684762 100644 --- a/xen/include/asm-ia64/event.h +++ b/xen/include/asm-ia64/event.h @@ -11,6 +11,7 @@ static inline void evtchn_notify(struct vcpu *v) { + vcpu_pend_interrupt(v, v->vcpu_info->arch.evtchn_vector); } #endif diff --git a/xen/include/asm-ia64/mm.h b/xen/include/asm-ia64/mm.h index c84a7c781a..1bb283873d 100644 --- a/xen/include/asm-ia64/mm.h +++ b/xen/include/asm-ia64/mm.h @@ -132,6 +132,7 @@ void add_to_domain_alloc_list(unsigned long ps, unsigned long pe); static inline void put_page(struct pfn_info *page) { +#ifdef CONFIG_VTI // doesn't work with non-VTI in grant tables yet u32 nx, x, y = page->count_info; do { @@ -142,12 +143,14 @@ static inline void put_page(struct pfn_info *page) if (unlikely((nx & PGC_count_mask) == 0)) free_domheap_page(page); +#endif } /* count_info and ownership are checked atomically. */ static inline int get_page(struct pfn_info *page, struct domain *domain) { +#ifdef CONFIG_VTI u64 x, nx, y = *((u64*)&page->count_info); u32 _domain = pickle_domptr(domain); @@ -164,13 +167,34 @@ static inline int get_page(struct pfn_info *page, } } while(unlikely(y = cmpxchg(&page->count_info, x, nx)) != x); - +#endif return 1; } /* No type info now */ -#define put_page_and_type(page) put_page((page)) -#define get_page_and_type(page, domain, type) get_page((page)) +#define put_page_type(page) +#define get_page_type(page, type) 1 +static inline void put_page_and_type(struct pfn_info *page) +{ + put_page_type(page); + put_page(page); +} + + +static inline int get_page_and_type(struct pfn_info *page, + struct domain *domain, + u32 type) +{ + int rc = get_page(page, domain); + + if ( likely(rc) && unlikely(!get_page_type(page, type)) ) + { + put_page(page); + rc = 0; + } + + return rc; +} #define set_machinetophys(_mfn, _pfn) do { } while(0); diff --git a/xen/include/asm-ia64/shadow.h b/xen/include/asm-ia64/shadow.h new file mode 100644 index 0000000000..40a8c178f1 --- /dev/null +++ b/xen/include/asm-ia64/shadow.h @@ -0,0 +1 @@ +/* empty */ diff --git a/xen/include/asm-ia64/vmx_uaccess.h b/xen/include/asm-ia64/vmx_uaccess.h new file mode 100644 index 0000000000..a6e27425f6 --- /dev/null +++ b/xen/include/asm-ia64/vmx_uaccess.h @@ -0,0 +1,156 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_uaccess.h: Defines vmx specific macros to transfer memory areas + * across the domain/hypervisor boundary. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Note: For vmx enabled environment, poor man's policy is actually + * useless since HV resides in completely different address space as + * domain. So the only way to do the access is search vTLB first, and + * access identity mapped address if hit. + * + * Copyright (c) 2004, Intel Corporation. + * Kun Tian (Kevin Tian) (kevin.tian@intel.com) + */ + +#ifndef __ASM_IA64_VMX_UACCESS_H__ +#define __ASM_IA64_VMX_UACCESS_H__ + +#include <xen/compiler.h> +#include <xen/errno.h> +#include <xen/sched.h> + +#include <asm/intrinsics.h> +#include <asm/vmmu.h> + +/* Since HV never accesses domain space directly, most security check can + * be dummy now + */ +asm (".section \"__ex_table\", \"a\"\n\t.previous"); + +/* For back compatibility */ +#define __access_ok(addr, size, segment) 1 +#define access_ok(addr, size, segment) __access_ok((addr), (size), (segment)) + +/* + * These are the main single-value transfer routines. They automatically + * use the right size if we just have the right pointer type. + * + * Careful to not + * (a) re-use the arguments for side effects (sizeof/typeof is ok) + * (b) require any knowledge of processes at this stage + */ +#define put_user(x, ptr) __put_user((x), (ptr)) +#define get_user(x, ptr) __get_user((x), (ptr)) + +#define __put_user(x, ptr) __do_put_user((__typeof__(*(ptr))) (x), (ptr), sizeof(*(ptr))) +#define __get_user(x, ptr) __do_get_user((x), (ptr), sizeof(*(ptr))) + +/* TODO: add specific unaligned access later. If assuming aligned at + * 1,2,4,8 bytes by far, it's impossible for operand spaning two + * vTLB entry + */ +extern long +__domain_va_to_ma(unsigned long va, unsigned long* ma, unsigned long *len); + +#define __do_put_user(x, ptr, size) \ +({ \ + __typeof__ (x) __pu_x = (x); \ + __typeof__ (*(ptr)) __user *__pu_ptr = (ptr); \ + __typeof__ (size) __pu_size = (size); \ + unsigned long __pu_ma; \ + long __pu_err; \ + \ + __pu_err = __domain_va_to_ma((unsigned long)__pu_ptr, \ + &__pu_ma, &__pu_size); \ + __pu_err ? (__pu_err = -EFAULT) : \ + (*((__typeof__ (*(ptr)) *)__va(__pu_ma)) = x); \ + __pu_err; \ +}) + +#define __do_get_user(x, ptr, size) \ +({ \ + __typeof__ (x) __gu_x = (x); \ + __typeof__ (*(ptr)) __user *__gu_ptr = (ptr); \ + __typeof__ (size) __gu_size = (size); \ + unsigned long __gu_ma; \ + long __gu_err; \ + \ + __gu_err = __domain_va_to_ma((unsigned long)__gu_ptr, \ + &__gu_ma, &__gu_size); \ + __gu_err ? (__gu_err = -EFAULT) : \ + (x = *((__typeof__ (*(ptr)) *)__va(__gu_ma))); \ + __gu_err; \ +}) + +/* More complex copy from domain */ +#define copy_from_user(to, from, n) __copy_from_user((to), (from), (n)) +#define copy_to_user(to, from, n) __copy_to_user((to), (from), (n)) +#define clear_user(to, n) __clear_user((t0), (n)) + +static inline unsigned long +__copy_from_user(void *to, void *from, unsigned long n) +{ + unsigned long ma, i; + + i = n; + while(!__domain_va_to_ma((unsigned long)from, &ma, &i)) { + memcpy(to, (void *)__va(ma), i); + n -= i; + if (!n) + break; + from += i; + to += i; + i = n; + } + return n; +} + +static inline unsigned long +__copy_to_user(void *to, void *from, unsigned long n) +{ + unsigned long ma, i; + + i = n; + while(!__domain_va_to_ma((unsigned long)to, &ma, &i)) { + memcpy((void *)__va(ma), from, i); + n -= i; + if (!n) + break; + from += i; + to += i; + i = n; + } + return n; +} + +static inline unsigned long +__clear_user(void *to, unsigned long n) +{ + unsigned long ma, i; + + i = n; + while(!__domain_va_to_ma((unsigned long)to, &ma, &i)) { + memset((void *)__va(ma), 0, i); + n -= i; + if (!n) + break; + to += i; + i = n; + } + return n; +} + +#endif // __ASM_IA64_VMX_UACCESS_H__ diff --git a/xen/include/asm-ia64/xensystem.h b/xen/include/asm-ia64/xensystem.h index c7c9771fee..c1915646f2 100644 --- a/xen/include/asm-ia64/xensystem.h +++ b/xen/include/asm-ia64/xensystem.h @@ -61,5 +61,37 @@ extern struct task_struct *vmx_ia64_switch_to (void *next_task); } while (0) #endif // CONFIG_VTI +#define __cmpxchg_user(ptr, new, old, _size) \ +({ \ + register long __gu_r8 asm ("r8"); \ + register long __gu_r9 asm ("r9"); \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + asm volatile ("mov %2=r0;;\n" \ + "[1:]\tcmpxchg"_size".acq %0=[%3],%4,ar.ccv\n" \ + "\t.xdata4 \"__ex_table\", 1b-., 1f-.+4\n" \ + "[1:]" \ + : "=r"(old), "=r"(__gu_r9), "=r"(__gu_r8) : \ + "r"(ptr), "r"(new) : "memory"); \ + (old) = __gu_r9; \ + __gu_r8; \ +}) + + +// NOTE: Xen defines args as pointer,old,new whereas ia64 uses pointer,new,old +// so reverse them here +#define cmpxchg_user(_p,_o,_n) \ +({ \ + register long _rc; \ + ia64_mf(); \ + switch ( sizeof(*(_p)) ) { \ + case 1: _rc = __cmpxchg_user(_p,_n,_o,"1"); break; \ + case 2: _rc = __cmpxchg_user(_p,_n,_o,"2"); break; \ + case 4: _rc = __cmpxchg_user(_p,_n,_o,"4"); break; \ + case 8: _rc = __cmpxchg_user(_p,_n,_o,"8"); break; \ + } \ + ia64_mf(); \ + _rc; \ +}) + #endif // __ASSEMBLY__ #endif // _ASM_IA64_XENSYSTEM_H diff --git a/xen/include/public/arch-ia64.h b/xen/include/public/arch-ia64.h index cd259c2e04..e6cb8959e3 100644 --- a/xen/include/public/arch-ia64.h +++ b/xen/include/public/arch-ia64.h @@ -182,11 +182,11 @@ typedef struct { unsigned long krs[8]; // kernel registers unsigned long pkrs[8]; // protection key registers unsigned long tmp[8]; // temp registers (e.g. for hyperprivops) + int evtchn_vector; //} PACKED arch_vcpu_info_t; } arch_vcpu_info_t; // DON'T PACK typedef struct { - int evtchn_vector; int domain_controller_evtchn; unsigned int flags; //} PACKED arch_shared_info_t; @@ -200,12 +200,22 @@ typedef struct vcpu_guest_context { #endif /* !__ASSEMBLY__ */ -#define XEN_HYPER_RFI 1 -#define XEN_HYPER_RSM_DT 2 -#define XEN_HYPER_SSM_DT 3 -#define XEN_HYPER_COVER 4 -#define XEN_HYPER_ITC_D 5 -#define XEN_HYPER_ITC_I 6 -#define XEN_HYPER_SSM_I 7 +#define XEN_HYPER_RFI 0x1 +#define XEN_HYPER_RSM_DT 0x2 +#define XEN_HYPER_SSM_DT 0x3 +#define XEN_HYPER_COVER 0x4 +#define XEN_HYPER_ITC_D 0x5 +#define XEN_HYPER_ITC_I 0x6 +#define XEN_HYPER_SSM_I 0x7 +#define XEN_HYPER_GET_IVR 0x8 +#define XEN_HYPER_GET_TPR 0x9 +#define XEN_HYPER_SET_TPR 0xa +#define XEN_HYPER_EOI 0xb +#define XEN_HYPER_SET_ITM 0xc +#define XEN_HYPER_THASH 0xd +#define XEN_HYPER_PTC_GA 0xe +#define XEN_HYPER_ITR_D 0xf +#define XEN_HYPER_GET_RR 0x10 +#define XEN_HYPER_SET_RR 0x11 #endif /* __HYPERVISOR_IF_IA64_H__ */ |