diff options
73 files changed, 15524 insertions, 608 deletions
@@ -1091,12 +1091,14 @@ 421098b2PHgzf_Gg4R65YRNi_QzMKQ xen/arch/ia64/dom0_ops.c 421098b2O7jsNfzQXA1v3rbAc1QhpA xen/arch/ia64/dom_fw.c 421098b2ZlaBcyiuuPr3WpzaSDwg6Q xen/arch/ia64/domain.c +428b9f38j9LG7X1Ask6iE6pWTTT2xw xen/arch/ia64/entry.h 4239e98a_HX-FCIcXtVqY0BbrDqVug xen/arch/ia64/hypercall.c 421098b3LYAS8xJkQiGP7tiTlyBt0Q xen/arch/ia64/idle0_task.c 421098b3ys5GAr4z6_H1jD33oem82g xen/arch/ia64/irq.c 4272a8e4lavI6DrTvqaIhXeR5RuKBw xen/arch/ia64/ivt.S 421098b3Heh72KuoVlND3CH6c0B0aA xen/arch/ia64/lib/Makefile 421098b3O0MYMUsmYVFy84VV_1gFwQ xen/arch/ia64/mm_init.c +428b9f38Gp0KcPokG9Nq5v1rGk2FkA xen/arch/ia64/mmio.c 425ae516maKAsHBJVSzs19cdRgt3Nw xen/arch/ia64/patch/linux-2.6.11/cpumask.h 425ae516cGqvMzGtihTEsQXAXsuOhQ xen/arch/ia64/patch/linux-2.6.11/efi.c 425ae516Y1A4q4_Kfre3qnDj7lbHJg xen/arch/ia64/patch/linux-2.6.11/entry.S @@ -1171,6 +1173,7 @@ 41a26ebcJ30TFl1v2kR8rqpEBvOtVw xen/arch/ia64/regionreg.c 421098b69pUiIJrqu_w0JMUnZ2uc2A xen/arch/ia64/smp.c 421098b6_ToSGrf6Pk1Uwg5aMAIBxg xen/arch/ia64/smpboot.c +428b9f38JJDW35iDn5DlfXTu700rkQ xen/arch/ia64/tools/README.RunVT 421098b6AUdbxR3wyn1ATcmNuTao_Q xen/arch/ia64/tools/README.xenia64 42376c6dfyY0eq8MS2dK3BW2rFuEGg xen/arch/ia64/tools/README.xenia64linux 421098b6rQ2BQ103qu1n1HNofbS2Og xen/arch/ia64/tools/mkbuildtree @@ -1181,6 +1184,20 @@ 4252ace74lKUPFnO8PmF0Dtpk7Xkng xen/arch/ia64/tools/privify/privify_elf64.c 41a26ebc--sjlYZQxmIxyCx3jw70qA xen/arch/ia64/vcpu.c 421098b6M2WhsJ_ZMzFamAQcdc5gzw xen/arch/ia64/vhpt.c +428b9f38PglyXM-mJJfo19ycuQrEhw xen/arch/ia64/vlsapic.c +428b9f38EmpBsMHL3WbOZoieteBGdQ xen/arch/ia64/vmmu.c +428b9f38hU-X5aX0MIY3EU0Yw4PjcA xen/arch/ia64/vmx_entry.S +428b9f38S76bWI96g7uPLmE-uAcmdg xen/arch/ia64/vmx_init.c +428b9f385AMSyCRYBsckQClQY4ZgHA xen/arch/ia64/vmx_interrupt.c +428b9f380IOjPmj0N6eelH-WJjl1xg xen/arch/ia64/vmx_ivt.S +428b9f38Y7tp9uyNRdru3lPDXLjOCA xen/arch/ia64/vmx_minstate.h +428b9f38H9Pz0ZhRUT0-11A6jceE1w xen/arch/ia64/vmx_phy_mode.c +428b9f38pXU56r2OjoFW2Z8H1XY17w xen/arch/ia64/vmx_process.c +428b9f38GmZxD-GMDnQB3m7tOoukTA xen/arch/ia64/vmx_utility.c +428b9f38Pflg6Z4CtXeVGv7dyEOM4g xen/arch/ia64/vmx_vcpu.c +428b9f38Y7p7hXHWx9QF_oYUjdD__g xen/arch/ia64/vmx_virt.c +428b9f38EL7qKbbKkhBNr0KzMLS4Gg xen/arch/ia64/vmx_vsa.S +428b9f3805WejQ1E-OqAPANPAu8vPw xen/arch/ia64/vtlb.c 41a26ebc4jSBGQOuyNIPDST58mNbBw xen/arch/ia64/xenasm.S 4272adaeit9raZ9KnjO_wR4Ii9LJNQ xen/arch/ia64/xenirq.c 427162263zDUiPmTj-lP4eGyXs5eIg xen/arch/ia64/xenmem.c @@ -1307,13 +1324,17 @@ 421098b6Nn0I7hGB8Mkd1Cis0KMkhA xen/include/asm-ia64/domain.h 4241e879ry316Y_teC18DuK7mGKaQw xen/include/asm-ia64/domain_page.h 4241e880hAyo_dk0PPDYj3LsMIvf-Q xen/include/asm-ia64/flushtlb.h +428b9f38zjEw15Jew-3EoMb_9H0cSQ xen/include/asm-ia64/gcc_intrin.h 421098b6X3Fs2yht42TE2ufgKqt2Fw xen/include/asm-ia64/ia64_int.h +428b9f38-opAgufQ4qbh8V176C3-3w xen/include/asm-ia64/ia64regs.h 421098b7psFAn8kbeR-vcRCdc860Vw xen/include/asm-ia64/init.h 421098b7XC1A5PhA-lrU9pIO3sSSmA xen/include/asm-ia64/mm.h 421098b7c0Dx0ABuW_yHQdAqKhUoiQ xen/include/asm-ia64/mmu_context.h 421098b7C2dr3O7lgc_oeC9TEE9GKw xen/include/asm-ia64/multicall.h 421098b7dX_56NCV9zjftqm1yIqC8w xen/include/asm-ia64/offsets.h +428b9f38Z3b5V7I8eOE0i3lN0DNg3Q xen/include/asm-ia64/pal.h 421098b72bPUyviWloEAIB85dGCm2Q xen/include/asm-ia64/privop.h +428b9f38_TmnCXJN3CN6wKMdpHy4Yg xen/include/asm-ia64/ptrace.h 421098b7Z6OwjZnrTZkh34DoDfcjrA xen/include/asm-ia64/regionreg.h 421098b707cY5YluUcWK5Pc-71ETVw xen/include/asm-ia64/regs.h 4214e2f3fbO_n9Z1kIcBR83d7W4OJw xen/include/asm-ia64/serial.h @@ -1321,6 +1342,18 @@ 421098b7FK3xgShpnH0I0Ou3O4fJ2Q xen/include/asm-ia64/tlb.h 421098b78IGdFOGUlPmpS7h_QBmoFg xen/include/asm-ia64/vcpu.h 421098b7PiAencgmBFGAqALU-V5rqQ xen/include/asm-ia64/vhpt.h +428b9f38_b0DgWwkJcBEsTdEmO9WNQ xen/include/asm-ia64/virt_event.h +428b9f38B0KbUj3o2FBQJ5tmIIMDHg xen/include/asm-ia64/vmmu.h +428b9f38ewjoJ-RL-2lsXFT04H2aag xen/include/asm-ia64/vmx.h +428b9f38coGlYeXx-7hpvfCTAPOd7w xen/include/asm-ia64/vmx_mm_def.h +428b9f387tov0OtOEeF8fVWSR2v5Pg xen/include/asm-ia64/vmx_pal.h +428b9f38is0zTsIm96_BKo4MLw0SzQ xen/include/asm-ia64/vmx_pal_vsa.h +428b9f38iDqbugHUheJrcTCD7zlb4g xen/include/asm-ia64/vmx_phy_mode.h +428b9f38grd_B0AGB1yp0Gi2befHaQ xen/include/asm-ia64/vmx_platform.h +428b9f38lm0ntDBusHggeQXkx1-1HQ xen/include/asm-ia64/vmx_ptrace.h +428b9f38XgwHchZEpOzRtWfz0agFNQ xen/include/asm-ia64/vmx_vcpu.h +428b9f38tDTTJbkoONcAB9ODP8CiVg xen/include/asm-ia64/vmx_vpd.h +428b9f38_o0U5uJqmxZf_bqi6_PqVw xen/include/asm-ia64/vtm.h 421098b7LfwIHQ2lRYWhO4ruEXqIuQ xen/include/asm-ia64/xenserial.h 40715b2dWe0tDhx9LkLXzTQkvD49RA xen/include/asm-x86/acpi.h 3ddb79c3l4IiQtf6MS2jIzcd-hJS8g xen/include/asm-x86/apic.h diff --git a/xen/arch/ia64/Makefile b/xen/arch/ia64/Makefile index 36d7b845b3..6c0e371fc4 100644 --- a/xen/arch/ia64/Makefile +++ b/xen/arch/ia64/Makefile @@ -10,6 +10,12 @@ OBJS = xensetup.o setup.o time.o irq.o ia64_ksyms.o process.o smp.o \ extable.o linuxextable.o xenirq.o xentime.o \ regionreg.o entry.o unaligned.o privop.o vcpu.o \ irq_ia64.o irq_lsapic.o vhpt.o xenasm.o dom_fw.o + +ifeq ($(CONFIG_VTI),y) +OBJS += vmx_init.o vmx_virt.o vmx_vcpu.o vmx_process.o vmx_vsa.o vmx_ivt.o \ + vmx_phy_mode.o vmx_utility.o vmx_interrupt.o vmx_entry.o vmmu.o \ + vtlb.o mmio.o vlsapic.o +endif # perfmon.o # unwind.o needed for kernel unwinding (rare) @@ -38,6 +44,7 @@ ia64lib.o: clean: rm -f *.o *~ core xen.lds.s $(BASEDIR)/include/asm-ia64/.offsets.h.stamp rm -f lib/*.o + $(MAKE) -C lib clean # setup.o contains bits of compile.h so it must be blown away delete-unfresh-files: diff --git a/xen/arch/ia64/Rules.mk b/xen/arch/ia64/Rules.mk index d4ac827b9a..9e33585df1 100644 --- a/xen/arch/ia64/Rules.mk +++ b/xen/arch/ia64/Rules.mk @@ -1,6 +1,7 @@ ######################################## # ia64-specific definitions +CONFIG_VTI ?= y ifneq ($(COMPILE_ARCH),$(TARGET_ARCH)) CROSS_COMPILE ?= /usr/local/sp_env/v2.2.5/i686/bin/ia64-unknown-linux- endif @@ -17,4 +18,7 @@ CFLAGS += -Wno-pointer-arith -Wredundant-decls CFLAGS += -DIA64 -DXEN -DLINUX_2_6 CFLAGS += -ffixed-r13 -mfixed-range=f12-f15,f32-f127 CFLAGS += -w -g +ifeq ($(CONFIG_VTI),y) +CFLAGS += -DCONFIG_VTI +endif LDFLAGS := -g diff --git a/xen/arch/ia64/acpi.c b/xen/arch/ia64/acpi.c index c49371537a..39ae5fe986 100644 --- a/xen/arch/ia64/acpi.c +++ b/xen/arch/ia64/acpi.c @@ -1,9 +1,16 @@ /* * acpi.c - Architecture-Specific Low-Level ACPI Support * - * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> - * Copyright (C) 2001 Jun Nakajima <jun.nakajima@intel.com> - * Copyright (C) 2001 Patrick Mochel <mochel@osdl.org> + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com> + * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co. + * David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 2000 Intel Corp. + * Copyright (C) 2000,2001 J.I. Lee <jung-ik.lee@intel.com> + * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> + * Copyright (C) 2001 Jenna Hall <jenna.s.hall@intel.com> + * Copyright (C) 2001 Takayoshi Kochi <t-kochi@bq.jp.nec.com> + * Copyright (C) 2002 Erich Focht <efocht@ess.nec.de> * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * @@ -19,667 +26,651 @@ * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -#include <xen/config.h> -#include <xen/kernel.h> -#include <xen/init.h> -#include <xen/types.h> -/*#include <xen/stddef.h>*/ -#include <xen/slab.h> -#include <xen/pci.h> -/*#include <xen/bootmem.h>*/ -#include <xen/irq.h> -#include <xen/acpi.h> -//#include <asm/mpspec.h> +#include <linux/config.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/irq.h> +#include <linux/acpi.h> +#include <linux/efi.h> +#include <linux/mmzone.h> #include <asm/io.h> -//#include <asm/apic.h> -//#include <asm/apicdef.h> +//#include <asm/iosapic.h> +#include <asm/machvec.h> #include <asm/page.h> -/*#include <asm/pgtable.h>*/ -#include <asm/pgalloc.h> -//#include <asm/io_apic.h> -#include <asm/acpi.h> -/*#include <asm/save_state.h>*/ -//#include <asm/smpboot.h> +#include <asm/system.h> +#include <asm/numa.h> +#include <asm/sal.h> +//#include <asm/cyclone.h> +#define BAD_MADT_ENTRY(entry, end) ( \ + (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ + ((acpi_table_entry_header *)entry)->length != sizeof(*entry)) #define PREFIX "ACPI: " -int acpi_lapic = 0; -int acpi_ioapic = 0; +void (*pm_idle) (void); +EXPORT_SYMBOL(pm_idle); +void (*pm_power_off) (void); -/* -------------------------------------------------------------------------- - Boot-time Configuration - -------------------------------------------------------------------------- */ - -#ifdef CONFIG_ACPI_BOOT -//int acpi_noirq __initdata = 0; /* skip ACPI IRQ initialization */ -int acpi_ht __initdata = 1; /* enable HT */ +unsigned char acpi_kbd_controller_present = 1; +unsigned char acpi_legacy_devices; -enum acpi_irq_model_id acpi_irq_model; - - -/* - * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, - * to map the target physical address. The problem is that set_fixmap() - * provides a single page, and it is possible that the page is not - * sufficient. - * By using this area, we can map up to MAX_IO_APICS pages temporarily, - * i.e. until the next __va_range() call. - * - * Important Safety Note: The fixed I/O APIC page numbers are *subtracted* - * from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and - * count idx down while incrementing the phys address. - */ -char *__acpi_map_table(unsigned long phys, unsigned long size) +const char * +acpi_get_sysname (void) { - unsigned long base, offset, mapped_size; - int idx; +#ifdef CONFIG_IA64_GENERIC + unsigned long rsdp_phys; + struct acpi20_table_rsdp *rsdp; + struct acpi_table_xsdt *xsdt; + struct acpi_table_header *hdr; + + rsdp_phys = acpi_find_rsdp(); + if (!rsdp_phys) { + printk(KERN_ERR "ACPI 2.0 RSDP not found, default to \"dig\"\n"); + return "dig"; + } - if (phys + size < 8*1024*1024) - return __va(phys); + rsdp = (struct acpi20_table_rsdp *) __va(rsdp_phys); + if (strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) { + printk(KERN_ERR "ACPI 2.0 RSDP signature incorrect, default to \"dig\"\n"); + return "dig"; + } - offset = phys & (PAGE_SIZE - 1); - mapped_size = PAGE_SIZE - offset; -#ifndef XEN -// where is FIX_ACPI_*? hack for now, FIXME later - set_fixmap(FIX_ACPI_END, phys); - base = fix_to_virt(FIX_ACPI_END); + xsdt = (struct acpi_table_xsdt *) __va(rsdp->xsdt_address); + hdr = &xsdt->header; + if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) { + printk(KERN_ERR "ACPI 2.0 XSDT signature incorrect, default to \"dig\"\n"); + return "dig"; + } - /* - * Most cases can be covered by the below. - */ - idx = FIX_ACPI_END; - while (mapped_size < size) { - if (--idx < FIX_ACPI_BEGIN) - return 0; /* cannot handle this */ - phys += PAGE_SIZE; - set_fixmap(idx, phys); - mapped_size += PAGE_SIZE; + if (!strcmp(hdr->oem_id, "HP")) { + return "hpzx1"; + } + else if (!strcmp(hdr->oem_id, "SGI")) { + return "sn2"; } -#endif - return ((unsigned char *) base + offset); + return "dig"; +#else +# if defined (CONFIG_IA64_HP_SIM) + return "hpsim"; +# elif defined (CONFIG_IA64_HP_ZX1) + return "hpzx1"; +# elif defined (CONFIG_IA64_SGI_SN2) + return "sn2"; +# elif defined (CONFIG_IA64_DIG) + return "dig"; +# else +# error Unknown platform. Fix acpi.c. +# endif +#endif } +#ifdef CONFIG_ACPI_BOOT -#ifdef CONFIG_X86_LOCAL_APIC +#define ACPI_MAX_PLATFORM_INTERRUPTS 256 -static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; +#if 0 +/* Array to record platform interrupt vectors for generic interrupt routing. */ +int platform_intr_list[ACPI_MAX_PLATFORM_INTERRUPTS] = { + [0 ... ACPI_MAX_PLATFORM_INTERRUPTS - 1] = -1 +}; +enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_IOSAPIC; -static int __init -acpi_parse_madt ( - unsigned long phys_addr, - unsigned long size) +/* + * Interrupt routing API for device drivers. Provides interrupt vector for + * a generic platform event. Currently only CPEI is implemented. + */ +int +acpi_request_vector (u32 int_type) { - struct acpi_table_madt *madt = NULL; + int vector = -1; + + if (int_type < ACPI_MAX_PLATFORM_INTERRUPTS) { + /* corrected platform error interrupt */ + vector = platform_intr_list[int_type]; + } else + printk(KERN_ERR "acpi_request_vector(): invalid interrupt type\n"); + return vector; +} +#endif +char * +__acpi_map_table (unsigned long phys_addr, unsigned long size) +{ + return __va(phys_addr); +} - if (!phys_addr || !size) - return -EINVAL; +/* -------------------------------------------------------------------------- + Boot-time Table Parsing + -------------------------------------------------------------------------- */ - madt = (struct acpi_table_madt *) __acpi_map_table(phys_addr, size); - if (!madt) { - printk(KERN_WARNING PREFIX "Unable to map MADT\n"); - return -ENODEV; - } +static int total_cpus __initdata; +static int available_cpus __initdata; +struct acpi_table_madt * acpi_madt __initdata; +static u8 has_8259; - if (madt->lapic_address) - acpi_lapic_addr = (u64) madt->lapic_address; +#if 0 +static int __init +acpi_parse_lapic_addr_ovr ( + acpi_table_entry_header *header, const unsigned long end) +{ + struct acpi_table_lapic_addr_ovr *lapic; + + lapic = (struct acpi_table_lapic_addr_ovr *) header; - printk(KERN_INFO PREFIX "Local APIC address 0x%08x\n", - madt->lapic_address); + if (BAD_MADT_ENTRY(lapic, end)) + return -EINVAL; - detect_clustered_apic(madt->header.oem_id, madt->header.oem_table_id); + acpi_table_print_madt_entry(header); + if (lapic->address) { + iounmap((void *) ipi_base_addr); + ipi_base_addr = (unsigned long) ioremap(lapic->address, 0); + } return 0; } static int __init -acpi_parse_lapic ( - acpi_table_entry_header *header) +acpi_parse_lsapic (acpi_table_entry_header *header, const unsigned long end) { - struct acpi_table_lapic *processor = NULL; + struct acpi_table_lsapic *lsapic; - processor = (struct acpi_table_lapic*) header; - if (!processor) + lsapic = (struct acpi_table_lsapic *) header; + + if (BAD_MADT_ENTRY(lsapic, end)) return -EINVAL; acpi_table_print_madt_entry(header); - mp_register_lapic ( - processor->id, /* APIC ID */ - processor->flags.enabled); /* Enabled? */ + printk(KERN_INFO "CPU %d (0x%04x)", total_cpus, (lsapic->id << 8) | lsapic->eid); + + if (!lsapic->flags.enabled) + printk(" disabled"); + else { + printk(" enabled"); +#ifdef CONFIG_SMP + smp_boot_data.cpu_phys_id[available_cpus] = (lsapic->id << 8) | lsapic->eid; + if (hard_smp_processor_id() + == (unsigned int) smp_boot_data.cpu_phys_id[available_cpus]) + printk(" (BSP)"); +#endif + ++available_cpus; + } + + printk("\n"); + total_cpus++; return 0; } static int __init -acpi_parse_lapic_addr_ovr ( - acpi_table_entry_header *header) +acpi_parse_lapic_nmi (acpi_table_entry_header *header, const unsigned long end) { - struct acpi_table_lapic_addr_ovr *lapic_addr_ovr = NULL; + struct acpi_table_lapic_nmi *lacpi_nmi; - lapic_addr_ovr = (struct acpi_table_lapic_addr_ovr*) header; - if (!lapic_addr_ovr) + lacpi_nmi = (struct acpi_table_lapic_nmi*) header; + + if (BAD_MADT_ENTRY(lacpi_nmi, end)) return -EINVAL; - acpi_lapic_addr = lapic_addr_ovr->address; + acpi_table_print_madt_entry(header); + /* TBD: Support lapic_nmi entries */ return 0; } + static int __init -acpi_parse_lapic_nmi ( - acpi_table_entry_header *header) +acpi_parse_iosapic (acpi_table_entry_header *header, const unsigned long end) { - struct acpi_table_lapic_nmi *lapic_nmi = NULL; + struct acpi_table_iosapic *iosapic; + + iosapic = (struct acpi_table_iosapic *) header; - lapic_nmi = (struct acpi_table_lapic_nmi*) header; - if (!lapic_nmi) + if (BAD_MADT_ENTRY(iosapic, end)) return -EINVAL; acpi_table_print_madt_entry(header); - if (lapic_nmi->lint != 1) - printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n"); + iosapic_init(iosapic->address, iosapic->global_irq_base); return 0; } -#endif /*CONFIG_X86_LOCAL_APIC*/ - -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) static int __init -acpi_parse_ioapic ( - acpi_table_entry_header *header) +acpi_parse_plat_int_src ( + acpi_table_entry_header *header, const unsigned long end) { - struct acpi_table_ioapic *ioapic = NULL; + struct acpi_table_plat_int_src *plintsrc; + int vector; + + plintsrc = (struct acpi_table_plat_int_src *) header; - ioapic = (struct acpi_table_ioapic*) header; - if (!ioapic) + if (BAD_MADT_ENTRY(plintsrc, end)) return -EINVAL; - + acpi_table_print_madt_entry(header); - mp_register_ioapic ( - ioapic->id, - ioapic->address, - ioapic->global_irq_base); - + /* + * Get vector assignment for this interrupt, set attributes, + * and program the IOSAPIC routing table. + */ + vector = iosapic_register_platform_intr(plintsrc->type, + plintsrc->global_irq, + plintsrc->iosapic_vector, + plintsrc->eid, + plintsrc->id, + (plintsrc->flags.polarity == 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW, + (plintsrc->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL); + + platform_intr_list[plintsrc->type] = vector; return 0; } static int __init acpi_parse_int_src_ovr ( - acpi_table_entry_header *header) + acpi_table_entry_header *header, const unsigned long end) { - struct acpi_table_int_src_ovr *intsrc = NULL; + struct acpi_table_int_src_ovr *p; - intsrc = (struct acpi_table_int_src_ovr*) header; - if (!intsrc) + p = (struct acpi_table_int_src_ovr *) header; + + if (BAD_MADT_ENTRY(p, end)) return -EINVAL; acpi_table_print_madt_entry(header); - mp_override_legacy_irq ( - intsrc->bus_irq, - intsrc->flags.polarity, - intsrc->flags.trigger, - intsrc->global_irq); - + iosapic_override_isa_irq(p->bus_irq, p->global_irq, + (p->flags.polarity == 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW, + (p->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL); return 0; } static int __init -acpi_parse_nmi_src ( - acpi_table_entry_header *header) +acpi_parse_nmi_src (acpi_table_entry_header *header, const unsigned long end) { - struct acpi_table_nmi_src *nmi_src = NULL; + struct acpi_table_nmi_src *nmi_src; nmi_src = (struct acpi_table_nmi_src*) header; - if (!nmi_src) + + if (BAD_MADT_ENTRY(nmi_src, end)) return -EINVAL; acpi_table_print_madt_entry(header); - /* TBD: Support nimsrc entries? */ - + /* TBD: Support nimsrc entries */ return 0; } - -#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/ - - -static unsigned long __init -acpi_scan_rsdp ( - unsigned long start, - unsigned long length) +/* Hook from generic ACPI tables.c */ +void __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - unsigned long offset = 0; - unsigned long sig_len = sizeof("RSD PTR ") - 1; + if (!strncmp(oem_id, "IBM", 3) && + (!strncmp(oem_table_id, "SERMOW", 6))){ - /* - * Scan all 16-byte boundaries of the physical memory region for the - * RSDP signature. - */ - for (offset = 0; offset < length; offset += 16) { - if (strncmp((char *) (start + offset), "RSD PTR ", sig_len)) - continue; - return (start + offset); - } + /* Unfortunatly ITC_DRIFT is not yet part of the + * official SAL spec, so the ITC_DRIFT bit is not + * set by the BIOS on this hardware. + */ + sal_platform_features |= IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT; - return 0; + /*Start cyclone clock*/ + cyclone_setup(0); + } } - -unsigned long __init -acpi_find_rsdp (void) +static int __init +acpi_parse_madt (unsigned long phys_addr, unsigned long size) { - unsigned long rsdp_phys = 0; + if (!phys_addr || !size) + return -EINVAL; - /* - * Scan memory looking for the RSDP signature. First search EBDA (low - * memory) paragraphs and then search upper memory (E0000-FFFFF). - */ - rsdp_phys = acpi_scan_rsdp (0, 0x400); - if (!rsdp_phys) - rsdp_phys = acpi_scan_rsdp (0xE0000, 0xFFFFF); + acpi_madt = (struct acpi_table_madt *) __va(phys_addr); - return rsdp_phys; -} + /* remember the value for reference after free_initmem() */ +#ifdef CONFIG_ITANIUM + has_8259 = 1; /* Firmware on old Itanium systems is broken */ +#else + has_8259 = acpi_madt->flags.pcat_compat; +#endif + iosapic_system_init(has_8259); + /* Get base address of IPI Message Block */ -/* - * acpi_boot_init() - * called from setup_arch(), always. - * 1. maps ACPI tables for later use - * 2. enumerates lapics - * 3. enumerates io-apics - * - * side effects: - * acpi_lapic = 1 if LAPIC found - * acpi_ioapic = 1 if IOAPIC found - * if (acpi_lapic && acpi_ioapic) smp_found_config = 1; - * if acpi_blacklisted() acpi_disabled = 1; - * acpi_irq_model=... - * ... - * - * return value: (currently ignored) - * 0: success - * !0: failure - */ -int __init -acpi_boot_init (void) -{ - int result = 0; + if (acpi_madt->lapic_address) + ipi_base_addr = (unsigned long) ioremap(acpi_madt->lapic_address, 0); - if (acpi_disabled && !acpi_ht) - return(1); + printk(KERN_INFO PREFIX "Local APIC address 0x%lx\n", ipi_base_addr); - /* - * The default interrupt routing model is PIC (8259). This gets - * overriden if IOAPICs are enumerated (below). - */ - acpi_irq_model = ACPI_IRQ_MODEL_PIC; + acpi_madt_oem_check(acpi_madt->header.oem_id, + acpi_madt->header.oem_table_id); - /* - * Initialize the ACPI boot-time table parser. - */ - result = acpi_table_init(); - if (result) { -#ifndef XEN -// hack for now, FIXME later - acpi_disabled = 1; + return 0; +} #endif - return result; - } - result = acpi_blacklisted(); - if (result) { - printk(KERN_NOTICE PREFIX "BIOS listed in blacklist, disabling ACPI support\n"); -#ifndef XEN -// hack for now, FIXME later - acpi_disabled = 1; -#endif - return result; - } +#ifdef CONFIG_ACPI_NUMA -#ifdef CONFIG_X86_LOCAL_APIC +#undef SLIT_DEBUG - /* - * MADT - * ---- - * Parse the Multiple APIC Description Table (MADT), if exists. - * Note that this table provides platform SMP configuration - * information -- the successor to MPS tables. - */ +#define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32) - result = acpi_table_parse(ACPI_APIC, acpi_parse_madt); - if (!result) { - return 0; - } - else if (result < 0) { - printk(KERN_ERR PREFIX "Error parsing MADT\n"); - return result; - } - else if (result > 1) - printk(KERN_WARNING PREFIX "Multiple MADT tables exist\n"); - - /* - * Local APIC - * ---------- - * Note that the LAPIC address is obtained from the MADT (32-bit value) - * and (optionally) overriden by a LAPIC_ADDR_OVR entry (64-bit value). - */ +static int __initdata srat_num_cpus; /* number of cpus */ +static u32 __initdata pxm_flag[PXM_FLAG_LEN]; +#define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag)) +#define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag)) +/* maps to convert between proximity domain and logical node ID */ +int __initdata pxm_to_nid_map[MAX_PXM_DOMAINS]; +int __initdata nid_to_pxm_map[MAX_NUMNODES]; +static struct acpi_table_slit __initdata *slit_table; - result = acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr); - if (result < 0) { - printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n"); - return result; - } - - mp_register_lapic_address(acpi_lapic_addr); - - result = acpi_table_parse_madt(ACPI_MADT_LAPIC, acpi_parse_lapic); - if (!result) { - printk(KERN_ERR PREFIX "No LAPIC entries present\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return -ENODEV; - } - else if (result < 0) { - printk(KERN_ERR PREFIX "Error parsing LAPIC entry\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return result; - } - - result = acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi); - if (result < 0) { - printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return result; +/* + * ACPI 2.0 SLIT (System Locality Information Table) + * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf + */ +void __init +acpi_numa_slit_init (struct acpi_table_slit *slit) +{ + u32 len; + + len = sizeof(struct acpi_table_header) + 8 + + slit->localities * slit->localities; + if (slit->header.length != len) { + printk(KERN_ERR "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n", + len, slit->header.length); + memset(numa_slit, 10, sizeof(numa_slit)); + return; } + slit_table = slit; +} - acpi_lapic = 1; +void __init +acpi_numa_processor_affinity_init (struct acpi_table_processor_affinity *pa) +{ + /* record this node in proximity bitmap */ + pxm_bit_set(pa->proximity_domain); -#endif /*CONFIG_X86_LOCAL_APIC*/ + node_cpuid[srat_num_cpus].phys_id = (pa->apic_id << 8) | (pa->lsapic_eid); + /* nid should be overridden as logical node id later */ + node_cpuid[srat_num_cpus].nid = pa->proximity_domain; + srat_num_cpus++; +} -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) +void __init +acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma) +{ + unsigned long paddr, size; + u8 pxm; + struct node_memblk_s *p, *q, *pend; - /* - * I/O APIC - * -------- - */ + pxm = ma->proximity_domain; - /* - * ACPI interpreter is required to complete interrupt setup, - * so if it is off, don't enumerate the io-apics with ACPI. - * If MPS is present, it will handle them, - * otherwise the system will stay in PIC mode - */ - if (acpi_disabled || acpi_noirq) { - return 1; - } + /* fill node memory chunk structure */ + paddr = ma->base_addr_hi; + paddr = (paddr << 32) | ma->base_addr_lo; + size = ma->length_hi; + size = (size << 32) | ma->length_lo; - /* - * if "noapic" boot option, don't look for IO-APICs - */ - if (ioapic_setup_disabled()) { - printk(KERN_INFO PREFIX "Skipping IOAPIC probe " - "due to 'noapic' option.\n"); - return 1; - } + /* Ignore disabled entries */ + if (!ma->flags.enabled) + return; + /* record this node in proximity bitmap */ + pxm_bit_set(pxm); - result = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic); - if (!result) { - printk(KERN_ERR PREFIX "No IOAPIC entries present\n"); - return -ENODEV; + /* Insertion sort based on base address */ + pend = &node_memblk[num_node_memblks]; + for (p = &node_memblk[0]; p < pend; p++) { + if (paddr < p->start_paddr) + break; } - else if (result < 0) { - printk(KERN_ERR PREFIX "Error parsing IOAPIC entry\n"); - return result; + if (p < pend) { + for (q = pend - 1; q >= p; q--) + *(q + 1) = *q; } + p->start_paddr = paddr; + p->size = size; + p->nid = pxm; + num_node_memblks++; +} - /* Build a default routing table for legacy (ISA) interrupts. */ - mp_config_acpi_legacy_irqs(); - - result = acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr); - if (result < 0) { - printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return result; - } +void __init +acpi_numa_arch_fixup (void) +{ + int i, j, node_from, node_to; - result = acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src); - if (result < 0) { - printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); - /* TBD: Cleanup to allow fallback to MPS */ - return result; + /* If there's no SRAT, fix the phys_id */ + if (srat_num_cpus == 0) { + node_cpuid[0].phys_id = hard_smp_processor_id(); + return; } - acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; + /* calculate total number of nodes in system from PXM bitmap */ + numnodes = 0; /* init total nodes in system */ - acpi_irq_balance_set(NULL); + memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map)); + memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map)); + for (i = 0; i < MAX_PXM_DOMAINS; i++) { + if (pxm_bit_test(i)) { + pxm_to_nid_map[i] = numnodes; + node_set_online(numnodes); + nid_to_pxm_map[numnodes++] = i; + } + } - acpi_ioapic = 1; + /* set logical node id in memory chunk structure */ + for (i = 0; i < num_node_memblks; i++) + node_memblk[i].nid = pxm_to_nid_map[node_memblk[i].nid]; - if (acpi_lapic && acpi_ioapic) - smp_found_config = 1; + /* assign memory bank numbers for each chunk on each node */ + for (i = 0; i < numnodes; i++) { + int bank; -#endif /*CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER*/ + bank = 0; + for (j = 0; j < num_node_memblks; j++) + if (node_memblk[j].nid == i) + node_memblk[j].bank = bank++; + } - return 0; -} + /* set logical node id in cpu structure */ + for (i = 0; i < srat_num_cpus; i++) + node_cpuid[i].nid = pxm_to_nid_map[node_cpuid[i].nid]; -#endif /*CONFIG_ACPI_BOOT*/ + printk(KERN_INFO "Number of logical nodes in system = %d\n", numnodes); + printk(KERN_INFO "Number of memory chunks in system = %d\n", num_node_memblks); -#ifdef CONFIG_ACPI_BUS -/* - * "acpi_pic_sci=level" (current default) - * programs the PIC-mode SCI to Level Trigger. - * (NO-OP if the BIOS set Level Trigger already) - * - * If a PIC-mode SCI is not recogznied or gives spurious IRQ7's - * it may require Edge Trigger -- use "acpi_pic_sci=edge" - * (NO-OP if the BIOS set Edge Trigger already) - * - * Port 0x4d0-4d1 are ECLR1 and ECLR2, the Edge/Level Control Registers - * for the 8259 PIC. bit[n] = 1 means irq[n] is Level, otherwise Edge. - * ECLR1 is IRQ's 0-7 (IRQ 0, 1, 2 must be 0) - * ECLR2 is IRQ's 8-15 (IRQ 8, 13 must be 0) - */ - -static __initdata int acpi_pic_sci_trigger; /* 0: level, 1: edge */ - -void __init -acpi_pic_sci_set_trigger(unsigned int irq) -{ - unsigned char mask = 1 << (irq & 7); - unsigned int port = 0x4d0 + (irq >> 3); - unsigned char val = inb(port); - - - printk(PREFIX "IRQ%d SCI:", irq); - if (!(val & mask)) { - printk(" Edge"); - - if (!acpi_pic_sci_trigger) { - printk(" set to Level"); - outb(val | mask, port); + if (!slit_table) return; + memset(numa_slit, -1, sizeof(numa_slit)); + for (i=0; i<slit_table->localities; i++) { + if (!pxm_bit_test(i)) + continue; + node_from = pxm_to_nid_map[i]; + for (j=0; j<slit_table->localities; j++) { + if (!pxm_bit_test(j)) + continue; + node_to = pxm_to_nid_map[j]; + node_distance(node_from, node_to) = + slit_table->entry[i*slit_table->localities + j]; } - } else { - printk(" Level"); + } - if (acpi_pic_sci_trigger) { - printk(" set to Edge"); - outb(val | mask, port); - } +#ifdef SLIT_DEBUG + printk("ACPI 2.0 SLIT locality table:\n"); + for (i = 0; i < numnodes; i++) { + for (j = 0; j < numnodes; j++) + printk("%03d ", node_distance(i,j)); + printk("\n"); } - printk(" Trigger.\n"); +#endif } +#endif /* CONFIG_ACPI_NUMA */ -int __init -acpi_pic_sci_setup(char *str) +#if 0 +unsigned int +acpi_register_gsi (u32 gsi, int polarity, int trigger) { - while (str && *str) { - if (strncmp(str, "level", 5) == 0) - acpi_pic_sci_trigger = 0; /* force level trigger */ - if (strncmp(str, "edge", 4) == 0) - acpi_pic_sci_trigger = 1; /* force edge trigger */ - str = strchr(str, ','); - if (str) - str += strspn(str, ", \t"); - } - return 1; + return acpi_register_irq(gsi, polarity, trigger); } +EXPORT_SYMBOL(acpi_register_gsi); +static int __init +acpi_parse_fadt (unsigned long phys_addr, unsigned long size) +{ + struct acpi_table_header *fadt_header; + struct fadt_descriptor_rev2 *fadt; -__setup("acpi_pic_sci=", acpi_pic_sci_setup); - -#endif /* CONFIG_ACPI_BUS */ - + if (!phys_addr || !size) + return -EINVAL; + fadt_header = (struct acpi_table_header *) __va(phys_addr); + if (fadt_header->revision != 3) + return -ENODEV; /* Only deal with ACPI 2.0 FADT */ -/* -------------------------------------------------------------------------- - Low-Level Sleep Support - -------------------------------------------------------------------------- */ + fadt = (struct fadt_descriptor_rev2 *) fadt_header; -#ifdef CONFIG_ACPI_SLEEP + if (!(fadt->iapc_boot_arch & BAF_8042_KEYBOARD_CONTROLLER)) + acpi_kbd_controller_present = 0; -#define DEBUG + if (fadt->iapc_boot_arch & BAF_LEGACY_DEVICES) + acpi_legacy_devices = 1; -#ifdef DEBUG -#include <xen/serial.h> + acpi_register_gsi(fadt->sci_int, ACPI_ACTIVE_LOW, ACPI_LEVEL_SENSITIVE); + return 0; +} #endif -/* address in low memory of the wakeup routine. */ -unsigned long acpi_wakeup_address = 0; +unsigned long __init +acpi_find_rsdp (void) +{ + unsigned long rsdp_phys = 0; + + if (efi.acpi20) + rsdp_phys = __pa(efi.acpi20); + else if (efi.acpi) + printk(KERN_WARNING PREFIX "v1.0/r0.71 tables no longer supported\n"); + return rsdp_phys; +} -/* new page directory that we will be using */ -static pmd_t *pmd; +#if 0 +int __init +acpi_boot_init (void) +{ -/* saved page directory */ -static pmd_t saved_pmd; + /* + * MADT + * ---- + * Parse the Multiple APIC Description Table (MADT), if exists. + * Note that this table provides platform SMP configuration + * information -- the successor to MPS tables. + */ -/* page which we'll use for the new page directory */ -static pte_t *ptep; + if (acpi_table_parse(ACPI_APIC, acpi_parse_madt) < 1) { + printk(KERN_ERR PREFIX "Can't find MADT\n"); + goto skip_madt; + } -extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); + /* Local APIC */ -/* - * acpi_create_identity_pmd - * - * Create a new, identity mapped pmd. - * - * Do this by creating new page directory, and marking all the pages as R/W - * Then set it as the new Page Middle Directory. - * And, of course, flush the TLB so it takes effect. - * - * We save the address of the old one, for later restoration. - */ -static void acpi_create_identity_pmd (void) -{ - pgd_t *pgd; - int i; + if (acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0) < 0) + printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n"); - ptep = (pte_t*)__get_free_page(GFP_KERNEL); + if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_parse_lsapic, NR_CPUS) < 1) + printk(KERN_ERR PREFIX "Error parsing MADT - no LAPIC entries\n"); - /* fill page with low mapping */ - for (i = 0; i < PTRS_PER_PTE; i++) - set_pte(ptep + i, mk_pte_phys(i << PAGE_SHIFT, PAGE_SHARED)); + if (acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0) < 0) + printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); - pgd = pgd_offset(current->active_mm, 0); - pmd = pmd_alloc(current->mm,pgd, 0); + /* I/O APIC */ - /* save the old pmd */ - saved_pmd = *pmd; + if (acpi_table_parse_madt(ACPI_MADT_IOSAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1) + printk(KERN_ERR PREFIX "Error parsing MADT - no IOSAPIC entries\n"); - /* set the new one */ - set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(ptep))); + /* System-Level Interrupt Routing */ - /* flush the TLB */ - local_flush_tlb(); -} + if (acpi_table_parse_madt(ACPI_MADT_PLAT_INT_SRC, acpi_parse_plat_int_src, ACPI_MAX_PLATFORM_INTERRUPTS) < 0) + printk(KERN_ERR PREFIX "Error parsing platform interrupt source entry\n"); -/* - * acpi_restore_pmd - * - * Restore the old pmd saved by acpi_create_identity_pmd and - * free the page that said function alloc'd - */ -static void acpi_restore_pmd (void) -{ - set_pmd(pmd, saved_pmd); - local_flush_tlb(); - free_page((unsigned long)ptep); -} + if (acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, 0) < 0) + printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n"); -/** - * acpi_save_state_mem - save kernel state - * - * Create an identity mapped page table and copy the wakeup routine to - * low memory. - */ -int acpi_save_state_mem (void) -{ - acpi_create_identity_pmd(); - acpi_copy_wakeup_routine(acpi_wakeup_address); + if (acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, 0) < 0) + printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); + skip_madt: + /* + * FADT says whether a legacy keyboard controller is present. + * The FADT also contains an SCI_INT line, by which the system + * gets interrupts such as power and sleep buttons. If it's not + * on a Legacy interrupt, it needs to be setup. + */ + if (acpi_table_parse(ACPI_FADT, acpi_parse_fadt) < 1) + printk(KERN_ERR PREFIX "Can't find FADT\n"); + +#ifdef CONFIG_SMP + if (available_cpus == 0) { + printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n"); + printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id()); + smp_boot_data.cpu_phys_id[available_cpus] = hard_smp_processor_id(); + available_cpus = 1; /* We've got at least one of these, no? */ + } + smp_boot_data.cpu_count = available_cpus; + + smp_build_cpu_map(); +# ifdef CONFIG_ACPI_NUMA + if (srat_num_cpus == 0) { + int cpu, i = 1; + for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++) + if (smp_boot_data.cpu_phys_id[cpu] != hard_smp_processor_id()) + node_cpuid[i++].phys_id = smp_boot_data.cpu_phys_id[cpu]; + } + build_cpu_to_node_map(); +# endif +#endif + /* Make boot-up look pretty */ + printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, total_cpus); return 0; } - -/** - * acpi_save_state_disk - save kernel state to disk - * - */ -int acpi_save_state_disk (void) +int +acpi_gsi_to_irq (u32 gsi, unsigned int *irq) { - return 1; -} + int vector; -/* - * acpi_restore_state - */ -void acpi_restore_state_mem (void) -{ - acpi_restore_pmd(); -} + if (has_8259 && gsi < 16) + *irq = isa_irq_to_vector(gsi); + else { + vector = gsi_to_vector(gsi); + if (vector == -1) + return -1; -/** - * acpi_reserve_bootmem - do _very_ early ACPI initialisation - * - * We allocate a page in low memory for the wakeup - * routine for when we come back from a sleep state. The - * runtime allocator allows specification of <16M pages, but not - * <1M pages. - */ -void __init acpi_reserve_bootmem(void) -{ - acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); - printk(KERN_DEBUG "ACPI: have wakeup address 0x%8.8lx\n", acpi_wakeup_address); -} - -void do_suspend_lowlevel_s4bios(int resume) -{ - if (!resume) { - save_processor_context(); - acpi_save_register_state((unsigned long)&&acpi_sleep_done); - acpi_enter_sleep_state_s4bios(); - return; + *irq = vector; } -acpi_sleep_done: - restore_processor_context(); + return 0; } +int +acpi_register_irq (u32 gsi, u32 polarity, u32 trigger) +{ + if (has_8259 && gsi < 16) + return isa_irq_to_vector(gsi); -#endif /*CONFIG_ACPI_SLEEP*/ - + return iosapic_register_intr(gsi, + (polarity == ACPI_ACTIVE_HIGH) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW, + (trigger == ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE : IOSAPIC_LEVEL); +} +EXPORT_SYMBOL(acpi_register_irq); +#endif +#endif /* CONFIG_ACPI_BOOT */ diff --git a/xen/arch/ia64/asm-offsets.c b/xen/arch/ia64/asm-offsets.c index 4326ea0078..a45bb76b88 100644 --- a/xen/arch/ia64/asm-offsets.c +++ b/xen/arch/ia64/asm-offsets.c @@ -9,6 +9,9 @@ #include <asm/processor.h> #include <asm/ptrace.h> #include <public/xen.h> +#ifdef CONFIG_VTI +#include <asm/tlb.h> +#endif // CONFIG_VTI #define task_struct exec_domain @@ -93,6 +96,24 @@ void foo(void) DEFINE(IA64_PT_REGS_R14_OFFSET, offsetof (struct pt_regs, r14)); DEFINE(IA64_PT_REGS_R2_OFFSET, offsetof (struct pt_regs, r2)); DEFINE(IA64_PT_REGS_R3_OFFSET, offsetof (struct pt_regs, r3)); +#ifdef CONFIG_VTI + DEFINE(IA64_PT_REGS_R4_OFFSET, offsetof (struct xen_regs, r4)); + DEFINE(IA64_PT_REGS_R5_OFFSET, offsetof (struct xen_regs, r5)); + DEFINE(IA64_PT_REGS_R6_OFFSET, offsetof (struct xen_regs, r6)); + DEFINE(IA64_PT_REGS_R7_OFFSET, offsetof (struct xen_regs, r7)); + DEFINE(IA64_PT_REGS_CR_IIPA_OFFSET, offsetof (struct xen_regs, cr_iipa)); + DEFINE(IA64_PT_REGS_CR_ISR_OFFSET, offsetof (struct xen_regs, cr_isr)); + DEFINE(IA64_PT_REGS_EML_UNAT_OFFSET, offsetof (struct xen_regs, eml_unat)); + DEFINE(IA64_PT_REGS_RFI_PFS_OFFSET, offsetof (struct xen_regs, rfi_pfs)); + DEFINE(RFI_IIP_OFFSET, offsetof(struct exec_domain, arch.arch_vmx.rfi_iip)); + DEFINE(RFI_IPSR_OFFSET, offsetof(struct exec_domain, arch.arch_vmx.rfi_ipsr)); + DEFINE(RFI_IFS_OFFSET,offsetof(struct exec_domain ,arch.arch_vmx.rfi_ifs)); + DEFINE(RFI_PFS_OFFSET,offsetof(struct exec_domain ,arch.arch_vmx.rfi_pfs)); + DEFINE(SWITCH_MRR5_OFFSET,offsetof(struct exec_domain ,arch.arch_vmx.mrr5)); + DEFINE(SWITCH_MRR6_OFFSET,offsetof(struct exec_domain ,arch.arch_vmx.mrr6)); + DEFINE(SWITCH_MRR7_OFFSET,offsetof(struct exec_domain ,arch.arch_vmx.mrr7)); + DEFINE(SWITCH_MPTA_OFFSET,offsetof(struct exec_domain ,arch.arch_vmx.mpta)); +#endif //CONFIG_VTI DEFINE(IA64_PT_REGS_R16_OFFSET, offsetof (struct pt_regs, r16)); DEFINE(IA64_PT_REGS_R17_OFFSET, offsetof (struct pt_regs, r17)); DEFINE(IA64_PT_REGS_R18_OFFSET, offsetof (struct pt_regs, r18)); @@ -164,6 +185,13 @@ void foo(void) BLANK(); +#ifdef CONFIG_VTI + DEFINE(IA64_VPD_BASE_OFFSET, offsetof (struct exec_domain, arch.arch_vmx.vpd)); + DEFINE(IA64_VPD_CR_VPTA_OFFSET, offsetof (cr_t, pta)); + DEFINE(XXX_THASH_SIZE, sizeof (thash_data_t)); + + BLANK(); +#endif //CONFIG_VTI //DEFINE(IA64_SIGCONTEXT_IP_OFFSET, offsetof (struct sigcontext, sc_ip)); //DEFINE(IA64_SIGCONTEXT_AR_BSP_OFFSET, offsetof (struct sigcontext, sc_ar_bsp)); //DEFINE(IA64_SIGCONTEXT_AR_FPSR_OFFSET, offsetof (struct sigcontext, sc_ar_fpsr)); diff --git a/xen/arch/ia64/dom_fw.c b/xen/arch/ia64/dom_fw.c index 9b91950d23..7726154c88 100644 --- a/xen/arch/ia64/dom_fw.c +++ b/xen/arch/ia64/dom_fw.c @@ -17,6 +17,7 @@ #include <asm/io.h> #include <asm/pal.h> #include <asm/sal.h> +#include <xen/acpi.h> #include <asm/dom_fw.h> @@ -301,6 +302,71 @@ void print_md(efi_memory_desc_t *md) #endif } +#define LSAPIC_NUM 16 // TEMP +static u32 lsapic_flag=1; + +/* Provide only one LP to guest */ +static int +acpi_update_lsapic (acpi_table_entry_header *header) +{ + struct acpi_table_lsapic *lsapic; + + lsapic = (struct acpi_table_lsapic *) header; + if (!lsapic) + return -EINVAL; + + if (lsapic->flags.enabled && lsapic_flag) { + printk("enable lsapic entry: 0x%lx\n", (u64)lsapic); + lsapic_flag = 0; /* disable all the following processros */ + } else if (lsapic->flags.enabled) { + printk("DISABLE lsapic entry: 0x%lx\n", (u64)lsapic); + lsapic->flags.enabled = 0; + } else + printk("lsapic entry is already disabled: 0x%lx\n", (u64)lsapic); + + return 0; +} + +static int +acpi_update_madt_checksum (unsigned long phys_addr, unsigned long size) +{ + u8 checksum=0; + u8* ptr; + int len; + struct acpi_table_madt* acpi_madt; + + if (!phys_addr || !size) + return -EINVAL; + + acpi_madt = (struct acpi_table_madt *) __va(phys_addr); + acpi_madt->header.checksum=0; + + /* re-calculate MADT checksum */ + ptr = (u8*)acpi_madt; + len = acpi_madt->header.length; + while (len>0){ + checksum = (u8)( checksum + (*ptr++) ); + len--; + } + acpi_madt->header.checksum = 0x0 - checksum; + + return 0; +} + +/* base is physical address of acpi table */ +void touch_acpi_table(void) +{ + u64 count = 0; + count = acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_update_lsapic, NR_CPUS); + if ( count < 1) + printk("Error parsing MADT - no LAPIC entires\n"); + printk("Total %d lsapic entry\n", count); + acpi_table_parse(ACPI_APIC, acpi_update_madt_checksum); + + return; +} + + struct ia64_boot_param * dom_fw_init (struct domain *d, char *args, int arglen, char *fw_mem, int fw_mem_size) { @@ -418,6 +484,9 @@ dom_fw_init (struct domain *d, char *args, int arglen, char *fw_mem, int fw_mem_ printf(" MPS=%0xlx",efi_tables[i].table); i++; } + + touch_acpi_table(); + if (efi.acpi20) { efi_tables[i].guid = ACPI_20_TABLE_GUID; efi_tables[i].table = __pa(efi.acpi20); diff --git a/xen/arch/ia64/domain.c b/xen/arch/ia64/domain.c index eb342e7021..2a5258dcb8 100644 --- a/xen/arch/ia64/domain.c +++ b/xen/arch/ia64/domain.c @@ -3,6 +3,11 @@ * * Pentium III FXSR, SSE support * Gareth Hughes <gareth@valinux.com>, May 2000 + * + * Copyright (C) 2005 Intel Co + * Kun Tian (Kevin Tian) <kevin.tian@intel.com> + * + * 05/04/29 Kun Tian (Kevin Tian) <kevin.tian@intel.com> Add CONFIG_VTI domain support */ #include <xen/config.h> @@ -32,12 +37,22 @@ #include <asm/asm-offsets.h> /* for IA64_THREAD_INFO_SIZE */ #include <asm/vcpu.h> /* for function declarations */ +#ifdef CONFIG_VTI +#include <asm/vmx.h> +#include <asm/vmx_vcpu.h> +#endif // CONFIG_VTI #define CONFIG_DOMAIN0_CONTIGUOUS unsigned long dom0_start = -1L; +#ifdef CONFIG_VTI unsigned long dom0_size = 512*1024*1024; //FIXME: Should be configurable //FIXME: alignment should be 256MB, lest Linux use a 256MB page size +unsigned long dom0_align = 256*1024*1024; +#else // CONFIG_VTI +unsigned long dom0_size = 256*1024*1024; //FIXME: Should be configurable +//FIXME: alignment should be 256MB, lest Linux use a 256MB page size unsigned long dom0_align = 64*1024*1024; +#endif // CONFIG_VTI #ifdef DOMU_BUILD_STAGING unsigned long domU_staging_size = 32*1024*1024; //FIXME: Should be configurable unsigned long domU_staging_start; @@ -151,6 +166,58 @@ void arch_free_exec_domain_struct(struct exec_domain *ed) free_xenheap_pages(ed, KERNEL_STACK_SIZE_ORDER); } +#ifdef CONFIG_VTI +void arch_do_createdomain(struct exec_domain *ed) +{ + struct domain *d = ed->domain; + struct thread_info *ti = alloc_thread_info(ed); + + /* If domain is VMX domain, shared info area is created + * by domain and then domain notifies HV by specific hypercall. + * If domain is xenolinux, shared info area is created by + * HV. + * Since we have no idea about whether domain is VMX now, + * (dom0 when parse and domN when build), postpone possible + * allocation. + */ + + /* FIXME: Because full virtual cpu info is placed in this area, + * it's unlikely to put it into one shareinfo page. Later + * need split vcpu context from vcpu_info and conforms to + * normal xen convention. + */ + d->shared_info = NULL; + ed->vcpu_info = (void *)alloc_xenheap_page(); + if (!ed->vcpu_info) { + printk("ERROR/HALTING: CAN'T ALLOC PAGE\n"); + while (1); + } + memset(ed->vcpu_info, 0, PAGE_SIZE); + + /* Clear thread_info to clear some important fields, like preempt_count */ + memset(ti, 0, sizeof(struct thread_info)); + + /* Allocate per-domain vTLB and vhpt */ + ed->arch.vtlb = init_domain_tlb(ed); + + /* Physical->machine page table will be allocated when + * final setup, since we have no the maximum pfn number in + * this stage + */ + + /* FIXME: This is identity mapped address for xenheap. + * Do we need it at all? + */ + d->xen_vastart = 0xf000000000000000; + d->xen_vaend = 0xf300000000000000; + d->breakimm = 0x1000; + + // stay on kernel stack because may get interrupts! + // ia64_ret_from_clone (which b0 gets in new_thread) switches + // to user stack + ed->arch._thread.on_ustack = 0; +} +#else // CONFIG_VTI void arch_do_createdomain(struct exec_domain *ed) { struct domain *d = ed->domain; @@ -193,6 +260,7 @@ void arch_do_createdomain(struct exec_domain *ed) // to user stack ed->arch._thread.on_ustack = 0; } +#endif // CONFIG_VTI void arch_do_boot_vcpu(struct exec_domain *p) { @@ -216,6 +284,70 @@ void domain_relinquish_resources(struct domain *d) dummy(); } +#ifdef CONFIG_VTI +void new_thread(struct exec_domain *ed, + unsigned long start_pc, + unsigned long start_stack, + unsigned long start_info) +{ + struct domain *d = ed->domain; + struct switch_stack *sw; + struct xen_regs *regs; + struct ia64_boot_param *bp; + extern char ia64_ret_from_clone; + extern char saved_command_line[]; + //char *dom0_cmdline = "BOOT_IMAGE=scsi0:\EFI\redhat\xenlinux nomca root=/dev/sdb1 ro"; + + +#ifdef CONFIG_DOMAIN0_CONTIGUOUS + if (d == dom0) start_pc += dom0_start; +#endif + regs = (struct xen_regs *) ((unsigned long) ed + IA64_STK_OFFSET) - 1; + sw = (struct switch_stack *) regs - 1; + /* Sanity Clear */ + memset(sw, 0, sizeof(struct xen_regs) + sizeof(struct switch_stack)); + + if (VMX_DOMAIN(ed)) { + /* dt/rt/it:1;i/ic:1, si:1, vm/bn:1, ac:1 */ + regs->cr_ipsr = 0x501008826008; /* Need to be expanded as macro */ + } else { + regs->cr_ipsr = ia64_getreg(_IA64_REG_PSR) + | IA64_PSR_BITS_TO_SET | IA64_PSR_BN + & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS); + regs->cr_ipsr |= 2UL << IA64_PSR_CPL0_BIT; // domain runs at PL2 + } + regs->cr_iip = start_pc; + regs->ar_rsc = 0x0; + regs->cr_ifs = 0x0; + regs->ar_fpsr = sw->ar_fpsr = FPSR_DEFAULT; + sw->ar_bspstore = (unsigned long)ed + IA64_RBS_OFFSET; + printf("new_thread: ed=%p, regs=%p, sw=%p, new_rbs=%p, IA64_STK_OFFSET=%p, &r8=%p\n", + ed,regs,sw,sw->ar_bspstore,IA64_STK_OFFSET,®s->r8); + printf("iip:0x%lx,ipsr:0x%lx\n", regs->cr_iip, regs->cr_ipsr); + + sw->b0 = (unsigned long) &ia64_ret_from_clone; + ed->arch._thread.ksp = (unsigned long) sw - 16; + printk("new_thread, about to call init_all_rr\n"); + if (VMX_DOMAIN(ed)) { + vmx_init_all_rr(ed); + } else + init_all_rr(ed); + // set up boot parameters (and fake firmware) + printk("new_thread, about to call dom_fw_setup\n"); + VMX_VPD(ed,vgr[12]) = dom_fw_setup(d,saved_command_line,256L); //FIXME + printk("new_thread, done with dom_fw_setup\n"); + + if (VMX_DOMAIN(ed)) { + /* Virtual processor context setup */ + VMX_VPD(ed, vpsr) = IA64_PSR_BN; + VPD_CR(ed, dcr) = 0; + } else { + // don't forget to set this! + ed->vcpu_info->arch.banknum = 1; + } +} +#else // CONFIG_VTI + // heavily leveraged from linux/arch/ia64/kernel/process.c:copy_thread() // and linux/arch/ia64/kernel/process.c:kernel_thread() void new_thread(struct exec_domain *ed, @@ -272,6 +404,7 @@ printk("new_thread, done with dom_fw_setup\n"); // don't forget to set this! ed->vcpu_info->arch.banknum = 1; } +#endif // CONFIG_VTI static struct page * map_new_domain0_page(unsigned long mpaddr) { @@ -599,6 +732,214 @@ domU_staging_write_32(unsigned long at, unsigned long a, unsigned long b, } #endif +#ifdef CONFIG_VTI +/* Up to whether domain is vmx one, different context may be setup + * here. + */ +void +post_arch_do_create_domain(struct exec_domain *ed, int vmx_domain) +{ + struct domain *d = ed->domain; + + if (!vmx_domain) { + d->shared_info = (void*)alloc_xenheap_page(); + if (!d->shared_info) + panic("Allocate share info for non-vmx domain failed.\n"); + d->shared_info_va = 0xfffd000000000000; + + printk("Build shared info for non-vmx domain\n"); + build_shared_info(d); + /* Setup start info area */ + } +} + +/* For VMX domain, this is invoked when kernel model in domain + * request actively + */ +void build_shared_info(struct domain *d) +{ + int i; + + /* Set up shared-info area. */ + update_dom_time(d); + d->shared_info->domain_time = 0; + + /* Mask all upcalls... */ + for ( i = 0; i < MAX_VIRT_CPUS; i++ ) + d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1; + + /* ... */ +} + +extern unsigned long running_on_sim; +unsigned int vmx_dom0 = 0; +int construct_dom0(struct domain *d, + unsigned long image_start, unsigned long image_len, + unsigned long initrd_start, unsigned long initrd_len, + char *cmdline) +{ + char *dst; + int i, rc; + unsigned long pfn, mfn; + unsigned long nr_pt_pages; + unsigned long count; + unsigned long alloc_start, alloc_end; + struct pfn_info *page = NULL; + start_info_t *si; + struct exec_domain *ed = d->exec_domain[0]; + struct domain_setup_info dsi; + unsigned long p_start; + unsigned long pkern_start; + unsigned long pkern_entry; + unsigned long pkern_end; + +//printf("construct_dom0: starting\n"); + /* Sanity! */ +#ifndef CLONE_DOMAIN0 + if ( d != dom0 ) + BUG(); + if ( test_bit(DF_CONSTRUCTED, &d->flags) ) + BUG(); +#endif + + printk("##Dom0: 0x%lx, domain: 0x%lx\n", (u64)dom0, (u64)d); + memset(&dsi, 0, sizeof(struct domain_setup_info)); + + printk("*** LOADING DOMAIN 0 ***\n"); + + alloc_start = dom0_start; + alloc_end = dom0_start + dom0_size; + d->tot_pages = d->max_pages = (alloc_end - alloc_start)/PAGE_SIZE; + image_start = __va(ia64_boot_param->initrd_start); + image_len = ia64_boot_param->initrd_size; + + dsi.image_addr = (unsigned long)image_start; + dsi.image_len = image_len; + rc = parseelfimage(&dsi); + if ( rc != 0 ) + return rc; + + /* Temp workaround */ + if (running_on_sim) + dsi.xen_elf_image = 1; + + if ((!vmx_enabled) && !dsi.xen_elf_image) { + printk("Lack of hardware support for unmodified vmx dom0\n"); + panic(""); + } + + if (vmx_enabled && !dsi.xen_elf_image) { + printk("Dom0 is vmx domain!\n"); + vmx_dom0 = 1; + } + + p_start = dsi.v_start; + pkern_start = dsi.v_kernstart; + pkern_end = dsi.v_kernend; + pkern_entry = dsi.v_kernentry; + + printk("p_start=%lx, pkern_start=%lx, pkern_end=%lx, pkern_entry=%lx\n", + p_start,pkern_start,pkern_end,pkern_entry); + + if ( (p_start & (PAGE_SIZE-1)) != 0 ) + { + printk("Initial guest OS must load to a page boundary.\n"); + return -EINVAL; + } + + printk("METAPHYSICAL MEMORY ARRANGEMENT:\n" + " Kernel image: %lx->%lx\n" + " Entry address: %lx\n" + " Init. ramdisk: (NOT IMPLEMENTED YET)\n", + pkern_start, pkern_end, pkern_entry); + + if ( (pkern_end - pkern_start) > (d->max_pages * PAGE_SIZE) ) + { + printk("Initial guest OS requires too much space\n" + "(%luMB is greater than %luMB limit)\n", + (pkern_end-pkern_start)>>20, (d->max_pages<<PAGE_SHIFT)>>20); + return -ENOMEM; + } + + // Other sanity check about Dom0 image + + /* Construct a frame-allocation list for the initial domain, since these + * pages are allocated by boot allocator and pfns are not set properly + */ + for ( mfn = (alloc_start>>PAGE_SHIFT); + mfn < (alloc_end>>PAGE_SHIFT); + mfn++ ) + { + page = &frame_table[mfn]; + page_set_owner(page, d); + page->u.inuse.type_info = 0; + page->count_info = PGC_allocated | 1; + list_add_tail(&page->list, &d->page_list); + + /* Construct 1:1 mapping */ + machine_to_phys_mapping[mfn] = mfn; + } + + post_arch_do_create_domain(ed, vmx_dom0); + + /* Load Dom0 image to its own memory */ + loaddomainelfimage(d,image_start); + + /* Copy the initial ramdisk. */ + + /* Sync d/i cache conservatively */ + { + unsigned long ret; + unsigned long progress; + ret = ia64_pal_cache_flush(4, 0, &progress, NULL); + if (ret != PAL_STATUS_SUCCESS) + panic("PAL CACHE FLUSH failed for dom0.\n"); + printk("Sync i/d cache for dom0 image SUCC\n"); + } + /* Physical mode emulation initialization, including + * emulation ID allcation and related memory request + */ + physical_mode_init(ed); + /* Dom0's pfn is equal to mfn, so there's no need to allocate pmt + * for dom0 + */ + d->arch.pmt = NULL; + + /* Give up the VGA console if DOM0 is configured to grab it. */ + if (cmdline != NULL) + console_endboot(strstr(cmdline, "tty0") != NULL); + + /* VMX specific construction for Dom0, if hardware supports VMX + * and Dom0 is unmodified image + */ + printk("Dom0: 0x%lx, domain: 0x%lx\n", (u64)dom0, (u64)d); + if (vmx_dom0) + vmx_final_setup_domain(dom0); + + /* vpd is ready now */ + vlsapic_reset(ed); + vtm_init(ed); + set_bit(DF_CONSTRUCTED, &d->flags); + + new_thread(ed, pkern_entry, 0, 0); + + // FIXME: Hack for keyboard input +#ifdef CLONE_DOMAIN0 +if (d == dom0) +#endif + serial_input_init(); + if (d == dom0) { + ed->vcpu_info->arch.delivery_mask[0] = -1L; + ed->vcpu_info->arch.delivery_mask[1] = -1L; + ed->vcpu_info->arch.delivery_mask[2] = -1L; + ed->vcpu_info->arch.delivery_mask[3] = -1L; + } + else __set_bit(0x30,ed->vcpu_info->arch.delivery_mask); + + return 0; +} +#else //CONFIG_VTI + int construct_dom0(struct domain *d, unsigned long image_start, unsigned long image_len, unsigned long initrd_start, unsigned long initrd_len, @@ -771,6 +1112,7 @@ if (d == dom0) return 0; } +#endif // CONFIG_VTI // FIXME: When dom0 can construct domains, this goes away (or is rewritten) int construct_domU(struct domain *d, diff --git a/xen/arch/ia64/entry.h b/xen/arch/ia64/entry.h new file mode 100644 index 0000000000..0aa2d0a36c --- /dev/null +++ b/xen/arch/ia64/entry.h @@ -0,0 +1,97 @@ +#include <linux/config.h> + +/* + * Preserved registers that are shared between code in ivt.S and + * entry.S. Be careful not to step on these! + */ +#define PRED_LEAVE_SYSCALL 1 /* TRUE iff leave from syscall */ +#define PRED_KERNEL_STACK 2 /* returning to kernel-stacks? */ +#define PRED_USER_STACK 3 /* returning to user-stacks? */ +#ifdef CONFIG_VTI +#define PRED_EMUL 2 /* Need to save r4-r7 for inst emulation */ +#define PRED_NON_EMUL 3 /* No need to save r4-r7 for normal path */ +#define PRED_BN0 6 /* Guest is in bank 0 */ +#define PRED_BN1 7 /* Guest is in bank 1 */ +#endif // CONFIG_VTI +#define PRED_SYSCALL 4 /* inside a system call? */ +#define PRED_NON_SYSCALL 5 /* complement of PRED_SYSCALL */ + +#ifdef __ASSEMBLY__ +# define PASTE2(x,y) x##y +# define PASTE(x,y) PASTE2(x,y) + +# define pLvSys PASTE(p,PRED_LEAVE_SYSCALL) +# define pKStk PASTE(p,PRED_KERNEL_STACK) +# define pUStk PASTE(p,PRED_USER_STACK) +#ifdef CONFIG_VTI +# define pEml PASTE(p,PRED_EMUL) +# define pNonEml PASTE(p,PRED_NON_EMUL) +# define pBN0 PASTE(p,PRED_BN0) +# define pBN1 PASTE(p,PRED_BN1) +#endif // CONFIG_VTI +# define pSys PASTE(p,PRED_SYSCALL) +# define pNonSys PASTE(p,PRED_NON_SYSCALL) +#endif + +#define PT(f) (IA64_PT_REGS_##f##_OFFSET) +#define SW(f) (IA64_SWITCH_STACK_##f##_OFFSET) +#ifdef CONFIG_VTI +#define VPD(f) (VPD_##f##_START_OFFSET) +#endif // CONFIG_VTI + +#define PT_REGS_SAVES(off) \ + .unwabi 3, 'i'; \ + .fframe IA64_PT_REGS_SIZE+16+(off); \ + .spillsp rp, PT(CR_IIP)+16+(off); \ + .spillsp ar.pfs, PT(CR_IFS)+16+(off); \ + .spillsp ar.unat, PT(AR_UNAT)+16+(off); \ + .spillsp ar.fpsr, PT(AR_FPSR)+16+(off); \ + .spillsp pr, PT(PR)+16+(off); + +#define PT_REGS_UNWIND_INFO(off) \ + .prologue; \ + PT_REGS_SAVES(off); \ + .body + +#define SWITCH_STACK_SAVES(off) \ + .savesp ar.unat,SW(CALLER_UNAT)+16+(off); \ + .savesp ar.fpsr,SW(AR_FPSR)+16+(off); \ + .spillsp f2,SW(F2)+16+(off); .spillsp f3,SW(F3)+16+(off); \ + .spillsp f4,SW(F4)+16+(off); .spillsp f5,SW(F5)+16+(off); \ + .spillsp f16,SW(F16)+16+(off); .spillsp f17,SW(F17)+16+(off); \ + .spillsp f18,SW(F18)+16+(off); .spillsp f19,SW(F19)+16+(off); \ + .spillsp f20,SW(F20)+16+(off); .spillsp f21,SW(F21)+16+(off); \ + .spillsp f22,SW(F22)+16+(off); .spillsp f23,SW(F23)+16+(off); \ + .spillsp f24,SW(F24)+16+(off); .spillsp f25,SW(F25)+16+(off); \ + .spillsp f26,SW(F26)+16+(off); .spillsp f27,SW(F27)+16+(off); \ + .spillsp f28,SW(F28)+16+(off); .spillsp f29,SW(F29)+16+(off); \ + .spillsp f30,SW(F30)+16+(off); .spillsp f31,SW(F31)+16+(off); \ + .spillsp r4,SW(R4)+16+(off); .spillsp r5,SW(R5)+16+(off); \ + .spillsp r6,SW(R6)+16+(off); .spillsp r7,SW(R7)+16+(off); \ + .spillsp b0,SW(B0)+16+(off); .spillsp b1,SW(B1)+16+(off); \ + .spillsp b2,SW(B2)+16+(off); .spillsp b3,SW(B3)+16+(off); \ + .spillsp b4,SW(B4)+16+(off); .spillsp b5,SW(B5)+16+(off); \ + .spillsp ar.pfs,SW(AR_PFS)+16+(off); .spillsp ar.lc,SW(AR_LC)+16+(off); \ + .spillsp @priunat,SW(AR_UNAT)+16+(off); \ + .spillsp ar.rnat,SW(AR_RNAT)+16+(off); \ + .spillsp ar.bspstore,SW(AR_BSPSTORE)+16+(off); \ + .spillsp pr,SW(PR)+16+(off)) + +#define DO_SAVE_SWITCH_STACK \ + movl r28=1f; \ + ;; \ + .fframe IA64_SWITCH_STACK_SIZE; \ + adds sp=-IA64_SWITCH_STACK_SIZE,sp; \ + mov.ret.sptk b7=r28,1f; \ + SWITCH_STACK_SAVES(0); \ + br.cond.sptk.many save_switch_stack; \ +1: + +#define DO_LOAD_SWITCH_STACK \ + movl r28=1f; \ + ;; \ + invala; \ + mov.ret.sptk b7=r28,1f; \ + br.cond.sptk.many load_switch_stack; \ +1: .restore sp; \ + adds sp=IA64_SWITCH_STACK_SIZE,sp diff --git a/xen/arch/ia64/mm_init.c b/xen/arch/ia64/mm_init.c index 0e3ce45dec..d67915593f 100644 --- a/xen/arch/ia64/mm_init.c +++ b/xen/arch/ia64/mm_init.c @@ -301,6 +301,20 @@ ia64_mmu_init (void *my_cpu_data) pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)), PERCPU_PAGE_SHIFT); +#ifdef CONFIG_VTI + { + u64 base; + extern void vmx_switch_rr7(void); + + base = (u64) &vmx_switch_rr7; + base = *((u64*)base); + ia64_itr(0x1, IA64_TR_RR7_SWITCH_STUB, XEN_RR7_SWITCH_STUB, + pte_val(pfn_pte(__pa(base) >> PAGE_SHIFT, PAGE_KERNEL)), + RR7_SWITCH_SHIFT); + printk("Add TR mapping for rr7 switch stub, with physical: 0x%lx\n", (u64)(__pa(base))); + } +#endif // CONFIG_VTI + ia64_set_psr(psr); ia64_srlz_i(); diff --git a/xen/arch/ia64/mmio.c b/xen/arch/ia64/mmio.c new file mode 100644 index 0000000000..88146c8d8e --- /dev/null +++ b/xen/arch/ia64/mmio.c @@ -0,0 +1,325 @@ + +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * mmio.c: MMIO emulation components. + * Copyright (c) 2004, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) + * Kun Tian (Kevin Tian) (Kevin.tian@intel.com) + */ + +#include <linux/sched.h> +#include <asm/tlb.h> +#include <asm/vmx_mm_def.h> +#include <asm/gcc_intrin.h> +#include <xen/interrupt.h> +#include <asm/vmx_vcpu.h> + +struct mmio_list *lookup_mmio(u64 gpa, struct mmio_list *mio_base) +{ + int i; + for (i=0; mio_base[i].iot != NOT_IO; i++ ) { + if ( gpa >= mio_base[i].start && gpa <= mio_base[i].end ) + return &mio_base[i]; + } + return NULL; +} + + +extern void pib_write(VCPU *vcpu, void *src, uint64_t pib_off, size_t s, int ma); +static inline void mmio_write(VCPU *vcpu, void *src, u64 dest_pa, size_t s, int ma) +{ + struct virutal_platform_def *v_plat; + struct mmio_list *mio; + + v_plat = vmx_vcpu_get_plat(vcpu); + mio = lookup_mmio(dest_pa, v_plat->mmio); + if ( mio == NULL ) + panic ("Wrong address for MMIO\n"); + + switch (mio->iot) { + case PIB_MMIO: + pib_write(vcpu, src, dest_pa - v_plat->pib_base, s, ma); + break; + case VGA_BUFF: + case CHIPSET_IO: + case LOW_MMIO: + case LEGACY_IO: + case IO_SAPIC: + default: + break; + } + return; +} + +static inline void mmio_read(VCPU *vcpu, u64 src_pa, void *dest, size_t s, int ma) +{ + struct virutal_platform_def *v_plat; + struct mmio_list *mio; + + v_plat = vmx_vcpu_get_plat(vcpu); + mio = lookup_mmio(src_pa, v_plat->mmio); + if ( mio == NULL ) + panic ("Wrong address for MMIO\n"); + + switch (mio->iot) { + case PIB_MMIO: + pib_read(vcpu, src_pa - v_plat->pib_base, dest, s, ma); + break; + case VGA_BUFF: + case CHIPSET_IO: + case LOW_MMIO: + case LEGACY_IO: + case IO_SAPIC: + default: + break; + } + return; +} + +/* + * Read or write data in guest virtual address mode. + */ + +void +memwrite_v(VCPU *vcpu, thash_data_t *vtlb, void *src, void *dest, size_t s) +{ + uint64_t pa; + + if (!vtlb->nomap) + panic("Normal memory write shouldn't go to this point!"); + pa = PPN_2_PA(vtlb->ppn); + pa += POFFSET((u64)dest, vtlb->ps); + mmio_write (vcpu, src, pa, s, vtlb->ma); +} + + +void +memwrite_p(VCPU *vcpu, void *src, void *dest, size_t s) +{ + uint64_t pa = (uint64_t)dest; + int ma; + + if ( pa & (1UL <<63) ) { + // UC + ma = 4; + pa <<=1; + pa >>=1; + } + else { + // WBL + ma = 0; // using WB for WBL + } + mmio_write (vcpu, src, pa, s, ma); +} + +void +memread_v(VCPU *vcpu, thash_data_t *vtlb, void *src, void *dest, size_t s) +{ + uint64_t pa; + + if (!vtlb->nomap) + panic("Normal memory write shouldn't go to this point!"); + pa = PPN_2_PA(vtlb->ppn); + pa += POFFSET((u64)src, vtlb->ps); + + mmio_read(vcpu, pa, dest, s, vtlb->ma); +} + +void +memread_p(VCPU *vcpu, void *src, void *dest, size_t s) +{ + uint64_t pa = (uint64_t)src; + int ma; + + if ( pa & (1UL <<63) ) { + // UC + ma = 4; + pa <<=1; + pa >>=1; + } + else { + // WBL + ma = 0; // using WB for WBL + } + mmio_read(vcpu, pa, dest, s, ma); +} + +#define PIB_LOW_HALF(ofst) !(ofst&(1<<20)) +#define PIB_OFST_INTA 0x1E0000 +#define PIB_OFST_XTP 0x1E0008 + + +/* + * Deliver IPI message. (Only U-VP is supported now) + * offset: address offset to IPI space. + * value: deliver value. + */ +static void deliver_ipi (VCPU *vcpu, uint64_t dm, uint64_t vector) +{ +#ifdef IPI_DEBUG + printf ("deliver_ipi %lx %lx\n",dm,vector); +#endif + switch ( dm ) { + case 0: // INT + vmx_vcpu_pend_interrupt (vcpu, vector); + break; + case 2: // PMI + // TODO -- inject guest PMI + panic ("Inject guest PMI!\n"); + break; + case 4: // NMI + vmx_vcpu_pend_interrupt (vcpu, 2); + break; + case 5: // INIT + // TODO -- inject guest INIT + panic ("Inject guest INIT!\n"); + break; + case 7: // ExtINT + vmx_vcpu_pend_interrupt (vcpu, 0); + break; + + case 1: + case 3: + case 6: + default: + panic ("Deliver reserved IPI!\n"); + break; + } +} + +/* + * TODO: Use hash table for the lookup. + */ +static inline VCPU *lid_2_vcpu (struct domain *d, u64 id, u64 eid) +{ + int i; + VCPU *vcpu; + LID lid; + + for (i=0; i<MAX_VIRT_CPUS; i++) { + vcpu = d->exec_domain[i]; + lid.val = VPD_CR(vcpu, lid); + if ( lid.id == id && lid.eid == eid ) { + return vcpu; + } + } + return NULL; +} + +/* + * execute write IPI op. + */ +static int write_ipi (VCPU *vcpu, uint64_t addr, uint64_t value) +{ + VCPU *target_cpu; + + target_cpu = lid_2_vcpu(vcpu->domain, + ((ipi_a_t)addr).id, ((ipi_a_t)addr).eid); + if ( target_cpu == NULL ) panic("Unknown IPI cpu\n"); + if ( target_cpu == vcpu ) { + // IPI to self + deliver_ipi (vcpu, ((ipi_d_t)value).dm, + ((ipi_d_t)value).vector); + return 1; + } + else { + // TODO: send Host IPI to inject guest SMP IPI interruption + panic ("No SM-VP supported!\n"); + return 0; + } +} + +void pib_write(VCPU *vcpu, void *src, uint64_t pib_off, size_t s, int ma) +{ + + switch (pib_off) { + case PIB_OFST_INTA: + panic("Undefined write on PIB INTA\n"); + break; + case PIB_OFST_XTP: + if ( s == 1 && ma == 4 /* UC */) { + vmx_vcpu_get_plat(vcpu)->xtp = *(uint8_t *)src; + } + else { + panic("Undefined write on PIB XTP\n"); + } + break; + default: + if ( PIB_LOW_HALF(pib_off) ) { // lower half + if ( s != 8 || ma != 0x4 /* UC */ ) { + panic("Undefined IPI-LHF write!\n"); + } + else { + write_ipi(vcpu, pib_off, *(uint64_t *)src); + // TODO for SM-VP + } + } + else { // upper half + printf("IPI-UHF write %lx\n",pib_off); + panic("Not support yet for SM-VP\n"); + } + break; + } +} + +void pib_read(VCPU *vcpu, uint64_t pib_off, void *dest, size_t s, int ma) +{ + switch (pib_off) { + case PIB_OFST_INTA: + // todo --- emit on processor system bus. + if ( s == 1 && ma == 4) { // 1 byte load + // TODO: INTA read from IOSAPIC + } + else { + panic("Undefined read on PIB INTA\n"); + } + break; + case PIB_OFST_XTP: + if ( s == 1 && ma == 4) { + *((uint8_t*)dest) = vmx_vcpu_get_plat(vcpu)->xtp; + } + else { + panic("Undefined read on PIB XTP\n"); + } + break; + default: + if ( PIB_LOW_HALF(pib_off) ) { // lower half + if ( s != 8 || ma != 4 ) { + panic("Undefined IPI-LHF read!\n"); + } + else { +#ifdef IPI_DEBUG + printf("IPI-LHF read %lx\n",pib_off); +#endif + *(uint64_t *)dest = 0; // TODO for SM-VP + } + } + else { // upper half + if ( s != 1 || ma != 4 ) { + panic("Undefined PIB-UHF read!\n"); + } + else { +#ifdef IPI_DEBUG + printf("IPI-UHF read %lx\n",pib_off); +#endif + *(uint8_t *)dest = 0; // TODO for SM-VP + } + } + break; + } +} + diff --git a/xen/arch/ia64/patch/linux-2.6.11/entry.S b/xen/arch/ia64/patch/linux-2.6.11/entry.S index 19217002a5..9f860edc4f 100644 --- a/xen/arch/ia64/patch/linux-2.6.11/entry.S +++ b/xen/arch/ia64/patch/linux-2.6.11/entry.S @@ -1,5 +1,5 @@ ---- ../../linux-2.6.11/arch/ia64/kernel/entry.S 2005-03-02 00:37:50.000000000 -0700 -+++ arch/ia64/entry.S 2005-04-29 14:54:13.000000000 -0600 +--- /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/arch/ia64/kernel/entry.S 2005-03-01 23:37:50.000000000 -0800 ++++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/arch/ia64/entry.S 2005-05-18 12:40:51.000000000 -0700 @@ -46,6 +46,7 @@ #include "minstate.h" @@ -73,19 +73,23 @@ GLOBAL_ENTRY(ia64_ret_from_clone) PT_REGS_UNWIND_INFO(0) -@@ -604,6 +626,11 @@ +@@ -604,6 +626,15 @@ */ br.call.sptk.many rp=ia64_invoke_schedule_tail } +#ifdef XEN + // new domains are cloned but not exec'ed so switch to user mode here + cmp.ne pKStk,pUStk=r0,r0 ++#ifdef CONFIG_VTI ++ br.cond.spnt ia64_leave_hypervisor ++#else // CONFIG_VTI + br.cond.spnt ia64_leave_kernel ++#endif // CONFIG_VTI +#else .ret8: adds r2=TI_FLAGS+IA64_TASK_SIZE,r13 ;; -@@ -614,6 +641,7 @@ +@@ -614,6 +645,7 @@ ;; cmp.ne p6,p0=r2,r0 (p6) br.cond.spnt .strace_check_retval @@ -93,7 +97,7 @@ ;; // added stop bits to prevent r8 dependency END(ia64_ret_from_clone) // fall through -@@ -700,19 +728,25 @@ +@@ -700,19 +732,25 @@ .work_processed_syscall: adds r2=PT(LOADRS)+16,r12 adds r3=PT(AR_BSPSTORE)+16,r12 @@ -119,7 +123,7 @@ ;; // start restoring the state saved on the kernel stack (struct pt_regs): ld8 r9=[r2],PT(CR_IPSR)-PT(R9) -@@ -757,7 +791,11 @@ +@@ -757,7 +795,11 @@ ;; ld8.fill r12=[r2] // restore r12 (sp) ld8.fill r15=[r3] // restore r15 @@ -131,7 +135,7 @@ ;; (pUStk) ld4 r3=[r3] // r3 = cpu_data->phys_stacked_size_p8 (pUStk) st1 [r14]=r17 -@@ -814,9 +852,18 @@ +@@ -814,9 +856,18 @@ (pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk #endif .work_processed_kernel: @@ -150,7 +154,19 @@ adds r21=PT(PR)+16,r12 ;; -@@ -838,7 +885,9 @@ +@@ -828,17 +879,20 @@ + ld8 r28=[r2],8 // load b6 + adds r29=PT(R24)+16,r12 + +- ld8.fill r16=[r3],PT(AR_CSD)-PT(R16) ++ ld8.fill r16=[r3] + adds r30=PT(AR_CCV)+16,r12 + (p6) and r19=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE? + ;; ++ adds r3=PT(AR_CSD)-PT(R16),r3 + ld8.fill r24=[r29] + ld8 r15=[r30] // load ar.ccv + (p6) cmp4.ne.unc p6,p0=r19, r0 // any special work pending? ;; ld8 r29=[r2],16 // load b7 ld8 r30=[r3],16 // load ar.csd @@ -160,7 +176,7 @@ ;; ld8 r31=[r2],16 // load ar.ssd ld8.fill r8=[r3],16 -@@ -934,7 +983,11 @@ +@@ -934,7 +988,11 @@ shr.u r18=r19,16 // get byte size of existing "dirty" partition ;; mov r16=ar.bsp // get existing backing store pointer @@ -172,7 +188,7 @@ ;; ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8 (pKStk) br.cond.dpnt skip_rbs_switch -@@ -1069,6 +1122,7 @@ +@@ -1069,6 +1127,7 @@ mov pr=r31,-1 // I0 rfi // B @@ -180,7 +196,7 @@ /* * On entry: * r20 = ¤t->thread_info->pre_count (if CONFIG_PREEMPT) -@@ -1130,6 +1184,7 @@ +@@ -1130,6 +1189,7 @@ ld8 r8=[r2] ld8 r10=[r3] br.cond.sptk.many .work_processed_syscall // re-check @@ -188,7 +204,7 @@ END(ia64_leave_kernel) -@@ -1166,6 +1221,7 @@ +@@ -1166,6 +1226,7 @@ br.ret.sptk.many rp END(ia64_invoke_schedule_tail) @@ -196,7 +212,7 @@ /* * Setup stack and call do_notify_resume_user(). Note that pSys and pNonSys need to * be set up by the caller. We declare 8 input registers so the system call -@@ -1264,6 +1320,7 @@ +@@ -1264,6 +1325,7 @@ mov ar.unat=r9 br.many b7 END(sys_rt_sigreturn) @@ -204,7 +220,7 @@ GLOBAL_ENTRY(ia64_prepare_handle_unaligned) .prologue -@@ -1278,6 +1335,7 @@ +@@ -1278,6 +1340,7 @@ br.cond.sptk.many rp // goes to ia64_leave_kernel END(ia64_prepare_handle_unaligned) @@ -212,7 +228,7 @@ // // unw_init_running(void (*callback)(info, arg), void *arg) // -@@ -1585,3 +1643,4 @@ +@@ -1585,3 +1648,4 @@ data8 sys_ni_syscall .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls diff --git a/xen/arch/ia64/patch/linux-2.6.11/head.S b/xen/arch/ia64/patch/linux-2.6.11/head.S index 7cb47a60b8..52ab758d85 100644 --- a/xen/arch/ia64/patch/linux-2.6.11/head.S +++ b/xen/arch/ia64/patch/linux-2.6.11/head.S @@ -1,6 +1,62 @@ ---- ../../linux-2.6.11/arch/ia64/kernel/head.S 2005-03-02 00:38:13.000000000 -0700 -+++ arch/ia64/head.S 2005-04-28 10:51:19.000000000 -0600 -@@ -187,7 +187,11 @@ +--- /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/arch/ia64/kernel/head.S 2005-03-01 23:38:13.000000000 -0800 ++++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/arch/ia64/head.S 2005-05-18 12:40:50.000000000 -0700 +@@ -76,21 +76,21 @@ + * We initialize all of them to prevent inadvertently assuming + * something about the state of address translation early in boot. + */ +- mov r6=((ia64_rid(IA64_REGION_ID_KERNEL, (0<<61)) << 8) | (PAGE_SHIFT << 2) | 1) ++ movl r6=((ia64_rid(IA64_REGION_ID_KERNEL, (0<<61)) << 8) | (PAGE_SHIFT << 2) | 1) + movl r7=(0<<61) +- mov r8=((ia64_rid(IA64_REGION_ID_KERNEL, (1<<61)) << 8) | (PAGE_SHIFT << 2) | 1) ++ movl r8=((ia64_rid(IA64_REGION_ID_KERNEL, (1<<61)) << 8) | (PAGE_SHIFT << 2) | 1) + movl r9=(1<<61) +- mov r10=((ia64_rid(IA64_REGION_ID_KERNEL, (2<<61)) << 8) | (PAGE_SHIFT << 2) | 1) ++ movl r10=((ia64_rid(IA64_REGION_ID_KERNEL, (2<<61)) << 8) | (PAGE_SHIFT << 2) | 1) + movl r11=(2<<61) +- mov r12=((ia64_rid(IA64_REGION_ID_KERNEL, (3<<61)) << 8) | (PAGE_SHIFT << 2) | 1) ++ movl r12=((ia64_rid(IA64_REGION_ID_KERNEL, (3<<61)) << 8) | (PAGE_SHIFT << 2) | 1) + movl r13=(3<<61) +- mov r14=((ia64_rid(IA64_REGION_ID_KERNEL, (4<<61)) << 8) | (PAGE_SHIFT << 2) | 1) ++ movl r14=((ia64_rid(IA64_REGION_ID_KERNEL, (4<<61)) << 8) | (PAGE_SHIFT << 2) | 1) + movl r15=(4<<61) +- mov r16=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT << 2) | 1) ++ movl r16=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT << 2) | 1) + movl r17=(5<<61) +- mov r18=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) | (IA64_GRANULE_SHIFT << 2)) ++ movl r18=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) | (IA64_GRANULE_SHIFT << 2)) + movl r19=(6<<61) +- mov r20=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) | (IA64_GRANULE_SHIFT << 2)) ++ movl r20=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) | (IA64_GRANULE_SHIFT << 2)) + movl r21=(7<<61) + ;; + mov rr[r7]=r6 +@@ -129,8 +129,13 @@ + /* + * Switch into virtual mode: + */ ++#ifdef CONFIG_VTI ++ movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH \ ++ |IA64_PSR_DI) ++#else // CONFIG_VTI + movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \ + |IA64_PSR_DI) ++#endif // CONFIG_VTI + ;; + mov cr.ipsr=r16 + movl r17=1f +@@ -143,7 +148,11 @@ + 1: // now we are in virtual mode + + // set IVT entry point---can't access I/O ports without it ++#ifdef CONFIG_VTI ++ movl r3=vmx_ia64_ivt ++#else // CONFIG_VTI + movl r3=ia64_ivt ++#endif // CONFIG_VTI + ;; + mov cr.iva=r3 + movl r2=FPSR_DEFAULT +@@ -187,7 +196,11 @@ dep r18=0,r3,0,12 ;; or r18=r17,r18 @@ -12,7 +68,23 @@ ;; mov r17=rr[r2] shr.u r16=r3,IA64_GRANULE_SHIFT -@@ -227,7 +231,11 @@ +@@ -207,8 +220,15 @@ + + .load_current: + // load the "current" pointer (r13) and ar.k6 with the current task ++#ifdef CONFIG_VTI ++ mov r21=r2 // virtual address ++ ;; ++ bsw.1 ++ ;; ++#else // CONFIG_VTI + mov IA64_KR(CURRENT)=r2 // virtual address + mov IA64_KR(CURRENT_STACK)=r16 ++#endif // CONFIG_VTI + mov r13=r2 + /* + * Reserve space at the top of the stack for "struct pt_regs". Kernel threads +@@ -227,7 +247,11 @@ ;; mov ar.rsc=0x3 // place RSE in eager mode @@ -24,7 +96,7 @@ (isBP) movl r2=ia64_boot_param ;; (isBP) st8 [r2]=r28 // save the address of the boot param area passed by the bootloader -@@ -254,7 +262,9 @@ +@@ -254,7 +278,9 @@ br.call.sptk.many b0=console_print self: hint @pause @@ -34,7 +106,7 @@ END(_start) GLOBAL_ENTRY(ia64_save_debug_regs) -@@ -850,7 +860,11 @@ +@@ -850,7 +876,11 @@ * intermediate precision so that we can produce a full 64-bit result. */ GLOBAL_ENTRY(sched_clock) diff --git a/xen/arch/ia64/patch/linux-2.6.11/hpsim_ssc.h b/xen/arch/ia64/patch/linux-2.6.11/hpsim_ssc.h index 421644b0f7..9c653e982b 100644 --- a/xen/arch/ia64/patch/linux-2.6.11/hpsim_ssc.h +++ b/xen/arch/ia64/patch/linux-2.6.11/hpsim_ssc.h @@ -1,10 +1,5 @@ - hpsim_ssc.h | 19 +++++++++++++++++++ - 1 files changed, 19 insertions(+) - -Index: linux-2.6.11/arch/ia64/hp/sim/hpsim_ssc.h -=================================================================== ---- linux-2.6.11.orig/arch/ia64/hp/sim/hpsim_ssc.h 2005-03-02 01:38:17.000000000 -0600 -+++ linux-2.6.11/arch/ia64/hp/sim/hpsim_ssc.h 2005-03-19 13:34:01.705520375 -0600 +--- /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/arch/ia64/hp/sim/hpsim_ssc.h 2005-03-01 23:38:17.000000000 -0800 ++++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/include/asm-ia64/hpsim_ssc.h 2005-05-18 12:40:19.000000000 -0700 @@ -33,4 +33,23 @@ */ extern long ia64_ssc (long arg0, long arg1, long arg2, long arg3, int nr); diff --git a/xen/arch/ia64/patch/linux-2.6.11/interrupt.h b/xen/arch/ia64/patch/linux-2.6.11/interrupt.h index f82e60c2ff..29491bd442 100644 --- a/xen/arch/ia64/patch/linux-2.6.11/interrupt.h +++ b/xen/arch/ia64/patch/linux-2.6.11/interrupt.h @@ -1,11 +1,6 @@ - interrupt.h | 2 ++ - 1 files changed, 2 insertions(+) - -Index: linux-2.6.11/include/linux/interrupt.h -=================================================================== ---- linux-2.6.11.orig/include/linux/interrupt.h 2005-03-02 01:38:09.000000000 -0600 -+++ linux-2.6.11/include/linux/interrupt.h 2005-03-19 13:41:00.739901125 -0600 -@@ -33,6 +33,7 @@ typedef int irqreturn_t; +--- /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/include/linux/interrupt.h 2005-03-01 23:38:09.000000000 -0800 ++++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/include/asm-ia64/linux/interrupt.h 2005-05-18 12:40:50.000000000 -0700 +@@ -33,6 +33,7 @@ #define IRQ_HANDLED (1) #define IRQ_RETVAL(x) ((x) != 0) @@ -13,7 +8,7 @@ Index: linux-2.6.11/include/linux/interrupt.h struct irqaction { irqreturn_t (*handler)(int, void *, struct pt_regs *); unsigned long flags; -@@ -49,6 +50,7 @@ extern int request_irq(unsigned int, +@@ -49,6 +50,7 @@ irqreturn_t (*handler)(int, void *, struct pt_regs *), unsigned long, const char *, void *); extern void free_irq(unsigned int, void *); @@ -21,3 +16,12 @@ Index: linux-2.6.11/include/linux/interrupt.h #ifdef CONFIG_GENERIC_HARDIRQS +@@ -121,7 +123,7 @@ + }; + + asmlinkage void do_softirq(void); +-extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data); ++//extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data); + extern void softirq_init(void); + #define __raise_softirq_irqoff(nr) do { local_softirq_pending() |= 1UL << (nr); } while (0) + extern void FASTCALL(raise_softirq_irqoff(unsigned int nr)); diff --git a/xen/arch/ia64/patch/linux-2.6.11/io.h b/xen/arch/ia64/patch/linux-2.6.11/io.h index 50b606e7cb..c935f35cf3 100644 --- a/xen/arch/ia64/patch/linux-2.6.11/io.h +++ b/xen/arch/ia64/patch/linux-2.6.11/io.h @@ -1,16 +1,11 @@ - io.h | 4 ++++ - 1 files changed, 4 insertions(+) - -Index: linux-2.6.11/include/asm-ia64/io.h -=================================================================== ---- linux-2.6.11.orig/include/asm-ia64/io.h 2005-03-02 01:38:34.000000000 -0600 -+++ linux-2.6.11/include/asm-ia64/io.h 2005-03-19 13:42:06.541900818 -0600 +--- /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/include/asm-ia64/io.h 2005-03-01 23:38:34.000000000 -0800 ++++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/include/asm-ia64/io.h 2005-05-18 12:40:50.000000000 -0700 @@ -23,7 +23,11 @@ #define __SLOW_DOWN_IO do { } while (0) #define SLOW_DOWN_IO do { } while (0) +#ifdef XEN -+#define __IA64_UNCACHED_OFFSET 0xdffc000000000000UL /* region 6 */ ++#define __IA64_UNCACHED_OFFSET 0xd000000000000000UL /* region 6 */ +#else #define __IA64_UNCACHED_OFFSET 0xc000000000000000UL /* region 6 */ +#endif diff --git a/xen/arch/ia64/patch/linux-2.6.11/irq_ia64.c b/xen/arch/ia64/patch/linux-2.6.11/irq_ia64.c index ba2c3bc1e4..1daf4a3100 100644 --- a/xen/arch/ia64/patch/linux-2.6.11/irq_ia64.c +++ b/xen/arch/ia64/patch/linux-2.6.11/irq_ia64.c @@ -1,5 +1,5 @@ ---- ../../linux-2.6.11/arch/ia64/kernel/irq_ia64.c 2005-03-02 00:38:07.000000000 -0700 -+++ arch/ia64/irq_ia64.c 2005-04-29 16:05:30.000000000 -0600 +--- /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/arch/ia64/kernel/irq_ia64.c 2005-03-01 23:38:07.000000000 -0800 ++++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/arch/ia64/irq_ia64.c 2005-05-18 12:40:51.000000000 -0700 @@ -106,6 +106,9 @@ unsigned long saved_tpr; @@ -20,3 +20,99 @@ __do_IRQ(local_vector_to_irq(vector), regs); /* +@@ -167,6 +173,95 @@ + irq_exit(); + } + ++#ifdef CONFIG_VTI ++/* ++ * That's where the IVT branches when we get an external ++ * interrupt. This branches to the correct hardware IRQ handler via ++ * function ptr. ++ */ ++void ++vmx_ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) ++{ ++ unsigned long saved_tpr; ++ int wake_dom0 = 0; ++ ++ ++#if IRQ_DEBUG ++ { ++ unsigned long bsp, sp; ++ ++ /* ++ * Note: if the interrupt happened while executing in ++ * the context switch routine (ia64_switch_to), we may ++ * get a spurious stack overflow here. This is ++ * because the register and the memory stack are not ++ * switched atomically. ++ */ ++ bsp = ia64_getreg(_IA64_REG_AR_BSP); ++ sp = ia64_getreg(_IA64_REG_AR_SP); ++ ++ if ((sp - bsp) < 1024) { ++ static unsigned char count; ++ static long last_time; ++ ++ if (jiffies - last_time > 5*HZ) ++ count = 0; ++ if (++count < 5) { ++ last_time = jiffies; ++ printk("ia64_handle_irq: DANGER: less than " ++ "1KB of free stack space!!\n" ++ "(bsp=0x%lx, sp=%lx)\n", bsp, sp); ++ } ++ } ++ } ++#endif /* IRQ_DEBUG */ ++ ++ /* ++ * Always set TPR to limit maximum interrupt nesting depth to ++ * 16 (without this, it would be ~240, which could easily lead ++ * to kernel stack overflows). ++ */ ++ irq_enter(); ++ saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); ++ ia64_srlz_d(); ++ while (vector != IA64_SPURIOUS_INT_VECTOR) { ++ if (!IS_RESCHEDULE(vector)) { ++ ia64_setreg(_IA64_REG_CR_TPR, vector); ++ ia64_srlz_d(); ++ ++ if (vector != IA64_TIMER_VECTOR) { ++ /* FIXME: Leave IRQ re-route later */ ++ vmx_vcpu_pend_interrupt(dom0->exec_domain[0],vector); ++ wake_dom0 = 1; ++ } ++ else { // FIXME: Handle Timer only now ++ __do_IRQ(local_vector_to_irq(vector), regs); ++ } ++ ++ /* ++ * Disable interrupts and send EOI: ++ */ ++ local_irq_disable(); ++ ia64_setreg(_IA64_REG_CR_TPR, saved_tpr); ++ } ++ else { ++ printf("Oops: RESCHEDULE IPI absorbed by HV\n"); ++ } ++ ia64_eoi(); ++ vector = ia64_get_ivr(); ++ } ++ /* ++ * This must be done *after* the ia64_eoi(). For example, the keyboard softirq ++ * handler needs to be able to wait for further keyboard interrupts, which can't ++ * come through until ia64_eoi() has been done. ++ */ ++ irq_exit(); ++ if ( wake_dom0 && current != dom0 ) ++ domain_wake(dom0->exec_domain[0]); ++} ++#endif ++ ++ + #ifdef CONFIG_HOTPLUG_CPU + /* + * This function emulates a interrupt processing when a cpu is about to be diff --git a/xen/arch/ia64/patch/linux-2.6.11/kregs.h b/xen/arch/ia64/patch/linux-2.6.11/kregs.h index 9901600489..dab91c8b07 100644 --- a/xen/arch/ia64/patch/linux-2.6.11/kregs.h +++ b/xen/arch/ia64/patch/linux-2.6.11/kregs.h @@ -1,18 +1,65 @@ - kregs.h | 4 ++++ - 1 files changed, 4 insertions(+) - -Index: linux-2.6.11/include/asm-ia64/kregs.h -=================================================================== ---- linux-2.6.11.orig/include/asm-ia64/kregs.h 2005-03-02 01:37:49.000000000 -0600 -+++ linux-2.6.11/include/asm-ia64/kregs.h 2005-03-19 13:44:24.362628092 -0600 -@@ -31,6 +31,10 @@ +--- /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/include/asm-ia64/kregs.h 2005-03-01 23:37:49.000000000 -0800 ++++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/include/asm-ia64/kregs.h 2005-05-18 12:40:50.000000000 -0700 +@@ -29,8 +29,20 @@ + */ + #define IA64_TR_KERNEL 0 /* itr0, dtr0: maps kernel image (code & data) */ #define IA64_TR_PALCODE 1 /* itr1: maps PALcode as required by EFI */ ++#ifdef CONFIG_VTI ++#define IA64_TR_XEN_IN_DOM 6 /* itr6, dtr6: Double mapping for xen image in domain space */ ++#endif // CONFIG_VTI #define IA64_TR_PERCPU_DATA 1 /* dtr1: percpu data */ #define IA64_TR_CURRENT_STACK 2 /* dtr2: maps kernel's memory- & register-stacks */ +#ifdef XEN +#define IA64_TR_SHARED_INFO 3 /* dtr3: page shared with domain */ +#define IA64_TR_VHPT 4 /* dtr4: vhpt */ ++#ifdef CONFIG_VTI ++#define IA64_TR_VHPT_IN_DOM 5 /* dtr5: Double mapping for vhpt table in domain space */ ++#define IA64_TR_RR7_SWITCH_STUB 7 /* dtr7: mapping for rr7 switch stub */ ++#define IA64_TEMP_PHYSICAL 8 /* itr8, dtr8: temp mapping for guest physical memory 256M */ ++#endif // CONFIG_VTI +#endif /* Processor status register bits: */ #define IA64_PSR_BE_BIT 1 +@@ -66,6 +78,9 @@ + #define IA64_PSR_ED_BIT 43 + #define IA64_PSR_BN_BIT 44 + #define IA64_PSR_IA_BIT 45 ++#ifdef CONFIG_VTI ++#define IA64_PSR_VM_BIT 46 ++#endif // CONFIG_VTI + + /* A mask of PSR bits that we generally don't want to inherit across a clone2() or an + execve(). Only list flags here that need to be cleared/set for BOTH clone2() and +@@ -107,6 +122,9 @@ + #define IA64_PSR_ED (__IA64_UL(1) << IA64_PSR_ED_BIT) + #define IA64_PSR_BN (__IA64_UL(1) << IA64_PSR_BN_BIT) + #define IA64_PSR_IA (__IA64_UL(1) << IA64_PSR_IA_BIT) ++#ifdef CONFIG_VTI ++#define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT) ++#endif // CONFIG_VTI + + /* User mask bits: */ + #define IA64_PSR_UM (IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL | IA64_PSR_MFH) +@@ -160,4 +178,21 @@ + #define IA64_ISR_CODE_LFETCH 4 + #define IA64_ISR_CODE_PROBEF 5 + ++#ifdef CONFIG_VTI ++/* Interruption Function State */ ++#define IA64_IFS_V_BIT 63 ++#define IA64_IFS_V (__IA64_UL(1) << IA64_IFS_V_BIT) ++ ++/* Page Table Address */ ++#define IA64_PTA_VE_BIT 0 ++#define IA64_PTA_SIZE_BIT 2 ++#define IA64_PTA_VF_BIT 8 ++#define IA64_PTA_BASE_BIT 15 ++ ++#define IA64_PTA_VE (__IA64_UL(1) << IA64_PTA_VE_BIT) ++#define IA64_PTA_SIZE (__IA64_UL(0x3f) << IA64_PTA_SIZE_BIT) ++#define IA64_PTA_VF (__IA64_UL(1) << IA64_PTA_VF_BIT) ++#define IA64_PTA_BASE (__IA64_UL(0) - ((__IA64_UL(1) << IA64_PTA_BASE_BIT))) ++#endif // CONFIG_VTI ++ + #endif /* _ASM_IA64_kREGS_H */ diff --git a/xen/arch/ia64/patch/linux-2.6.11/mca_asm.h b/xen/arch/ia64/patch/linux-2.6.11/mca_asm.h index cbd6dd3795..b9acc231e9 100644 --- a/xen/arch/ia64/patch/linux-2.6.11/mca_asm.h +++ b/xen/arch/ia64/patch/linux-2.6.11/mca_asm.h @@ -1,10 +1,5 @@ - mca_asm.h | 11 +++++++++++ - 1 files changed, 11 insertions(+) - -Index: linux-2.6.11-xendiffs/include/asm-ia64/mca_asm.h -=================================================================== ---- linux-2.6.11-xendiffs.orig/include/asm-ia64/mca_asm.h 2005-03-02 01:38:38.000000000 -0600 -+++ linux-2.6.11-xendiffs/include/asm-ia64/mca_asm.h 2005-04-06 22:41:57.392411032 -0500 +--- /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/include/asm-ia64/mca_asm.h 2005-03-01 23:38:38.000000000 -0800 ++++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/include/asm-ia64/mca_asm.h 2005-05-18 12:40:19.000000000 -0700 @@ -26,8 +26,13 @@ * direct mapped to physical addresses. * 1. Lop off bits 61 thru 63 in the virtual address diff --git a/xen/arch/ia64/patch/linux-2.6.11/page.h b/xen/arch/ia64/patch/linux-2.6.11/page.h index cd6b281224..67f5ecdbeb 100644 --- a/xen/arch/ia64/patch/linux-2.6.11/page.h +++ b/xen/arch/ia64/patch/linux-2.6.11/page.h @@ -1,6 +1,17 @@ ---- ../../linux-2.6.11/include/asm-ia64/page.h 2005-03-02 00:37:48.000000000 -0700 -+++ include/asm-ia64/page.h 2005-05-02 11:25:33.000000000 -0600 -@@ -95,9 +95,15 @@ +--- /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/include/asm-ia64/page.h 2005-03-01 23:37:48.000000000 -0800 ++++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/include/asm-ia64/page.h 2005-05-18 12:40:50.000000000 -0700 +@@ -32,6 +32,10 @@ + #define PAGE_ALIGN(addr) (((addr) + PAGE_SIZE - 1) & PAGE_MASK) + + #define PERCPU_PAGE_SHIFT 16 /* log2() of max. size of per-CPU area */ ++#ifdef CONFIG_VTI ++#define RR7_SWITCH_SHIFT 12 /* 4k enough */ ++#endif // CONFIG_VTI ++ + #define PERCPU_PAGE_SIZE (__IA64_UL_CONST(1) << PERCPU_PAGE_SHIFT) + + #define RGN_MAP_LIMIT ((1UL << (4*PAGE_SHIFT - 12)) - PAGE_SIZE) /* per region addr limit */ +@@ -95,9 +99,15 @@ #endif #ifndef CONFIG_DISCONTIGMEM @@ -16,7 +27,7 @@ #else extern struct page *vmem_map; extern unsigned long max_low_pfn; -@@ -109,6 +115,11 @@ +@@ -109,6 +119,11 @@ #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) @@ -28,7 +39,7 @@ typedef union ia64_va { struct { unsigned long off : 61; /* intra-region offset */ -@@ -124,8 +135,23 @@ +@@ -124,8 +139,23 @@ * expressed in this way to ensure they result in a single "dep" * instruction. */ @@ -52,7 +63,7 @@ #define REGION_NUMBER(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg;}) #define REGION_OFFSET(x) ({ia64_va _v; _v.l = (long) (x); _v.f.off;}) -@@ -197,7 +223,11 @@ +@@ -197,7 +227,11 @@ # define __pgprot(x) (x) #endif /* !STRICT_MM_TYPECHECKS */ diff --git a/xen/arch/ia64/patch/linux-2.6.11/pal.S b/xen/arch/ia64/patch/linux-2.6.11/pal.S index 0affc3a288..6e1fa22ca6 100644 --- a/xen/arch/ia64/patch/linux-2.6.11/pal.S +++ b/xen/arch/ia64/patch/linux-2.6.11/pal.S @@ -1,11 +1,6 @@ - pal.S | 8 ++++++++ - 1 files changed, 8 insertions(+) - -Index: linux-2.6.11-xendiffs/arch/ia64/kernel/pal.S -=================================================================== ---- linux-2.6.11-xendiffs.orig/arch/ia64/kernel/pal.S 2005-03-02 01:38:33.000000000 -0600 -+++ linux-2.6.11-xendiffs/arch/ia64/kernel/pal.S 2005-04-06 22:43:53.817885390 -0500 -@@ -166,7 +166,11 @@ GLOBAL_ENTRY(ia64_pal_call_phys_static) +--- /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/arch/ia64/kernel/pal.S 2005-03-01 23:38:33.000000000 -0800 ++++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/arch/ia64/pal.S 2005-05-18 12:40:19.000000000 -0700 +@@ -166,7 +166,11 @@ adds r8 = 1f-1b,r8 // calculate return address for call ;; mov loc4=ar.rsc // save RSE configuration @@ -17,7 +12,7 @@ Index: linux-2.6.11-xendiffs/arch/ia64/kernel/pal.S tpa r8=r8 // convert rp to physical ;; mov b7 = loc2 // install target to branch reg -@@ -225,7 +229,11 @@ GLOBAL_ENTRY(ia64_pal_call_phys_stacked) +@@ -225,7 +229,11 @@ mov loc3 = psr // save psr ;; mov loc4=ar.rsc // save RSE configuration diff --git a/xen/arch/ia64/patch/linux-2.6.11/processor.h b/xen/arch/ia64/patch/linux-2.6.11/processor.h index 521d0dd013..308b298cad 100644 --- a/xen/arch/ia64/patch/linux-2.6.11/processor.h +++ b/xen/arch/ia64/patch/linux-2.6.11/processor.h @@ -1,11 +1,173 @@ - processor.h | 4 ++++ - 1 files changed, 4 insertions(+) - -Index: linux-2.6.11/include/asm-ia64/processor.h -=================================================================== ---- linux-2.6.11.orig/include/asm-ia64/processor.h 2005-03-02 01:37:58.000000000 -0600 -+++ linux-2.6.11/include/asm-ia64/processor.h 2005-03-19 14:26:01.062135543 -0600 -@@ -408,12 +408,16 @@ extern void ia64_setreg_unknown_kr (void +--- /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/include/asm-ia64/processor.h 2005-03-01 23:37:58.000000000 -0800 ++++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/include/asm-ia64/processor.h 2005-05-18 12:40:50.000000000 -0700 +@@ -131,9 +131,166 @@ + __u64 ri : 2; + __u64 ed : 1; + __u64 bn : 1; ++#ifdef CONFIG_VTI ++ __u64 ia : 1; ++ __u64 vm : 1; ++ __u64 reserved5 : 17; ++#else // CONFIG_VTI + __u64 reserved4 : 19; ++#endif // CONFIG_VTI + }; + ++#ifdef CONFIG_VTI ++/* vmx like above but expressed as bitfields for more efficient access: */ ++typedef union{ ++ __u64 val; ++ struct{ ++ __u64 reserved0 : 1; ++ __u64 be : 1; ++ __u64 up : 1; ++ __u64 ac : 1; ++ __u64 mfl : 1; ++ __u64 mfh : 1; ++ __u64 reserved1 : 7; ++ __u64 ic : 1; ++ __u64 i : 1; ++ __u64 pk : 1; ++ __u64 reserved2 : 1; ++ __u64 dt : 1; ++ __u64 dfl : 1; ++ __u64 dfh : 1; ++ __u64 sp : 1; ++ __u64 pp : 1; ++ __u64 di : 1; ++ __u64 si : 1; ++ __u64 db : 1; ++ __u64 lp : 1; ++ __u64 tb : 1; ++ __u64 rt : 1; ++ __u64 reserved3 : 4; ++ __u64 cpl : 2; ++ __u64 is : 1; ++ __u64 mc : 1; ++ __u64 it : 1; ++ __u64 id : 1; ++ __u64 da : 1; ++ __u64 dd : 1; ++ __u64 ss : 1; ++ __u64 ri : 2; ++ __u64 ed : 1; ++ __u64 bn : 1; ++ __u64 reserved4 : 19; ++ }; ++} IA64_PSR; ++ ++typedef union { ++ __u64 val; ++ struct { ++ __u64 code : 16; ++ __u64 vector : 8; ++ __u64 reserved1 : 8; ++ __u64 x : 1; ++ __u64 w : 1; ++ __u64 r : 1; ++ __u64 na : 1; ++ __u64 sp : 1; ++ __u64 rs : 1; ++ __u64 ir : 1; ++ __u64 ni : 1; ++ __u64 so : 1; ++ __u64 ei : 2; ++ __u64 ed : 1; ++ __u64 reserved2 : 20; ++ }; ++} ISR; ++ ++ ++typedef union { ++ __u64 val; ++ struct { ++ __u64 ve : 1; ++ __u64 reserved0 : 1; ++ __u64 size : 6; ++ __u64 vf : 1; ++ __u64 reserved1 : 6; ++ __u64 base : 49; ++ }; ++} PTA; ++ ++typedef union { ++ __u64 val; ++ struct { ++ __u64 rv : 16; ++ __u64 eid : 8; ++ __u64 id : 8; ++ __u64 ig : 32; ++ }; ++} LID; ++ ++typedef union{ ++ __u64 val; ++ struct { ++ __u64 rv : 3; ++ __u64 ir : 1; ++ __u64 eid : 8; ++ __u64 id : 8; ++ __u64 ib_base : 44; ++ }; ++} ipi_a_t; ++ ++typedef union{ ++ __u64 val; ++ struct { ++ __u64 vector : 8; ++ __u64 dm : 3; ++ __u64 ig : 53; ++ }; ++} ipi_d_t; ++ ++ ++#define IA64_ISR_CODE_MASK0 0xf ++#define IA64_UNIMPL_DADDR_FAULT 0x30 ++#define IA64_UNIMPL_IADDR_TRAP 0x10 ++#define IA64_RESERVED_REG_FAULT 0x30 ++#define IA64_REG_NAT_CONSUMPTION_FAULT 0x10 ++#define IA64_NAT_CONSUMPTION_FAULT 0x20 ++#define IA64_PRIV_OP_FAULT 0x10 ++ ++/* indirect register type */ ++enum { ++ IA64_CPUID, /* cpuid */ ++ IA64_DBR, /* dbr */ ++ IA64_IBR, /* ibr */ ++ IA64_PKR, /* pkr */ ++ IA64_PMC, /* pmc */ ++ IA64_PMD, /* pmd */ ++ IA64_RR /* rr */ ++}; ++ ++/* instruction type */ ++enum { ++ IA64_INST_TPA=1, ++ IA64_INST_TAK ++}; ++ ++/* Generate Mask ++ * Parameter: ++ * bit -- starting bit ++ * len -- how many bits ++ */ ++#define MASK(bit,len) \ ++({ \ ++ __u64 ret; \ ++ \ ++ __asm __volatile("dep %0=-1, r0, %1, %2" \ ++ : "=r" (ret): \ ++ "M" (bit), \ ++ "M" (len) ); \ ++ ret; \ ++}) ++ ++#endif // CONFIG_VTI ++ + /* + * CPU type, hardware bug flags, and per-CPU state. Frequently used + * state comes earlier: +@@ -408,12 +565,16 @@ */ /* Return TRUE if task T owns the fph partition of the CPU we're running on. */ diff --git a/xen/arch/ia64/patch/linux-2.6.11/setup.c b/xen/arch/ia64/patch/linux-2.6.11/setup.c index 5d7afce326..07c9d382a4 100644 --- a/xen/arch/ia64/patch/linux-2.6.11/setup.c +++ b/xen/arch/ia64/patch/linux-2.6.11/setup.c @@ -1,6 +1,16 @@ ---- ../../linux-2.6.11/arch/ia64/kernel/setup.c 2005-03-02 00:37:49.000000000 -0700 -+++ arch/ia64/setup.c 2005-05-02 10:04:03.000000000 -0600 -@@ -127,7 +127,16 @@ +--- /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/arch/ia64/kernel/setup.c 2005-03-01 23:37:49.000000000 -0800 ++++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/arch/ia64/setup.c 2005-05-18 12:40:50.000000000 -0700 +@@ -51,6 +51,9 @@ + #include <asm/smp.h> + #include <asm/system.h> + #include <asm/unistd.h> ++#ifdef CONFIG_VTI ++#include <asm/vmx.h> ++#endif // CONFIG_VTI + + #if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE) + # error "struct cpuinfo_ia64 too big!" +@@ -127,7 +130,16 @@ range_end = min(end, rsvd_region[i].start); if (range_start < range_end) @@ -17,7 +27,7 @@ /* nothing more available in this segment */ if (range_end == end) return 0; -@@ -185,7 +194,12 @@ +@@ -185,7 +197,12 @@ n++; rsvd_region[n].start = (unsigned long) ia64_imva((void *)KERNEL_START); @@ -30,7 +40,7 @@ n++; #ifdef CONFIG_BLK_DEV_INITRD -@@ -299,7 +313,11 @@ +@@ -299,7 +316,11 @@ } void __init @@ -42,7 +52,7 @@ { unw_init(); -@@ -308,8 +326,14 @@ +@@ -308,8 +329,14 @@ *cmdline_p = __va(ia64_boot_param->command_line); strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE); @@ -57,7 +67,7 @@ #ifdef CONFIG_IA64_GENERIC { -@@ -351,8 +375,17 @@ +@@ -351,8 +378,18 @@ # endif #endif /* CONFIG_APCI_BOOT */ @@ -71,11 +81,23 @@ +late_setup_arch (char **cmdline_p) +{ +#undef CONFIG_ACPI_BOOT ++ acpi_table_init(); +#endif /* process SAL system table: */ ia64_sal_init(efi.sal_systab); -@@ -492,12 +525,14 @@ +@@ -360,6 +397,10 @@ + cpu_physical_id(0) = hard_smp_processor_id(); + #endif + ++#ifdef CONFIG_VTI ++ identify_vmx_feature(); ++#endif // CONFIG_VTI ++ + cpu_init(); /* initialize the bootstrap CPU */ + + #ifdef CONFIG_ACPI_BOOT +@@ -492,12 +533,14 @@ { } @@ -90,7 +112,20 @@ void identify_cpu (struct cpuinfo_ia64 *c) -@@ -659,7 +694,11 @@ +@@ -551,6 +594,12 @@ + } + c->unimpl_va_mask = ~((7L<<61) | ((1L << (impl_va_msb + 1)) - 1)); + c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1)); ++ ++#ifdef CONFIG_VTI ++ /* If vmx feature is on, do necessary initialization for vmx */ ++ if (vmx_enabled) ++ vmx_init_env(); ++#endif + } + + void +@@ -659,7 +708,11 @@ | IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC)); atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; diff --git a/xen/arch/ia64/patch/linux-2.6.11/system.h b/xen/arch/ia64/patch/linux-2.6.11/system.h index e31d332c2b..05af88b8bf 100644 --- a/xen/arch/ia64/patch/linux-2.6.11/system.h +++ b/xen/arch/ia64/patch/linux-2.6.11/system.h @@ -1,10 +1,17 @@ ---- ../../linux-2.6.11/include/asm-ia64/system.h 2005-03-02 00:38:07.000000000 -0700 -+++ include/asm-ia64/system.h 2005-05-02 10:18:30.000000000 -0600 -@@ -24,8 +24,15 @@ +--- /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/include/asm-ia64/system.h 2005-03-01 23:38:07.000000000 -0800 ++++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/include/asm-ia64/system.h 2005-05-18 12:40:50.000000000 -0700 +@@ -24,8 +24,22 @@ * 0xa000000000000000+2*PERCPU_PAGE_SIZE * - 0xa000000000000000+3*PERCPU_PAGE_SIZE remain unmapped (guard page) */ +#ifdef XEN ++#ifdef CONFIG_VTI ++#define XEN_VIRT_SPACE_LOW 0xe800000000000000 ++#define XEN_VIRT_SPACE_HIGH 0xf800000000000000 ++/* This is address to mapping rr7 switch stub, in region 5 */ ++#define XEN_RR7_SWITCH_STUB 0xb700000000000000 ++#endif // CONFIG_VTI ++ +#define KERNEL_START 0xf000000004000000 +#define PERCPU_ADDR 0xf100000000000000-PERCPU_PAGE_SIZE +#define SHAREDINFO_ADDR 0xf100000000000000 @@ -16,7 +23,17 @@ #ifndef __ASSEMBLY__ -@@ -218,9 +225,13 @@ +@@ -205,6 +219,9 @@ + * ia64_ret_from_syscall_clear_r8. + */ + extern struct task_struct *ia64_switch_to (void *next_task); ++#ifdef CONFIG_VTI ++extern struct task_struct *vmx_ia64_switch_to (void *next_task); ++#endif // CONFIG_VTI + + struct task_struct; + +@@ -218,10 +235,32 @@ # define PERFMON_IS_SYSWIDE() (0) #endif @@ -28,5 +45,32 @@ || IS_IA32_PROCESS(ia64_task_regs(t)) || PERFMON_IS_SYSWIDE()) +#endif ++#ifdef CONFIG_VTI ++#define __switch_to(prev,next,last) do { \ ++ if (VMX_DOMAIN(prev)) \ ++ vmx_save_state(prev); \ ++ else { \ ++ if (IA64_HAS_EXTRA_STATE(prev)) \ ++ ia64_save_extra(prev); \ ++ } \ ++ if (VMX_DOMAIN(next)) \ ++ vmx_load_state(next); \ ++ else { \ ++ if (IA64_HAS_EXTRA_STATE(next)) \ ++ ia64_save_extra(next); \ ++ } \ ++ ia64_psr(ia64_task_regs(next))->dfh = !ia64_is_local_fpu_owner(next); \ ++ (last) = vmx_ia64_switch_to((next)); \ ++} while (0) ++#else // CONFIG_VTI #define __switch_to(prev,next,last) do { \ if (IA64_HAS_EXTRA_STATE(prev)) \ + ia64_save_extra(prev); \ +@@ -230,6 +269,7 @@ + ia64_psr(ia64_task_regs(next))->dfh = !ia64_is_local_fpu_owner(next); \ + (last) = ia64_switch_to((next)); \ + } while (0) ++#endif // CONFIG_VTI + + #ifdef CONFIG_SMP + /* diff --git a/xen/arch/ia64/patch/linux-2.6.11/unaligned.c b/xen/arch/ia64/patch/linux-2.6.11/unaligned.c index 0ed114e064..942cce7ec1 100644 --- a/xen/arch/ia64/patch/linux-2.6.11/unaligned.c +++ b/xen/arch/ia64/patch/linux-2.6.11/unaligned.c @@ -1,7 +1,144 @@ ---- ../../linux-2.6.11/arch/ia64/kernel/unaligned.c 2005-03-02 00:38:25.000000000 -0700 -+++ arch/ia64/unaligned.c 2005-05-10 15:46:09.000000000 -0600 -@@ -437,7 +437,11 @@ +--- /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/../../linux-2.6.11/arch/ia64/kernel/unaligned.c 2005-03-01 23:38:25.000000000 -0800 ++++ /home/adsharma/disk2/xen-ia64/xeno-unstable-rebase.bk/xen/arch/ia64/unaligned.c 2005-05-18 12:40:50.000000000 -0700 +@@ -201,7 +201,11 @@ + + RPT(r1), RPT(r2), RPT(r3), + ++#ifdef CONFIG_VTI ++ RPT(r4), RPT(r5), RPT(r6), RPT(r7), ++#else //CONFIG_VTI + RSW(r4), RSW(r5), RSW(r6), RSW(r7), ++#endif //CONFIG_VTI + + RPT(r8), RPT(r9), RPT(r10), RPT(r11), + RPT(r12), RPT(r13), RPT(r14), RPT(r15), +@@ -291,6 +295,121 @@ + return reg; + } + ++#ifdef CONFIG_VTI ++static void ++set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, unsigned long nat) ++{ ++ struct switch_stack *sw = (struct switch_stack *) regs - 1; ++ unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end; ++ unsigned long *kbs = (void *) current + IA64_RBS_OFFSET; ++ unsigned long rnats, nat_mask; ++ unsigned long old_rsc,new_rsc; ++ unsigned long on_kbs,rnat; ++ long sof = (regs->cr_ifs) & 0x7f; ++ long sor = 8 * ((regs->cr_ifs >> 14) & 0xf); ++ long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; ++ long ridx = r1 - 32; ++ ++ if (ridx >= sof) { ++ /* this should never happen, as the "rsvd register fault" has higher priority */ ++ DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof); ++ return; ++ } ++ ++ if (ridx < sor) ++ ridx = rotate_reg(sor, rrb_gr, ridx); ++ ++ old_rsc=ia64_get_rsc(); ++ new_rsc=old_rsc&(~0x3); ++ ia64_set_rsc(new_rsc); ++ ++ bspstore = ia64_get_bspstore(); ++ bsp =kbs + (regs->loadrs >> 19);//16+3 ++ ++ addr = ia64_rse_skip_regs(bsp, -sof + ridx); ++ nat_mask = 1UL << ia64_rse_slot_num(addr); ++ rnat_addr = ia64_rse_rnat_addr(addr); ++ ++ if(addr >= bspstore){ ++ ++ ia64_flushrs (); ++ ia64_mf (); ++ *addr = val; ++ bspstore = ia64_get_bspstore(); ++ rnat = ia64_get_rnat (); ++ if(bspstore < rnat_addr){ ++ rnat=rnat&(~nat_mask); ++ }else{ ++ *rnat_addr = (*rnat_addr)&(~nat_mask); ++ } ++ ia64_mf(); ++ ia64_loadrs(); ++ ia64_set_rnat(rnat); ++ }else{ ++ ++ rnat = ia64_get_rnat (); ++ *addr = val; ++ if(bspstore < rnat_addr){ ++ rnat=rnat&(~nat_mask); ++ }else{ ++ *rnat_addr = (*rnat_addr)&(~nat_mask); ++ } ++ ia64_set_bspstore (bspstore); ++ ia64_set_rnat(rnat); ++ } ++ ia64_set_rsc(old_rsc); ++} ++ ++ ++static void ++get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, unsigned long *nat) ++{ ++ struct switch_stack *sw = (struct switch_stack *) regs - 1; ++ unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore; ++ unsigned long *kbs = (void *) current + IA64_RBS_OFFSET; ++ unsigned long rnats, nat_mask; ++ unsigned long on_kbs; ++ unsigned long old_rsc, new_rsc; ++ long sof = (regs->cr_ifs) & 0x7f; ++ long sor = 8 * ((regs->cr_ifs >> 14) & 0xf); ++ long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; ++ long ridx = r1 - 32; ++ ++ if (ridx >= sof) { ++ /* read of out-of-frame register returns an undefined value; 0 in our case. */ ++ DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof); ++ panic("wrong stack register number"); ++ } ++ ++ if (ridx < sor) ++ ridx = rotate_reg(sor, rrb_gr, ridx); ++ ++ old_rsc=ia64_get_rsc(); ++ new_rsc=old_rsc&(~(0x3)); ++ ia64_set_rsc(new_rsc); ++ ++ bspstore = ia64_get_bspstore(); ++ bsp =kbs + (regs->loadrs >> 19); //16+3; ++ ++ addr = ia64_rse_skip_regs(bsp, -sof + ridx); ++ nat_mask = 1UL << ia64_rse_slot_num(addr); ++ rnat_addr = ia64_rse_rnat_addr(addr); ++ ++ if(addr >= bspstore){ ++ ++ ia64_flushrs (); ++ ia64_mf (); ++ bspstore = ia64_get_bspstore(); ++ } ++ *val=*addr; ++ if(bspstore < rnat_addr){ ++ *nat=!!(ia64_get_rnat()&nat_mask); ++ }else{ ++ *nat = !!((*rnat_addr)&nat_mask); ++ } ++ ia64_set_rsc(old_rsc); ++} ++#else // CONFIG_VTI + static void + set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat) + { +@@ -435,9 +554,14 @@ + *nat = 0; + return; } ++#endif // CONFIG_VTI +#ifdef XEN @@ -12,7 +149,19 @@ setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs) { struct switch_stack *sw = (struct switch_stack *) regs - 1; -@@ -522,7 +526,11 @@ +@@ -466,7 +590,11 @@ + unat = &sw->ar_unat; + } else { + addr = (unsigned long)regs; ++#ifdef CONFIG_VTI ++ unat = ®s->eml_unat; ++#else //CONFIG_VTI + unat = &sw->caller_unat; ++#endif //CONFIG_VTI + } + DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n", + addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum)); +@@ -522,7 +650,11 @@ */ if (regnum >= IA64_FIRST_ROTATING_FR) { ia64_sync_fph(current); @@ -24,7 +173,7 @@ } else { /* * pt_regs or switch_stack ? -@@ -581,7 +589,11 @@ +@@ -581,7 +713,11 @@ */ if (regnum >= IA64_FIRST_ROTATING_FR) { ia64_flush_fph(current); @@ -36,7 +185,7 @@ } else { /* * f0 = 0.0, f1= 1.0. Those registers are constant and are thus -@@ -611,7 +623,11 @@ +@@ -611,7 +747,11 @@ } @@ -48,7 +197,19 @@ getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs) { struct switch_stack *sw = (struct switch_stack *) regs - 1; -@@ -1294,6 +1310,9 @@ +@@ -640,7 +780,11 @@ + unat = &sw->ar_unat; + } else { + addr = (unsigned long)regs; ++#ifdef CONFIG_VTI ++ unat = ®s->eml_unat;; ++#else //CONFIG_VTI + unat = &sw->caller_unat; ++#endif //CONFIG_VTI + } + + DPRINT("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum)); +@@ -1294,6 +1438,9 @@ void ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) { @@ -58,7 +219,7 @@ struct ia64_psr *ipsr = ia64_psr(regs); mm_segment_t old_fs = get_fs(); unsigned long bundle[2]; -@@ -1502,4 +1521,5 @@ +@@ -1502,4 +1649,5 @@ si.si_imm = 0; force_sig_info(SIGBUS, &si, current); goto done; diff --git a/xen/arch/ia64/process.c b/xen/arch/ia64/process.c index dd9e58071f..3e276ae696 100644 --- a/xen/arch/ia64/process.c +++ b/xen/arch/ia64/process.c @@ -64,11 +64,16 @@ long do_iopl(domid_t domain, unsigned int new_io_pl) void schedule_tail(struct exec_domain *next) { unsigned long rr7; - printk("current=%lx,shared_info=%lx\n",current,current->vcpu_info); - printk("next=%lx,shared_info=%lx\n",next,next->vcpu_info); + //printk("current=%lx,shared_info=%lx\n",current,current->vcpu_info); + //printk("next=%lx,shared_info=%lx\n",next,next->vcpu_info); +#ifdef CONFIG_VTI + /* rr7 will be postponed to last point when resuming back to guest */ + vmx_load_all_rr(current); +#else // CONFIG_VTI if (rr7 = load_region_regs(current)) { printk("schedule_tail: change to rr7 not yet implemented\n"); } +#endif // CONFIG_VTI } extern TR_ENTRY *match_tr(struct exec_domain *ed, unsigned long ifa); @@ -346,8 +351,8 @@ void ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_reg // FIXME should validate mpaddr here if (d == dom0) { if (address < dom0_start || address >= dom0_start + dom0_size) { - printk("ia64_do_page_fault: out-of-bounds dom0 mpaddr %p, iip=%p! continuing...\n",address,iip); - printk("ia64_do_page_fault: out-of-bounds dom0 mpaddr %p, old iip=%p!\n",address,current->vcpu_info->arch.iip); + //printk("ia64_do_page_fault: out-of-bounds dom0 mpaddr %p, iip=%p! continuing...\n",address,iip); + //printk("ia64_do_page_fault: out-of-bounds dom0 mpaddr %p, old iip=%p!\n",address,current->vcpu_info->arch.iip); tdpfoo(); } } diff --git a/xen/arch/ia64/tools/README.RunVT b/xen/arch/ia64/tools/README.RunVT new file mode 100644 index 0000000000..f93e85b3f3 --- /dev/null +++ b/xen/arch/ia64/tools/README.RunVT @@ -0,0 +1,59 @@ +INSTRUCTIONS FOR Running IPF/Xen with VT-enabled Tiger4 pltform + +Note: the Domain0 must be an unmodified Linux + +1) Perform operations in README.xenia64 to get a flattened Xen IPF source tree + +2) Build an unmodified Linux 2.6 kernel + a) tar xvfz linux-2.6.11.tar.gz + b) cp arch/ia64/configs/tiger_defconfig .config + c) Build linux. + 1) yes "" | make oldconfig + 2) make + +3) Build IPF VT-enabled Xen image + edit xen/arch/ia64/Rules.mk for + CONFIG_VTI ?= y to enable VT-enable build +4) Setup ELILO.CONF + image=xen + label=xen + initrd=vmlinux2.6.11 // unmodified Linux kernel image + read-only + append="nomca root=/dev/sda3" + +STATUS as 4/28/05 - Features implemented for Domain0 + +0. Runs unmodified Linux kernel as Domain0 + Validated with Linux 2.6.11 to run Xwindow and NIC on UP logical processor + +1. Take advantage of VT-enabled processor + a. Processor intercepts guest privileged instruction and deliver Opcode/Cause to Hypervisor + b. One VPD (Virtual Processor Descriptor) per Virtual Processor + c. Domains are in a different virtual address space from hypervisor. Domains have one less VA bit than hypervisor, where hypervisor runs in 0xF00000... address protected by the processor from Domains. + +2. vTLB and guest_VHPT + a. vTLB extending machine TLB entries through hypervisor internal data structure + vTLB caches Domains installed TR's and TC's, and then installs TC's for Domains instead. + vTLB implements collision chains + b. Processor walks hypervisor internal VHPT, not the domain VHPT. On TLB miss, vTLB is consulted first to put hypervisor cached entry into VHPT without inject TLB miss to domain. + +3. Region ID fix-partitioning + a. currently hard partition 24bits of RIDs into 16 partitions by using top 4bit. + b. Hypervisor uses the very last partition RIDs, i.e., 0xFxxxxx RIDs + c. Effectively supports Domain0 and 14 other DomainN + +4. HyperVisor is mapped with 2 sets of RIDs during runtime, its own RIDs and the active Domain RIDs + a. Domain RIDs are used by processor to access guest_VHPT during Domain runtime + b. Hypervisor RIDs are used when Hypervisor is running + c. Implies there are some Region registers transition on entering/exiting hypervisor + +5. Linux styled pt_regs with minor modification for VT and instruction emulation + a. Part of Domain registers are saved/restored from VPD + b. Extended pt_regs to include r4~r7 and Domain's iipa & isr for possible instruction emulation, so no need to save a complete set of switch_stack on IVT entry + +6. Linux styled per virtual processor memory/RSE stacks, which is the same as non-VT domain0 + +7. Handles splitted I/DCache design + Newer IPF processors has split I/Dcaches. The design takes this into consideration when Xen recopy Domain0 to target address for execution + + diff --git a/xen/arch/ia64/vcpu.c b/xen/arch/ia64/vcpu.c index 29b20a4df0..1831dc21f4 100644 --- a/xen/arch/ia64/vcpu.c +++ b/xen/arch/ia64/vcpu.c @@ -14,6 +14,9 @@ #include <asm/tlb.h> #include <asm/processor.h> #include <asm/delay.h> +#ifdef CONFIG_VTI +#include <asm/vmx_vcpu.h> +#endif // CONFIG_VTI typedef union { struct ia64_psr ia64_psr; @@ -523,12 +526,19 @@ void vcpu_pend_interrupt(VCPU *vcpu, UINT64 vector) printf("vcpu_pend_interrupt: bad vector\n"); return; } +#ifdef CONFIG_VTI + if ( VMX_DOMAIN(vcpu) ) { + set_bit(vector,VPD_CR(vcpu,irr)); + } else +#endif // CONFIG_VTI + { if (!test_bit(vector,PSCB(vcpu,delivery_mask))) return; if (test_bit(vector,PSCBX(vcpu,irr))) { //printf("vcpu_pend_interrupt: overrun\n"); } set_bit(vector,PSCBX(vcpu,irr)); PSCB(vcpu,pending_interruption) = 1; + } } void early_tick(VCPU *vcpu) @@ -619,7 +629,8 @@ extern unsigned long privop_trace; //privop_trace=1; //TODO: Implement this printf("vcpu_get_lid: WARNING: Getting cr.lid always returns zero\n"); - *pval = 0; + //*pval = 0; + *pval = ia64_getreg(_IA64_REG_CR_LID); return IA64_NO_FAULT; } diff --git a/xen/arch/ia64/vlsapic.c b/xen/arch/ia64/vlsapic.c new file mode 100644 index 0000000000..01927d5294 --- /dev/null +++ b/xen/arch/ia64/vlsapic.c @@ -0,0 +1,504 @@ + +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vlsapic.c: virtual lsapic model including ITC timer. + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) + */ + +#include <linux/sched.h> +#include <public/arch-ia64.h> +#include <asm/ia64_int.h> +#include <asm/vcpu.h> +#include <asm/regionreg.h> +#include <asm/tlb.h> +#include <asm/processor.h> +#include <asm/delay.h> +#include <asm/vmx_vcpu.h> +#include <asm/vmx_vcpu.h> +#include <asm/regs.h> +#include <asm/gcc_intrin.h> +#include <asm/vmx_mm_def.h> +#include <asm/vmx.h> +#include <asm/hw_irq.h> +#include <asm/vmx_pal_vsa.h> +#include <asm/kregs.h> + +//u64 fire_itc; +//u64 fire_itc2; +//u64 fire_itm; +//u64 fire_itm2; +/* + * Update the checked last_itc. + */ +static void update_last_itc(vtime_t *vtm, uint64_t cur_itc) +{ + vtm->last_itc = cur_itc; +} + +/* + * ITC value saw in guest (host+offset+drift). + */ +static uint64_t now_itc(vtime_t *vtm) +{ + uint64_t guest_itc=vtm->vtm_offset+ia64_get_itc(); + + if ( vtm->vtm_local_drift ) { +// guest_itc -= vtm->vtm_local_drift; + } + if ( (long)(guest_itc - vtm->last_itc) > 0 ) { + return guest_itc; + + } + else { + /* guest ITC backwarded due after LP switch */ + return vtm->last_itc; + } +} + +/* + * Interval time components reset. + */ +static void vtm_reset(VCPU *vcpu) +{ + uint64_t cur_itc; + vtime_t *vtm; + + vtm=&(vcpu->arch.arch_vmx.vtm); + vtm->vtm_offset = 0; + vtm->vtm_local_drift = 0; + VPD_CR(vcpu, itm) = 0; + VPD_CR(vcpu, itv) = 0x10000; + cur_itc = ia64_get_itc(); + vtm->last_itc = vtm->vtm_offset + cur_itc; +} + +/* callback function when vtm_timer expires */ +static void vtm_timer_fn(unsigned long data) +{ + vtime_t *vtm; + VCPU *vcpu = (VCPU*)data; + u64 cur_itc,vitm; + + UINT64 vec; + + vec = VPD_CR(vcpu, itv) & 0xff; + vmx_vcpu_pend_interrupt(vcpu, vec); + + vtm=&(vcpu->arch.arch_vmx.vtm); + cur_itc = now_itc(vtm); + vitm =VPD_CR(vcpu, itm); + //fire_itc2 = cur_itc; + //fire_itm2 = vitm; + update_last_itc(vtm,cur_itc); // pseudo read to update vITC + vtm->timer_hooked = 0; +} + +void vtm_init(VCPU *vcpu) +{ + vtime_t *vtm; + uint64_t itc_freq; + + vtm=&(vcpu->arch.arch_vmx.vtm); + + itc_freq = local_cpu_data->itc_freq; + vtm->cfg_max_jump=itc_freq*MAX_JUMP_STEP/1000; + vtm->cfg_min_grun=itc_freq*MIN_GUEST_RUNNING_TIME/1000; + /* set up the actimer */ + init_ac_timer(&(vtm->vtm_timer)); + vtm->timer_hooked = 0; + vtm->vtm_timer.cpu = 0; /* Init value for SMP case */ + vtm->vtm_timer.data = (unsigned long)vcpu; + vtm->vtm_timer.function = vtm_timer_fn; + vtm_reset(vcpu); +} + +/* + * Action when guest read ITC. + */ +uint64_t vtm_get_itc(VCPU *vcpu) +{ + uint64_t guest_itc, spsr; + vtime_t *vtm; + + vtm=&(vcpu->arch.arch_vmx.vtm); + // FIXME: should use local_irq_disable & local_irq_enable ?? + local_irq_save(spsr); + guest_itc = now_itc(vtm); + update_last_itc(vtm, guest_itc); + + local_irq_restore(spsr); + return guest_itc; +} + +void vtm_set_itc(VCPU *vcpu, uint64_t new_itc) +{ + uint64_t spsr; + vtime_t *vtm; + + vtm=&(vcpu->arch.arch_vmx.vtm); + local_irq_save(spsr); + vtm->vtm_offset = new_itc - ia64_get_itc(); + vtm->last_itc = new_itc; + vtm_interruption_update(vcpu, vtm); + local_irq_restore(spsr); +} + +void vtm_set_itv(VCPU *vcpu) +{ + uint64_t spsr,itv; + vtime_t *vtm; + + vtm=&(vcpu->arch.arch_vmx.vtm); + local_irq_save(spsr); + itv = VPD_CR(vcpu, itv); + if ( ITV_IRQ_MASK(itv) && vtm->timer_hooked ) { + rem_ac_timer(&(vtm->vtm_timer)); + vtm->timer_hooked = 0; + } + vtm_interruption_update(vcpu, vtm); + local_irq_restore(spsr); +} + + +/* + * Update interrupt or hook the vtm ac_timer for fire + * At this point vtm_timer should be removed if itv is masked. + */ +/* Interrupt must be disabled at this point */ + +extern u64 tick_to_ns(u64 tick); +#define TIMER_SLOP (50*1000) /* ns */ /* copy from ac_timer.c */ +void vtm_interruption_update(VCPU *vcpu, vtime_t* vtm) +{ + uint64_t cur_itc,vitm,vitv; + uint64_t expires; + long diff_now, diff_last; + uint64_t spsr; + + vitv = VPD_CR(vcpu, itv); + if ( ITV_IRQ_MASK(vitv) ) { + return; + } + + vitm =VPD_CR(vcpu, itm); + local_irq_save(spsr); + cur_itc =now_itc(vtm); + diff_last = vtm->last_itc - vitm; + diff_now = cur_itc - vitm; + update_last_itc (vtm,cur_itc); + + if ( diff_last >= 0 ) { + // interrupt already fired. + if ( vtm->timer_hooked ) { + rem_ac_timer(&(vtm->vtm_timer)); + vtm->timer_hooked = 0; + } + } + else if ( diff_now >= 0 ) { + // ITV is fired. + vmx_vcpu_pend_interrupt(vcpu, vitv&0xff); + } + /* Both last_itc & cur_itc < itm, wait for fire condition */ + else if ( vtm->timer_hooked ) { + expires = NOW() + tick_to_ns(0-diff_now) + TIMER_SLOP; + mod_ac_timer (&(vtm->vtm_timer), expires); + printf("mod vtm_timer\n"); +//fire_itc = cur_itc; +//fire_itm = vitm; + } + else { + vtm->vtm_timer.expires = NOW() + tick_to_ns(0-diff_now) + TIMER_SLOP; + vtm->vtm_timer.cpu = vcpu->processor; + add_ac_timer(&(vtm->vtm_timer)); + vtm->timer_hooked = 1; +//fire_itc = cur_itc; +//fire_itm = vitm; + } + local_irq_restore(spsr); +} + +/* + * Action for vtm when the domain is scheduled out. + * Remove the ac_timer for vtm. + */ +void vtm_domain_out(VCPU *vcpu) +{ + vtime_t *vtm; + uint64_t spsr; + + vtm=&(vcpu->arch.arch_vmx.vtm); + local_irq_save(spsr); + if ( vtm->timer_hooked ) { + rem_ac_timer(&(vtm->vtm_timer)); + vtm->timer_hooked = 0; + } + local_irq_restore(spsr); +} + +/* + * Action for vtm when the domain is scheduled in. + * Fire vtm IRQ or add the ac_timer for vtm. + */ +void vtm_domain_in(VCPU *vcpu) +{ + vtime_t *vtm; + + vtm=&(vcpu->arch.arch_vmx.vtm); + vtm_interruption_update(vcpu, vtm); +} + + + +/* + * Next for vLSapic + */ + +#define NMI_VECTOR 2 +#define ExtINT_VECTOR 0 + +#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.arch_vmx.in_service[i]) +/* + * LID-CR64: Keep in vpd. + * IVR-CR65: (RO) see guest_read_ivr(). + * TPR-CR66: Keep in vpd, acceleration enabled. + * EOI-CR67: see guest_write_eoi(). + * IRR0-3 - CR68-71: (RO) Keep in vpd irq_pending[] + * can move to vpd for optimization. + * ITV: in time virtualization. + * PMV: Keep in vpd initialized as 0x10000. + * CMCV: Keep in vpd initialized as 0x10000. + * LRR0-1: Keep in vpd, initialized as 0x10000. + * + */ + +void vlsapic_reset(VCPU *vcpu) +{ + int i; + VPD_CR(vcpu, lid) = 0; + VPD_CR(vcpu, ivr) = 0; + VPD_CR(vcpu,tpr) = 0x10000; + VPD_CR(vcpu, eoi) = 0; + VPD_CR(vcpu, irr[0]) = 0; + VPD_CR(vcpu, irr[1]) = 0; + VPD_CR(vcpu, irr[2]) = 0; + VPD_CR(vcpu, irr[3]) = 0; + VPD_CR(vcpu, pmv) = 0x10000; + VPD_CR(vcpu, cmcv) = 0x10000; + VPD_CR(vcpu, lrr0) = 0x10000; // default reset value? + VPD_CR(vcpu, lrr1) = 0x10000; // default reset value? + for ( i=0; i<4; i++) { + VLSAPIC_INSVC(vcpu,i) = 0; + } +} + +/* + * Find highest signaled bits in 4 words (long). + * + * return 0-255: highest bits. + * -1 : Not found. + */ +static __inline__ int highest_bits(uint64_t *dat) +{ + uint64_t bits, bitnum=-1; + int i; + + /* loop for all 256 bits */ + for ( i=3; i >= 0 ; i -- ) { + bits = dat[i]; + if ( bits ) { + bitnum = ia64_fls(bits); + return i*64+bitnum; + } + } + return -1; +} + +/* + * Return 0-255 for pending irq. + * -1 when no pending. + */ +static int highest_pending_irq(VCPU *vcpu) +{ + if ( VPD_CR(vcpu, irr[0]) & (1UL<<NMI_VECTOR) ) return NMI_VECTOR; + if ( VPD_CR(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR) ) return ExtINT_VECTOR; + return highest_bits(&VPD_CR(vcpu, irr[0])); +} + +static int highest_inservice_irq(VCPU *vcpu) +{ + if ( VLSAPIC_INSVC(vcpu, 0) & (1UL<<NMI_VECTOR) ) return NMI_VECTOR; + if ( VLSAPIC_INSVC(vcpu, 0) & (1UL<<ExtINT_VECTOR) ) return ExtINT_VECTOR; + return highest_bits(&(VLSAPIC_INSVC(vcpu, 0))); +} + +/* + * The pending irq is higher than the inservice one. + * + */ +static int is_higher_irq(int pending, int inservice) +{ + return ( (pending >> 4) > (inservice>>4) || + ((pending != -1) && (inservice == -1)) ); +} + +static int is_higher_class(int pending, int mic) +{ + return ( (pending >> 4) > mic ); +} + +static int is_invalid_irq(int vec) +{ + return (vec == 1 || ((vec <= 14 && vec >= 3))); +} + +/* See Table 5-8 in SDM vol2 for the definition */ +static int +irq_masked(VCPU *vcpu, int h_pending, int h_inservice) +{ + uint64_t vtpr; + + vtpr = VPD_CR(vcpu, tpr); + + if ( h_pending == NMI_VECTOR && h_inservice != NMI_VECTOR ) + // Non Maskable Interrupt + return 0; + + if ( h_pending == ExtINT_VECTOR && h_inservice >= 16) + return (vtpr>>16)&1; // vtpr.mmi + + if ( !(vtpr&(1UL<<16)) && + is_higher_irq(h_pending, h_inservice) && + is_higher_class(h_pending, (vtpr>>4)&0xf) ) + return 0; + + return 1; +} + +void vmx_vcpu_pend_interrupt(VCPU *vcpu, UINT64 vector) +{ + uint64_t spsr; + + if (vector & ~0xff) { + printf("vmx_vcpu_pend_interrupt: bad vector\n"); + return; + } + local_irq_save(spsr); + VPD_CR(vcpu,irr[vector>>6]) |= 1UL<<(vector&63); + local_irq_restore(spsr); +} + +/* + * If the new pending interrupt is enabled and not masked, we directly inject + * it into the guest. Otherwise, we set the VHPI if vac.a_int=1 so that when + * the interrupt becomes unmasked, it gets injected. + * RETURN: + * TRUE: Interrupt is injected. + * FALSE: Not injected but may be in VHPI when vac.a_int=1 + * + * Optimization: We defer setting the VHPI until the EOI time, if a higher + * priority interrupt is in-service. The idea is to reduce the + * number of unnecessary calls to inject_vhpi. + */ +int vmx_check_pending_irq(VCPU *vcpu) +{ + uint64_t spsr; + int h_pending, h_inservice; + int injected=0; + uint64_t isr; + IA64_PSR vpsr; + + local_irq_save(spsr); + h_pending = highest_pending_irq(vcpu); + if ( h_pending == -1 ) goto chk_irq_exit; + h_inservice = highest_inservice_irq(vcpu); + + vpsr.val = vmx_vcpu_get_psr(vcpu); + if ( vpsr.i && + !irq_masked(vcpu, h_pending, h_inservice) ) { + //inject_guest_irq(v); + isr = vpsr.val & IA64_PSR_RI; + if ( !vpsr.ic ) + panic("Interrupt when IC=0\n"); + vmx_reflect_interruption(0,isr,0, 12 ); // EXT IRQ + injected = 1; + } + else if ( VMX_VPD(vcpu,vac).a_int && + is_higher_irq(h_pending,h_inservice) ) { + vmx_inject_vhpi(vcpu,h_pending); + } + +chk_irq_exit: + local_irq_restore(spsr); + return injected; +} + +void guest_write_eoi(VCPU *vcpu) +{ + int vec; + uint64_t spsr; + + vec = highest_inservice_irq(vcpu); + if ( vec < 0 ) panic("Wrong vector to EOI\n"); + local_irq_save(spsr); + VLSAPIC_INSVC(vcpu,vec>>6) &= ~(1UL <<(vec&63)); + local_irq_restore(spsr); + VPD_CR(vcpu, eoi)=0; // overwrite the data +} + +uint64_t guest_read_vivr(VCPU *vcpu) +{ + int vec, next, h_inservice; + uint64_t spsr; + + local_irq_save(spsr); + vec = highest_pending_irq(vcpu); + h_inservice = highest_inservice_irq(vcpu); + if ( vec < 0 || irq_masked(vcpu, vec, h_inservice) ) { + local_irq_restore(spsr); + return IA64_SPURIOUS_INT_VECTOR; + } + + VLSAPIC_INSVC(vcpu,vec>>6) |= (1UL <<(vec&63)); + VPD_CR(vcpu, irr[vec>>6]) &= ~(1UL <<(vec&63)); + + h_inservice = highest_inservice_irq(vcpu); + next = highest_pending_irq(vcpu); + if ( VMX_VPD(vcpu,vac).a_int && + (is_higher_irq(next, h_inservice) || (next == -1)) ) + vmx_inject_vhpi(vcpu, next); + local_irq_restore(spsr); + return (uint64_t)vec; +} + +void vmx_inject_vhpi(VCPU *vcpu, u8 vec) +{ + VMX_VPD(vcpu,vhpi) = vec / 16; + + + // non-maskable + if ( vec == NMI_VECTOR ) // NMI + VMX_VPD(vcpu,vhpi) = 32; + else if (vec == ExtINT_VECTOR) //ExtINT + VMX_VPD(vcpu,vhpi) = 16; + else if (vec == -1) + VMX_VPD(vcpu,vhpi) = 0; /* Nothing pending */ + + ia64_call_vsa ( PAL_VPS_SET_PENDING_INTERRUPT, + (uint64_t) &(vcpu->arch.arch_vmx.vpd), 0, 0,0,0,0,0); +} + diff --git a/xen/arch/ia64/vmmu.c b/xen/arch/ia64/vmmu.c new file mode 100644 index 0000000000..d2b2b30153 --- /dev/null +++ b/xen/arch/ia64/vmmu.c @@ -0,0 +1,801 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmmu.c: virtual memory management unit components. + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) + */ +#include <linux/sched.h> +#include <asm/tlb.h> +#include <asm/gcc_intrin.h> +#include <asm/vcpu.h> +#include <xen/interrupt.h> +#include <asm/vmx_vcpu.h> +#include <asm/vmx_mm_def.h> +#include <asm/vmx.h> +#include <asm/hw_irq.h> +#include <asm/vmx_pal_vsa.h> +#include <asm/kregs.h> + +/* + * Architecture ppn is in 4KB unit while XEN + * page may be different(1<<PAGE_SHIFT). + */ +static inline u64 arch_ppn_to_xen_ppn(u64 appn) +{ + return (appn << ARCH_PAGE_SHIFT) >> PAGE_SHIFT; +} + +static inline u64 xen_ppn_to_arch_ppn(u64 xppn) +{ + return (xppn << PAGE_SHIFT) >> ARCH_PAGE_SHIFT; +} + + +/* + * Get the machine page frame number in 16KB unit + * Input: + * d: + */ +u64 get_mfn(domid_t domid, u64 gpfn, u64 pages) +{ + struct domain *d; + u64 i, xen_gppn, xen_mppn, mpfn; + + if ( domid == DOMID_SELF ) { + d = current->domain; + } + else { + d = find_domain_by_id(domid); + } + xen_gppn = arch_ppn_to_xen_ppn(gpfn); + xen_mppn = __gpfn_to_mfn(d, xen_gppn); +/* + for (i=0; i<pages; i++) { + if ( __gpfn_to_mfn(d, gpfn+i) == INVALID_MFN ) { + return INVALID_MFN; + } + } +*/ + mpfn= xen_ppn_to_arch_ppn(xen_mppn); + mpfn = mpfn | (((1UL <<(PAGE_SHIFT-12))-1)&gpfn); + return mpfn; + +} + +/* + * The VRN bits of va stand for which rr to get. + */ +rr_t vmmu_get_rr(VCPU *vcpu, u64 va) +{ + rr_t vrr; + vmx_vcpu_get_rr(vcpu, va, &vrr.value); + return vrr; +} + + +void recycle_message(thash_cb_t *hcb, u64 para) +{ + printk("hcb=%p recycled with %lx\n",hcb,para); +} + + +/* + * Purge all guest TCs in logical processor. + * Instead of purging all LP TCs, we should only purge + * TCs that belong to this guest. + */ +void +purge_machine_tc_by_domid(domid_t domid) +{ +#ifndef PURGE_GUEST_TC_ONLY + // purge all TCs + struct ia64_pal_retval result; + u64 addr; + u32 count1,count2; + u32 stride1,stride2; + u32 i,j; + u64 psr; + + + result = ia64_pal_call_static(PAL_PTCE_INFO,0,0,0, 0); + if ( result.status != 0 ) { + panic ("PAL_PTCE_INFO failed\n"); + } + addr = result.v0; + count1 = HIGH_32BITS(result.v1); + count2 = LOW_32BITS (result.v1); + stride1 = HIGH_32BITS(result.v2); + stride2 = LOW_32BITS (result.v2); + + local_irq_save(psr); + for (i=0; i<count1; i++) { + for (j=0; j<count2; j++) { + ia64_ptce(addr); + addr += stride2; + } + addr += stride1; + } + local_irq_restore(psr); +#else + // purge all TCs belong to this guest. +#endif +} + +static thash_cb_t *init_domain_vhpt(struct exec_domain *d) +{ + struct pfn_info *page; + void *vbase,*vcur; + vhpt_special *vs; + thash_cb_t *vhpt; + PTA pta_value; + + page = alloc_domheap_pages (NULL, VCPU_TLB_ORDER); + if ( page == NULL ) { + panic("No enough contiguous memory for init_domain_mm\n"); + } + vbase = page_to_virt(page); + printk("Allocate domain vhpt at 0x%lx\n", (u64)vbase); + memset(vbase, 0, VCPU_TLB_SIZE); + vcur = (void*)((u64)vbase + VCPU_TLB_SIZE); + vhpt = --((thash_cb_t*)vcur); + vhpt->ht = THASH_VHPT; + vhpt->vcpu = d; + vhpt->hash_func = machine_thash; + vs = --((vhpt_special *)vcur); + + /* Setup guest pta */ + pta_value.val = 0; + pta_value.ve = 1; + pta_value.vf = 1; + pta_value.size = VCPU_TLB_SHIFT - 1; /* 2M */ + pta_value.base = ((u64)vbase) >> PTA_BASE_SHIFT; + d->arch.arch_vmx.mpta = pta_value.val; + + vhpt->vs = vs; + vhpt->vs->get_mfn = get_mfn; + vhpt->vs->tag_func = machine_ttag; + vhpt->hash = vbase; + vhpt->hash_sz = VCPU_TLB_SIZE/2; + vhpt->cch_buf = (u64)vbase + vhpt->hash_sz; + vhpt->cch_sz = (u64)vcur - (u64)vhpt->cch_buf; + vhpt->recycle_notifier = recycle_message; + thash_init(vhpt,VCPU_TLB_SHIFT-1); + return vhpt; +} + + +thash_cb_t *init_domain_tlb(struct exec_domain *d) +{ + struct pfn_info *page; + void *vbase,*vcur; + tlb_special_t *ts; + thash_cb_t *tlb; + + page = alloc_domheap_pages (NULL, VCPU_TLB_ORDER); + if ( page == NULL ) { + panic("No enough contiguous memory for init_domain_mm\n"); + } + vbase = page_to_virt(page); + printk("Allocate domain tlb at 0x%lx\n", (u64)vbase); + memset(vbase, 0, VCPU_TLB_SIZE); + vcur = (void*)((u64)vbase + VCPU_TLB_SIZE); + tlb = --((thash_cb_t*)vcur); + tlb->ht = THASH_TLB; + tlb->vcpu = d; + ts = --((tlb_special_t *)vcur); + tlb->ts = ts; + tlb->ts->vhpt = init_domain_vhpt(d); + tlb->hash_func = machine_thash; + tlb->hash = vbase; + tlb->hash_sz = VCPU_TLB_SIZE/2; + tlb->cch_buf = (u64)vbase + tlb->hash_sz; + tlb->cch_sz = (u64)vcur - (u64)tlb->cch_buf; + tlb->recycle_notifier = recycle_message; + thash_init(tlb,VCPU_TLB_SHIFT-1); + return tlb; +} + +/* Allocate physical to machine mapping table for domN + * FIXME: Later this interface may be removed, if that table is provided + * by control panel. Dom0 has gpfn identical to mfn, which doesn't need + * this interface at all. + */ +void +alloc_pmt(struct domain *d) +{ + struct pfn_info *page; + + /* Only called once */ + ASSERT(d->arch.pmt); + + page = alloc_domheap_pages(NULL, get_order(d->max_pages)); + ASSERT(page); + + d->arch.pmt = page_to_virt(page); + memset(d->arch.pmt, 0x55, d->max_pages * 8); +} + +/* + * Insert guest TLB to machine TLB. + * data: In TLB format + */ +void machine_tlb_insert(struct exec_domain *d, thash_data_t *tlb) +{ + u64 saved_itir, saved_ifa, saved_rr; + u64 pages; + thash_data_t mtlb; + rr_t vrr; + unsigned int cl = tlb->cl; + + mtlb.ifa = tlb->vadr; + mtlb.itir = tlb->itir & ~ITIR_RV_MASK; + vrr = vmmu_get_rr(d,mtlb.ifa); + //vmx_vcpu_get_rr(d, mtlb.ifa, &vrr.value); + pages = PSIZE(vrr.ps) >> PAGE_SHIFT; + mtlb.page_flags = tlb->page_flags & ~PAGE_FLAGS_RV_MASK; + mtlb.ppn = get_mfn(DOMID_SELF,tlb->ppn, pages); + if (mtlb.ppn == INVALID_MFN) + panic("Machine tlb insert with invalid mfn number.\n"); + + __asm __volatile("rsm psr.ic|psr.i;; srlz.i" ); + + saved_itir = ia64_getreg(_IA64_REG_CR_ITIR); + saved_ifa = ia64_getreg(_IA64_REG_CR_IFA); + saved_rr = ia64_get_rr(mtlb.ifa); + + ia64_setreg(_IA64_REG_CR_ITIR, mtlb.itir); + ia64_setreg(_IA64_REG_CR_IFA, mtlb.ifa); + /* Only access memory stack which is mapped by TR, + * after rr is switched. + */ + ia64_set_rr(mtlb.ifa, vmx_vrrtomrr(d, vrr.value)); + ia64_srlz_d(); + if ( cl == ISIDE_TLB ) { + ia64_itci(mtlb.page_flags); + ia64_srlz_i(); + } + else { + ia64_itcd(mtlb.page_flags); + ia64_srlz_d(); + } + ia64_set_rr(mtlb.ifa,saved_rr); + ia64_srlz_d(); + ia64_setreg(_IA64_REG_CR_IFA, saved_ifa); + ia64_setreg(_IA64_REG_CR_ITIR, saved_itir); + __asm __volatile("ssm psr.ic|psr.i;; srlz.i" ); +} + +u64 machine_thash(PTA pta, u64 va, u64 rid, u64 ps) +{ + u64 saved_pta, saved_rr0; + u64 hash_addr, tag; + unsigned long psr; + struct exec_domain *ed = current; + rr_t vrr; + + + saved_pta = ia64_getreg(_IA64_REG_CR_PTA); + saved_rr0 = ia64_get_rr(0); + vrr.value = saved_rr0; + vrr.rid = rid; + vrr.ps = ps; + + va = (va << 3) >> 3; // set VRN to 0. + // TODO: Set to enforce lazy mode + local_irq_save(psr); + ia64_setreg(_IA64_REG_CR_PTA, pta.val); + ia64_set_rr(0, vmx_vrrtomrr(ed, vrr.value)); + ia64_srlz_d(); + + hash_addr = ia64_thash(va); + ia64_setreg(_IA64_REG_CR_PTA, saved_pta); + + ia64_set_rr(0, saved_rr0); + ia64_srlz_d(); + local_irq_restore(psr); + return hash_addr; +} + +u64 machine_ttag(PTA pta, u64 va, u64 rid, u64 ps) +{ + u64 saved_pta, saved_rr0; + u64 hash_addr, tag; + u64 psr; + struct exec_domain *ed = current; + rr_t vrr; + + // TODO: Set to enforce lazy mode + saved_pta = ia64_getreg(_IA64_REG_CR_PTA); + saved_rr0 = ia64_get_rr(0); + vrr.value = saved_rr0; + vrr.rid = rid; + vrr.ps = ps; + + va = (va << 3) >> 3; // set VRN to 0. + local_irq_save(psr); + ia64_setreg(_IA64_REG_CR_PTA, pta.val); + ia64_set_rr(0, vmx_vrrtomrr(ed, vrr.value)); + ia64_srlz_d(); + + tag = ia64_ttag(va); + ia64_setreg(_IA64_REG_CR_PTA, saved_pta); + + ia64_set_rr(0, saved_rr0); + ia64_srlz_d(); + local_irq_restore(psr); + return tag; +} + +/* + * Purge machine tlb. + * INPUT + * rr: guest rr. + * va: only bits 0:60 is valid + * size: bits format (1<<size) for the address range to purge. + * + */ +void machine_tlb_purge(u64 rid, u64 va, u64 ps) +{ + u64 saved_rr0; + u64 psr; + rr_t vrr; + + va = (va << 3) >> 3; // set VRN to 0. + saved_rr0 = ia64_get_rr(0); + vrr.value = saved_rr0; + vrr.rid = rid; + vrr.ps = ps; + local_irq_save(psr); + ia64_set_rr( 0, vmx_vrrtomrr(current,vrr.value) ); + ia64_srlz_d(); + ia64_ptcl(va, ps << 2); + ia64_set_rr( 0, saved_rr0 ); + ia64_srlz_d(); + local_irq_restore(psr); +} + + +int vhpt_enabled(VCPU *vcpu, uint64_t vadr, vhpt_ref_t ref) +{ + ia64_rr vrr; + PTA vpta; + IA64_PSR vpsr; + + vpsr.val = vmx_vcpu_get_psr(vcpu); + vrr = vmx_vcpu_rr(vcpu, vadr); + vmx_vcpu_get_pta(vcpu,&vpta.val); + + if ( vrr.ve & vpta.ve ) { + switch ( ref ) { + case DATA_REF: + case NA_REF: + return vpsr.dt; + case INST_REF: + return vpsr.dt && vpsr.it && vpsr.ic; + case RSE_REF: + return vpsr.dt && vpsr.rt; + + } + } + return 0; +} + + +int unimplemented_gva(VCPU *vcpu,u64 vadr) +{ + int bit=vcpu->domain->arch.imp_va_msb; + u64 ladr =(vadr<<3)>>(3+bit); + if(!ladr||ladr==(1U<<(61-bit))-1){ + return 0; + }else{ + return 1; + } +} + + +/* + * Prefetch guest bundle code. + * INPUT: + * code: buffer pointer to hold the read data. + * num: number of dword (8byts) to read. + */ +int +fetch_code(VCPU *vcpu, u64 gip, u64 *code) +{ + u64 gpip; // guest physical IP + u64 mpa; + thash_data_t *tlb; + rr_t vrr; + u64 mfn; + + if ( !(VMX_VPD(vcpu, vpsr) & IA64_PSR_IT) ) { // I-side physical mode + gpip = gip; + } + else { + vmx_vcpu_get_rr(vcpu, gip, &vrr.value); + tlb = vtlb_lookup_ex (vmx_vcpu_get_vtlb(vcpu), + vrr.rid, gip, ISIDE_TLB ); + if ( tlb == NULL ) panic("No entry found in ITLB\n"); + gpip = (tlb->ppn << 12) | ( gip & (PSIZE(tlb->ps)-1) ); + } + mfn = __gpfn_to_mfn(vcpu->domain, gpip >>PAGE_SHIFT); + if ( mfn == INVALID_MFN ) return 0; + + mpa = (gpip & (PAGE_SIZE-1)) | (mfn<<PAGE_SHIFT); + *code = *(u64*)__va(mpa); + return 1; +} + +IA64FAULT vmx_vcpu_itc_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa) +{ + + thash_data_t data, *ovl; + thash_cb_t *hcb; + search_section_t sections; + rr_t vrr; + + hcb = vmx_vcpu_get_vtlb(vcpu); + data.page_flags=pte & ~PAGE_FLAGS_RV_MASK; + data.itir=itir; + data.vadr=PAGEALIGN(ifa,data.ps); + data.section=THASH_TLB_TC; + data.cl=ISIDE_TLB; + vmx_vcpu_get_rr(vcpu, ifa, &vrr); + data.rid = vrr.rid; + + sections.v = THASH_SECTION_TR; + + ovl = thash_find_overlap(hcb, &data, sections); + while (ovl) { + // generate MCA. + panic("Tlb conflict!!"); + return; + } + sections.v = THASH_SECTION_TC; + thash_purge_entries(hcb, &data, sections); + thash_insert(hcb, &data, ifa); + return IA64_NO_FAULT; +} + + + + +IA64FAULT vmx_vcpu_itc_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa) +{ + + thash_data_t data, *ovl; + thash_cb_t *hcb; + search_section_t sections; + rr_t vrr; + + hcb = vmx_vcpu_get_vtlb(vcpu); + data.page_flags=pte & ~PAGE_FLAGS_RV_MASK; + data.itir=itir; + data.vadr=PAGEALIGN(ifa,data.ps); + data.section=THASH_TLB_TC; + data.cl=DSIDE_TLB; + vmx_vcpu_get_rr(vcpu, ifa, &vrr); + data.rid = vrr.rid; + sections.v = THASH_SECTION_TR; + + ovl = thash_find_overlap(hcb, &data, sections); + if (ovl) { + // generate MCA. + panic("Tlb conflict!!"); + return; + } + sections.v = THASH_SECTION_TC; + thash_purge_entries(hcb, &data, sections); + thash_insert(hcb, &data, ifa); + return IA64_NO_FAULT; +} + +IA64FAULT insert_foreignmap(VCPU *vcpu, UINT64 pte, UINT64 ps, UINT64 va) +{ + + thash_data_t data, *ovl; + thash_cb_t *hcb; + search_section_t sections; + rr_t vrr; + + hcb = vmx_vcpu_get_vtlb(vcpu); + data.page_flags=pte & ~PAGE_FLAGS_RV_MASK; + data.itir=0; + data.ps = ps; + data.vadr=PAGEALIGN(va,ps); + data.section=THASH_TLB_FM; + data.cl=DSIDE_TLB; + vmx_vcpu_get_rr(vcpu, va, &vrr); + data.rid = vrr.rid; + sections.v = THASH_SECTION_TR|THASH_SECTION_TC|THASH_SECTION_FM; + + ovl = thash_find_overlap(hcb, &data, sections); + if (ovl) { + // generate MCA. + panic("Foreignmap Tlb conflict!!"); + return; + } + thash_insert(hcb, &data, va); + return IA64_NO_FAULT; +} + + +IA64FAULT vmx_vcpu_itr_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa, UINT64 idx) +{ + + thash_data_t data, *ovl; + thash_cb_t *hcb; + search_section_t sections; + rr_t vrr; + + hcb = vmx_vcpu_get_vtlb(vcpu); + data.page_flags=pte & ~PAGE_FLAGS_RV_MASK; + data.itir=itir; + data.vadr=PAGEALIGN(ifa,data.ps); + data.section=THASH_TLB_TR; + data.cl=ISIDE_TLB; + vmx_vcpu_get_rr(vcpu, ifa, &vrr); + data.rid = vrr.rid; + sections.v = THASH_SECTION_TR; + + ovl = thash_find_overlap(hcb, &data, sections); + if (ovl) { + // generate MCA. + panic("Tlb conflict!!"); + return; + } + sections.v=THASH_SECTION_TC; + thash_purge_entries(hcb, &data, sections); + thash_tr_insert(hcb, &data, ifa, idx); + return IA64_NO_FAULT; +} + +IA64FAULT vmx_vcpu_itr_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa, UINT64 idx) +{ + + thash_data_t data, *ovl; + thash_cb_t *hcb; + search_section_t sections; + rr_t vrr; + + + hcb = vmx_vcpu_get_vtlb(vcpu); + data.page_flags=pte & ~PAGE_FLAGS_RV_MASK; + data.itir=itir; + data.vadr=PAGEALIGN(ifa,data.ps); + data.section=THASH_TLB_TR; + data.cl=DSIDE_TLB; + vmx_vcpu_get_rr(vcpu, ifa, &vrr); + data.rid = vrr.rid; + sections.v = THASH_SECTION_TR; + + ovl = thash_find_overlap(hcb, &data, sections); + while (ovl) { + // generate MCA. + panic("Tlb conflict!!"); + return; + } + sections.v=THASH_SECTION_TC; + thash_purge_entries(hcb, &data, sections); + thash_tr_insert(hcb, &data, ifa, idx); + return IA64_NO_FAULT; +} + + + +IA64FAULT vmx_vcpu_ptr_d(VCPU *vcpu,UINT64 vadr,UINT64 ps) +{ + thash_cb_t *hcb; + ia64_rr rr; + search_section_t sections; + + hcb = vmx_vcpu_get_vtlb(vcpu); + rr=vmx_vcpu_rr(vcpu,vadr); + sections.v = THASH_SECTION_TR | THASH_SECTION_TC; + thash_purge_entries_ex(hcb,rr.rid,vadr,ps,sections,DSIDE_TLB); + return IA64_NO_FAULT; +} + +IA64FAULT vmx_vcpu_ptr_i(VCPU *vcpu,UINT64 vadr,UINT64 ps) +{ + thash_cb_t *hcb; + ia64_rr rr; + search_section_t sections; + hcb = vmx_vcpu_get_vtlb(vcpu); + rr=vmx_vcpu_rr(vcpu,vadr); + sections.v = THASH_SECTION_TR | THASH_SECTION_TC; + thash_purge_entries_ex(hcb,rr.rid,vadr,ps,sections,ISIDE_TLB); + return IA64_NO_FAULT; +} + +IA64FAULT vmx_vcpu_ptc_l(VCPU *vcpu, UINT64 vadr, UINT64 ps) +{ + thash_cb_t *hcb; + ia64_rr vrr; + search_section_t sections; + thash_data_t data, *ovl; + hcb = vmx_vcpu_get_vtlb(vcpu); + vrr=vmx_vcpu_rr(vcpu,vadr); + sections.v = THASH_SECTION_TC; + vadr = PAGEALIGN(vadr, ps); + + thash_purge_entries_ex(hcb,vrr.rid,vadr,ps,sections,DSIDE_TLB); + thash_purge_entries_ex(hcb,vrr.rid,vadr,ps,sections,ISIDE_TLB); + return IA64_NO_FAULT; +} + + +IA64FAULT vmx_vcpu_ptc_e(VCPU *vcpu, UINT64 vadr) +{ + thash_cb_t *hcb; + hcb = vmx_vcpu_get_vtlb(vcpu); + thash_purge_all(hcb); + return IA64_NO_FAULT; +} + +IA64FAULT vmx_vcpu_ptc_g(VCPU *vcpu, UINT64 vadr, UINT64 ps) +{ + vmx_vcpu_ptc_l(vcpu, vadr, ps); + return IA64_ILLOP_FAULT; +} + +IA64FAULT vmx_vcpu_ptc_ga(VCPU *vcpu,UINT64 vadr,UINT64 ps) +{ + vmx_vcpu_ptc_l(vcpu, vadr, ps); + return IA64_NO_FAULT; +} + + +IA64FAULT vmx_vcpu_thash(VCPU *vcpu, UINT64 vadr, UINT64 *pval) +{ + PTA vpta; + ia64_rr vrr; + u64 vhpt_offset,tmp; + vmx_vcpu_get_pta(vcpu, &vpta.val); + vrr=vmx_vcpu_rr(vcpu, vadr); + if(vpta.vf){ + panic("THASH,Don't support long format VHPT"); + *pval = ia64_call_vsa(PAL_VPS_THASH,vadr,vrr.rrval,vpta.val,0,0,0,0); + }else{ + vhpt_offset=((vadr>>vrr.ps)<<3)&((1UL<<(vpta.size))-1); + *pval = (vadr&VRN_MASK)| + (vpta.val<<3>>(vpta.size+3)<<(vpta.size))| + vhpt_offset; + } + return IA64_NO_FAULT; +} + + +IA64FAULT vmx_vcpu_ttag(VCPU *vcpu, UINT64 vadr, UINT64 *pval) +{ + ia64_rr vrr; + PTA vpta; + vmx_vcpu_get_pta(vcpu, &vpta.val); + vrr=vmx_vcpu_rr(vcpu, vadr); + if(vpta.vf){ + panic("THASH,Don't support long format VHPT"); + *pval = ia64_call_vsa(PAL_VPS_TTAG,vadr,vrr.rrval,0,0,0,0,0); + }else{ + *pval = 1; + } + return IA64_NO_FAULT; +} + + + +IA64FAULT vmx_vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr) +{ + thash_data_t *data; + thash_cb_t *hcb; + ia64_rr vrr; + ISR visr,pt_isr; + REGS *regs; + u64 vhpt_adr; + IA64_PSR vpsr; + hcb = vmx_vcpu_get_vtlb(vcpu); + vrr=vmx_vcpu_rr(vcpu,vadr); + regs=vcpu_regs(vcpu); + pt_isr.val=regs->cr_isr; + visr.val=0; + visr.ei=pt_isr.ei; + visr.ir=pt_isr.ir; + vpsr.val = vmx_vcpu_get_psr(vcpu); + if(vpsr.ic==0){ + visr.ni=1; + } + visr.na=1; + data = vtlb_lookup_ex(hcb, vrr.rid, vadr, DSIDE_TLB); + if(data){ + if(data->p==0){ + visr.na=1; + vmx_vcpu_set_isr(vcpu,visr.val); + page_not_present(vcpu, vadr); + return IA64_FAULT; + }else if(data->ma == VA_MATTR_NATPAGE){ + visr.na = 1; + vmx_vcpu_set_isr(vcpu, visr.val); + dnat_page_consumption(vcpu, vadr); + return IA64_FAULT; + }else{ + *padr = (data->ppn<<12) | (vadr&(PSIZE(data->ps)-1)); + return IA64_NO_FAULT; + } + }else{ + if(!vhpt_enabled(vcpu, vadr, NA_REF)){ + if(vpsr.ic){ + vmx_vcpu_set_isr(vcpu, visr.val); + alt_dtlb(vcpu, vadr); + return IA64_FAULT; + } + else{ + nested_dtlb(vcpu); + return IA64_FAULT; + } + } + else{ + vmx_vcpu_thash(vcpu, vadr, &vhpt_adr); + vrr=vmx_vcpu_rr(vcpu,vhpt_adr); + data = vtlb_lookup_ex(hcb, vrr.rid, vhpt_adr, DSIDE_TLB); + if(data){ + if(vpsr.ic){ + vmx_vcpu_set_isr(vcpu, visr.val); + dtlb_fault(vcpu, vadr); + return IA64_FAULT; + } + else{ + nested_dtlb(vcpu); + return IA64_FAULT; + } + } + else{ + if(vpsr.ic){ + vmx_vcpu_set_isr(vcpu, visr.val); + dvhpt_fault(vcpu, vadr); + return IA64_FAULT; + } + else{ + nested_dtlb(vcpu); + return IA64_FAULT; + } + } + } + } +} + +IA64FAULT vmx_vcpu_tak(VCPU *vcpu, UINT64 vadr, UINT64 *key) +{ + thash_data_t *data; + thash_cb_t *hcb; + ia64_rr rr; + PTA vpta; + vmx_vcpu_get_pta(vcpu, &vpta.val); + if(vpta.vf==0 || unimplemented_gva(vcpu, vadr)){ + *key=1; + return IA64_NO_FAULT; + } + hcb = vmx_vcpu_get_vtlb(vcpu); + rr=vmx_vcpu_rr(vcpu,vadr); + data = vtlb_lookup_ex(hcb, rr.rid, vadr, DSIDE_TLB); + if(!data||!data->p){ + *key=1; + }else{ + *key=data->key; + } + return IA64_NO_FAULT; +} + diff --git a/xen/arch/ia64/vmx_entry.S b/xen/arch/ia64/vmx_entry.S new file mode 100644 index 0000000000..682a69df8a --- /dev/null +++ b/xen/arch/ia64/vmx_entry.S @@ -0,0 +1,611 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_entry.S: + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) + * Kun Tian (Kevin Tian) (kevin.tian@intel.com) + */ + +#ifndef VCPU_TLB_SHIFT +#define VCPU_TLB_SHIFT 22 +#endif +#include <linux/config.h> +#include <asm/asmmacro.h> +#include <asm/cache.h> +#include <asm/kregs.h> +#include <asm/offsets.h> +#include <asm/pgtable.h> +#include <asm/percpu.h> +#include <asm/processor.h> +#include <asm/thread_info.h> +#include <asm/unistd.h> + +#include "vmx_minstate.h" + +/* + * prev_task <- vmx_ia64_switch_to(struct task_struct *next) + * With Ingo's new scheduler, interrupts are disabled when this routine gets + * called. The code starting at .map relies on this. The rest of the code + * doesn't care about the interrupt masking status. + * + * Since we allocate domain stack in xenheap, there's no need to map new + * domain's stack since all xenheap is mapped by TR. Another different task + * for vmx_ia64_switch_to is to switch to bank0 and change current pointer. + */ +GLOBAL_ENTRY(vmx_ia64_switch_to) + .prologue + alloc r16=ar.pfs,1,0,0,0 + DO_SAVE_SWITCH_STACK + .body + + bsw.0 // Switch to bank0, because bank0 r21 is current pointer + ;; + adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13 + movl r25=init_task + adds r26=IA64_TASK_THREAD_KSP_OFFSET,in0 + ;; + st8 [r22]=sp // save kernel stack pointer of old task + ;; + /* + * TR always mapped this task's page, we can skip doing it again. + */ + ld8 sp=[r26] // load kernel stack pointer of new task + mov r21=in0 // update "current" application register + mov r8=r13 // return pointer to previously running task + mov r13=in0 // set "current" pointer + ;; + bsw.1 + ;; + DO_LOAD_SWITCH_STACK + +#ifdef CONFIG_SMP + sync.i // ensure "fc"s done by this CPU are visible on other CPUs +#endif + br.ret.sptk.many rp // boogie on out in new context +END(vmx_ia64_switch_to) + +GLOBAL_ENTRY(ia64_leave_nested) + rsm psr.i + ;; + adds r21=PT(PR)+16,r12 + ;; + + lfetch [r21],PT(CR_IPSR)-PT(PR) + adds r2=PT(B6)+16,r12 + adds r3=PT(R16)+16,r12 + ;; + lfetch [r21] + ld8 r28=[r2],8 // load b6 + adds r29=PT(R24)+16,r12 + + ld8.fill r16=[r3] + adds r3=PT(AR_CSD)-PT(R16),r3 + adds r30=PT(AR_CCV)+16,r12 + ;; + ld8.fill r24=[r29] + ld8 r15=[r30] // load ar.ccv + ;; + ld8 r29=[r2],16 // load b7 + ld8 r30=[r3],16 // load ar.csd + ;; + ld8 r31=[r2],16 // load ar.ssd + ld8.fill r8=[r3],16 + ;; + ld8.fill r9=[r2],16 + ld8.fill r10=[r3],PT(R17)-PT(R10) + ;; + ld8.fill r11=[r2],PT(R18)-PT(R11) + ld8.fill r17=[r3],16 + ;; + ld8.fill r18=[r2],16 + ld8.fill r19=[r3],16 + ;; + ld8.fill r20=[r2],16 + ld8.fill r21=[r3],16 + mov ar.csd=r30 + mov ar.ssd=r31 + ;; + rsm psr.i | psr.ic // initiate turning off of interrupt and interruption collection + invala // invalidate ALAT + ;; + ld8.fill r22=[r2],24 + ld8.fill r23=[r3],24 + mov b6=r28 + ;; + ld8.fill r25=[r2],16 + ld8.fill r26=[r3],16 + mov b7=r29 + ;; + ld8.fill r27=[r2],16 + ld8.fill r28=[r3],16 + ;; + ld8.fill r29=[r2],16 + ld8.fill r30=[r3],24 + ;; + ld8.fill r31=[r2],PT(F9)-PT(R31) + adds r3=PT(F10)-PT(F6),r3 + ;; + ldf.fill f9=[r2],PT(F6)-PT(F9) + ldf.fill f10=[r3],PT(F8)-PT(F10) + ;; + ldf.fill f6=[r2],PT(F7)-PT(F6) + ;; + ldf.fill f7=[r2],PT(F11)-PT(F7) + ldf.fill f8=[r3],32 + ;; + srlz.i // ensure interruption collection is off + mov ar.ccv=r15 + ;; + bsw.0 // switch back to bank 0 (no stop bit required beforehand...) + ;; + ldf.fill f11=[r2] +// mov r18=r13 +// mov r21=r13 + adds r16=PT(CR_IPSR)+16,r12 + adds r17=PT(CR_IIP)+16,r12 + ;; + ld8 r29=[r16],16 // load cr.ipsr + ld8 r28=[r17],16 // load cr.iip + ;; + ld8 r30=[r16],16 // load cr.ifs + ld8 r25=[r17],16 // load ar.unat + ;; + ld8 r26=[r16],16 // load ar.pfs + ld8 r27=[r17],16 // load ar.rsc + cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs + ;; + ld8 r24=[r16],16 // load ar.rnat (may be garbage) + ld8 r23=[r17],16// load ar.bspstore (may be garbage) + ;; + ld8 r31=[r16],16 // load predicates + ld8 r22=[r17],16 // load b0 + ;; + ld8 r19=[r16],16 // load ar.rsc value for "loadrs" + ld8.fill r1=[r17],16 // load r1 + ;; + ld8.fill r12=[r16],16 + ld8.fill r13=[r17],16 + ;; + ld8 r20=[r16],16 // ar.fpsr + ld8.fill r15=[r17],16 + ;; + ld8.fill r14=[r16],16 + ld8.fill r2=[r17] + ;; + ld8.fill r3=[r16] + ;; + mov r16=ar.bsp // get existing backing store pointer + ;; + mov b0=r22 + mov ar.pfs=r26 + mov cr.ifs=r30 + mov cr.ipsr=r29 + mov ar.fpsr=r20 + mov cr.iip=r28 + ;; + mov ar.rsc=r27 + mov ar.unat=r25 + mov pr=r31,-1 + rfi +END(ia64_leave_nested) + + + +GLOBAL_ENTRY(ia64_leave_hypervisor) + PT_REGS_UNWIND_INFO(0) + /* + * work.need_resched etc. mustn't get changed by this CPU before it returns to + ;; + * user- or fsys-mode, hence we disable interrupts early on: + */ + rsm psr.i + ;; + alloc loc0=ar.pfs,0,1,1,0 + adds out0=16,r12 + ;; + br.call.sptk.many b0=vmx_deliver_pending_interrupt + mov ar.pfs=loc0 + adds r8=IA64_VPD_BASE_OFFSET,r13 + ;; + ld8 r8=[r8] + ;; + adds r9=VPD(VPSR),r8 + ;; + ld8 r9=[r9] + ;; + tbit.z pBN0,pBN1=r9,IA64_PSR_BN_BIT + ;; +(pBN0) add r7=VPD(VBNAT),r8; +(pBN1) add r7=VPD(VNAT),r8; + ;; + ld8 r7=[r7] + ;; + mov ar.unat=r7 +(pBN0) add r4=VPD(VBGR),r8; +(pBN1) add r4=VPD(VGR),r8; +(pBN0) add r5=VPD(VBGR)+0x8,r8; +(pBN1) add r5=VPD(VGR)+0x8,r8; + ;; + ld8.fill r16=[r4],16 + ld8.fill r17=[r5],16 + ;; + ld8.fill r18=[r4],16 + ld8.fill r19=[r5],16 + ;; + ld8.fill r20=[r4],16 + ld8.fill r21=[r5],16 + ;; + ld8.fill r22=[r4],16 + ld8.fill r23=[r5],16 + ;; + ld8.fill r24=[r4],16 + ld8.fill r25=[r5],16 + ;; + ld8.fill r26=[r4],16 + ld8.fill r27=[r5],16 + ;; + ld8.fill r28=[r4],16 + ld8.fill r29=[r5],16 + ;; + ld8.fill r30=[r4],16 + ld8.fill r31=[r5],16 + ;; + bsw.0 + ;; + mov r18=r8 //vpd + mov r19=r9 //vpsr + adds r20=PT(PR)+16,r12 + ;; + lfetch [r20],PT(CR_IPSR)-PT(PR) + adds r16=PT(B6)+16,r12 + adds r17=PT(B7)+16,r12 + ;; + lfetch [r20] + mov r21=r13 // get current + ;; + ld8 r30=[r16],16 // load b6 + ld8 r31=[r17],16 // load b7 + add r20=PT(EML_UNAT)+16,r12 + ;; + ld8 r29=[r20] //load ar_unat + mov b6=r30 + mov b7=r31 + ld8 r30=[r16],16 //load ar_csd + ld8 r31=[r17],16 //load ar_ssd + ;; + mov ar.unat=r29 + mov ar.csd=r30 + mov ar.ssd=r31 + ;; + ld8.fill r8=[r16],16 //load r8 + ld8.fill r9=[r17],16 //load r9 + ;; + ld8.fill r10=[r16],PT(R1)-PT(R10) //load r10 + ld8.fill r11=[r17],PT(R12)-PT(R11) //load r11 + ;; + ld8.fill r1=[r16],16 //load r1 + ld8.fill r12=[r17],16 //load r12 + ;; + ld8.fill r13=[r16],16 //load r13 + ld8 r30=[r17],16 //load ar_fpsr + ;; + ld8.fill r15=[r16],16 //load r15 + ld8.fill r14=[r17],16 //load r14 + mov ar.fpsr=r30 + ;; + ld8.fill r2=[r16],16 //load r2 + ld8.fill r3=[r17],16 //load r3 + ;; +/* +(pEml) ld8.fill r4=[r16],16 //load r4 +(pEml) ld8.fill r5=[r17],16 //load r5 + ;; +(pEml) ld8.fill r6=[r16],PT(AR_CCV)-PT(R6) //load r6 +(pEml) ld8.fill r7=[r17],PT(F7)-PT(R7) //load r7 + ;; +(pNonEml) adds r16=PT(AR_CCV)-PT(R4),r16 +(pNonEml) adds r17=PT(F7)-PT(R5),r17 + ;; +*/ + ld8.fill r4=[r16],16 //load r4 + ld8.fill r5=[r17],16 //load r5 + ;; + ld8.fill r6=[r16],PT(AR_CCV)-PT(R6) //load r6 + ld8.fill r7=[r17],PT(F7)-PT(R7) //load r7 + ;; + + ld8 r30=[r16],PT(F6)-PT(AR_CCV) + rsm psr.i | psr.ic // initiate turning off of interrupt and interruption collection + ;; + srlz.i // ensure interruption collection is off + ;; + invala // invalidate ALAT + ;; + ldf.fill f6=[r16],32 + ldf.fill f7=[r17],32 + ;; + ldf.fill f8=[r16],32 + ldf.fill f9=[r17],32 + ;; + ldf.fill f10=[r16] + ldf.fill f11=[r17] + ;; + mov ar.ccv=r30 + adds r16=PT(CR_IPSR)-PT(F10),r16 + adds r17=PT(CR_IIP)-PT(F11),r17 + ;; + ld8 r31=[r16],16 // load cr.ipsr + ld8 r30=[r17],16 // load cr.iip + ;; + ld8 r29=[r16],16 // load cr.ifs + ld8 r28=[r17],16 // load ar.unat + ;; + ld8 r27=[r16],16 // load ar.pfs + ld8 r26=[r17],16 // load ar.rsc + ;; + ld8 r25=[r16],16 // load ar.rnat (may be garbage) + ld8 r24=[r17],16// load ar.bspstore (may be garbage) + ;; + ld8 r23=[r16],16 // load predicates + ld8 r22=[r17],PT(RFI_PFS)-PT(B0) // load b0 + ;; + ld8 r20=[r16],16 // load ar.rsc value for "loadrs" + ;; +//rbs_switch + // loadrs has already been shifted + alloc r16=ar.pfs,0,0,0,0 // drop current register frame + ;; + mov ar.rsc=r20 + ;; + loadrs + ;; + mov ar.bspstore=r24 + ;; + ld8 r24=[r17] //load rfi_pfs + mov ar.unat=r28 + mov ar.rnat=r25 + mov ar.rsc=r26 + ;; + mov cr.ipsr=r31 + mov cr.iip=r30 + mov cr.ifs=r29 + cmp.ne p6,p0=r24,r0 +(p6)br.sptk vmx_dorfirfi + ;; +vmx_dorfirfi_back: + mov ar.pfs=r27 + +//vsa_sync_write_start + movl r20=__vsa_base + ;; + ld8 r20=[r20] // read entry point + mov r25=r18 + ;; + add r16=PAL_VPS_SYNC_WRITE,r20 + movl r24=switch_rr7 // calculate return address + ;; + mov b0=r16 + br.cond.sptk b0 // call the service + ;; +// switch rr7 and rr5 +switch_rr7: + adds r24=SWITCH_MRR5_OFFSET, r21 + adds r26=SWITCH_MRR6_OFFSET, r21 + adds r16=SWITCH_MRR7_OFFSET ,r21 + movl r25=(5<<61) + movl r27=(6<<61) + movl r17=(7<<61) + ;; + ld8 r24=[r24] + ld8 r26=[r26] + ld8 r16=[r16] + ;; + mov rr[r25]=r24 + mov rr[r27]=r26 + mov rr[r17]=r16 + ;; + srlz.i + ;; + add r24=SWITCH_MPTA_OFFSET, r21 + ;; + ld8 r24=[r24] + ;; + mov cr.pta=r24 + ;; + srlz.i + ;; +// fall through +GLOBAL_ENTRY(ia64_vmm_entry) +/* + * must be at bank 0 + * parameter: + * r18:vpd + * r19:vpsr + * r20:__vsa_base + * r22:b0 + * r23:predicate + */ + mov r24=r22 + mov r25=r18 + tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic + ;; + (p1) add r29=PAL_VPS_RESUME_NORMAL,r20 + (p2) add r29=PAL_VPS_RESUME_HANDLER,r20 + ;; + mov pr=r23,-2 + mov b0=r29 + ;; + br.cond.sptk b0 // call pal service +END(ia64_leave_hypervisor) + +//r24 rfi_pfs +//r17 address of rfi_pfs +GLOBAL_ENTRY(vmx_dorfirfi) + mov r16=ar.ec + movl r20 = vmx_dorfirfi_back + ;; +// clean rfi_pfs + st8 [r17]=r0 + mov b0=r20 +// pfs.pec=ar.ec + dep r24 = r16, r24, 52, 6 + ;; + mov ar.pfs=r24 + ;; + br.ret.sptk b0 + ;; +END(vmx_dorfirfi) + + +#define VMX_PURGE_RR7 0 +#define VMX_INSERT_RR7 1 +/* + * in0: old rr7 + * in1: virtual address of xen image + * in2: virtual address of vhpt table + */ +GLOBAL_ENTRY(vmx_purge_double_mapping) + alloc loc1 = ar.pfs,5,9,0,0 + mov loc0 = rp + movl r8 = 1f + ;; + movl loc4 = KERNEL_TR_PAGE_SHIFT + movl loc5 = VCPU_TLB_SHIFT + mov loc6 = psr + movl loc7 = XEN_RR7_SWITCH_STUB + mov loc8 = (1<<VMX_PURGE_RR7) + ;; + srlz.i + ;; + rsm psr.i | psr.ic + ;; + srlz.i + ;; + mov ar.rsc = 0 + mov b6 = loc7 + mov rp = r8 + ;; + br.sptk b6 +1: + mov ar.rsc = 3 + mov rp = loc0 + ;; + mov psr.l = loc6 + ;; + srlz.i + ;; + br.ret.sptk rp +END(vmx_purge_double_mapping) + +/* + * in0: new rr7 + * in1: virtual address of xen image + * in2: virtual address of vhpt table + * in3: pte entry of xen image + * in4: pte entry of vhpt table + */ +GLOBAL_ENTRY(vmx_insert_double_mapping) + alloc loc1 = ar.pfs,5,9,0,0 + mov loc0 = rp + movl loc2 = IA64_TR_XEN_IN_DOM // TR number for xen image + ;; + movl loc3 = IA64_TR_VHPT_IN_DOM // TR number for vhpt table + movl r8 = 1f + movl loc4 = KERNEL_TR_PAGE_SHIFT + ;; + movl loc5 = VCPU_TLB_SHIFT + mov loc6 = psr + movl loc7 = XEN_RR7_SWITCH_STUB + ;; + srlz.i + ;; + rsm psr.i | psr.ic + mov loc8 = (1<<VMX_INSERT_RR7) + ;; + srlz.i + ;; + mov ar.rsc = 0 + mov b6 = loc7 + mov rp = r8 + ;; + br.sptk b6 +1: + mov ar.rsc = 3 + mov rp = loc0 + ;; + mov psr.l = loc6 + ;; + srlz.i + ;; + br.ret.sptk rp +END(vmx_insert_double_mapping) + + .align PAGE_SIZE +/* + * Stub to add double mapping for new domain, which shouldn't + * access any memory when active. Before reaching this point, + * both psr.i/ic is cleared and rse is set in lazy mode. + * + * in0: new rr7 + * in1: virtual address of xen image + * in2: virtual address of vhpt table + * in3: pte entry of xen image + * in4: pte entry of vhpt table + * loc2: TR number for xen image + * loc3: TR number for vhpt table + * loc4: page size for xen image + * loc5: page size of vhpt table + * loc7: free to use + * loc8: purge or insert + * r8: will contain old rid value + */ +GLOBAL_ENTRY(vmx_switch_rr7) + movl loc7 = (7<<61) + dep.z loc4 = loc4, 2, 6 + dep.z loc5 = loc5, 2, 6 + ;; + tbit.nz p6,p7=loc8, VMX_INSERT_RR7 + mov r8 = rr[loc7] + ;; + mov rr[loc7] = in0 +(p6)mov cr.ifa = in1 +(p6)mov cr.itir = loc4 + ;; + srlz.i + ;; +(p6)itr.i itr[loc2] = in3 +(p7)ptr.i in1, loc4 + ;; +(p6)itr.d dtr[loc2] = in3 +(p7)ptr.d in1, loc4 + ;; + srlz.i + ;; +(p6)mov cr.ifa = in2 +(p6)mov cr.itir = loc5 + ;; +(p6)itr.d dtr[loc3] = in4 +(p7)ptr.d in2, loc5 + ;; + srlz.i + ;; + mov rr[loc7] = r8 + ;; + srlz.i + br.sptk rp +END(vmx_switch_rr7) + .align PAGE_SIZE diff --git a/xen/arch/ia64/vmx_init.c b/xen/arch/ia64/vmx_init.c new file mode 100644 index 0000000000..01809cc541 --- /dev/null +++ b/xen/arch/ia64/vmx_init.c @@ -0,0 +1,275 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_init.c: initialization work for vt specific domain + * Copyright (c) 2005, Intel Corporation. + * Kun Tian (Kevin Tian) <kevin.tian@intel.com> + * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com> + * Fred Yang <fred.yang@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +/* + * 05/03/23 Kun Tian (Kevin Tian) <kevin.tian@intel.com>: + * Simplied design in first step: + * - One virtual environment + * - Domain is bound to one LP + * Later to support guest SMP: + * - Need interface to handle VP scheduled to different LP + */ +#include <xen/config.h> +#include <xen/types.h> +#include <xen/sched.h> +#include <asm/pal.h> +#include <asm/page.h> +#include <asm/processor.h> +#include <asm/vmx_vcpu.h> +#include <xen/lib.h> +#include <asm/vmmu.h> +#include <public/arch-ia64.h> +#include <asm/vmx_phy_mode.h> + +/* Global flag to identify whether Intel vmx feature is on */ +u32 vmx_enabled = 0; +static u32 vm_order; +static u64 buffer_size; +static u64 vp_env_info; +static u64 vm_buffer = 0; /* Buffer required to bring up VMX feature */ +u64 __vsa_base = 0; /* Run-time service base of VMX */ + +/* Check whether vt feature is enabled or not. */ +void +identify_vmx_feature(void) +{ + pal_status_t ret; + u64 avail = 1, status = 1, control = 1; + + vmx_enabled = 0; + /* Check VT-i feature */ + ret = ia64_pal_proc_get_features(&avail, &status, &control); + if (ret != PAL_STATUS_SUCCESS) { + printk("Get proc features failed.\n"); + goto no_vti; + } + + /* FIXME: do we need to check status field, to see whether + * PSR.vm is actually enabled? If yes, aonther call to + * ia64_pal_proc_set_features may be reuqired then. + */ + printk("avail:0x%lx, status:0x%lx,control:0x%lx, vm?0x%lx\n", + avail, status, control, avail & PAL_PROC_VM_BIT); + if (!(avail & PAL_PROC_VM_BIT)) { + printk("No VT feature supported.\n"); + goto no_vti; + } + + ret = ia64_pal_vp_env_info(&buffer_size, &vp_env_info); + if (ret != PAL_STATUS_SUCCESS) { + printk("Get vp environment info failed.\n"); + goto no_vti; + } + + /* Does xen has ability to decode itself? */ + if (!(vp_env_info & VP_OPCODE)) + printk("WARNING: no opcode provided from hardware(%lx)!!!\n", vp_env_info); + vm_order = get_order(buffer_size); + printk("vm buffer size: %d, order: %d\n", buffer_size, vm_order); + + vmx_enabled = 1; +no_vti: + return; +} + +/* + * Init virtual environment on current LP + * vsa_base is the indicator whether it's first LP to be initialized + * for current domain. + */ +void +vmx_init_env(void) +{ + u64 status, tmp_base; + + if (!vm_buffer) { + vm_buffer = alloc_xenheap_pages(vm_order); + ASSERT(vm_buffer); + printk("vm_buffer: 0x%lx\n", vm_buffer); + } + + status=ia64_pal_vp_init_env(__vsa_base ? VP_INIT_ENV : VP_INIT_ENV_INITALIZE, + __pa(vm_buffer), + vm_buffer, + &tmp_base); + + if (status != PAL_STATUS_SUCCESS) { + printk("ia64_pal_vp_init_env failed.\n"); + return -1; + } + + if (!__vsa_base) + __vsa_base = tmp_base; + else + ASSERT(tmp_base != __vsa_base); +} + +typedef union { + u64 value; + struct { + u64 number : 8; + u64 revision : 8; + u64 model : 8; + u64 family : 8; + u64 archrev : 8; + u64 rv : 24; + }; +} cpuid3_t; + +/* Allocate vpd from xenheap */ +static vpd_t *alloc_vpd(void) +{ + int i; + cpuid3_t cpuid3; + vpd_t *vpd; + + vpd = alloc_xenheap_pages(get_order(VPD_SIZE)); + if (!vpd) { + printk("VPD allocation failed.\n"); + return NULL; + } + + printk("vpd base: 0x%lx, vpd size:%d\n", vpd, sizeof(vpd_t)); + memset(vpd, 0, VPD_SIZE); + /* CPUID init */ + for (i = 0; i < 5; i++) + vpd->vcpuid[i] = ia64_get_cpuid(i); + + /* Limit the CPUID number to 5 */ + cpuid3.value = vpd->vcpuid[3]; + cpuid3.number = 4; /* 5 - 1 */ + vpd->vcpuid[3] = cpuid3.value; + + vpd->vdc.d_vmsw = 1; + return vpd; +} + + + +/* + * Create a VP on intialized VMX environment. + */ +static void +vmx_create_vp(struct exec_domain *ed) +{ + u64 ret; + vpd_t *vpd = ed->arch.arch_vmx.vpd; + u64 ivt_base; + extern char vmx_ia64_ivt; + /* ia64_ivt is function pointer, so need this tranlation */ + ivt_base = (u64) &vmx_ia64_ivt; + printk("ivt_base: 0x%lx\n", ivt_base); + ret = ia64_pal_vp_create(vpd, ivt_base, 0); + if (ret != PAL_STATUS_SUCCESS) + panic("ia64_pal_vp_create failed. \n"); +} + +/* Other non-context related tasks can be done in context switch */ +void +vmx_save_state(struct exec_domain *ed) +{ + u64 status, psr; + u64 old_rr0, dom_rr7, rr0_xen_start, rr0_vhpt; + + /* FIXME: about setting of pal_proc_vector... time consuming */ + status = ia64_pal_vp_save(ed->arch.arch_vmx.vpd, 0); + if (status != PAL_STATUS_SUCCESS) + panic("Save vp status failed\n"); + + /* FIXME: Do we really need purge double mapping for old ed? + * Since rid is completely different between prev and next, + * it's not overlap and thus no MCA possible... */ + dom_rr7 = vmx_vrrtomrr(ed, VMX(ed, vrr[7])); + vmx_purge_double_mapping(dom_rr7, KERNEL_START, + (u64)ed->arch.vtlb->ts->vhpt->hash); + +} + +/* Even guest is in physical mode, we still need such double mapping */ +void +vmx_load_state(struct exec_domain *ed) +{ + u64 status, psr; + u64 old_rr0, dom_rr7, rr0_xen_start, rr0_vhpt; + u64 pte_xen, pte_vhpt; + + status = ia64_pal_vp_restore(ed->arch.arch_vmx.vpd, 0); + if (status != PAL_STATUS_SUCCESS) + panic("Restore vp status failed\n"); + + dom_rr7 = vmx_vrrtomrr(ed, VMX(ed, vrr[7])); + pte_xen = pte_val(pfn_pte((xen_pstart >> PAGE_SHIFT), PAGE_KERNEL)); + pte_vhpt = pte_val(pfn_pte((__pa(ed->arch.vtlb->ts->vhpt->hash) >> PAGE_SHIFT), PAGE_KERNEL)); + vmx_insert_double_mapping(dom_rr7, KERNEL_START, + (u64)ed->arch.vtlb->ts->vhpt->hash, + pte_xen, pte_vhpt); + + /* Guest vTLB is not required to be switched explicitly, since + * anchored in exec_domain */ +} + +/* Purge old double mapping and insert new one, due to rr7 change */ +void +vmx_change_double_mapping(struct exec_domain *ed, u64 oldrr7, u64 newrr7) +{ + u64 pte_xen, pte_vhpt, vhpt_base; + + vhpt_base = (u64)ed->arch.vtlb->ts->vhpt->hash; + vmx_purge_double_mapping(oldrr7, KERNEL_START, + vhpt_base); + + pte_xen = pte_val(pfn_pte((xen_pstart >> PAGE_SHIFT), PAGE_KERNEL)); + pte_vhpt = pte_val(pfn_pte((__pa(vhpt_base) >> PAGE_SHIFT), PAGE_KERNEL)); + vmx_insert_double_mapping(newrr7, KERNEL_START, + vhpt_base, + pte_xen, pte_vhpt); +} + +/* + * Initialize VMX envirenment for guest. Only the 1st vp/exec_domain + * is registered here. + */ +void +vmx_final_setup_domain(struct domain *d) +{ + struct exec_domain *ed = d->exec_domain[0]; + vpd_t *vpd; + + /* Allocate resources for exec_domain 0 */ + //memset(&ed->arch.arch_vmx, 0, sizeof(struct arch_vmx_struct)); + + vpd = alloc_vpd(); + ASSERT(vpd); + + ed->arch.arch_vmx.vpd = vpd; + vpd->virt_env_vaddr = vm_buffer; + + /* ed->arch.schedule_tail = arch_vmx_do_launch; */ + vmx_create_vp(ed); + + /* Set this ed to be vmx */ + ed->arch.arch_vmx.flags = 1; + + /* Other vmx specific initialization work */ +} + diff --git a/xen/arch/ia64/vmx_interrupt.c b/xen/arch/ia64/vmx_interrupt.c new file mode 100644 index 0000000000..056d8c9700 --- /dev/null +++ b/xen/arch/ia64/vmx_interrupt.c @@ -0,0 +1,388 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_interrupt.c: handle inject interruption. + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Shaofan Li (Susue Li) <susie.li@intel.com> + * Xiaoyan Feng (Fleming Feng) <fleming.feng@intel.com> + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + */ + + +#include <xen/types.h> +#include <asm/vmx_vcpu.h> +#include <asm/vmx_mm_def.h> +#include <asm/vmx_pal_vsa.h> +/* SDM vol2 5.5 - IVA based interruption handling */ +#define INITIAL_PSR_VALUE_AT_INTERRUPTION 0x0000001808028034 +void +collect_interruption(VCPU *vcpu) +{ + u64 ipsr; + u64 vdcr; + u64 vifs; + IA64_PSR vpsr; + REGS * regs = vcpu_regs(vcpu); + vpsr.val = vmx_vcpu_get_psr(vcpu); + + if(vpsr.ic){ + extern void vmx_dorfirfi(void); + if (regs->cr_iip == *(unsigned long *)vmx_dorfirfi) + panic("COLLECT interruption for vmx_dorfirfi\n"); + + /* Sync mpsr id/da/dd/ss/ed bits to vipsr + * since after guest do rfi, we still want these bits on in + * mpsr + */ + + ipsr = regs->cr_ipsr; + vpsr.val = vpsr.val | (ipsr & (IA64_PSR_ID | IA64_PSR_DA + | IA64_PSR_DD |IA64_PSR_SS |IA64_PSR_ED)); + vmx_vcpu_set_ipsr(vcpu, vpsr.val); + + /* Currently, for trap, we do not advance IIP to next + * instruction. That's because we assume caller already + * set up IIP correctly + */ + + vmx_vcpu_set_iip(vcpu , regs->cr_iip); + + /* set vifs.v to zero */ + vifs = VPD_CR(vcpu,ifs); + vifs &= ~IA64_IFS_V; + vmx_vcpu_set_ifs(vcpu, vifs); + + vmx_vcpu_set_iipa(vcpu, regs->cr_iipa); + } + + vdcr = VPD_CR(vcpu,dcr); + + /* Set guest psr + * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged + * be: set to the value of dcr.be + * pp: set to the value of dcr.pp + */ + vpsr.val &= INITIAL_PSR_VALUE_AT_INTERRUPTION; + vpsr.val |= ( vdcr & IA64_DCR_BE); + + /* VDCR pp bit position is different from VPSR pp bit */ + if ( vdcr & IA64_DCR_PP ) { + vpsr.val |= IA64_PSR_PP; + } else { + vpsr.val &= ~IA64_PSR_PP;; + } + + vmx_vcpu_set_psr(vcpu, vpsr.val); + +} +int +inject_guest_interruption(VCPU *vcpu, u64 vec) +{ + u64 viva; + REGS *regs; + regs=vcpu_regs(vcpu); + + collect_interruption(vcpu); + + vmx_vcpu_get_iva(vcpu,&viva); + regs->cr_iip = viva + vec; +} + + +/* + * Set vIFA & vITIR & vIHA, when vPSR.ic =1 + * Parameter: + * set_ifa: if true, set vIFA + * set_itir: if true, set vITIR + * set_iha: if true, set vIHA + */ +void +set_ifa_itir_iha (VCPU *vcpu, u64 vadr, + int set_ifa, int set_itir, int set_iha) +{ + IA64_PSR vpsr; + u64 value; + vpsr.val = vmx_vcpu_get_psr(vcpu); + /* Vol2, Table 8-1 */ + if ( vpsr.ic ) { + if ( set_ifa){ + vmx_vcpu_set_ifa(vcpu, vadr); + } + if ( set_itir) { + value = vmx_vcpu_get_itir_on_fault(vcpu, vadr); + vmx_vcpu_set_itir(vcpu, value); + } + + if ( set_iha) { + vmx_vcpu_thash(vcpu, vadr, &value); + vmx_vcpu_set_iha(vcpu, value); + } + } + + +} + +/* + * Data TLB Fault + * @ Data TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +dtlb_fault (VCPU *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR, IHA */ + set_ifa_itir_iha (vcpu, vadr, 1, 1, 1); + inject_guest_interruption(vcpu,IA64_DATA_TLB_VECTOR); +} + +/* + * Instruction TLB Fault + * @ Instruction TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +itlb_fault (VCPU *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR, IHA */ + set_ifa_itir_iha (vcpu, vadr, 1, 1, 1); + inject_guest_interruption(vcpu,IA64_INST_TLB_VECTOR); +} + + + +/* + * Data Nested TLB Fault + * @ Data Nested TLB Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +nested_dtlb (VCPU *vcpu) +{ + inject_guest_interruption(vcpu,IA64_DATA_NESTED_TLB_VECTOR); +} + +/* + * Alternate Data TLB Fault + * @ Alternate Data TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +alt_dtlb (VCPU *vcpu, u64 vadr) +{ + set_ifa_itir_iha (vcpu, vadr, 1, 1, 0); + inject_guest_interruption(vcpu,IA64_ALT_DATA_TLB_VECTOR); +} + + +/* + * Data TLB Fault + * @ Data TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +alt_itlb (VCPU *vcpu, u64 vadr) +{ + set_ifa_itir_iha (vcpu, vadr, 1, 1, 0); + inject_guest_interruption(vcpu,IA64_ALT_INST_TLB_VECTOR); +} + +/* Deal with: + * VHPT Translation Vector + */ +static void +_vhpt_fault(VCPU *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR, IHA*/ + set_ifa_itir_iha (vcpu, vadr, 1, 1, 1); + inject_guest_interruption(vcpu,IA64_VHPT_TRANS_VECTOR); + + +} + +/* + * VHPT Instruction Fault + * @ VHPT Translation vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +ivhpt_fault (VCPU *vcpu, u64 vadr) +{ + _vhpt_fault(vcpu, vadr); +} + + +/* + * VHPT Data Fault + * @ VHPT Translation vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +dvhpt_fault (VCPU *vcpu, u64 vadr) +{ + _vhpt_fault(vcpu, vadr); +} + + + +/* + * Deal with: + * General Exception vector + */ +void +_general_exception (VCPU *vcpu) +{ + inject_guest_interruption(vcpu,IA64_GENEX_VECTOR); +} + + +/* + * Illegal Operation Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +illegal_op (VCPU *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Illegal Dependency Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +illegal_dep (VCPU *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Reserved Register/Field Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +rsv_reg_field (VCPU *vcpu) +{ + _general_exception(vcpu); +} +/* + * Privileged Operation Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ + +void +privilege_op (VCPU *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Unimplement Data Address Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +unimpl_daddr (VCPU *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Privileged Register Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +privilege_reg (VCPU *vcpu) +{ + _general_exception(vcpu); +} + +/* Deal with + * Nat consumption vector + * Parameter: + * vaddr: Optional, if t == REGISTER + */ +static void +_nat_consumption_fault(VCPU *vcpu, u64 vadr, miss_type t) +{ + /* If vPSR.ic && t == DATA/INST, IFA */ + if ( t == DATA || t == INSTRUCTION ) { + /* IFA */ + set_ifa_itir_iha (vcpu, vadr, 1, 0, 0); + } + + inject_guest_interruption(vcpu,IA64_NAT_CONSUMPTION_VECTOR); +} + +/* + * IR Data Nat Page Consumption Fault + * @ Nat Consumption Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +static void +ir_nat_page_consumption (VCPU *vcpu, u64 vadr) +{ + _nat_consumption_fault(vcpu, vadr, DATA); +} + +/* + * Instruction Nat Page Consumption Fault + * @ Nat Consumption Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +inat_page_consumption (VCPU *vcpu, u64 vadr) +{ + _nat_consumption_fault(vcpu, vadr, INSTRUCTION); +} + +/* + * Register Nat Consumption Fault + * @ Nat Consumption Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +rnat_consumption (VCPU *vcpu) +{ + _nat_consumption_fault(vcpu, 0, REGISTER); +} + +/* + * Data Nat Page Consumption Fault + * @ Nat Consumption Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void +dnat_page_consumption (VCPU *vcpu, uint64_t vadr) +{ + _nat_consumption_fault(vcpu, vadr, DATA); +} + +/* Deal with + * Page not present vector + */ +void +page_not_present(VCPU *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR */ + set_ifa_itir_iha (vcpu, vadr, 1, 1, 0); + inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR); +} + diff --git a/xen/arch/ia64/vmx_ivt.S b/xen/arch/ia64/vmx_ivt.S new file mode 100644 index 0000000000..9647386a8c --- /dev/null +++ b/xen/arch/ia64/vmx_ivt.S @@ -0,0 +1,978 @@ +/* + * arch/ia64/kernel/vmx_ivt.S + * + * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co + * Stephane Eranian <eranian@hpl.hp.com> + * David Mosberger <davidm@hpl.hp.com> + * Copyright (C) 2000, 2002-2003 Intel Co + * Asit Mallick <asit.k.mallick@intel.com> + * Suresh Siddha <suresh.b.siddha@intel.com> + * Kenneth Chen <kenneth.w.chen@intel.com> + * Fenghua Yu <fenghua.yu@intel.com> + * + * + * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling for SMP + * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB handler now uses virtual PT. + * + * 05/3/20 Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) + * Supporting Intel virtualization architecture + * + */ + +/* + * This file defines the interruption vector table used by the CPU. + * It does not include one entry per possible cause of interruption. + * + * The first 20 entries of the table contain 64 bundles each while the + * remaining 48 entries contain only 16 bundles each. + * + * The 64 bundles are used to allow inlining the whole handler for critical + * interruptions like TLB misses. + * + * For each entry, the comment is as follows: + * + * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) + * entry offset ----/ / / / / + * entry number ---------/ / / / + * size of the entry -------------/ / / + * vector name -------------------------------------/ / + * interruptions triggering this vector ----------------------/ + * + * The table is 32KB in size and must be aligned on 32KB boundary. + * (The CPU ignores the 15 lower bits of the address) + * + * Table is based upon EAS2.6 (Oct 1999) + */ + +#include <linux/config.h> + +#include <asm/asmmacro.h> +#include <asm/break.h> +#include <asm/ia32.h> +#include <asm/kregs.h> +#include <asm/offsets.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/system.h> +#include <asm/thread_info.h> +#include <asm/unistd.h> +#include <asm/vhpt.h> + + +#if 0 + /* + * This lets you track the last eight faults that occurred on the CPU. Make sure ar.k2 isn't + * needed for something else before enabling this... + */ +# define VMX_DBG_FAULT(i) mov r16=ar.k2;; shl r16=r16,8;; add r16=(i),r16;;mov ar.k2=r16 +#else +# define VMX_DBG_FAULT(i) +#endif + +#include "vmx_minstate.h" + + + +#define VMX_FAULT(n) \ +vmx_fault_##n:; \ + br.sptk vmx_fault_##n; \ + ;; \ + + +#define VMX_REFLECT(n) \ + mov r31=pr; \ + mov r19=n; /* prepare to save predicates */ \ + mov r29=cr.ipsr; \ + ;; \ + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \ +(p7) br.sptk.many vmx_dispatch_reflection; \ + VMX_FAULT(n); \ + + +GLOBAL_ENTRY(vmx_panic) + br.sptk.many vmx_panic + ;; +END(vmx_panic) + + + + + + .section .text.ivt,"ax" + + .align 32768 // align on 32KB boundary + .global vmx_ia64_ivt +vmx_ia64_ivt: +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) +ENTRY(vmx_vhpt_miss) + VMX_FAULT(0) +END(vmx_vhpt_miss) + + .org vmx_ia64_ivt+0x400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0400 Entry 1 (size 64 bundles) ITLB (21) +ENTRY(vmx_itlb_miss) + mov r31 = pr + mov r29=cr.ipsr; + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; +(p6) br.sptk vmx_fault_1 + mov r16 = cr.ifa + ;; + thash r17 = r16 + ttag r20 = r16 + ;; +vmx_itlb_loop: + cmp.eq p6,p0 = r0, r17 +(p6) br vmx_itlb_out + ;; + adds r22 = VLE_TITAG_OFFSET, r17 + adds r23 = VLE_CCHAIN_OFFSET, r17 + ;; + ld8 r24 = [r22] + ld8 r25 = [r23] + ;; + lfetch [r25] + cmp.eq p6,p7 = r20, r24 + ;; +(p7) mov r17 = r25; +(p7) br.sptk vmx_itlb_loop + ;; + adds r23 = VLE_PGFLAGS_OFFSET, r17 + adds r24 = VLE_ITIR_OFFSET, r17 + ;; + ld8 r26 = [r23] + ld8 r25 = [r24] + ;; + mov cr.itir = r25 + ;; + itc.i r26 + ;; + srlz.i + ;; + mov r23=r31 + mov r22=b0 + adds r16=IA64_VPD_BASE_OFFSET,r21 + ;; + ld8 r18=[r16] + ;; + adds r19=VPD(VPSR),r18 + movl r20=__vsa_base + ;; + ld8 r19=[r19] + ld8 r20=[r20] + ;; + br.sptk ia64_vmm_entry + ;; +vmx_itlb_out: + mov r19 = 1 + br.sptk vmx_dispatch_tlb_miss + VMX_FAULT(1); +END(vmx_itlb_miss) + + .org vmx_ia64_ivt+0x0800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) +ENTRY(vmx_dtlb_miss) + mov r31 = pr + mov r29=cr.ipsr; + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; +(p6)br.sptk vmx_fault_1 + mov r16 = cr.ifa + ;; + thash r17 = r16 + ttag r20 = r16 + ;; +vmx_dtlb_loop: + cmp.eq p6,p0 = r0, r17 +(p6)br vmx_dtlb_out + ;; + adds r22 = VLE_TITAG_OFFSET, r17 + adds r23 = VLE_CCHAIN_OFFSET, r17 + ;; + ld8 r24 = [r22] + ld8 r25 = [r23] + ;; + lfetch [r25] + cmp.eq p6,p7 = r20, r24 + ;; +(p7)mov r17 = r25; +(p7)br.sptk vmx_dtlb_loop + ;; + adds r23 = VLE_PGFLAGS_OFFSET, r17 + adds r24 = VLE_ITIR_OFFSET, r17 + ;; + ld8 r26 = [r23] + ld8 r25 = [r24] + ;; + mov cr.itir = r25 + ;; + itc.d r26 + ;; + srlz.d; + ;; + mov r23=r31 + mov r22=b0 + adds r16=IA64_VPD_BASE_OFFSET,r21 + ;; + ld8 r18=[r16] + ;; + adds r19=VPD(VPSR),r18 + movl r20=__vsa_base + ;; + ld8 r19=[r19] + ld8 r20=[r20] + ;; + br.sptk ia64_vmm_entry + ;; +vmx_dtlb_out: + mov r19 = 2 + br.sptk vmx_dispatch_tlb_miss + VMX_FAULT(2); +END(vmx_dtlb_miss) + + .org vmx_ia64_ivt+0x0c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) +ENTRY(vmx_alt_itlb_miss) + mov r31 = pr + mov r29=cr.ipsr; + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; +(p7)br.sptk vmx_fault_3 + mov r16=cr.ifa // get address that caused the TLB miss + movl r17=PAGE_KERNEL + mov r24=cr.ipsr + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + ;; + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + shr.u r18=r16,57 // move address bit 61 to bit 4 + ;; + andcm r18=0x10,r18 // bit 4=~address-bit(61) + or r19=r17,r19 // insert PTE control bits into r19 + ;; + or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 + ;; + itc.i r19 // insert the TLB entry + mov pr=r31,-1 + rfi + VMX_FAULT(3); +END(vmx_alt_itlb_miss) + + + .org vmx_ia64_ivt+0x1000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) +ENTRY(vmx_alt_dtlb_miss) + mov r31=pr + mov r29=cr.ipsr; + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; +(p7)br.sptk vmx_fault_4 + mov r16=cr.ifa // get address that caused the TLB miss + movl r17=PAGE_KERNEL + mov r20=cr.isr + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + mov r24=cr.ipsr + ;; + and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field + tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on? + shr.u r18=r16,57 // move address bit 61 to bit 4 + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on? + ;; + andcm r18=0x10,r18 // bit 4=~address-bit(61) +(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field + dep r24=-1,r24,IA64_PSR_ED_BIT,1 + or r19=r19,r17 // insert PTE control bits into r19 + ;; + or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 +(p6) mov cr.ipsr=r24 + ;; +(p7) itc.d r19 // insert the TLB entry + mov pr=r31,-1 + rfi + VMX_FAULT(4); +END(vmx_alt_dtlb_miss) + + .org vmx_ia64_ivt+0x1400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) +ENTRY(vmx_nested_dtlb_miss) + VMX_FAULT(5) +END(vmx_nested_dtlb_miss) + + .org vmx_ia64_ivt+0x1800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) +ENTRY(vmx_ikey_miss) + VMX_REFLECT(6) +END(vmx_ikey_miss) + + .org vmx_ia64_ivt+0x1c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) +ENTRY(vmx_dkey_miss) + VMX_REFLECT(7) +END(vmx_dkey_miss) + + .org vmx_ia64_ivt+0x2000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) +ENTRY(vmx_dirty_bit) + VMX_REFLECT(8) +END(vmx_idirty_bit) + + .org vmx_ia64_ivt+0x2400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) +ENTRY(vmx_iaccess_bit) + VMX_REFLECT(9) +END(vmx_iaccess_bit) + + .org vmx_ia64_ivt+0x2800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) +ENTRY(vmx_daccess_bit) + VMX_REFLECT(10) +END(vmx_daccess_bit) + + .org vmx_ia64_ivt+0x2c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) +ENTRY(vmx_break_fault) + mov r31=pr + mov r19=11 + br.sptk.many vmx_dispatch_break_fault +END(vmx_break_fault) + + .org vmx_ia64_ivt+0x3000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) +ENTRY(vmx_interrupt) + mov r31=pr // prepare to save predicates + mov r19=12 + mov r29=cr.ipsr + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT +(p7) br.sptk vmx_dispatch_interrupt + ;; + mov r27=ar.rsc /* M */ + mov r20=r1 /* A */ + mov r25=ar.unat /* M */ + mov r26=ar.pfs /* I */ + mov r28=cr.iip /* M */ + cover /* B (or nothing) */ + ;; + mov r1=sp + ;; + invala /* M */ + mov r30=cr.ifs + ;; + addl r1=-IA64_PT_REGS_SIZE,r1 + ;; + adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */ + adds r16=PT(CR_IPSR),r1 + ;; + lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES + st8 [r16]=r29 /* save cr.ipsr */ + ;; + lfetch.fault.excl.nt1 [r17] + mov r29=b0 + ;; + adds r16=PT(R8),r1 /* initialize first base pointer */ + adds r17=PT(R9),r1 /* initialize second base pointer */ + mov r18=r0 /* make sure r18 isn't NaT */ + ;; +.mem.offset 0,0; st8.spill [r16]=r8,16 +.mem.offset 8,0; st8.spill [r17]=r9,16 + ;; +.mem.offset 0,0; st8.spill [r16]=r10,24 +.mem.offset 8,0; st8.spill [r17]=r11,24 + ;; + st8 [r16]=r28,16 /* save cr.iip */ + st8 [r17]=r30,16 /* save cr.ifs */ + mov r8=ar.fpsr /* M */ + mov r9=ar.csd + mov r10=ar.ssd + movl r11=FPSR_DEFAULT /* L-unit */ + ;; + st8 [r16]=r25,16 /* save ar.unat */ + st8 [r17]=r26,16 /* save ar.pfs */ + shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */ + ;; + st8 [r16]=r27,16 /* save ar.rsc */ + adds r17=16,r17 /* skip over ar_rnat field */ + ;; /* avoid RAW on r16 & r17 */ + st8 [r17]=r31,16 /* save predicates */ + adds r16=16,r16 /* skip over ar_bspstore field */ + ;; + st8 [r16]=r29,16 /* save b0 */ + st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */ + ;; +.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */ +.mem.offset 8,0; st8.spill [r17]=r12,16 + adds r12=-16,r1 /* switch to kernel memory stack (with 16 bytes of scratch) */ + ;; +.mem.offset 0,0; st8.spill [r16]=r13,16 +.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */ + mov r13=r21 /* establish `current' */ + ;; +.mem.offset 0,0; st8.spill [r16]=r15,16 +.mem.offset 8,0; st8.spill [r17]=r14,16 + dep r14=-1,r0,60,4 + ;; +.mem.offset 0,0; st8.spill [r16]=r2,16 +.mem.offset 8,0; st8.spill [r17]=r3,16 + adds r2=IA64_PT_REGS_R16_OFFSET,r1 + ;; + mov r8=ar.ccv + movl r1=__gp /* establish kernel global pointer */ + ;; \ + bsw.1 + ;; + alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group + mov out0=cr.ivr // pass cr.ivr as first arg + add out1=16,sp // pass pointer to pt_regs as second arg + + ssm psr.ic + ;; + srlz.i + ;; + ssm psr.i + adds r3=8,r2 // set up second base pointer for SAVE_REST + srlz.i // ensure everybody knows psr.ic is back on + ;; +.mem.offset 0,0; st8.spill [r2]=r16,16 +.mem.offset 8,0; st8.spill [r3]=r17,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r18,16 +.mem.offset 8,0; st8.spill [r3]=r19,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r20,16 +.mem.offset 8,0; st8.spill [r3]=r21,16 + mov r18=b6 + ;; +.mem.offset 0,0; st8.spill [r2]=r22,16 +.mem.offset 8,0; st8.spill [r3]=r23,16 + mov r19=b7 + ;; +.mem.offset 0,0; st8.spill [r2]=r24,16 +.mem.offset 8,0; st8.spill [r3]=r25,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r26,16 +.mem.offset 8,0; st8.spill [r3]=r27,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r28,16 +.mem.offset 8,0; st8.spill [r3]=r29,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r30,16 +.mem.offset 8,0; st8.spill [r3]=r31,32 + ;; + mov ar.fpsr=r11 /* M-unit */ + st8 [r2]=r8,8 /* ar.ccv */ + adds r24=PT(B6)-PT(F7),r3 + ;; + stf.spill [r2]=f6,32 + stf.spill [r3]=f7,32 + ;; + stf.spill [r2]=f8,32 + stf.spill [r3]=f9,32 + ;; + stf.spill [r2]=f10 + stf.spill [r3]=f11 + adds r25=PT(B7)-PT(F11),r3 + ;; + st8 [r24]=r18,16 /* b6 */ + st8 [r25]=r19,16 /* b7 */ + ;; + st8 [r24]=r9 /* ar.csd */ + st8 [r25]=r10 /* ar.ssd */ + ;; + srlz.d // make sure we see the effect of cr.ivr + movl r14=ia64_leave_nested + ;; + mov rp=r14 + br.call.sptk.many b6=vmx_ia64_handle_irq + ;; +END(vmx_interrupt) + + .org vmx_ia64_ivt+0x3400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3400 Entry 13 (size 64 bundles) Reserved + VMX_DBG_FAULT(13) + VMX_FAULT(13) + + + .org vmx_ia64_ivt+0x3800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3800 Entry 14 (size 64 bundles) Reserved + VMX_DBG_FAULT(14) + VMX_FAULT(14) + + + .org vmx_ia64_ivt+0x3c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3c00 Entry 15 (size 64 bundles) Reserved + VMX_DBG_FAULT(15) + VMX_FAULT(15) + + + .org vmx_ia64_ivt+0x4000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4000 Entry 16 (size 64 bundles) Reserved + VMX_DBG_FAULT(16) + VMX_FAULT(16) + + .org vmx_ia64_ivt+0x4400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4400 Entry 17 (size 64 bundles) Reserved + VMX_DBG_FAULT(17) + VMX_FAULT(17) + + .org vmx_ia64_ivt+0x4800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4800 Entry 18 (size 64 bundles) Reserved + VMX_DBG_FAULT(18) + VMX_FAULT(18) + + .org vmx_ia64_ivt+0x4c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4c00 Entry 19 (size 64 bundles) Reserved + VMX_DBG_FAULT(19) + VMX_FAULT(19) + +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) +ENTRY(vmx_iaccess_rights) + VMX_REFLECT(22) +END(vmx_iaccess_rights) + + .org vmx_ia64_ivt+0x5300 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) +ENTRY(vmx_daccess_rights) + VMX_REFLECT(23) +END(vmx_daccess_rights) + + .org vmx_ia64_ivt+0x5400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) +ENTRY(vmx_general_exception) + VMX_FAULT(24) +// VMX_REFLECT(24) +END(vmx_general_exception) + + .org vmx_ia64_ivt+0x5500 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) +ENTRY(vmx_disabled_fp_reg) + VMX_REFLECT(25) +END(vmx_disabled_fp_reg) + + .org vmx_ia64_ivt+0x5600 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) +ENTRY(vmx_nat_consumption) + VMX_REFLECT(26) +END(vmx_nat_consumption) + + .org vmx_ia64_ivt+0x5700 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5700 Entry 27 (size 16 bundles) Speculation (40) +ENTRY(vmx_speculation_vector) + VMX_REFLECT(27) +END(vmx_speculation_vector) + + .org vmx_ia64_ivt+0x5800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5800 Entry 28 (size 16 bundles) Reserved + VMX_DBG_FAULT(28) + VMX_FAULT(28) + + .org vmx_ia64_ivt+0x5900 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) +ENTRY(vmx_debug_vector) + VMX_DBG_FAULT(29) + VMX_FAULT(29) +END(vmx_debug_vector) + + .org vmx_ia64_ivt+0x5a00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) +ENTRY(vmx_unaligned_access) + VMX_REFLECT(30) +END(vmx_unaligned_access) + + .org vmx_ia64_ivt+0x5b00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) +ENTRY(vmx_unsupported_data_reference) + VMX_REFLECT(31) +END(vmx_unsupported_data_reference) + + .org vmx_ia64_ivt+0x5c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64) +ENTRY(vmx_floating_point_fault) + VMX_REFLECT(32) +END(vmx_floating_point_fault) + + .org vmx_ia64_ivt+0x5d00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) +ENTRY(vmx_floating_point_trap) + VMX_REFLECT(33) +END(vmx_floating_point_trap) + + .org vmx_ia64_ivt+0x5e00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) +ENTRY(vmx_lower_privilege_trap) + VMX_REFLECT(34) +END(vmx_lower_privilege_trap) + + .org vmx_ia64_ivt+0x5f00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) +ENTRY(vmx_taken_branch_trap) + VMX_REFLECT(35) +END(vmx_taken_branch_trap) + + .org vmx_ia64_ivt+0x6000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) +ENTRY(vmx_single_step_trap) + VMX_REFLECT(36) +END(vmx_single_step_trap) + + .org vmx_ia64_ivt+0x6100 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault +ENTRY(vmx_virtualization_fault) + VMX_DBG_FAULT(37) + mov r31=pr + mov r19=37 + br.sptk vmx_dispatch_virtualization_fault +END(vmx_virtualization_fault) + + .org vmx_ia64_ivt+0x6200 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6200 Entry 38 (size 16 bundles) Reserved + VMX_DBG_FAULT(38) + VMX_FAULT(38) + + .org vmx_ia64_ivt+0x6300 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6300 Entry 39 (size 16 bundles) Reserved + VMX_DBG_FAULT(39) + VMX_FAULT(39) + + .org vmx_ia64_ivt+0x6400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6400 Entry 40 (size 16 bundles) Reserved + VMX_DBG_FAULT(40) + VMX_FAULT(40) + + .org vmx_ia64_ivt+0x6500 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6500 Entry 41 (size 16 bundles) Reserved + VMX_DBG_FAULT(41) + VMX_FAULT(41) + + .org vmx_ia64_ivt+0x6600 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6600 Entry 42 (size 16 bundles) Reserved + VMX_DBG_FAULT(42) + VMX_FAULT(42) + + .org vmx_ia64_ivt+0x6700 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6700 Entry 43 (size 16 bundles) Reserved + VMX_DBG_FAULT(43) + VMX_FAULT(43) + + .org vmx_ia64_ivt+0x6800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6800 Entry 44 (size 16 bundles) Reserved + VMX_DBG_FAULT(44) + VMX_FAULT(44) + + .org vmx_ia64_ivt+0x6900 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) +ENTRY(vmx_ia32_exception) + VMX_DBG_FAULT(45) + VMX_FAULT(45) +END(vmx_ia32_exception) + + .org vmx_ia64_ivt+0x6a00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) +ENTRY(vmx_ia32_intercept) + VMX_DBG_FAULT(46) + VMX_FAULT(46) +END(vmx_ia32_intercept) + + .org vmx_ia64_ivt+0x6b00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74) +ENTRY(vmx_ia32_interrupt) + VMX_DBG_FAULT(47) + VMX_FAULT(47) +END(vmx_ia32_interrupt) + + .org vmx_ia64_ivt+0x6c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6c00 Entry 48 (size 16 bundles) Reserved + VMX_DBG_FAULT(48) + VMX_FAULT(48) + + .org vmx_ia64_ivt+0x6d00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6d00 Entry 49 (size 16 bundles) Reserved + VMX_DBG_FAULT(49) + VMX_FAULT(49) + + .org vmx_ia64_ivt+0x6e00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6e00 Entry 50 (size 16 bundles) Reserved + VMX_DBG_FAULT(50) + VMX_FAULT(50) + + .org vmx_ia64_ivt+0x6f00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6f00 Entry 51 (size 16 bundles) Reserved + VMX_DBG_FAULT(51) + VMX_FAULT(51) + + .org vmx_ia64_ivt+0x7000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7000 Entry 52 (size 16 bundles) Reserved + VMX_DBG_FAULT(52) + VMX_FAULT(52) + + .org vmx_ia64_ivt+0x7100 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7100 Entry 53 (size 16 bundles) Reserved + VMX_DBG_FAULT(53) + VMX_FAULT(53) + + .org vmx_ia64_ivt+0x7200 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7200 Entry 54 (size 16 bundles) Reserved + VMX_DBG_FAULT(54) + VMX_FAULT(54) + + .org vmx_ia64_ivt+0x7300 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7300 Entry 55 (size 16 bundles) Reserved + VMX_DBG_FAULT(55) + VMX_FAULT(55) + + .org vmx_ia64_ivt+0x7400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7400 Entry 56 (size 16 bundles) Reserved + VMX_DBG_FAULT(56) + VMX_FAULT(56) + + .org vmx_ia64_ivt+0x7500 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7500 Entry 57 (size 16 bundles) Reserved + VMX_DBG_FAULT(57) + VMX_FAULT(57) + + .org vmx_ia64_ivt+0x7600 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7600 Entry 58 (size 16 bundles) Reserved + VMX_DBG_FAULT(58) + VMX_FAULT(58) + + .org vmx_ia64_ivt+0x7700 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7700 Entry 59 (size 16 bundles) Reserved + VMX_DBG_FAULT(59) + VMX_FAULT(59) + + .org vmx_ia64_ivt+0x7800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7800 Entry 60 (size 16 bundles) Reserved + VMX_DBG_FAULT(60) + VMX_FAULT(60) + + .org vmx_ia64_ivt+0x7900 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7900 Entry 61 (size 16 bundles) Reserved + VMX_DBG_FAULT(61) + VMX_FAULT(61) + + .org vmx_ia64_ivt+0x7a00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7a00 Entry 62 (size 16 bundles) Reserved + VMX_DBG_FAULT(62) + VMX_FAULT(62) + + .org vmx_ia64_ivt+0x7b00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7b00 Entry 63 (size 16 bundles) Reserved + VMX_DBG_FAULT(63) + VMX_FAULT(63) + + .org vmx_ia64_ivt+0x7c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7c00 Entry 64 (size 16 bundles) Reserved + VMX_DBG_FAULT(64) + VMX_FAULT(64) + + .org vmx_ia64_ivt+0x7d00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7d00 Entry 65 (size 16 bundles) Reserved + VMX_DBG_FAULT(65) + VMX_FAULT(65) + + .org vmx_ia64_ivt+0x7e00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7e00 Entry 66 (size 16 bundles) Reserved + VMX_DBG_FAULT(66) + VMX_FAULT(66) + + .org vmx_ia64_ivt+0x7f00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7f00 Entry 67 (size 16 bundles) Reserved + VMX_DBG_FAULT(67) + VMX_FAULT(67) + + .org vmx_ia64_ivt+0x8000 + // There is no particular reason for this code to be here, other than that + // there happens to be space here that would go unused otherwise. If this + // fault ever gets "unreserved", simply moved the following code to a more + // suitable spot... + + +ENTRY(vmx_dispatch_reflection) + /* + * Input: + * psr.ic: off + * r19: intr type (offset into ivt, see ia64_int.h) + * r31: contains saved predicates (pr) + */ + VMX_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,4,0 + mov out0=cr.ifa + mov out1=cr.isr + mov out2=cr.iim + mov out3=r15 + + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + ssm psr.i // restore psr.i + adds r3=16,r2 // set up second base pointer + ;; + VMX_SAVE_REST + movl r14=ia64_leave_hypervisor + ;; + mov rp=r14 + br.call.sptk.many b6=vmx_reflect_interruption +END(vmx_dispatch_reflection) + +ENTRY(vmx_dispatch_virtualization_fault) + cmp.eq pEml,pNonEml=r0,r0 /* force pEml =1, save r4 ~ r7 */ + ;; + VMX_SAVE_MIN_WITH_COVER_R19 + ;; + alloc r14=ar.pfs,0,0,3,0 // now it's safe (must be first in insn group!) + mov out0=r13 //vcpu + mov out1=r4 //cause + mov out2=r5 //opcode + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + ssm psr.i // restore psr.i + adds r3=16,r2 // set up second base pointer + ;; + VMX_SAVE_REST + movl r14=ia64_leave_hypervisor + ;; + mov rp=r14 + br.call.sptk.many b6=vmx_emulate +END(vmx_dispatch_virtualization_fault) + + + +ENTRY(vmx_dispatch_tlb_miss) + VMX_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,3,0 + mov out0=r13 + mov out1=r15 + mov out2=cr.ifa + + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + ssm psr.i // restore psr.i + adds r3=16,r2 // set up second base pointer + ;; + VMX_SAVE_REST + movl r14=ia64_leave_hypervisor + ;; + mov rp=r14 + br.call.sptk.many b6=vmx_hpw_miss +END(vmx_dispatch_tlb_miss) + + +ENTRY(vmx_dispatch_break_fault) + cmp.ne pEml,pNonEml=r0,r0 /* force pNonEml =1, don't save r4 ~ r7 */ + ;; + VMX_SAVE_MIN_WITH_COVER_R19 + ;; + alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!) + mov out0=cr.ifa + adds out1=16,sp + mov out2=cr.isr // FIXME: pity to make this slow access twice + mov out3=cr.iim // FIXME: pity to make this slow access twice + + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + ssm psr.i // restore psr.i + adds r3=16,r2 // set up second base pointer + ;; + VMX_SAVE_REST + movl r14=ia64_leave_hypervisor + ;; + mov rp=r14 + br.call.sptk.many b6=vmx_ia64_handle_break +END(vmx_dispatch_break_fault) + + +ENTRY(vmx_dispatch_interrupt) + cmp.ne pEml,pNonEml=r0,r0 /* force pNonEml =1, don't save r4 ~ r7 */ + ;; + VMX_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3 + ;; + alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group + mov out0=cr.ivr // pass cr.ivr as first arg + add out1=16,sp // pass pointer to pt_regs as second arg + + ssm psr.ic + ;; + srlz.i + ;; + ssm psr.i + adds r3=16,r2 // set up second base pointer for SAVE_REST + ;; + VMX_SAVE_REST + movl r14=ia64_leave_hypervisor + ;; + mov rp=r14 + br.call.sptk.many b6=vmx_ia64_handle_irq +END(vmx_dispatch_interrupt) diff --git a/xen/arch/ia64/vmx_minstate.h b/xen/arch/ia64/vmx_minstate.h new file mode 100644 index 0000000000..afee6516d9 --- /dev/null +++ b/xen/arch/ia64/vmx_minstate.h @@ -0,0 +1,329 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_minstate.h: + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + */ + +#include <linux/config.h> + +#include <asm/asmmacro.h> +#include <asm/fpu.h> +#include <asm/mmu_context.h> +#include <asm/offsets.h> +#include <asm/pal.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/system.h> +#include <asm/vmx_pal_vsa.h> +#include <asm/vmx_vpd.h> +#include <asm/cache.h> +#include "entry.h" + +#define VMX_MINSTATE_START_SAVE_MIN \ + mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \ + ;; \ + mov.m r28=ar.rnat; \ + addl r22=IA64_RBS_OFFSET,r1; /* compute base of RBS */ \ + ;; \ + lfetch.fault.excl.nt1 [r22]; \ + addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ + mov r23=ar.bspstore; /* save ar.bspstore */ \ + ;; \ + mov ar.bspstore=r22; /* switch to kernel RBS */ \ + ;; \ + mov r18=ar.bsp; \ + mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \ + + + +#define VMX_MINSTATE_END_SAVE_MIN \ + bsw.1; /* switch back to bank 1 (must be last in insn group) */ \ + ;; + + +#define PAL_VSA_SYNC_READ_CLEANUP_PSR_PL \ + /* begin to call pal vps sync_read and cleanup psr.pl */ \ + add r25=IA64_VPD_BASE_OFFSET, r21; \ + movl r20=__vsa_base; \ + ;; \ + ld8 r25=[r25]; /* read vpd base */ \ + ld8 r20=[r20]; /* read entry point */ \ + ;; \ + mov r6=r25; \ + add r20=PAL_VPS_SYNC_READ,r20; \ + ;; \ +{ .mii; \ + add r22=VPD(VPSR),r25; \ + mov r24=ip; \ + mov b0=r20; \ + ;; \ +}; \ +{ .mmb; \ + add r24 = 0x20, r24; \ + mov r16 = cr.ipsr; /* Temp workaround since psr.ic is off */ \ + br.cond.sptk b0; /* call the service */ \ + ;; \ +}; \ + ld8 r7=[r22]; \ + /* deposite ipsr bit cpl into vpd.vpsr, since epc will change */ \ + extr.u r30=r16, IA64_PSR_CPL0_BIT, 2; \ + ;; \ + dep r7=r30, r7, IA64_PSR_CPL0_BIT, 2; \ + ;; \ + extr.u r30=r16, IA64_PSR_BE_BIT, 5; \ + ;; \ + dep r7=r30, r7, IA64_PSR_BE_BIT, 5; \ + ;; \ + extr.u r30=r16, IA64_PSR_RI_BIT, 2; \ + ;; \ + dep r7=r30, r7, IA64_PSR_RI_BIT, 2; \ + ;; \ + st8 [r22]=r7; \ + ;; + + + +#define IA64_CURRENT_REG IA64_KR(CURRENT) /* r21 is reserved for current pointer */ +//#define VMX_MINSTATE_GET_CURRENT(reg) mov reg=IA64_CURRENT_REG +#define VMX_MINSTATE_GET_CURRENT(reg) mov reg=r21 + +/* + * VMX_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves + * the minimum state necessary that allows us to turn psr.ic back + * on. + * + * Assumed state upon entry: + * psr.ic: off + * r31: contains saved predicates (pr) + * + * Upon exit, the state is as follows: + * psr.ic: off + * r2 = points to &pt_regs.r16 + * r8 = contents of ar.ccv + * r9 = contents of ar.csd + * r10 = contents of ar.ssd + * r11 = FPSR_DEFAULT + * r12 = kernel sp (kernel virtual address) + * r13 = points to current task_struct (kernel virtual address) + * p15 = TRUE if psr.i is set in cr.ipsr + * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15: + * preserved + * + * Note that psr.ic is NOT turned on by this macro. This is so that + * we can pass interruption state as arguments to a handler. + */ +#define VMX_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \ +/* switch rr7 */ \ + movl r16=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) | (IA64_GRANULE_SHIFT << 2)); \ + movl r17=(7<<61); \ + movl r20=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) | (IA64_GRANULE_SHIFT << 2)); \ + movl r22=(6<<61); \ + movl r18=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT << 2) | 1); \ + movl r23=(5<<61); \ + ;; \ + mov rr[r17]=r16; \ + mov rr[r22]=r20; \ + mov rr[r23]=r18; \ + ;; \ + srlz.i; \ + ;; \ + VMX_MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \ + mov r27=ar.rsc; /* M */ \ + mov r20=r1; /* A */ \ + mov r26=ar.unat; /* M */ \ + mov r29=cr.ipsr; /* M */ \ + COVER; /* B;; (or nothing) */ \ + ;; \ + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \ +(p6) br.sptk.few vmx_panic; \ + mov r1=r16; \ +/* mov r21=r16; */ \ + /* switch from user to kernel RBS: */ \ + ;; \ + invala; /* M */ \ + SAVE_IFS; \ + ;; \ + VMX_MINSTATE_START_SAVE_MIN \ + adds r17=2*L1_CACHE_BYTES,r1; /* really: biggest cache-line size */ \ + adds r16=PT(CR_IPSR),r1; \ + ;; \ + lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \ + st8 [r16]=r29; /* save cr.ipsr */ \ + ;; \ + lfetch.fault.excl.nt1 [r17]; \ + tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \ + mov r29=b0 \ + ;; \ + adds r16=PT(R8),r1; /* initialize first base pointer */ \ + adds r17=PT(R9),r1; /* initialize second base pointer */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r8,16; \ +.mem.offset 8,0; st8.spill [r17]=r9,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r10,24; \ +.mem.offset 8,0; st8.spill [r17]=r11,24; \ + ;; \ + mov r8=ar.pfs; /* I */ \ + mov r9=cr.iip; /* M */ \ + mov r10=ar.fpsr; /* M */ \ + ;; \ + st8 [r16]=r9,16; /* save cr.iip */ \ + st8 [r17]=r30,16; /* save cr.ifs */ \ + sub r18=r18,r22; /* r18=RSE.ndirty*8 */ \ + ;; \ + st8 [r16]=r26,16; /* save ar.unat */ \ + st8 [r17]=r8,16; /* save ar.pfs */ \ + shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \ + ;; \ + st8 [r16]=r27,16; /* save ar.rsc */ \ + st8 [r17]=r28,16; /* save ar.rnat */ \ + ;; /* avoid RAW on r16 & r17 */ \ + st8 [r16]=r23,16; /* save ar.bspstore */ \ + st8 [r17]=r31,16; /* save predicates */ \ + ;; \ + st8 [r16]=r29,16; /* save b0 */ \ + st8 [r17]=r18,16; /* save ar.rsc value for "loadrs" */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r20,16; /* save original r1 */ \ +.mem.offset 8,0; st8.spill [r17]=r12,16; \ + adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r13,16; \ +.mem.offset 8,0; st8.spill [r17]=r10,16; /* save ar.fpsr */ \ + mov r13=r21; /* establish `current' */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r15,16; \ +.mem.offset 8,0; st8.spill [r17]=r14,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r2,16; \ +.mem.offset 8,0; st8.spill [r17]=r3,16; \ + adds r2=PT(F6),r1; \ + ;; \ + .mem.offset 0,0; st8.spill [r16]=r4,16; \ + .mem.offset 8,0; st8.spill [r17]=r5,16; \ + ;; \ + .mem.offset 0,0; st8.spill [r16]=r6,16; \ + .mem.offset 8,0; st8.spill [r17]=r7,16; \ + mov r20=ar.ccv; \ + ;; \ + mov r18=cr.iipa; \ + mov r4=cr.isr; \ + mov r22=ar.unat; \ + ;; \ + st8 [r16]=r18,16; \ + st8 [r17]=r4; \ + ;; \ + adds r16=PT(EML_UNAT),r1; \ + adds r17=PT(AR_CCV),r1; \ + ;; \ + st8 [r16]=r22,8; \ + st8 [r17]=r20; \ + mov r4=r24; \ + mov r5=r25; \ + ;; \ + st8 [r16]=r0; \ + EXTRA; \ + mov r9=ar.csd; \ + mov r10=ar.ssd; \ + movl r11=FPSR_DEFAULT; /* L-unit */ \ + movl r1=__gp; /* establish kernel global pointer */ \ + ;; \ + PAL_VSA_SYNC_READ_CLEANUP_PSR_PL \ + VMX_MINSTATE_END_SAVE_MIN + +/* + * SAVE_REST saves the remainder of pt_regs (with psr.ic on). + * + * Assumed state upon entry: + * psr.ic: on + * r2: points to &pt_regs.f6 + * r3: points to &pt_regs.f7 + * r4,r5,scrach + * r6: points to vpd + * r7: vpsr + * r9: contents of ar.csd + * r10: contents of ar.ssd + * r11: FPSR_DEFAULT + * + * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST. + */ +#define VMX_SAVE_REST \ + tbit.z pBN0,pBN1=r7,IA64_PSR_BN_BIT; /* guest bank0 or bank1 ? */ \ + ;; \ +(pBN0) add r4=VPD(VBGR),r6; \ +(pBN0) add r5=VPD(VBGR)+0x8,r6; \ +(pBN0) add r7=VPD(VBNAT),r6; \ + ;; \ +(pBN1) add r5=VPD(VGR)+0x8,r6; \ +(pBN1) add r4=VPD(VGR),r6; \ +(pBN1) add r7=VPD(VNAT),r6; \ + ;; \ +.mem.offset 0,0; st8.spill [r4]=r16,16; \ +.mem.offset 8,0; st8.spill [r5]=r17,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r4]=r18,16; \ +.mem.offset 8,0; st8.spill [r5]=r19,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r4]=r20,16; \ +.mem.offset 8,0; st8.spill [r5]=r21,16; \ + mov r18=b6; \ + ;; \ +.mem.offset 0,0; st8.spill [r4]=r22,16; \ +.mem.offset 8,0; st8.spill [r5]=r23,16; \ + mov r19=b7; \ + ;; \ +.mem.offset 0,0; st8.spill [r4]=r24,16; \ +.mem.offset 8,0; st8.spill [r5]=r25,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r4]=r26,16; \ +.mem.offset 8,0; st8.spill [r5]=r27,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r4]=r28,16; \ +.mem.offset 8,0; st8.spill [r5]=r29,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r4]=r30,16; \ +.mem.offset 8,0; st8.spill [r5]=r31,16; \ + ;; \ + mov r30=ar.unat; \ + ;; \ + st8 [r7]=r30; \ + mov ar.fpsr=r11; /* M-unit */ \ + ;; \ + stf.spill [r2]=f6,32; \ + stf.spill [r3]=f7,32; \ + ;; \ + stf.spill [r2]=f8,32; \ + stf.spill [r3]=f9,32; \ + ;; \ + stf.spill [r2]=f10; \ + stf.spill [r3]=f11; \ + ;; \ + adds r2=PT(B6)-PT(F10),r2; \ + adds r3=PT(B7)-PT(F11),r3; \ + ;; \ + st8 [r2]=r18,16; /* b6 */ \ + st8 [r3]=r19,16; /* b7 */ \ + ;; \ + st8 [r2]=r9; /* ar.csd */ \ + st8 [r3]=r10; /* ar.ssd */ \ + ;; + +#define VMX_SAVE_MIN_WITH_COVER VMX_DO_SAVE_MIN(cover, mov r30=cr.ifs,) +#define VMX_SAVE_MIN_WITH_COVER_R19 VMX_DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19) +#define VMX_SAVE_MIN VMX_DO_SAVE_MIN( , mov r30=r0, ) diff --git a/xen/arch/ia64/vmx_phy_mode.c b/xen/arch/ia64/vmx_phy_mode.c new file mode 100644 index 0000000000..938cfeb051 --- /dev/null +++ b/xen/arch/ia64/vmx_phy_mode.c @@ -0,0 +1,393 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_phy_mode.c: emulating domain physical mode. + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Arun Sharma (arun.sharma@intel.com) + * Kun Tian (Kevin Tian) (kevin.tian@intel.com) + * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) + */ + + +#include <asm/processor.h> +#include <asm/gcc_intrin.h> +#include <asm/vmx_phy_mode.h> +#include <xen/sched.h> +#include <asm/pgtable.h> + + +int valid_mm_mode[8] = { + GUEST_PHYS, /* (it, dt, rt) -> (0, 0, 0) */ + INV_MODE, + INV_MODE, + GUEST_PHYS, /* (it, dt, rt) -> (0, 1, 1) */ + INV_MODE, + GUEST_PHYS, /* (it, dt, rt) -> (1, 0, 1) */ + INV_MODE, + GUEST_VIRT, /* (it, dt, rt) -> (1, 1, 1).*/ +}; + +/* + * Special notes: + * - Index by it/dt/rt sequence + * - Only existing mode transitions are allowed in this table + * - RSE is placed at lazy mode when emulating guest partial mode + * - If gva happens to be rr0 and rr4, only allowed case is identity + * mapping (gva=gpa), or panic! (How?) + */ +int mm_switch_table[8][8] = { + /* 2004/09/12(Kevin): Allow switch to self */ + /* + * (it,dt,rt): (0,0,0) -> (1,1,1) + * This kind of transition usually occurs in the very early + * stage of Linux boot up procedure. Another case is in efi + * and pal calls. (see "arch/ia64/kernel/head.S") + * + * (it,dt,rt): (0,0,0) -> (0,1,1) + * This kind of transition is found when OSYa exits efi boot + * service. Due to gva = gpa in this case (Same region), + * data access can be satisfied though itlb entry for physical + * emulation is hit. + */ + SW_SELF,0, 0, SW_NOP, 0, 0, 0, SW_P2V, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + /* + * (it,dt,rt): (0,1,1) -> (1,1,1) + * This kind of transition is found in OSYa. + * + * (it,dt,rt): (0,1,1) -> (0,0,0) + * This kind of transition is found in OSYa + */ + SW_NOP, 0, 0, SW_SELF,0, 0, 0, SW_P2V, + /* (1,0,0)->(1,1,1) */ + 0, 0, 0, 0, 0, 0, 0, SW_P2V, + /* + * (it,dt,rt): (1,0,1) -> (1,1,1) + * This kind of transition usually occurs when Linux returns + * from the low level TLB miss handlers. + * (see "arch/ia64/kernel/ivt.S") + */ + 0, 0, 0, 0, 0, SW_SELF,0, SW_P2V, + 0, 0, 0, 0, 0, 0, 0, 0, + /* + * (it,dt,rt): (1,1,1) -> (1,0,1) + * This kind of transition usually occurs in Linux low level + * TLB miss handler. (see "arch/ia64/kernel/ivt.S") + * + * (it,dt,rt): (1,1,1) -> (0,0,0) + * This kind of transition usually occurs in pal and efi calls, + * which requires running in physical mode. + * (see "arch/ia64/kernel/head.S") + * (1,1,1)->(1,0,0) + */ + + SW_V2P, 0, 0, 0, SW_V2P, SW_V2P, 0, SW_SELF, +}; + +void +physical_mode_init(VCPU *vcpu) +{ + UINT64 psr; + struct domain * d = vcpu->domain; + + vcpu->domain->arch.emul_phy_rr0.rid = XEN_RR7_RID+((d->id)<<3); + /* FIXME */ +#if 0 + vcpu->domain->arch.emul_phy_rr0.ps = 28; /* set page size to 256M */ +#endif + vcpu->domain->arch.emul_phy_rr0.ps = EMUL_PHY_PAGE_SHIFT; /* set page size to 4k */ + vcpu->domain->arch.emul_phy_rr0.ve = 1; /* enable VHPT walker on this region */ + + vcpu->domain->arch.emul_phy_rr4.rid = XEN_RR7_RID + ((d->id)<<3) + 4; + vcpu->domain->arch.emul_phy_rr4.ps = EMUL_PHY_PAGE_SHIFT; /* set page size to 4k */ + vcpu->domain->arch.emul_phy_rr4.ve = 1; /* enable VHPT walker on this region */ + + vcpu->arch.old_rsc = 0; + vcpu->arch.mode_flags = GUEST_IN_PHY; + + psr = ia64_clear_ic(); + + ia64_set_rr((VRN0<<VRN_SHIFT), vcpu->domain->arch.emul_phy_rr0.rrval); + ia64_srlz_d(); + ia64_set_rr((VRN4<<VRN_SHIFT), vcpu->domain->arch.emul_phy_rr4.rrval); + ia64_srlz_d(); +#if 0 + /* FIXME: temp workaround to support guest physical mode */ +ia64_itr(0x1, IA64_TEMP_PHYSICAL, dom0_start, + pte_val(pfn_pte((dom0_start >> PAGE_SHIFT), PAGE_KERNEL)), + 28); +ia64_itr(0x2, IA64_TEMP_PHYSICAL, dom0_start, + pte_val(pfn_pte((dom0_start >> PAGE_SHIFT), PAGE_KERNEL)), + 28); +ia64_srlz_i(); +#endif + ia64_set_psr(psr); + ia64_srlz_i(); + return; +} + +extern u64 get_mfn(domid_t domid, u64 gpfn, u64 pages); +void +physical_itlb_miss(VCPU *vcpu, u64 vadr) +{ + u64 psr; + IA64_PSR vpsr; + u64 mppn,gppn; + vpsr.val=vmx_vcpu_get_psr(vcpu); + gppn=(vadr<<1)>>13; + mppn = get_mfn(DOMID_SELF,gppn,1); + mppn=(mppn<<12)|(vpsr.cpl<<7)|PHY_PAGE_WB; + + psr=ia64_clear_ic(); + ia64_itc(1,vadr&(~0xfff),mppn,EMUL_PHY_PAGE_SHIFT); + ia64_set_psr(psr); + ia64_srlz_i(); + return; +} + +void +physical_dtlb_miss(VCPU *vcpu, u64 vadr) +{ + u64 psr; + IA64_PSR vpsr; + u64 mppn,gppn; + vpsr.val=vmx_vcpu_get_psr(vcpu); + gppn=(vadr<<1)>>13; + mppn = get_mfn(DOMID_SELF,gppn,1); + mppn=(mppn<<12)|(vpsr.cpl<<7); + if(vadr>>63) + mppn |= PHY_PAGE_UC; + else + mppn |= PHY_PAGE_WB; + + psr=ia64_clear_ic(); + ia64_itc(2,vadr&(~0xfff),mppn,EMUL_PHY_PAGE_SHIFT); + ia64_set_psr(psr); + ia64_srlz_i(); + return; +} + +void +vmx_init_all_rr(VCPU *vcpu) +{ + VMX(vcpu,vrr[VRN0]) = 0x38; + VMX(vcpu,vrr[VRN1]) = 0x38; + VMX(vcpu,vrr[VRN2]) = 0x38; + VMX(vcpu,vrr[VRN3]) = 0x38; + VMX(vcpu,vrr[VRN4]) = 0x38; + VMX(vcpu,vrr[VRN5]) = 0x38; + VMX(vcpu,vrr[VRN6]) = 0x60; + VMX(vcpu,vrr[VRN7]) = 0x60; + + VMX(vcpu,mrr5) = vmx_vrrtomrr(vcpu, 0x38); + VMX(vcpu,mrr6) = vmx_vrrtomrr(vcpu, 0x60); + VMX(vcpu,mrr7) = vmx_vrrtomrr(vcpu, 0x60); +} + +void +vmx_load_all_rr(VCPU *vcpu) +{ + unsigned long psr; + + psr = ia64_clear_ic(); + + /* WARNING: not allow co-exist of both virtual mode and physical + * mode in same region + */ + if (is_physical_mode(vcpu)) { + ia64_set_rr((VRN0 << VRN_SHIFT), + vcpu->domain->arch.emul_phy_rr0.rrval); + ia64_set_rr((VRN4 << VRN_SHIFT), + vcpu->domain->arch.emul_phy_rr4.rrval); + } else { + ia64_set_rr((VRN0 << VRN_SHIFT), + vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN0]))); + ia64_set_rr((VRN4 << VRN_SHIFT), + vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN4]))); + } + +#if 1 + /* rr567 will be postponed to last point when resuming back to guest */ + ia64_set_rr((VRN1 << VRN_SHIFT), + vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN1]))); + ia64_set_rr((VRN2 << VRN_SHIFT), + vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN2]))); + ia64_set_rr((VRN3 << VRN_SHIFT), + vmx_vrrtomrr(vcpu, VMX(vcpu, vrr[VRN3]))); +#endif + ia64_srlz_d(); + ia64_set_psr(psr); + ia64_srlz_i(); +} + +void +switch_to_physical_rid(VCPU *vcpu) +{ + UINT64 psr; + + /* Save original virtual mode rr[0] and rr[4] */ + + psr=ia64_clear_ic(); + ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->domain->arch.emul_phy_rr0.rrval); + ia64_srlz_d(); + ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->domain->arch.emul_phy_rr4.rrval); + ia64_srlz_d(); + + ia64_set_psr(psr); + ia64_srlz_i(); + return; +} + + +void +switch_to_virtual_rid(VCPU *vcpu) +{ + UINT64 psr; + ia64_rr mrr; + + psr=ia64_clear_ic(); + + mrr=vmx_vcpu_rr(vcpu,VRN0<<VRN_SHIFT); + mrr.rid = VRID_2_MRID(vcpu,mrr.rid); +//VRID_2_MRID(vcpu,mrr.rid); + mrr.ve = 1; + ia64_set_rr(VRN0<<VRN_SHIFT, mrr.rrval ); + ia64_srlz_d(); + mrr=vmx_vcpu_rr(vcpu,VRN4<<VRN_SHIFT); + mrr.rid = VRID_2_MRID(vcpu,mrr.rid); + mrr.ve = 1; + ia64_set_rr(VRN4<<VRN_SHIFT, mrr.rrval ); + ia64_srlz_d(); + ia64_set_psr(psr); + ia64_srlz_i(); + return; +} + +static int mm_switch_action(IA64_PSR opsr, IA64_PSR npsr) +{ + return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)]; +} + +void +switch_mm_mode(VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr) +{ + int act; + REGS * regs=vcpu_regs(vcpu); + act = mm_switch_action(old_psr, new_psr); + switch (act) { + case SW_V2P: + vcpu->arch.old_rsc = regs->ar_rsc; + switch_to_physical_rid(vcpu); + /* + * Set rse to enforced lazy, to prevent active rse save/restor when + * guest physical mode. + */ + regs->ar_rsc &= ~(IA64_RSC_MODE); + vcpu->arch.mode_flags |= GUEST_IN_PHY; + break; + case SW_P2V: + switch_to_virtual_rid(vcpu); + /* + * recover old mode which is saved when entering + * guest physical mode + */ + regs->ar_rsc = vcpu->arch.old_rsc; + vcpu->arch.mode_flags &= ~GUEST_IN_PHY; + break; + case SW_SELF: + printf("Switch to self-0x%lx!!! MM mode doesn't change...\n", + old_psr.val); + break; + case SW_NOP: + printf("No action required for mode transition: (0x%lx -> 0x%lx)\n", + old_psr.val, new_psr.val); + break; + default: + /* Sanity check */ + printf("old: %lx, new: %lx\n", old_psr.val, new_psr.val); + panic("Unexpected virtual <--> physical mode transition"); + break; + } + return; +} + + + +/* + * In physical mode, insert tc/tr for region 0 and 4 uses + * RID[0] and RID[4] which is for physical mode emulation. + * However what those inserted tc/tr wants is rid for + * virtual mode. So original virtual rid needs to be restored + * before insert. + * + * Operations which required such switch include: + * - insertions (itc.*, itr.*) + * - purges (ptc.* and ptr.*) + * - tpa + * - tak + * - thash?, ttag? + * All above needs actual virtual rid for destination entry. + */ + +void +check_mm_mode_switch (VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr) +{ + + if ( (old_psr.dt != new_psr.dt ) || + (old_psr.it != new_psr.it ) || + (old_psr.rt != new_psr.rt ) + ) { + switch_mm_mode (vcpu, old_psr, new_psr); + } + + return 0; +} + + +/* + * In physical mode, insert tc/tr for region 0 and 4 uses + * RID[0] and RID[4] which is for physical mode emulation. + * However what those inserted tc/tr wants is rid for + * virtual mode. So original virtual rid needs to be restored + * before insert. + * + * Operations which required such switch include: + * - insertions (itc.*, itr.*) + * - purges (ptc.* and ptr.*) + * - tpa + * - tak + * - thash?, ttag? + * All above needs actual virtual rid for destination entry. + */ + +void +prepare_if_physical_mode(VCPU *vcpu) +{ + if (is_physical_mode(vcpu)) + switch_to_virtual_rid(vcpu); + return; +} + +/* Recover always follows prepare */ +void +recover_if_physical_mode(VCPU *vcpu) +{ + if (is_physical_mode(vcpu)) + switch_to_physical_rid(vcpu); + return; +} + diff --git a/xen/arch/ia64/vmx_process.c b/xen/arch/ia64/vmx_process.c new file mode 100644 index 0000000000..99701e36ef --- /dev/null +++ b/xen/arch/ia64/vmx_process.c @@ -0,0 +1,345 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_process.c: handling VMX architecture-related VM exits + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Xiaoyan Feng (Fleming Feng) <fleming.feng@intel.com> + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + */ + +#include <xen/config.h> +#include <xen/lib.h> +#include <xen/errno.h> +#include <xen/sched.h> +#include <xen/smp.h> +#include <asm/ptrace.h> +#include <xen/delay.h> + +#include <linux/efi.h> /* FOR EFI_UNIMPLEMENTED */ +#include <asm/sal.h> /* FOR struct ia64_sal_retval */ + +#include <asm/system.h> +#include <asm/io.h> +#include <asm/processor.h> +#include <asm/desc.h> +//#include <asm/ldt.h> +#include <xen/irq.h> +#include <xen/event.h> +#include <asm/regionreg.h> +#include <asm/privop.h> +#include <asm/ia64_int.h> +#include <asm/hpsim_ssc.h> +#include <asm/dom_fw.h> +#include <asm/vmx_vcpu.h> +#include <asm/kregs.h> +#include <asm/vmx_mm_def.h> +/* reset all PSR field to 0, except up,mfl,mfh,pk,dt,rt,mc,it */ +#define INITIAL_PSR_VALUE_AT_INTERRUPTION 0x0000001808028034 + + +extern struct ia64_sal_retval pal_emulator_static(UINT64); +extern struct ia64_sal_retval sal_emulator(UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64); +extern void rnat_consumption (VCPU *vcpu); + +IA64FAULT +vmx_ia64_handle_break (unsigned long ifa, struct pt_regs *regs, unsigned long isr, unsigned long iim) +{ + static int first_time = 1; + struct domain *d = (struct domain *) current->domain; + struct exec_domain *ed = (struct domain *) current; + extern unsigned long running_on_sim; + unsigned long i, sal_param[8]; + +#if 0 + if (first_time) { + if (platform_is_hp_ski()) running_on_sim = 1; + else running_on_sim = 0; + first_time = 0; + } + if (iim == 0x80001 || iim == 0x80002) { //FIXME: don't hardcode constant + if (running_on_sim) do_ssc(vcpu_get_gr(current,36), regs); + else do_ssc(vcpu_get_gr(current,36), regs); + } +#endif + if (iim == d->breakimm) { + struct ia64_sal_retval x; + switch (regs->r2) { + case FW_HYPERCALL_PAL_CALL: + //printf("*** PAL hypercall: index=%d\n",regs->r28); + //FIXME: This should call a C routine + x = pal_emulator_static(VMX_VPD(ed, vgr[12])); + regs->r8 = x.status; regs->r9 = x.v0; + regs->r10 = x.v1; regs->r11 = x.v2; +#if 0 + if (regs->r8) + printk("Failed vpal emulation, with index:0x%lx\n", + VMX_VPD(ed, vgr[12])); +#endif + break; + case FW_HYPERCALL_SAL_CALL: + for (i = 0; i < 8; i++) + vmx_vcpu_get_gr(ed, 32+i, &sal_param[i]); + x = sal_emulator(sal_param[0], sal_param[1], + sal_param[2], sal_param[3], + sal_param[4], sal_param[5], + sal_param[6], sal_param[7]); + regs->r8 = x.status; regs->r9 = x.v0; + regs->r10 = x.v1; regs->r11 = x.v2; +#if 0 + if (regs->r8) + printk("Failed vsal emulation, with index:0x%lx\n", + sal_param[0]); +#endif + break; + case FW_HYPERCALL_EFI_RESET_SYSTEM: + printf("efi.reset_system called "); + if (current->domain == dom0) { + printf("(by dom0)\n "); + (*efi.reset_system)(EFI_RESET_WARM,0,0,NULL); + } + printf("(not supported for non-0 domain)\n"); + regs->r8 = EFI_UNSUPPORTED; + break; + case FW_HYPERCALL_EFI_GET_TIME: + { + unsigned long *tv, *tc; + fooefi(); + vmx_vcpu_get_gr(ed, 32, &tv); + vmx_vcpu_get_gr(ed, 33, &tc); + printf("efi_get_time(%p,%p) called...",tv,tc); + tv = __va(translate_domain_mpaddr(tv)); + if (tc) tc = __va(translate_domain_mpaddr(tc)); + regs->r8 = (*efi.get_time)(tv,tc); + printf("and returns %lx\n",regs->r8); + } + break; + case FW_HYPERCALL_EFI_SET_TIME: + case FW_HYPERCALL_EFI_GET_WAKEUP_TIME: + case FW_HYPERCALL_EFI_SET_WAKEUP_TIME: + // FIXME: need fixes in efi.h from 2.6.9 + case FW_HYPERCALL_EFI_SET_VIRTUAL_ADDRESS_MAP: + // FIXME: WARNING!! IF THIS EVER GETS IMPLEMENTED + // SOME OF THE OTHER EFI EMULATIONS WILL CHANGE AS + // POINTER ARGUMENTS WILL BE VIRTUAL!! + case FW_HYPERCALL_EFI_GET_VARIABLE: + // FIXME: need fixes in efi.h from 2.6.9 + case FW_HYPERCALL_EFI_GET_NEXT_VARIABLE: + case FW_HYPERCALL_EFI_SET_VARIABLE: + case FW_HYPERCALL_EFI_GET_NEXT_HIGH_MONO_COUNT: + // FIXME: need fixes in efi.h from 2.6.9 + regs->r8 = EFI_UNSUPPORTED; + break; + } +#if 0 + if (regs->r8) + printk("Failed vgfw emulation, with index:0x%lx\n", + regs->r2); +#endif + vmx_vcpu_increment_iip(current); + } else + vmx_reflect_interruption(ifa,isr,iim,11); +} + +static UINT64 vec2off[68] = {0x0,0x400,0x800,0xc00,0x1000, 0x1400,0x1800, + 0x1c00,0x2000,0x2400,0x2800,0x2c00,0x3000,0x3400,0x3800,0x3c00,0x4000, + 0x4400,0x4800,0x4c00,0x5000,0x5100,0x5200,0x5300,0x5400,0x5500,0x5600, + 0x5700,0x5800,0x5900,0x5a00,0x5b00,0x5c00,0x5d00,0x5e00,0x5f00,0x6000, + 0x6100,0x6200,0x6300,0x6400,0x6500,0x6600,0x6700,0x6800,0x6900,0x6a00, + 0x6b00,0x6c00,0x6d00,0x6e00,0x6f00,0x7000,0x7100,0x7200,0x7300,0x7400, + 0x7500,0x7600,0x7700,0x7800,0x7900,0x7a00,0x7b00,0x7c00,0x7d00,0x7e00, + 0x7f00, +}; + + + +void vmx_reflect_interruption(UINT64 ifa,UINT64 isr,UINT64 iim, + UINT64 vector) +{ + VCPU *vcpu = current; + REGS *regs=vcpu_regs(vcpu); + UINT64 viha,vpsr = vmx_vcpu_get_psr(vcpu); + if(!(vpsr&IA64_PSR_IC)&&(vector!=5)){ + panic("Guest nested fault!"); + } + VPD_CR(vcpu,isr)=isr; + VPD_CR(vcpu,iipa) = regs->cr_iip; + vector=vec2off[vector]; + if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR) + VPD_CR(vcpu,iim) = iim; + else { + set_ifa_itir_iha(vcpu,ifa,1,1,1); + } + inject_guest_interruption(vcpu, vector); +} + +// ONLY gets called from ia64_leave_kernel +// ONLY call with interrupts disabled?? (else might miss one?) +// NEVER successful if already reflecting a trap/fault because psr.i==0 +void vmx_deliver_pending_interrupt(struct pt_regs *regs) +{ + struct domain *d = current->domain; + struct exec_domain *ed = current; + // FIXME: Will this work properly if doing an RFI??? + if (!is_idle_task(d) ) { // always comes from guest + //vcpu_poke_timer(ed); + //if (vcpu_deliverable_interrupts(ed)) { + // unsigned long isr = regs->cr_ipsr & IA64_PSR_RI; + // foodpi(); + // reflect_interruption(0,isr,0,regs,IA64_EXTINT_VECTOR); + //} + extern void vmx_dorfirfi(void); + struct pt_regs *user_regs = vcpu_regs(current); + + if (user_regs != regs) + printk("WARNING: checking pending interrupt in nested interrupt!!!\n"); + if (regs->cr_iip == *(unsigned long *)vmx_dorfirfi) + return; + vmx_check_pending_irq(ed); + } +} + +extern ia64_rr vmx_vcpu_rr(VCPU *vcpu,UINT64 vadr); + +/* We came here because the H/W VHPT walker failed to find an entry */ +void vmx_hpw_miss(VCPU *vcpu, u64 vec, u64 vadr) +{ + IA64_PSR vpsr; + CACHE_LINE_TYPE type; + u64 vhpt_adr; + ISR misr; + ia64_rr vrr; + REGS *regs; + thash_cb_t *vtlb, *vhpt; + thash_data_t *data, me; + vtlb=vmx_vcpu_get_vtlb(vcpu); +#ifdef VTLB_DEBUG + check_vtlb_sanity(vtlb); + dump_vtlb(vtlb); +#endif + vpsr.val = vmx_vcpu_get_psr(vcpu); + regs = vcpu_regs(vcpu); + misr.val=regs->cr_isr; +/* TODO + if(vcpu->domain->id && vec == 2 && + vpsr.dt == 0 && is_gpa_io(MASK_PMA(vaddr))){ + emulate_ins(&v); + return; + } +*/ + + if((vec==1)&&(!vpsr.it)){ + physical_itlb_miss(vcpu, vadr); + return; + } + if((vec==2)&&(!vpsr.dt)){ + physical_dtlb_miss(vcpu, vadr); + return; + } + vrr = vmx_vcpu_rr(vcpu,vadr); + if(vec == 1) type = ISIDE_TLB; + else if(vec == 2) type = DSIDE_TLB; + else panic("wrong vec\n"); + +// prepare_if_physical_mode(vcpu); + + if(data=vtlb_lookup_ex(vtlb, vrr.rid, vadr,type)){ + if ( data->ps != vrr.ps ) { + machine_tlb_insert(vcpu, data); + } + else { + thash_insert(vtlb->ts->vhpt,data,vadr); + } + }else if(type == DSIDE_TLB){ + if(!vhpt_enabled(vcpu, vadr, misr.rs?RSE_REF:DATA_REF)){ + if(vpsr.ic){ + vmx_vcpu_set_isr(vcpu, misr.val); + alt_dtlb(vcpu, vadr); + return IA64_FAULT; + } else{ + if(misr.sp){ + //TODO lds emulation + panic("Don't support speculation load"); + }else{ + nested_dtlb(vcpu); + return IA64_FAULT; + } + } + } else{ + vmx_vcpu_thash(vcpu, vadr, &vhpt_adr); + vrr=vmx_vcpu_rr(vcpu,vhpt_adr); + data = vtlb_lookup_ex(vtlb, vrr.rid, vhpt_adr, DSIDE_TLB); + if(data){ + if(vpsr.ic){ + vmx_vcpu_set_isr(vcpu, misr.val); + dtlb_fault(vcpu, vadr); + return IA64_FAULT; + }else{ + if(misr.sp){ + //TODO lds emulation + panic("Don't support speculation load"); + }else{ + nested_dtlb(vcpu); + return IA64_FAULT; + } + } + }else{ + if(vpsr.ic){ + vmx_vcpu_set_isr(vcpu, misr.val); + dvhpt_fault(vcpu, vadr); + return IA64_FAULT; + }else{ + if(misr.sp){ + //TODO lds emulation + panic("Don't support speculation load"); + }else{ + nested_dtlb(vcpu); + return IA64_FAULT; + } + } + } + } + }else if(type == ISIDE_TLB){ + if(!vhpt_enabled(vcpu, vadr, misr.rs?RSE_REF:DATA_REF)){ + if(!vpsr.ic){ + misr.ni=1; + } + vmx_vcpu_set_isr(vcpu, misr.val); + alt_itlb(vcpu, vadr); + return IA64_FAULT; + } else{ + vmx_vcpu_thash(vcpu, vadr, &vhpt_adr); + vrr=vmx_vcpu_rr(vcpu,vhpt_adr); + data = vtlb_lookup_ex(vtlb, vrr.rid, vhpt_adr, DSIDE_TLB); + if(data){ + if(!vpsr.ic){ + misr.ni=1; + } + vmx_vcpu_set_isr(vcpu, misr.val); + itlb_fault(vcpu, vadr); + return IA64_FAULT; + }else{ + if(!vpsr.ic){ + misr.ni=1; + } + vmx_vcpu_set_isr(vcpu, misr.val); + ivhpt_fault(vcpu, vadr); + return IA64_FAULT; + } + } + } +} + + diff --git a/xen/arch/ia64/vmx_utility.c b/xen/arch/ia64/vmx_utility.c new file mode 100644 index 0000000000..05239d5b3e --- /dev/null +++ b/xen/arch/ia64/vmx_utility.c @@ -0,0 +1,659 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_utility.c: + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Shaofan Li (Susue Li) <susie.li@intel.com> + * Xiaoyan Feng (Fleming Feng) <fleming.feng@intel.com> + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + */ + +#include <xen/types.h> +#include <asm/vmx_vcpu.h> +#include <asm/processor.h> +#include <asm/vmx_mm_def.h> + + +/* + * Return: + * 0: Not reserved indirect registers + * 1: Is reserved indirect registers + */ +int +is_reserved_indirect_register ( + int type, + int index ) +{ + switch (type) { + case IA64_CPUID: + if ( index >= 5 ) { + return 1; + } + + case IA64_DBR: + case IA64_IBR: + //bugbugbug:check with pal about the max ibr/dbr!!!! + break; + + case IA64_PMC: + //bugbugbug:check with pal about the max ibr/dbr!!!! + break; + + case IA64_PMD: + //bugbugbug:check with pal about the max ibr/dbr!!!! + break; + + case IA64_PKR: + //bugbugbug:check with pal about the max pkr!!!! + break; + + case IA64_RR: + //bugbugbug:check with pal about the max rr!!!! + break; + + default: + panic ("Unsupported instruction!"); + } + + return 0; + +} + +/* + * Return: + * Set all ignored fields in value to 0 and return + */ +u64 +indirect_reg_igfld_MASK ( + int type, + int index, + u64 value + ) +{ + u64 nvalue; + + nvalue = value; + switch ( type ) { + case IA64_CPUID: + if ( index == 2 ) { + nvalue = 0; + } + break; + + case IA64_DBR: + case IA64_IBR: + /* Refer to SDM Vol2 Table 7-1,7-2 */ + if ( index % 2 != 0) { + /* Ignore field: {61:60} */ + nvalue = value & (~MASK (60, 2)); + } + break; + case IA64_PMC: + if ( index == 0 ) { + /* Ignore field: 3:1 */ + nvalue = value & (~MASK (1, 3)); + } + break; + case IA64_PMD: + if ( index >= 4 ) { + /* Ignore field: 7:7 */ + /* bugbug: this code is correct for generic + * PMD. However, for implementation specific + * PMD, it's WRONG. need more info to judge + * what's implementation specific PMD. + */ + nvalue = value & (~MASK (7, 1)); + } + break; + case IA64_PKR: + case IA64_RR: + break; + default: + panic ("Unsupported instruction!"); + } + + return nvalue; +} + +/* + * Return: + * Set all ignored fields in value to 0 and return + */ +u64 +cr_igfld_mask (int index, u64 value) +{ + u64 nvalue; + + nvalue = value; + + switch ( index ) { + case IA64_REG_CR_IVA: + /* Ignore filed: 14:0 */ + nvalue = value & (~MASK (0, 15)); + break; + + case IA64_REG_CR_IHA: + /* Ignore filed: 1:0 */ + nvalue = value & (~MASK (0, 2)); + break; + + case IA64_REG_CR_LID: + /* Ignore filed: 63:32 */ + nvalue = value & (~MASK (32, 32)); + break; + + case IA64_REG_CR_TPR: + /* Ignore filed: 63:17,3:0 */ + nvalue = value & (~MASK (17, 47)); + nvalue = nvalue & (~MASK (0, 4)); + break; + + case IA64_REG_CR_EOI: + /* Ignore filed: 63:0 */ + nvalue = 0; + break; + + case IA64_REG_CR_ITV: + case IA64_REG_CR_PMV: + case IA64_REG_CR_CMCV: + case IA64_REG_CR_LRR0: + case IA64_REG_CR_LRR1: + /* Ignore filed: 63:17,12:12 */ + nvalue = value & (~MASK (17, 47)); + nvalue = nvalue & (~MASK (12, 1)); + break; + } + + return nvalue; +} + + +/* + * Return: + * 1: PSR reserved fields are not zero + * 0: PSR reserved fields are all zero + */ +int +check_psr_rsv_fields (u64 value) +{ + /* PSR reserved fields: 0, 12~6, 16, 31~28, 63~46 + * These reserved fields shall all be zero + * Otherwise we will panic + */ + + if ( value & MASK (0, 1) || + value & MASK (6, 7) || + value & MASK (16, 1) || + value & MASK (28, 4) || + value & MASK (46, 18) + ) { + return 1; + } + + return 0; +} + + + +/* + * Return: + * 1: CR reserved fields are not zero + * 0: CR reserved fields are all zero + */ +int +check_cr_rsv_fields (int index, u64 value) +{ + switch (index) { + case IA64_REG_CR_DCR: + if ( (value & MASK ( 3, 5 )) || + (value & MASK (15, 49))) { + return 1; + } + return 0; + + case IA64_REG_CR_ITM: + case IA64_REG_CR_IVA: + case IA64_REG_CR_IIP: + case IA64_REG_CR_IFA: + case IA64_REG_CR_IIPA: + case IA64_REG_CR_IIM: + case IA64_REG_CR_IHA: + case IA64_REG_CR_EOI: + return 0; + + case IA64_REG_CR_PTA: + if ( (value & MASK ( 1, 1 )) || + (value & MASK (9, 6))) { + return 1; + } + return 0; + + case IA64_REG_CR_IPSR: + return check_psr_rsv_fields (value); + + + case IA64_REG_CR_ISR: + if ( (value & MASK ( 24, 8 )) || + (value & MASK (44, 20))) { + return 1; + } + return 0; + + case IA64_REG_CR_ITIR: + if ( (value & MASK ( 0, 2 )) || + (value & MASK (32, 32))) { + return 1; + } + return 0; + + case IA64_REG_CR_IFS: + if ( (value & MASK ( 38, 25 ))) { + return 1; + } + return 0; + + case IA64_REG_CR_LID: + if ( (value & MASK ( 0, 16 ))) { + return 1; + } + return 0; + + case IA64_REG_CR_IVR: + if ( (value & MASK ( 8, 56 ))) { + return 1; + } + return 0; + + case IA64_REG_CR_TPR: + if ( (value & MASK ( 8, 8 ))) { + return 1; + } + return 0; + + case IA64_REG_CR_IRR0: + if ( (value & MASK ( 1, 1 )) || + (value & MASK (3, 13))) { + return 1; + } + return 0; + + case IA64_REG_CR_ITV: + case IA64_REG_CR_PMV: + case IA64_REG_CR_CMCV: + if ( (value & MASK ( 8, 4 )) || + (value & MASK (13, 3))) { + return 1; + } + return 0; + + case IA64_REG_CR_LRR0: + case IA64_REG_CR_LRR1: + if ( (value & MASK ( 11, 1 )) || + (value & MASK (14, 1))) { + return 1; + } + return 0; + } + + + panic ("Unsupported CR"); +} + + + +/* + * Return: + * 0: Indirect Reg reserved fields are not zero + * 1: Indirect Reg reserved fields are all zero + */ +int +check_indirect_reg_rsv_fields ( int type, int index, u64 value ) +{ + + switch ( type ) { + case IA64_CPUID: + if ( index == 3 ) { + if ( value & MASK (40, 24 )) { + return 0; + } + } else if ( index == 4 ) { + if ( value & MASK (2, 62 )) { + return 0; + } + } + break; + + case IA64_DBR: + case IA64_IBR: + case IA64_PMC: + case IA64_PMD: + break; + + case IA64_PKR: + if ( value & MASK (4, 4) || + value & MASK (32, 32 )) { + return 0; + } + break; + + case IA64_RR: + if ( value & MASK (1, 1) || + value & MASK (32, 32 )) { + return 0; + } + break; + + default: + panic ("Unsupported instruction!"); + } + + return 1; +} + + + + +/* Return + * Same format as isr_t + * Only ei/ni bits are valid, all other bits are zero + */ +u64 +set_isr_ei_ni (VCPU *vcpu) +{ + + IA64_PSR vpsr,ipsr; + ISR visr; + REGS *regs; + + regs=vcpu_regs(vcpu); + + visr.val = 0; + + vpsr.val = vmx_vcpu_get_psr (vcpu); + + if (!vpsr.ic == 1 ) { + /* Set ISR.ni */ + visr.ni = 1; + } + ipsr.val = regs->cr_ipsr; + + visr.ei = ipsr.ri; + return visr.val; +} + + +/* Set up ISR.na/code{3:0}/r/w for no-access instructions + * Refer to SDM Vol Table 5-1 + * Parameter: + * setr: if 1, indicates this function will set up ISR.r + * setw: if 1, indicates this function will set up ISR.w + * Return: + * Same format as ISR. All fields are zero, except na/code{3:0}/r/w + */ +u64 +set_isr_for_na_inst(VCPU *vcpu, int op) +{ + ISR visr; + visr.val = 0; + switch (op) { + case IA64_INST_TPA: + visr.na = 1; + visr.code = 0; + break; + case IA64_INST_TAK: + visr.na = 1; + visr.code = 3; + break; + } + return visr.val; +} + + + +/* + * Set up ISR for registe Nat consumption fault + * Parameters: + * read: if 1, indicates this is a read access; + * write: if 1, indicates this is a write access; + */ +void +set_rnat_consumption_isr (VCPU *vcpu,int inst,int read,int write) +{ + ISR visr; + u64 value; + /* Need set up ISR: code, ei, ni, na, r/w */ + visr.val = 0; + + /* ISR.code{7:4} =1, + * Set up ISR.code{3:0}, ISR.na + */ + visr.code = (1 << 4); + if (inst) { + + value = set_isr_for_na_inst (vcpu,inst); + visr.val = visr.val | value; + } + + /* Set up ISR.r/w */ + visr.r = read; + visr.w = write; + + /* Set up ei/ni */ + value = set_isr_ei_ni (vcpu); + visr.val = visr.val | value; + + vmx_vcpu_set_isr (vcpu,visr.val); +} + + + +/* + * Set up ISR for break fault + */ +void set_break_isr (VCPU *vcpu) +{ + ISR visr; + u64 value; + + /* Need set up ISR: ei, ni */ + + visr.val = 0; + + /* Set up ei/ni */ + value = set_isr_ei_ni (vcpu); + visr.val = visr.val | value; + + vmx_vcpu_set_isr(vcpu, visr.val); +} + + + + + + +/* + * Set up ISR for Priviledged Operation fault + */ +void set_privileged_operation_isr (VCPU *vcpu,int inst) +{ + ISR visr; + u64 value; + + /* Need set up ISR: code, ei, ni, na */ + + visr.val = 0; + + /* Set up na, code{3:0} for no-access instruction */ + value = set_isr_for_na_inst (vcpu, inst); + visr.val = visr.val | value; + + + /* ISR.code{7:4} =1 */ + visr.code = (1 << 4) | visr.code; + + /* Set up ei/ni */ + value = set_isr_ei_ni (vcpu); + visr.val = visr.val | value; + + vmx_vcpu_set_isr (vcpu, visr.val); +} + + + + +/* + * Set up ISR for Priviledged Register fault + */ +void set_privileged_reg_isr (VCPU *vcpu, int inst) +{ + ISR visr; + u64 value; + + /* Need set up ISR: code, ei, ni */ + + visr.val = 0; + + /* ISR.code{7:4} =2 */ + visr.code = 2 << 4; + + /* Set up ei/ni */ + value = set_isr_ei_ni (vcpu); + visr.val = visr.val | value; + + vmx_vcpu_set_isr (vcpu, visr.val); +} + + + + + +/* + * Set up ISR for Reserved Register/Field fault + */ +void set_rsv_reg_field_isr (VCPU *vcpu) +{ + ISR visr; + u64 value; + + /* Need set up ISR: code, ei, ni */ + + visr.val = 0; + + /* ISR.code{7:4} =4 */ + visr.code = (3 << 4) | visr.code; + + /* Set up ei/ni */ + value = set_isr_ei_ni (vcpu); + visr.val = visr.val | value; + + vmx_vcpu_set_isr (vcpu, visr.val); +} + + + +/* + * Set up ISR for Illegal Operation fault + */ +void set_illegal_op_isr (VCPU *vcpu) +{ + ISR visr; + u64 value; + + /* Need set up ISR: ei, ni */ + + visr.val = 0; + + /* Set up ei/ni */ + value = set_isr_ei_ni (vcpu); + visr.val = visr.val | value; + + vmx_vcpu_set_isr (vcpu, visr.val); +} + + +void set_isr_reg_nat_consumption(VCPU *vcpu, u64 flag, u64 non_access) +{ + ISR isr; + + isr.val = 0; + isr.val = set_isr_ei_ni(vcpu); + isr.code = IA64_REG_NAT_CONSUMPTION_FAULT | flag; + isr.na = non_access; + isr.r = 1; + isr.w = 0; + vmx_vcpu_set_isr(vcpu, isr.val); + return; +} + +void set_isr_for_priv_fault(VCPU *vcpu, u64 non_access) +{ + u64 value; + ISR isr; + + isr.val = set_isr_ei_ni(vcpu); + isr.code = IA64_PRIV_OP_FAULT; + isr.na = non_access; + vmx_vcpu_set_isr(vcpu, isr.val); + + return; +} + + +IA64FAULT check_target_register(VCPU *vcpu, u64 reg_index) +{ + u64 sof; + REGS *regs; + regs=vcpu_regs(vcpu); + sof = regs->cr_ifs & 0x7f; + if(reg_index >= sof + 32) + return IA64_FAULT; + return IA64_NO_FAULT;; +} + + +int is_reserved_rr_register(VCPU* vcpu, int reg_index) +{ + return (reg_index >= 8); +} + +#define ITIR_RSV_MASK (0x3UL | (((1UL<<32)-1) << 32)) +int is_reserved_itir_field(VCPU* vcpu, u64 itir) +{ + if ( itir & ITIR_RSV_MASK ) { + return 1; + } + return 0; +} + +int is_reserved_rr_field(VCPU* vcpu, u64 reg_value) +{ + ia64_rr rr; + rr.rrval = reg_value; + + if(rr.reserved0 != 0 || rr.reserved1 != 0){ + return 1; + } + if(rr.ps < 12 || rr.ps > 28){ + // page too big or small. + return 1; + } + if(rr.ps > 15 && rr.ps % 2 != 0){ + // unsupported page size. + return 1; + } + return 0; +} + diff --git a/xen/arch/ia64/vmx_vcpu.c b/xen/arch/ia64/vmx_vcpu.c new file mode 100644 index 0000000000..05c211d428 --- /dev/null +++ b/xen/arch/ia64/vmx_vcpu.c @@ -0,0 +1,436 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_vcpu.c: handling all virtual cpu related thing. + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Fred yang (fred.yang@intel.com) + * Arun Sharma (arun.sharma@intel.com) + * Shaofan Li (Susue Li) <susie.li@intel.com> + * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + */ + + + +#include <linux/sched.h> +#include <public/arch-ia64.h> +#include <asm/ia64_int.h> +#include <asm/vmx_vcpu.h> +#include <asm/regionreg.h> +#include <asm/tlb.h> +#include <asm/processor.h> +#include <asm/delay.h> +#include <asm/regs.h> +#include <asm/gcc_intrin.h> +#include <asm/vmx_mm_def.h> +#include <asm/vmx.h> + +//u64 fire_itc; +//u64 fire_itc2; +//u64 fire_itm; +//u64 fire_itm2; +/* + * Copyright (c) 2005 Intel Corporation. + * Anthony Xu (anthony.xu@intel.com) + * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +/************************************************************************** + VCPU general register access routines +**************************************************************************/ +#include <asm/hw_irq.h> +#include <asm/vmx_pal_vsa.h> +#include <asm/kregs.h> + +//unsigned long last_guest_rsm = 0x0; +struct guest_psr_bundle{ + unsigned long ip; + unsigned long psr; +}; + +struct guest_psr_bundle guest_psr_buf[100]; +unsigned long guest_psr_index = 0; + +void +vmx_vcpu_set_psr(VCPU *vcpu, unsigned long value) +{ + + UINT64 mask; + REGS *regs; + IA64_PSR old_psr, new_psr; + old_psr.val=vmx_vcpu_get_psr(vcpu); + + regs=vcpu_regs(vcpu); + /* We only support guest as: + * vpsr.pk = 0 + * vpsr.is = 0 + * Otherwise panic + */ + if ( value & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM )) { + panic ("Setting unsupport guest psr!"); + } + + /* + * For those IA64_PSR bits: id/da/dd/ss/ed/ia + * Since these bits will become 0, after success execution of each + * instruction, we will change set them to mIA64_PSR + */ + VMX_VPD(vcpu,vpsr) = value & + (~ (IA64_PSR_ID |IA64_PSR_DA | IA64_PSR_DD | + IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA + )); + + new_psr.val=vmx_vcpu_get_psr(vcpu); + { + struct xen_regs *regs = vcpu_regs(vcpu); + guest_psr_buf[guest_psr_index].ip = regs->cr_iip; + guest_psr_buf[guest_psr_index].psr = new_psr.val; + if (++guest_psr_index >= 100) + guest_psr_index = 0; + } +#if 0 + if (old_psr.i != new_psr.i) { + if (old_psr.i) + last_guest_rsm = vcpu_regs(vcpu)->cr_iip; + else + last_guest_rsm = 0; + } +#endif + + /* + * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr) + * , except for the following bits: + * ic/i/dt/si/rt/mc/it/bn/vm + */ + mask = IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI + + IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN + + IA64_PSR_VM; + + regs->cr_ipsr = (regs->cr_ipsr & mask ) | ( value & (~mask) ); + + check_mm_mode_switch(vcpu, old_psr, new_psr); + return IA64_NO_FAULT; +} + +/* Adjust slot both in xen_regs and vpd, upon vpsr.ri which + * should have sync with ipsr in entry. + * + * Clear some bits due to successfully emulation. + */ +IA64FAULT vmx_vcpu_increment_iip(VCPU *vcpu) +{ + // TODO: trap_bounce?? Eddie + REGS *regs = vcpu_regs(vcpu); + IA64_PSR vpsr; + IA64_PSR *ipsr = (IA64_PSR *)®s->cr_ipsr; + + vpsr.val = vmx_vcpu_get_psr(vcpu); + if (vpsr.ri == 2) { + vpsr.ri = 0; + regs->cr_iip += 16; + } else { + vpsr.ri++; + } + + ipsr->ri = vpsr.ri; + vpsr.val &= + (~ (IA64_PSR_ID |IA64_PSR_DA | IA64_PSR_DD | + IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA + )); + + VMX_VPD(vcpu, vpsr) = vpsr.val; + + ipsr->val &= + (~ (IA64_PSR_ID |IA64_PSR_DA | IA64_PSR_DD | + IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA + )); + + return (IA64_NO_FAULT); +} + + +IA64FAULT vmx_vcpu_cover(VCPU *vcpu) +{ + REGS *regs = vcpu_regs(vcpu); + IA64_PSR vpsr; + vpsr.val = vmx_vcpu_get_psr(vcpu); + + if(!vpsr.ic) + VPD_CR(vcpu,ifs) = regs->cr_ifs; + regs->cr_ifs = IA64_IFS_V; + return (IA64_NO_FAULT); +} + + +thash_cb_t * +vmx_vcpu_get_vtlb(VCPU *vcpu) +{ + return vcpu->arch.vtlb; +} + + +struct virutal_platform_def * +vmx_vcpu_get_plat(VCPU *vcpu) +{ + return &(vcpu->arch.arch_vmx.vmx_platform); +} + + +ia64_rr vmx_vcpu_rr(VCPU *vcpu,UINT64 vadr) +{ + return (ia64_rr)VMX(vcpu,vrr[vadr>>61]); +} + + +IA64FAULT vmx_vcpu_set_rr(VCPU *vcpu, UINT64 reg, UINT64 val) +{ + extern void set_one_rr(UINT64, UINT64); + ia64_rr oldrr,newrr; + thash_cb_t *hcb; + oldrr=vmx_vcpu_rr(vcpu,reg); + newrr.rrval=val; +#if 1 + if(oldrr.ps!=newrr.ps){ + hcb = vmx_vcpu_get_vtlb(vcpu); + thash_purge_all(hcb); + } +#endif + VMX(vcpu,vrr[reg>>61]) = val; + switch((u64)(reg>>61)) { + case VRN5: + VMX(vcpu,mrr5)=vmx_vrrtomrr(vcpu,val); + break; + case VRN6: + VMX(vcpu,mrr6)=vmx_vrrtomrr(vcpu,val); + break; + case VRN7: + VMX(vcpu,mrr7)=vmx_vrrtomrr(vcpu,val); + /* Change double mapping for this domain */ + vmx_change_double_mapping(vcpu, + vmx_vrrtomrr(vcpu,oldrr.rrval), + vmx_vrrtomrr(vcpu,newrr.rrval)); + break; + default: + ia64_set_rr(reg,vmx_vrrtomrr(vcpu,val)); + break; + } + + return (IA64_NO_FAULT); +} + + + +/************************************************************************** + VCPU protection key register access routines +**************************************************************************/ + +IA64FAULT vmx_vcpu_get_pkr(VCPU *vcpu, UINT64 reg, UINT64 *pval) +{ + UINT64 val = (UINT64)ia64_get_pkr(reg); + *pval = val; + return (IA64_NO_FAULT); +} + +IA64FAULT vmx_vcpu_set_pkr(VCPU *vcpu, UINT64 reg, UINT64 val) +{ + ia64_set_pkr(reg,val); + return (IA64_NO_FAULT); +} + +#if 0 +int tlb_debug=0; +check_entry(u64 va, u64 ps, char *str) +{ + va &= ~ (PSIZE(ps)-1); + if ( va == 0x2000000002908000UL || + va == 0x600000000000C000UL ) { + stop(); + } + if (tlb_debug) printf("%s at %lx %lx\n", str, va, 1UL<<ps); +} +#endif + + +u64 vmx_vcpu_get_itir_on_fault(VCPU *vcpu, u64 ifa) +{ + ia64_rr rr,rr1; + rr=vmx_vcpu_rr(vcpu,ifa); + rr1.rrval=0; + rr1.ps=rr.ps; + rr1.rid=rr.rid; + return (rr1.rrval); +} + + + + +IA64FAULT vmx_vcpu_rfi(VCPU *vcpu) +{ + // TODO: Only allowed for current vcpu + UINT64 ifs, psr; + REGS *regs = vcpu_regs(vcpu); + psr = VPD_CR(vcpu,ipsr); + vmx_vcpu_set_psr(vcpu,psr); + ifs=VPD_CR(vcpu,ifs); + if((ifs>>63)&&(ifs<<1)){ + ifs=(regs->cr_ifs)&0x7f; + regs->rfi_pfs = (ifs<<7)|ifs; + regs->cr_ifs = VPD_CR(vcpu,ifs); + } + regs->cr_iip = VPD_CR(vcpu,iip); + return (IA64_NO_FAULT); +} + + +UINT64 +vmx_vcpu_get_psr(VCPU *vcpu) +{ + return VMX_VPD(vcpu,vpsr); +} + + +IA64FAULT +vmx_vcpu_get_bgr(VCPU *vcpu, unsigned int reg, UINT64 *val) +{ + IA64_PSR vpsr; + + vpsr.val = vmx_vcpu_get_psr(vcpu); + if ( vpsr.bn ) { + *val=VMX_VPD(vcpu,vgr[reg-16]); + // Check NAT bit + if ( VMX_VPD(vcpu,vnat) & (1UL<<(reg-16)) ) { + // TODO + //panic ("NAT consumption fault\n"); + return IA64_FAULT; + } + + } + else { + *val=VMX_VPD(vcpu,vbgr[reg-16]); + if ( VMX_VPD(vcpu,vbnat) & (1UL<<reg) ) { + //panic ("NAT consumption fault\n"); + return IA64_FAULT; + } + + } + return IA64_NO_FAULT; +} + +IA64FAULT +vmx_vcpu_set_bgr(VCPU *vcpu, unsigned int reg, u64 val,int nat) +{ + IA64_PSR vpsr; + vpsr.val = vmx_vcpu_get_psr(vcpu); + if ( vpsr.bn ) { + VMX_VPD(vcpu,vgr[reg-16]) = val; + if(nat){ + VMX_VPD(vcpu,vnat) |= ( 1UL<<(reg-16) ); + }else{ + VMX_VPD(vcpu,vbnat) &= ~( 1UL<<(reg-16) ); + } + } + else { + VMX_VPD(vcpu,vbgr[reg-16]) = val; + if(nat){ + VMX_VPD(vcpu,vnat) |= ( 1UL<<(reg) ); + }else{ + VMX_VPD(vcpu,vbnat) &= ~( 1UL<<(reg) ); + } + } + return IA64_NO_FAULT; +} + + + +IA64FAULT +vmx_vcpu_get_gr(VCPU *vcpu, unsigned reg, UINT64 * val) +{ + REGS *regs=vcpu_regs(vcpu); + u64 nat; + //TODO, Eddie + if (!regs) return 0; + if (reg >= 16 && reg < 32) { + return vmx_vcpu_get_bgr(vcpu,reg,val); + } + getreg(reg,val,&nat,regs); // FIXME: handle NATs later + if(nat){ + return IA64_FAULT; + } + return IA64_NO_FAULT; +} + +// returns: +// IA64_ILLOP_FAULT if the register would cause an Illegal Operation fault +// IA64_NO_FAULT otherwise + +IA64FAULT +vmx_vcpu_set_gr(VCPU *vcpu, unsigned reg, u64 value, int nat) +{ + REGS *regs = vcpu_regs(vcpu); + long sof = (regs->cr_ifs) & 0x7f; + //TODO Eddie + + if (!regs) return IA64_ILLOP_FAULT; + if (reg >= sof + 32) return IA64_ILLOP_FAULT; + if ( reg >= 16 && reg < 32 ) { + return vmx_vcpu_set_bgr(vcpu,reg, value, nat); + } + setreg(reg,value,nat,regs); + return IA64_NO_FAULT; +} + + +IA64FAULT vmx_vcpu_reset_psr_sm(VCPU *vcpu, UINT64 imm24) +{ + UINT64 vpsr; + vpsr = vmx_vcpu_get_psr(vcpu); + vpsr &= (~imm24); + vmx_vcpu_set_psr(vcpu, vpsr); + return IA64_NO_FAULT; +} + + +IA64FAULT vmx_vcpu_set_psr_sm(VCPU *vcpu, UINT64 imm24) +{ + UINT64 vpsr; + vpsr = vmx_vcpu_get_psr(vcpu); + vpsr |= imm24; + vmx_vcpu_set_psr(vcpu, vpsr); + return IA64_NO_FAULT; +} + + +IA64FAULT vmx_vcpu_set_psr_l(VCPU *vcpu, UINT64 val) +{ + vmx_vcpu_set_psr(vcpu, val); + return IA64_NO_FAULT; +} + + diff --git a/xen/arch/ia64/vmx_virt.c b/xen/arch/ia64/vmx_virt.c new file mode 100644 index 0000000000..29b1164196 --- /dev/null +++ b/xen/arch/ia64/vmx_virt.c @@ -0,0 +1,1501 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_virt.c: + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Fred yang (fred.yang@intel.com) + * Shaofan Li (Susue Li) <susie.li@intel.com> + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + */ + + + +#include <asm/privop.h> +#include <asm/vmx_vcpu.h> +#include <asm/processor.h> +#include <asm/delay.h> // Debug only +#include <asm/vmmu.h> +#include <asm/vmx_mm_def.h> +#include <asm/smp.h> + +#include <asm/virt_event.h> +extern UINT64 privop_trace; + +void +ia64_priv_decoder(IA64_SLOT_TYPE slot_type, INST64 inst, UINT64 * cause) +{ + *cause=0; + switch (slot_type) { + case M: + if (inst.generic.major==0){ + if(inst.M28.x3==0){ + if(inst.M44.x4==6){ + *cause=EVENT_SSM; + }else if(inst.M44.x4==7){ + *cause=EVENT_RSM; + }else if(inst.M30.x4==8&&inst.M30.x2==2){ + *cause=EVENT_MOV_TO_AR_IMM; + } + } + } + else if(inst.generic.major==1){ + if(inst.M28.x3==0){ + if(inst.M32.x6==0x2c){ + *cause=EVENT_MOV_TO_CR; + }else if(inst.M33.x6==0x24){ + *cause=EVENT_MOV_FROM_CR; + }else if(inst.M35.x6==0x2d){ + *cause=EVENT_MOV_TO_PSR; + }else if(inst.M36.x6==0x25){ + *cause=EVENT_MOV_FROM_PSR; + }else if(inst.M29.x6==0x2A){ + *cause=EVENT_MOV_TO_AR; + }else if(inst.M31.x6==0x22){ + *cause=EVENT_MOV_FROM_AR; + }else if(inst.M45.x6==0x09){ + *cause=EVENT_PTC_L; + }else if(inst.M45.x6==0x0A){ + *cause=EVENT_PTC_G; + }else if(inst.M45.x6==0x0B){ + *cause=EVENT_PTC_GA; + }else if(inst.M45.x6==0x0C){ + *cause=EVENT_PTR_D; + }else if(inst.M45.x6==0x0D){ + *cause=EVENT_PTR_I; + }else if(inst.M46.x6==0x1A){ + *cause=EVENT_THASH; + }else if(inst.M46.x6==0x1B){ + *cause=EVENT_TTAG; + }else if(inst.M46.x6==0x1E){ + *cause=EVENT_TPA; + }else if(inst.M46.x6==0x1F){ + *cause=EVENT_TAK; + }else if(inst.M47.x6==0x34){ + *cause=EVENT_PTC_E; + }else if(inst.M41.x6==0x2E){ + *cause=EVENT_ITC_D; + }else if(inst.M41.x6==0x2F){ + *cause=EVENT_ITC_I; + }else if(inst.M42.x6==0x00){ + *cause=EVENT_MOV_TO_RR; + }else if(inst.M42.x6==0x01){ + *cause=EVENT_MOV_TO_DBR; + }else if(inst.M42.x6==0x02){ + *cause=EVENT_MOV_TO_IBR; + }else if(inst.M42.x6==0x03){ + *cause=EVENT_MOV_TO_PKR; + }else if(inst.M42.x6==0x04){ + *cause=EVENT_MOV_TO_PMC; + }else if(inst.M42.x6==0x05){ + *cause=EVENT_MOV_TO_PMD; + }else if(inst.M42.x6==0x0E){ + *cause=EVENT_ITR_D; + }else if(inst.M42.x6==0x0F){ + *cause=EVENT_ITR_I; + }else if(inst.M43.x6==0x10){ + *cause=EVENT_MOV_FROM_RR; + }else if(inst.M43.x6==0x11){ + *cause=EVENT_MOV_FROM_DBR; + }else if(inst.M43.x6==0x12){ + *cause=EVENT_MOV_FROM_IBR; + }else if(inst.M43.x6==0x13){ + *cause=EVENT_MOV_FROM_PKR; + }else if(inst.M43.x6==0x14){ + *cause=EVENT_MOV_FROM_PMC; +/* + }else if(inst.M43.x6==0x15){ + *cause=EVENT_MOV_FROM_PMD; +*/ + }else if(inst.M43.x6==0x17){ + *cause=EVENT_MOV_FROM_CPUID; + } + } + } + break; + case B: + if(inst.generic.major==0){ + if(inst.B8.x6==0x02){ + *cause=EVENT_COVER; + }else if(inst.B8.x6==0x08){ + *cause=EVENT_RFI; + }else if(inst.B8.x6==0x0c){ + *cause=EVENT_BSW_0; + }else if(inst.B8.x6==0x0d){ + *cause=EVENT_BSW_1; + } + } + } +} + +IA64FAULT vmx_emul_rsm(VCPU *vcpu, INST64 inst) +{ + UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm; + return vmx_vcpu_reset_psr_sm(vcpu,imm24); +} + +IA64FAULT vmx_emul_ssm(VCPU *vcpu, INST64 inst) +{ + UINT64 imm24 = (inst.M44.i<<23)|(inst.M44.i2<<21)|inst.M44.imm; + return vmx_vcpu_set_psr_sm(vcpu,imm24); +} + +unsigned long last_guest_psr = 0x0; +IA64FAULT vmx_emul_mov_from_psr(VCPU *vcpu, INST64 inst) +{ + UINT64 tgt = inst.M33.r1; + UINT64 val; + IA64FAULT fault; + +/* + if ((fault = vmx_vcpu_get_psr(vcpu,&val)) == IA64_NO_FAULT) + return vmx_vcpu_set_gr(vcpu, tgt, val); + else return fault; + */ + val = vmx_vcpu_get_psr(vcpu); + val = (val & MASK(0, 32)) | (val & MASK(35, 2)); + last_guest_psr = val; + return vmx_vcpu_set_gr(vcpu, tgt, val, 0); +} + +/** + * @todo Check for reserved bits and return IA64_RSVDREG_FAULT. + */ +IA64FAULT vmx_emul_mov_to_psr(VCPU *vcpu, INST64 inst) +{ + UINT64 val; + IA64FAULT fault; + if(vmx_vcpu_get_gr(vcpu, inst.M35.r2, &val) != IA64_NO_FAULT) + panic(" get_psr nat bit fault\n"); + + val = (val & MASK(0, 32)) | (VMX_VPD(vcpu, vpsr) & MASK(32, 32)); +#if 0 + if (last_mov_from_psr && (last_guest_psr != (val & MASK(0,32)))) + while(1); + else + last_mov_from_psr = 0; +#endif + return vmx_vcpu_set_psr_l(vcpu,val); +} + + +/************************************************************************** +Privileged operation emulation routines +**************************************************************************/ + +IA64FAULT vmx_emul_rfi(VCPU *vcpu, INST64 inst) +{ + IA64_PSR vpsr; + REGS *regs; +#ifdef CHECK_FAULT + vpsr.val=vmx_vcpu_get_psr(vcpu); + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + regs=vcpu_regs(vcpu); + vpsr.val=regs->cr_ipsr; + if ( vpsr.is == 1 ) { + panic ("We do not support IA32 instruction yet"); + } + + return vmx_vcpu_rfi(vcpu); +} + +IA64FAULT vmx_emul_bsw0(VCPU *vcpu, INST64 inst) +{ +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + return vmx_vcpu_bsw0(vcpu); +} + +IA64FAULT vmx_emul_bsw1(VCPU *vcpu, INST64 inst) +{ +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + return vmx_vcpu_bsw1(vcpu); +} + +IA64FAULT vmx_emul_cover(VCPU *vcpu, INST64 inst) +{ + return vmx_vcpu_cover(vcpu); +} + +IA64FAULT vmx_emul_ptc_l(VCPU *vcpu, INST64 inst) +{ + u64 r2,r3; + ISR isr; + IA64_PSR vpsr; + + vpsr.val=vmx_vcpu_get_psr(vcpu); + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } + if(vmx_vcpu_get_gr(vcpu,inst.M45.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M45.r2,&r2)){ +#ifdef VMAL_NO_FAULT_CHECK + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif // VMAL_NO_FAULT_CHECK + } +#ifdef VMAL_NO_FAULT_CHECK + if (unimplemented_gva(vcpu,r3) ) { + isr.val = set_isr_ei_ni(vcpu); + isr.code = IA64_RESERVED_REG_FAULT; + vcpu_set_isr(vcpu, isr.val); + unimpl_daddr(vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + return vmx_vcpu_ptc_l(vcpu,r3,bits(r2,2,7)); +} + +IA64FAULT vmx_emul_ptc_e(VCPU *vcpu, INST64 inst) +{ + u64 r3; + ISR isr; + IA64_PSR vpsr; + + vpsr.val=vmx_vcpu_get_psr(vcpu); +#ifdef VMAL_NO_FAULT_CHECK + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + if(vmx_vcpu_get_gr(vcpu,inst.M47.r3,&r3)){ +#ifdef VMAL_NO_FAULT_CHECK + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif // VMAL_NO_FAULT_CHECK + } + return vmx_vcpu_ptc_e(vcpu,r3); +} + +IA64FAULT vmx_emul_ptc_g(VCPU *vcpu, INST64 inst) +{ + return vmx_emul_ptc_l(vcpu, inst); +} + +IA64FAULT vmx_emul_ptc_ga(VCPU *vcpu, INST64 inst) +{ + return vmx_emul_ptc_l(vcpu, inst); +} + +IA64FAULT ptr_fault_check(VCPU *vcpu, INST64 inst, u64 *pr2, u64 *pr3) +{ + ISR isr; + IA64FAULT ret1, ret2; + +#ifdef VMAL_NO_FAULT_CHECK + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + ret1 = vmx_vcpu_get_gr(vcpu,inst.M45.r3,pr3); + ret2 = vmx_vcpu_get_gr(vcpu,inst.M45.r2,pr2); +#ifdef VMAL_NO_FAULT_CHECK + if ( ret1 != IA64_NO_FAULT || ret2 != IA64_NO_FAULT ) { + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; + } + if (unimplemented_gva(vcpu,r3) ) { + isr.val = set_isr_ei_ni(vcpu); + isr.code = IA64_RESERVED_REG_FAULT; + vcpu_set_isr(vcpu, isr.val); + unimpl_daddr(vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + return IA64_NO_FAULT; +} + +IA64FAULT vmx_emul_ptr_d(VCPU *vcpu, INST64 inst) +{ + u64 r2,r3; + if ( ptr_fault_check(vcpu, inst, &r2, &r3 ) == IA64_FAULT ) + return IA64_FAULT; + return vmx_vcpu_ptr_d(vcpu,r3,bits(r2,2,7)); +} + +IA64FAULT vmx_emul_ptr_i(VCPU *vcpu, INST64 inst) +{ + u64 r2,r3; + if ( ptr_fault_check(vcpu, inst, &r2, &r3 ) == IA64_FAULT ) + return IA64_FAULT; + return vmx_vcpu_ptr_i(vcpu,r3,bits(r2,2,7)); +} + + +IA64FAULT vmx_emul_thash(VCPU *vcpu, INST64 inst) +{ + u64 r1,r3; + ISR visr; + IA64_PSR vpsr; +#ifdef CHECK_FAULT + if(check_target_register(vcpu, inst.M46.r1)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } +#endif //CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){ +#ifdef CHECK_FAULT + vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1); + return IA64_NO_FAULT; +#endif //CHECK_FAULT + } +#ifdef CHECK_FAULT + if(unimplemented_gva(vcpu, r3)){ + vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1); + return IA64_NO_FAULT; + } +#endif //CHECK_FAULT + vmx_vcpu_thash(vcpu, r3, &r1); + vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); + return(IA64_NO_FAULT); +} + + +IA64FAULT vmx_emul_ttag(VCPU *vcpu, INST64 inst) +{ + u64 r1,r3; + ISR visr; + IA64_PSR vpsr; + #ifdef CHECK_FAULT + if(check_target_register(vcpu, inst.M46.r1)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } +#endif //CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){ +#ifdef CHECK_FAULT + vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1); + return IA64_NO_FAULT; +#endif //CHECK_FAULT + } +#ifdef CHECK_FAULT + if(unimplemented_gva(vcpu, r3)){ + vmx_vcpu_set_gr(vcpu, inst.M46.r1, 0, 1); + return IA64_NO_FAULT; + } +#endif //CHECK_FAULT + vmx_vcpu_ttag(vcpu, r3, &r1); + vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); + return(IA64_NO_FAULT); +} + + +IA64FAULT vmx_emul_tpa(VCPU *vcpu, INST64 inst) +{ + u64 r1,r3; + ISR visr; +#ifdef CHECK_FAULT + if(check_target_register(vcpu, inst.M46.r1)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if(vpsr.cpl!=0){ + visr.val=0; + vcpu_set_isr(vcpu, visr.val); + return IA64_FAULT; + } +#endif //CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,1); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } +#ifdef CHECK_FAULT + if (unimplemented_gva(vcpu,r3) ) { + // inject unimplemented_data_address_fault + visr.val = set_isr_ei_ni(vcpu); + visr.code = IA64_RESERVED_REG_FAULT; + vcpu_set_isr(vcpu, isr.val); + // FAULT_UNIMPLEMENTED_DATA_ADDRESS. + unimpl_daddr(vcpu); + return IA64_FAULT; + } +#endif //CHECK_FAULT + + if(vmx_vcpu_tpa(vcpu, r3, &r1)){ + return IA64_FAULT; + } + vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); + return(IA64_NO_FAULT); +} + +IA64FAULT vmx_emul_tak(VCPU *vcpu, INST64 inst) +{ + u64 r1,r3; + ISR visr; + IA64_PSR vpsr; + int fault=IA64_NO_FAULT; +#ifdef CHECK_FAULT + visr.val=0; + if(check_target_register(vcpu, inst.M46.r1)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } + vpsr.val=vmx_vcpu_get_psr(vcpu); + if(vpsr.cpl!=0){ + vcpu_set_isr(vcpu, visr.val); + return IA64_FAULT; + } +#endif + if(vmx_vcpu_get_gr(vcpu, inst.M46.r3, &r3)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,1); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif + } + if(vmx_vcpu_tak(vcpu, r3, &r1)){ + return IA64_FAULT; + } + vmx_vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); + return(IA64_NO_FAULT); +} + + +/************************************ + * Insert translation register/cache +************************************/ + +IA64FAULT vmx_emul_itr_d(VCPU *vcpu, INST64 inst) +{ + UINT64 fault, itir, ifa, pte, slot; + ISR isr; + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if ( vpsr.ic ) { + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } +#ifdef VMAL_NO_FAULT_CHECK + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + if(vmx_vcpu_get_gr(vcpu,inst.M45.r3,&slot)||vmx_vcpu_get_gr(vcpu,inst.M45.r2,&pte)){ +#ifdef VMAL_NO_FAULT_CHECK + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif // VMAL_NO_FAULT_CHECK + } +#ifdef VMAL_NO_FAULT_CHECK + if(is_reserved_rr_register(vcpu, slot)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + + if (vmx_vcpu_get_itir(vcpu,&itir)){ + return(IA64_FAULT); + } + if (vmx_vcpu_get_ifa(vcpu,&ifa)){ + return(IA64_FAULT); + } +#ifdef VMAL_NO_FAULT_CHECK + if (is_reserved_itir_field(vcpu, itir)) { + // TODO + return IA64_FAULT; + } + if (unimplemented_gva(vcpu,ifa) ) { + isr.val = set_isr_ei_ni(vcpu); + isr.code = IA64_RESERVED_REG_FAULT; + vcpu_set_isr(vcpu, isr.val); + unimpl_daddr(vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + + return (vmx_vcpu_itr_d(vcpu,pte,itir,ifa,slot)); +} + +IA64FAULT vmx_emul_itr_i(VCPU *vcpu, INST64 inst) +{ + UINT64 fault, itir, ifa, pte, slot; + ISR isr; + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if ( vpsr.ic ) { + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } +#ifdef VMAL_NO_FAULT_CHECK + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + if(vmx_vcpu_get_gr(vcpu,inst.M45.r3,&slot)||vmx_vcpu_get_gr(vcpu,inst.M45.r2,&pte)){ +#ifdef VMAL_NO_FAULT_CHECK + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif // VMAL_NO_FAULT_CHECK + } +#ifdef VMAL_NO_FAULT_CHECK + if(is_reserved_rr_register(vcpu, slot)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + + if (vmx_vcpu_get_itir(vcpu,&itir)){ + return(IA64_FAULT); + } + if (vmx_vcpu_get_ifa(vcpu,&ifa)){ + return(IA64_FAULT); + } +#ifdef VMAL_NO_FAULT_CHECK + if (is_reserved_itir_field(vcpu, itir)) { + // TODO + return IA64_FAULT; + } + if (unimplemented_gva(vcpu,ifa) ) { + isr.val = set_isr_ei_ni(vcpu); + isr.code = IA64_RESERVED_REG_FAULT; + vcpu_set_isr(vcpu, isr.val); + unimpl_daddr(vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + + return (vmx_vcpu_itr_i(vcpu,pte,itir,ifa,slot)); +} + +IA64FAULT itc_fault_check(VCPU *vcpu, INST64 inst, u64 *itir, u64 *ifa,u64 *pte) +{ + UINT64 fault; + ISR isr; + IA64_PSR vpsr; + IA64FAULT ret1; + + vpsr.val=vmx_vcpu_get_psr(vcpu); + if ( vpsr.ic ) { + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } + +#ifdef VMAL_NO_FAULT_CHECK + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + ret1 = vmx_vcpu_get_gr(vcpu,inst.M45.r2,pte); +#ifdef VMAL_NO_FAULT_CHECK + if( ret1 != IA64_NO_FAULT ){ + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + + if (vmx_vcpu_get_itir(vcpu,itir)){ + return(IA64_FAULT); + } + if (vmx_vcpu_get_ifa(vcpu,ifa)){ + return(IA64_FAULT); + } +#ifdef VMAL_NO_FAULT_CHECK + if (unimplemented_gva(vcpu,ifa) ) { + isr.val = set_isr_ei_ni(vcpu); + isr.code = IA64_RESERVED_REG_FAULT; + vcpu_set_isr(vcpu, isr.val); + unimpl_daddr(vcpu); + return IA64_FAULT; + } +#endif // VMAL_NO_FAULT_CHECK + return IA64_NO_FAULT; +} + +IA64FAULT vmx_emul_itc_d(VCPU *vcpu, INST64 inst) +{ + UINT64 itir, ifa, pte; + + if ( itc_fault_check(vcpu, inst, &itir, &ifa, &pte) == IA64_FAULT ) { + return IA64_FAULT; + } + + return (vmx_vcpu_itc_d(vcpu,pte,itir,ifa)); +} + +IA64FAULT vmx_emul_itc_i(VCPU *vcpu, INST64 inst) +{ + UINT64 itir, ifa, pte; + + if ( itc_fault_check(vcpu, inst, &itir, &ifa, &pte) == IA64_FAULT ) { + return IA64_FAULT; + } + + return (vmx_vcpu_itc_i(vcpu,pte,itir,ifa)); + +} + +/************************************* + * Moves to semi-privileged registers +*************************************/ + +IA64FAULT vmx_emul_mov_to_ar_imm(VCPU *vcpu, INST64 inst) +{ + // I27 and M30 are identical for these fields + if(inst.M30.ar3!=44){ + panic("Can't support ar register other than itc"); + } +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + UINT64 imm; + if(inst.M30.s){ + imm = -inst.M30.imm; + }else{ + imm = inst.M30.imm; + } + return (vmx_vcpu_set_itc(vcpu, imm)); +} + +IA64FAULT vmx_emul_mov_to_ar_reg(VCPU *vcpu, INST64 inst) +{ + // I26 and M29 are identical for these fields + u64 r2; + if(inst.M29.ar3!=44){ + panic("Can't support ar register other than itc"); + } + if(vmx_vcpu_get_gr(vcpu,inst.M29.r2,&r2)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + return (vmx_vcpu_set_itc(vcpu, r2)); +} + + +IA64FAULT vmx_emul_mov_from_ar_reg(VCPU *vcpu, INST64 inst) +{ + // I27 and M30 are identical for these fields + if(inst.M31.ar3!=44){ + panic("Can't support ar register other than itc"); + } +#ifdef CHECK_FAULT + if(check_target_register(vcpu,inst.M31.r1)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.si&& vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + u64 r1; + vmx_vcpu_get_itc(vcpu,&r1); + vmx_vcpu_set_gr(vcpu,inst.M31.r1,r1,0); + return IA64_NO_FAULT; +} + + +/******************************** + * Moves to privileged registers +********************************/ + +IA64FAULT vmx_emul_mov_to_pkr(VCPU *vcpu, INST64 inst) +{ + u64 r3,r2; +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } + return (vmx_vcpu_set_pkr(vcpu,r3,r2)); +} + +IA64FAULT vmx_emul_mov_to_rr(VCPU *vcpu, INST64 inst) +{ + u64 r3,r2; +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } + return (vmx_vcpu_set_rr(vcpu,r3,r2)); +} + +IA64FAULT vmx_emul_mov_to_dbr(VCPU *vcpu, INST64 inst) +{ + u64 r3,r2; +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } + return (vmx_vcpu_set_dbr(vcpu,r3,r2)); +} + +IA64FAULT vmx_emul_mov_to_ibr(VCPU *vcpu, INST64 inst) +{ + u64 r3,r2; +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } + return (vmx_vcpu_set_ibr(vcpu,r3,r2)); +} + +IA64FAULT vmx_emul_mov_to_pmc(VCPU *vcpu, INST64 inst) +{ + u64 r3,r2; +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } + return (vmx_vcpu_set_pmc(vcpu,r3,r2)); +} + +IA64FAULT vmx_emul_mov_to_pmd(VCPU *vcpu, INST64 inst) +{ + u64 r3,r2; +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M42.r3,&r3)||vmx_vcpu_get_gr(vcpu,inst.M42.r2,&r2)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } + return (vmx_vcpu_set_pmd(vcpu,r3,r2)); +} + + +/********************************** + * Moves from privileged registers + **********************************/ + +IA64FAULT vmx_emul_mov_from_rr(VCPU *vcpu, INST64 inst) +{ + u64 r3,r1; +#ifdef CHECK_FAULT + if(check_target_register(vcpu, inst.M43.r1)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } + +#endif //CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } +#ifdef CHECK_FAULT + if(is_reserved_rr_register(vcpu,r3>>VRN_SHIFT)){ + set_rsv_reg_field_isr(vcpu); + rsv_reg_field(vcpu); + } +#endif //CHECK_FAULT + vmx_vcpu_get_rr(vcpu,r3,&r1); + return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0); +} + +IA64FAULT vmx_emul_mov_from_pkr(VCPU *vcpu, INST64 inst) +{ + u64 r3,r1; +#ifdef CHECK_FAULT + if(check_target_register(vcpu, inst.M43.r1)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } + +#endif //CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } +#ifdef CHECK_FAULT + if(is_reserved_indirect_register(vcpu,r3)){ + set_rsv_reg_field_isr(vcpu); + rsv_reg_field(vcpu); + return IA64_FAULT; + } +#endif //CHECK_FAULT + vmx_vcpu_get_pkr(vcpu,r3,&r1); + return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0); +} + +IA64FAULT vmx_emul_mov_from_dbr(VCPU *vcpu, INST64 inst) +{ + u64 r3,r1; +#ifdef CHECK_FAULT + if(check_target_register(vcpu, inst.M43.r1)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } + +#endif //CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } +#ifdef CHECK_FAULT + if(is_reserved_indirect_register(vcpu,r3)){ + set_rsv_reg_field_isr(vcpu); + rsv_reg_field(vcpu); + return IA64_FAULT; + } +#endif //CHECK_FAULT + vmx_vcpu_get_dbr(vcpu,r3,&r1); + return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0); +} + +IA64FAULT vmx_emul_mov_from_ibr(VCPU *vcpu, INST64 inst) +{ + u64 r3,r1; +#ifdef CHECK_FAULT + if(check_target_register(vcpu, inst.M43.r1)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } + +#endif //CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } +#ifdef CHECK_FAULT + if(is_reserved_indirect_register(vcpu,r3)){ + set_rsv_reg_field_isr(vcpu); + rsv_reg_field(vcpu); + return IA64_FAULT; + } +#endif //CHECK_FAULT + vmx_vcpu_get_ibr(vcpu,r3,&r1); + return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0); +} + +IA64FAULT vmx_emul_mov_from_pmc(VCPU *vcpu, INST64 inst) +{ + u64 r3,r1; +#ifdef CHECK_FAULT + if(check_target_register(vcpu, inst.M43.r1)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if (vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } + +#endif //CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } +#ifdef CHECK_FAULT + if(is_reserved_indirect_register(vcpu,r3)){ + set_rsv_reg_field_isr(vcpu); + rsv_reg_field(vcpu); + return IA64_FAULT; + } +#endif //CHECK_FAULT + vmx_vcpu_get_pmc(vcpu,r3,&r1); + return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0); +} + +IA64FAULT vmx_emul_mov_from_cpuid(VCPU *vcpu, INST64 inst) +{ + u64 r3,r1; +#ifdef CHECK_FAULT + if(check_target_register(vcpu, inst.M43.r1)){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } +#endif //CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu,inst.M43.r3,&r3)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } +#ifdef CHECK_FAULT + if(is_reserved_indirect_register(vcpu,r3)){ + set_rsv_reg_field_isr(vcpu); + rsv_reg_field(vcpu); + return IA64_FAULT; + } +#endif //CHECK_FAULT + vmx_vcpu_get_cpuid(vcpu,r3,&r1); + return vmx_vcpu_set_gr(vcpu, inst.M43.r1, r1,0); +} + +IA64FAULT vmx_emul_mov_to_cr(VCPU *vcpu, INST64 inst) +{ + u64 r2,cr3; +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if(is_reserved_cr(inst.M32.cr3)||(vpsr.ic&&is_interruption_control_cr(inst.M32.cr3))){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + if(vmx_vcpu_get_gr(vcpu, inst.M32.r2, &r2)){ +#ifdef CHECK_FAULT + set_isr_reg_nat_consumption(vcpu,0,0); + rnat_comsumption(vcpu); + return IA64_FAULT; +#endif //CHECK_FAULT + } +#ifdef CHECK_FAULT + if ( check_cr_rsv_fields (inst.M32.cr3, r2)) { + /* Inject Reserved Register/Field fault + * into guest */ + set_rsv_reg_field_isr (vcpu,0); + rsv_reg_field (vcpu); + return IA64_FAULT; + } +#endif //CHECK_FAULT + extern u64 cr_igfld_mask(int index, u64 value); + r2 = cr_igfld_mask(inst.M32.cr3,r2); + VMX_VPD(vcpu, vcr[inst.M32.cr3]) = r2; + switch (inst.M32.cr3) { + case 0: return vmx_vcpu_set_dcr(vcpu,r2); + case 1: return vmx_vcpu_set_itm(vcpu,r2); + case 2: return vmx_vcpu_set_iva(vcpu,r2); + case 8: return vmx_vcpu_set_pta(vcpu,r2); + case 16:return vmx_vcpu_set_ipsr(vcpu,r2); + case 17:return vmx_vcpu_set_isr(vcpu,r2); + case 19:return vmx_vcpu_set_iip(vcpu,r2); + case 20:return vmx_vcpu_set_ifa(vcpu,r2); + case 21:return vmx_vcpu_set_itir(vcpu,r2); + case 22:return vmx_vcpu_set_iipa(vcpu,r2); + case 23:return vmx_vcpu_set_ifs(vcpu,r2); + case 24:return vmx_vcpu_set_iim(vcpu,r2); + case 25:return vmx_vcpu_set_iha(vcpu,r2); + case 64:return vmx_vcpu_set_lid(vcpu,r2); + case 65:return IA64_NO_FAULT; + case 66:return vmx_vcpu_set_tpr(vcpu,r2); + case 67:return vmx_vcpu_set_eoi(vcpu,r2); + case 68:return IA64_NO_FAULT; + case 69:return IA64_NO_FAULT; + case 70:return IA64_NO_FAULT; + case 71:return IA64_NO_FAULT; + case 72:return vmx_vcpu_set_itv(vcpu,r2); + case 73:return vmx_vcpu_set_pmv(vcpu,r2); + case 74:return vmx_vcpu_set_cmcv(vcpu,r2); + case 80:return vmx_vcpu_set_lrr0(vcpu,r2); + case 81:return vmx_vcpu_set_lrr1(vcpu,r2); + default: return IA64_NO_FAULT; + } +} + + +#define cr_get(cr) \ + ((fault=vmx_vcpu_get_##cr(vcpu,&val))==IA64_NO_FAULT)?\ + vmx_vcpu_set_gr(vcpu, tgt, val,0):fault; + + +IA64FAULT vmx_emul_mov_from_cr(VCPU *vcpu, INST64 inst) +{ + UINT64 tgt = inst.M33.r1; + UINT64 val; + IA64FAULT fault; +#ifdef CHECK_FAULT + IA64_PSR vpsr; + vpsr.val=vmx_vcpu_get_psr(vcpu); + if(is_reserved_cr(inst.M33.cr3)||is_read_only_cr(inst.M33.cr3|| + (vpsr.ic&&is_interruption_control_cr(inst.M33.cr3)))){ + set_illegal_op_isr(vcpu); + illegal_op(vcpu); + return IA64_FAULT; + } + if ( vpsr.cpl != 0) { + /* Inject Privileged Operation fault into guest */ + set_privileged_operation_isr (vcpu, 0); + privilege_op (vcpu); + return IA64_FAULT; + } +#endif // CHECK_FAULT + +// from_cr_cnt[inst.M33.cr3]++; + switch (inst.M33.cr3) { + case 0: return cr_get(dcr); + case 1: return cr_get(itm); + case 2: return cr_get(iva); + case 8: return cr_get(pta); + case 16:return cr_get(ipsr); + case 17:return cr_get(isr); + case 19:return cr_get(iip); + case 20:return cr_get(ifa); + case 21:return cr_get(itir); + case 22:return cr_get(iipa); + case 23:return cr_get(ifs); + case 24:return cr_get(iim); + case 25:return cr_get(iha); + case 64:val = ia64_getreg(_IA64_REG_CR_LID); + return vmx_vcpu_set_gr(vcpu,tgt,val,0); +// case 64:return cr_get(lid); + case 65: + vmx_vcpu_get_ivr(vcpu,&val); + return vmx_vcpu_set_gr(vcpu,tgt,val,0); + case 66:return cr_get(tpr); + case 67:return vmx_vcpu_set_gr(vcpu,tgt,0L,0); + case 68:return cr_get(irr0); + case 69:return cr_get(irr1); + case 70:return cr_get(irr2); + case 71:return cr_get(irr3); + case 72:return cr_get(itv); + case 73:return cr_get(pmv); + case 74:return cr_get(cmcv); + case 80:return cr_get(lrr0); + case 81:return cr_get(lrr1); + default: + panic("Read reserved cr register"); + } +} + + + + +//#define BYPASS_VMAL_OPCODE +extern IA64_SLOT_TYPE slot_types[0x20][3]; +IA64_BUNDLE __vmx_get_domain_bundle(u64 iip) +{ + IA64_BUNDLE bundle; + + fetch_code( current,iip, &bundle.i64[0]); + fetch_code( current,iip+8, &bundle.i64[1]); + return bundle; +} + +/** Emulate a privileged operation. + * + * + * @param vcpu virtual cpu + * @cause the reason cause virtualization fault + * @opcode the instruction code which cause virtualization fault + */ + +void +vmx_emulate(VCPU *vcpu, UINT64 cause, UINT64 opcode) +{ + IA64_BUNDLE bundle; + int slot; + IA64_SLOT_TYPE slot_type; + IA64FAULT status; + INST64 inst; + REGS * regs; + UINT64 iip; + regs = vcpu_regs(vcpu); + iip = regs->cr_iip; + IA64_PSR vpsr; +/* + if (privop_trace) { + static long i = 400; + //if (i > 0) printf("privop @%p\n",iip); + if (i > 0) printf("priv_handle_op: @%p, itc=%lx, itm=%lx\n", + iip,ia64_get_itc(),ia64_get_itm()); + i--; + } +*/ +#ifdef VTLB_DEBUG + check_vtlb_sanity(vmx_vcpu_get_vtlb(vcpu)); + dump_vtlb(vmx_vcpu_get_vtlb(vcpu)); +#endif +#if 0 +if ( (cause == 0xff && opcode == 0x1e000000000) || cause == 0 ) { + printf ("VMAL decode error: cause - %lx; op - %lx\n", + cause, opcode ); + return; +} +#endif +#ifdef BYPASS_VMAL_OPCODE + // make a local copy of the bundle containing the privop + bundle = __vmx_get_domain_bundle(iip); + slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri; + if (!slot) inst.inst = bundle.slot0; + else if (slot == 1) + inst.inst = bundle.slot1a + (bundle.slot1b<<23); + else if (slot == 2) inst.inst = bundle.slot2; + else printf("priv_handle_op: illegal slot: %d\n", slot); + slot_type = slot_types[bundle.template][slot]; + ia64_priv_decoder(slot_type, inst, &cause); + if(cause==0){ + printf("This instruction at 0x%lx slot %d can't be virtualized", iip, slot); + panic("123456\n"); + } +#else + inst.inst=opcode; +#endif /* BYPASS_VMAL_OPCODE */ + + /* + * Switch to actual virtual rid in rr0 and rr4, + * which is required by some tlb related instructions. + */ + prepare_if_physical_mode(vcpu); + + switch(cause) { + case EVENT_RSM: + status=vmx_emul_rsm(vcpu, inst); + break; + case EVENT_SSM: + status=vmx_emul_ssm(vcpu, inst); + break; + case EVENT_MOV_TO_PSR: + status=vmx_emul_mov_to_psr(vcpu, inst); + break; + case EVENT_MOV_FROM_PSR: + status=vmx_emul_mov_from_psr(vcpu, inst); + break; + case EVENT_MOV_FROM_CR: + status=vmx_emul_mov_from_cr(vcpu, inst); + break; + case EVENT_MOV_TO_CR: + status=vmx_emul_mov_to_cr(vcpu, inst); + break; + case EVENT_BSW_0: + status=vmx_emul_bsw0(vcpu, inst); + break; + case EVENT_BSW_1: + status=vmx_emul_bsw1(vcpu, inst); + break; + case EVENT_COVER: + status=vmx_emul_cover(vcpu, inst); + break; + case EVENT_RFI: + status=vmx_emul_rfi(vcpu, inst); + break; + case EVENT_ITR_D: + status=vmx_emul_itr_d(vcpu, inst); + break; + case EVENT_ITR_I: + status=vmx_emul_itr_i(vcpu, inst); + break; + case EVENT_PTR_D: + status=vmx_emul_ptr_d(vcpu, inst); + break; + case EVENT_PTR_I: + status=vmx_emul_ptr_i(vcpu, inst); + break; + case EVENT_ITC_D: + status=vmx_emul_itc_d(vcpu, inst); + break; + case EVENT_ITC_I: + status=vmx_emul_itc_i(vcpu, inst); + break; + case EVENT_PTC_L: + status=vmx_emul_ptc_l(vcpu, inst); + break; + case EVENT_PTC_G: + status=vmx_emul_ptc_g(vcpu, inst); + break; + case EVENT_PTC_GA: + status=vmx_emul_ptc_ga(vcpu, inst); + break; + case EVENT_PTC_E: + status=vmx_emul_ptc_e(vcpu, inst); + break; + case EVENT_MOV_TO_RR: + status=vmx_emul_mov_to_rr(vcpu, inst); + break; + case EVENT_MOV_FROM_RR: + status=vmx_emul_mov_from_rr(vcpu, inst); + break; + case EVENT_THASH: + status=vmx_emul_thash(vcpu, inst); + break; + case EVENT_TTAG: + status=vmx_emul_ttag(vcpu, inst); + break; + case EVENT_TPA: + status=vmx_emul_tpa(vcpu, inst); + break; + case EVENT_TAK: + status=vmx_emul_tak(vcpu, inst); + break; + case EVENT_MOV_TO_AR_IMM: + status=vmx_emul_mov_to_ar_imm(vcpu, inst); + break; + case EVENT_MOV_TO_AR: + status=vmx_emul_mov_to_ar_reg(vcpu, inst); + break; + case EVENT_MOV_FROM_AR: + status=vmx_emul_mov_from_ar_reg(vcpu, inst); + break; + case EVENT_MOV_TO_DBR: + status=vmx_emul_mov_to_dbr(vcpu, inst); + break; + case EVENT_MOV_TO_IBR: + status=vmx_emul_mov_to_ibr(vcpu, inst); + break; + case EVENT_MOV_TO_PMC: + status=vmx_emul_mov_to_pmc(vcpu, inst); + break; + case EVENT_MOV_TO_PMD: + status=vmx_emul_mov_to_pmd(vcpu, inst); + break; + case EVENT_MOV_TO_PKR: + status=vmx_emul_mov_to_pkr(vcpu, inst); + break; + case EVENT_MOV_FROM_DBR: + status=vmx_emul_mov_from_dbr(vcpu, inst); + break; + case EVENT_MOV_FROM_IBR: + status=vmx_emul_mov_from_ibr(vcpu, inst); + break; + case EVENT_MOV_FROM_PMC: + status=vmx_emul_mov_from_pmc(vcpu, inst); + break; + case EVENT_MOV_FROM_PKR: + status=vmx_emul_mov_from_pkr(vcpu, inst); + break; + case EVENT_MOV_FROM_CPUID: + status=vmx_emul_mov_from_cpuid(vcpu, inst); + break; + case EVENT_VMSW: + printf ("Unimplemented instruction %d\n", cause); + status=IA64_FAULT; + break; + default: + printf("unknown cause %d:\n", cause); + /* For unknown cause, let hardware to re-execute */ + status=IA64_RETRY; +// panic("unknown cause in virtualization intercept"); + }; + +#if 0 + if (status == IA64_FAULT) + panic("Emulation failed with cause %d:\n", cause); +#endif + + if ( status == IA64_NO_FAULT && cause !=EVENT_RFI ) { + vmx_vcpu_increment_iip(vcpu); + } + + recover_if_physical_mode(vcpu); +//TODO set_irq_check(v); + return; + +} + diff --git a/xen/arch/ia64/vmx_vsa.S b/xen/arch/ia64/vmx_vsa.S new file mode 100644 index 0000000000..5ceea44fb6 --- /dev/null +++ b/xen/arch/ia64/vmx_vsa.S @@ -0,0 +1,84 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_vsa.c: Call PAL virtualization services. + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Arun Sharma <arun.sharma@intel.com> + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + */ + +#include <asm/asmmacro.h> + + + .text + +/* + * extern UINT64 ia64_call_vsa(UINT64 proc,UINT64 arg1, UINT64 arg2, + * UINT64 arg3, UINT64 arg4, UINT64 arg5, + * UINT64 arg6, UINT64 arg7); + * + * XXX: The currently defined services use only 4 args at the max. The + * rest are not consumed. + */ +GLOBAL_ENTRY(ia64_call_vsa) + .regstk 4,4,0,0 + +rpsave = loc0 +pfssave = loc1 +psrsave = loc2 +entry = loc3 +hostret = r24 + + alloc pfssave=ar.pfs,4,4,0,0 + mov rpsave=rp + movl entry=@gprel(__vsa_base) +1: mov hostret=ip + mov r25=in1 // copy arguments + mov r26=in2 + mov r27=in3 + mov psrsave=psr + ;; + add entry=entry,gp + tbit.nz p6,p0=psrsave,14 // IA64_PSR_I + tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC + ;; + ld8 entry=[entry] // read entry point + ;; + add hostret=2f-1b,hostret // calculate return address + add entry=entry,in0 + ;; + rsm psr.i | psr.ic + ;; + srlz.d + mov b6=entry + br.cond.sptk b6 // call the service +2: + // Architectural sequence for enabling interrupts if necessary +(p7) ssm psr.ic + ;; +(p7) srlz.d + ;; +(p6) ssm psr.i + ;; + mov rp=rpsave + mov ar.pfs=pfssave + mov r8=r31 + ;; + srlz.d + br.ret.sptk rp + +END(ia64_call_vsa) + diff --git a/xen/arch/ia64/vtlb.c b/xen/arch/ia64/vtlb.c new file mode 100644 index 0000000000..6cbb4478b7 --- /dev/null +++ b/xen/arch/ia64/vtlb.c @@ -0,0 +1,1004 @@ + +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vtlb.c: guest virtual tlb handling module. + * Copyright (c) 2004, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) + * XiaoYan Feng (Fleming Feng) (Fleming.feng@intel.com) + */ + +#include <linux/sched.h> +#include <asm/tlb.h> +#include <asm/vmx_mm_def.h> +#include <asm/gcc_intrin.h> +#include <xen/interrupt.h> +#include <asm/vcpu.h> +#define MAX_CCH_LENGTH 40 + + +static void cch_mem_init(thash_cb_t *hcb) +{ + thash_cch_mem_t *p, *q; + + hcb->cch_freelist = p = hcb->cch_buf; + + for ( q=p+1; (u64)(q + 1) <= (u64)hcb->cch_buf + hcb->cch_sz; + p++, q++ ) { + p->next = q; + } + p->next = NULL; +} + +static thash_data_t *cch_alloc(thash_cb_t *hcb) +{ + thash_cch_mem_t *p; + + if ( (p = hcb->cch_freelist) != NULL ) { + hcb->cch_freelist = p->next; + } + return &(p->data); +} + +static void cch_free(thash_cb_t *hcb, thash_data_t *cch) +{ + thash_cch_mem_t *p = (thash_cch_mem_t*)cch; + + p->next = hcb->cch_freelist; + hcb->cch_freelist = p; +} + +/* + * Check to see if the address rid:va is translated by the TLB + */ +static int __is_translated(thash_data_t *tlb, u64 rid, u64 va, CACHE_LINE_TYPE cl) +{ + u64 size1,sa1,ea1; + + if ( tlb->rid != rid || tlb->cl != cl ) + return 0; + size1 = PSIZE(tlb->ps); + sa1 = tlb->vadr & ~(size1-1); // mask the low address bits + ea1 = sa1 + size1; + + if ( va >= sa1 && (va < ea1 || ea1 == 0) ) + return 1; + else + return 0; +} + +/* + * Only for TLB format. + */ +static int +__is_tlb_overlap(thash_cb_t *hcb,thash_data_t *entry,int rid, char cl, u64 sva, u64 eva) +{ + uint64_t size1,size2,sa1,ea1,ea2; + + if ( entry->invalid || entry->rid != rid || entry->cl != cl ) { + return 0; + } + size1=PSIZE(entry->ps); + sa1 = entry->vadr & ~(size1-1); // mask the low address bits + ea1 = sa1 + size1; + if ( (sva >= ea1 && ea1 != 0) || (eva <= sa1 && eva != 0) ) + return 0; + else + return 1; + +} + +static void __rem_tr (thash_cb_t *hcb, thash_data_t *tr) +{ + if ( hcb->remove_notifier ) { + (hcb->remove_notifier)(hcb,tr); + } + tr->invalid = 1; +} + +static inline void __set_tr (thash_data_t *tr, thash_data_t *data, int idx) +{ + *tr = *data; + tr->tr_idx = idx; +} + + +static void __init_tr(thash_cb_t *hcb) +{ + int i; + thash_data_t *tr; + + for ( i=0, tr = &ITR(hcb,0); i<NITRS; i++ ) { + tr[i].invalid = 1; + } + for ( i=0, tr = &DTR(hcb,0); i<NDTRS; i++ ) { + tr[i].invalid = 1; + } +} + +/* + * Replace TR entry. + */ +static void rep_tr(thash_cb_t *hcb,thash_data_t *insert, int idx) +{ + thash_data_t *tr; + + if ( insert->cl == ISIDE_TLB ) { + tr = &ITR(hcb,idx); + } + else { + tr = &DTR(hcb,idx); + } + if ( !INVALID_TLB(tr) ) { + __rem_tr(hcb, tr); + } + __set_tr (tr, insert, idx); +} + +/* + * remove TR entry. + */ +static void rem_tr(thash_cb_t *hcb,CACHE_LINE_TYPE cl, int idx) +{ + thash_data_t *tr; + + if ( cl == ISIDE_TLB ) { + tr = &ITR(hcb,idx); + } + else { + tr = &DTR(hcb,idx); + } + if ( !INVALID_TLB(tr) ) { + __rem_tr(hcb, tr); + } +} + +/* + * Delete an thash entry in collision chain. + * prev: the previous entry. + * rem: the removed entry. + */ +static void __rem_chain(thash_cb_t *hcb/*, thash_data_t *prev*/, thash_data_t *rem) +{ + //prev->next = rem->next; + if ( hcb->remove_notifier ) { + (hcb->remove_notifier)(hcb,rem); + } + cch_free (hcb, rem); +} + +/* + * Delete an thash entry leading collision chain. + */ +static void __rem_hash_head(thash_cb_t *hcb, thash_data_t *hash) +{ + thash_data_t *next=hash->next; + + if ( hcb->remove_notifier ) { + (hcb->remove_notifier)(hcb,hash); + } + if ( next != NULL ) { + *hash = *next; + cch_free (hcb, next); + } + else { + INVALIDATE_HASH(hcb, hash); + } +} + +thash_data_t *__vtr_lookup(thash_cb_t *hcb, + u64 rid, u64 va, + CACHE_LINE_TYPE cl) +{ + thash_data_t *tr; + int num,i; + + if ( cl == ISIDE_TLB ) { + tr = &ITR(hcb,0); + num = NITRS; + } + else { + tr = &DTR(hcb,0); + num = NDTRS; + } + for ( i=0; i<num; i++ ) { + if ( !INVALID_ENTRY(hcb,&tr[i]) && + __is_translated(&tr[i], rid, va, cl) ) + return &tr[i]; + } + return NULL; +} + + +/* + * Find overlap VHPT entry within current collision chain + * base on internal priv info. + */ +static inline thash_data_t* _vhpt_next_overlap_in_chain(thash_cb_t *hcb) +{ + thash_data_t *cch; + thash_internal_t *priv = &hcb->priv; + + + for (cch=priv->cur_cch; cch; cch = cch->next) { + if ( priv->tag == cch->etag ) { + return cch; + } + } + return NULL; +} + +/* + * Find overlap TLB/VHPT entry within current collision chain + * base on internal priv info. + */ +static thash_data_t *_vtlb_next_overlap_in_chain(thash_cb_t *hcb) +{ + thash_data_t *cch; + thash_internal_t *priv = &hcb->priv; + + /* Find overlap TLB entry */ + for (cch=priv->cur_cch; cch; cch = cch->next) { + if ( ((1UL<<cch->section) & priv->s_sect.v) && + __is_tlb_overlap(hcb, cch, priv->rid, priv->cl, + priv->_curva, priv->_eva) ) { + return cch; + } + } + return NULL; +} + +/* + * Get the machine format of VHPT entry. + * PARAS: + * 1: tlb: means the tlb format hash entry converting to VHPT. + * 2: va means the guest virtual address that must be coverd by + * the translated machine VHPT. + * 3: vhpt: means the machine format VHPT converting from tlb. + * NOTES: + * 1: In case of the machine address is discontiguous, + * "tlb" needs to be covered by several machine VHPT. va + * is used to choice one of them. + * 2: Foreign map is supported in this API. + * RETURN: + * 0/1: means successful or fail. + * + */ +int __tlb_to_vhpt(thash_cb_t *hcb, + thash_data_t *tlb, u64 va, + thash_data_t *vhpt) +{ + u64 pages,mfn; + rr_t vrr; + + ASSERT ( hcb->ht == THASH_VHPT ); + vrr = (hcb->get_rr_fn)(hcb->vcpu,va); + pages = PSIZE(vrr.ps) >> PAGE_SHIFT; + mfn = (hcb->vs->get_mfn)(DOMID_SELF,tlb->ppn, pages); + if ( mfn == INVALID_MFN ) return 0; + + // TODO with machine discontinuous address space issue. + vhpt->etag = (hcb->vs->tag_func)( hcb->pta, + tlb->vadr, tlb->rid, tlb->ps); + //vhpt->ti = 0; + vhpt->itir = tlb->itir & ~ITIR_RV_MASK; + vhpt->page_flags = tlb->page_flags & ~PAGE_FLAGS_RV_MASK; + vhpt->ppn = mfn; + vhpt->next = 0; + return 1; +} + + +/* + * Insert an entry to hash table. + * NOTES: + * 1: TLB entry may be TR, TC or Foreign Map. For TR entry, + * itr[]/dtr[] need to be updated too. + * 2: Inserting to collision chain may trigger recycling if + * the buffer for collision chain is empty. + * 3: The new entry is inserted at the next of hash table. + * (I.e. head of the collision chain) + * 4: The buffer holding the entry is allocated internally + * from cch_buf or just in the hash table. + * 5: Return the entry in hash table or collision chain. + * 6: Input parameter, entry, should be in TLB format. + * I.e. Has va, rid, ps... + * 7: This API is invoked by emulating ITC/ITR and tlb_miss. + * + */ + +void thash_tr_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va, int idx) +{ + if ( hcb->ht != THASH_TLB || entry->section != THASH_TLB_TR ) { + panic("wrong parameter\n"); + } + entry->vadr = PAGEALIGN(entry->vadr,entry->ps); + entry->ppn = PAGEALIGN(entry->ppn, entry->ps-12); + rep_tr(hcb, entry, idx); + return ; +} + +thash_data_t *__alloc_chain(thash_cb_t *hcb,thash_data_t *entry) +{ + thash_data_t *cch; + + cch = cch_alloc(hcb); + if(cch == NULL){ + // recycle + if ( hcb->recycle_notifier ) { + hcb->recycle_notifier(hcb,(u64)entry); + } + thash_purge_all(hcb); + cch = cch_alloc(hcb); + } + return cch; +} + +/* + * Insert an entry into hash TLB or VHPT. + * NOTES: + * 1: When inserting VHPT to thash, "va" is a must covered + * address by the inserted machine VHPT entry. + * 2: The format of entry is always in TLB. + * 3: The caller need to make sure the new entry will not overlap + * with any existed entry. + */ +static void vtlb_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va) +{ + thash_data_t *hash_table, *cch; + rr_t vrr; + + hash_table = (hcb->hash_func)(hcb->pta, + va, entry->rid, entry->ps); + if( INVALID_ENTRY(hcb, hash_table) ) { + *hash_table = *entry; + hash_table->next = 0; + } + else { + // TODO: Add collision chain length limitation. + cch = __alloc_chain(hcb,entry); + + *cch = *hash_table; + *hash_table = *entry; + hash_table->next = cch; + } + thash_insert (hcb->ts->vhpt, entry, va); + return ; +} + +static void vhpt_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va) +{ + thash_data_t *hash_table, *cch; + rr_t vrr; + + hash_table = (hcb->hash_func)(hcb->pta, + va, entry->rid, entry->ps); + if( INVALID_ENTRY(hcb, hash_table) ) { + if ( !__tlb_to_vhpt(hcb, entry, va, hash_table) ) { + panic("Can't convert to machine VHPT entry\n"); + } + hash_table->next = 0; + } + else { + // TODO: Add collision chain length limitation. + cch = __alloc_chain(hcb,entry); + + *cch = *hash_table; + if ( !__tlb_to_vhpt(hcb, entry, va, hash_table) ) { + panic("Can't convert to machine VHPT entry\n"); + } + hash_table->next = cch; + } + return /*hash_table*/; +} + +void thash_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va) +{ + thash_data_t *hash_table; + rr_t vrr; + + vrr = (hcb->get_rr_fn)(hcb->vcpu,entry->vadr); + if ( entry->ps != vrr.ps && entry->section==THASH_TLB_TC) { + panic("Not support for multiple page size now\n"); + } + entry->vadr = PAGEALIGN(entry->vadr,entry->ps); + entry->ppn = PAGEALIGN(entry->ppn, entry->ps-12); + (hcb->ins_hash)(hcb, entry, va); + +} + +static void rem_thash(thash_cb_t *hcb, thash_data_t *entry) +{ + thash_data_t *hash_table, *p, *q; + thash_internal_t *priv = &hcb->priv; + int idx; + + hash_table = priv->hash_base; + if ( hash_table == entry ) { + __rem_hash_head (hcb, entry); + return ; + } + // remove from collision chain + p = hash_table; + for ( q=p->next; q; q = p->next ) { + if ( q == entry ) { + p->next = q->next; + __rem_chain(hcb, entry); + return ; + } + p = q; + } + panic("Entry not existed or bad sequence\n"); +} + +static void rem_vtlb(thash_cb_t *hcb, thash_data_t *entry) +{ + thash_data_t *hash_table, *p, *q; + thash_internal_t *priv = &hcb->priv; + int idx; + + if ( entry->section == THASH_TLB_TR ) { + return rem_tr(hcb, entry->cl, entry->tr_idx); + } + rem_thash(hcb, entry); +} + +int cch_depth=0; +/* + * Purge the collision chain starting from cch. + * NOTE: + * For those UN-Purgable entries(FM), this function will return + * the head of left collision chain. + */ +static thash_data_t *thash_rem_cch(thash_cb_t *hcb, thash_data_t *cch) +{ + thash_data_t *next; + + if ( ++cch_depth > MAX_CCH_LENGTH ) { + printf ("cch length > MAX_CCH_LENGTH, exceed the expected length\n"); + } + if ( cch -> next ) { + next = thash_rem_cch(hcb, cch->next); + } + else { + next = NULL; + } + if ( PURGABLE_ENTRY(hcb, cch) ) { + __rem_chain(hcb, cch); + return next; + } + else { + cch->next = next; + return cch; + } +} + +/* + * Purge one hash line (include the entry in hash table). + * Can only be called by thash_purge_all. + * Input: + * hash: The head of collision chain (hash table) + * + */ +static void thash_rem_line(thash_cb_t *hcb, thash_data_t *hash) +{ + if ( INVALID_ENTRY(hcb, hash) ) return; + + if ( hash->next ) { + cch_depth = 0; + hash->next = thash_rem_cch(hcb, hash->next); + } + // Then hash table itself. + if ( PURGABLE_ENTRY(hcb, hash) ) { + __rem_hash_head(hcb, hash); + } +} + + +/* + * Find an overlap entry in hash table and its collision chain. + * Refer to SDM2 4.1.1.4 for overlap definition. + * PARAS: + * 1: in: TLB format entry, rid:ps must be same with vrr[]. + * va & ps identify the address space for overlap lookup + * 2: section can be combination of TR, TC and FM. (THASH_SECTION_XX) + * 3: cl means I side or D side. + * RETURNS: + * NULL to indicate the end of findings. + * NOTES: + * + */ +thash_data_t *thash_find_overlap(thash_cb_t *hcb, + thash_data_t *in, search_section_t s_sect) +{ + return (hcb->find_overlap)(hcb, in->vadr, + in->ps, in->rid, in->cl, s_sect); +} + +static thash_data_t *vtlb_find_overlap(thash_cb_t *hcb, + u64 va, u64 ps, int rid, char cl, search_section_t s_sect) +{ + thash_data_t *hash_table; + thash_internal_t *priv = &hcb->priv; + u64 tag; + rr_t vrr; + + priv->_curva = PAGEALIGN(va,ps); + priv->_eva = priv->_curva + PSIZE(ps); + priv->rid = rid; + vrr = (hcb->get_rr_fn)(hcb->vcpu,va); + priv->ps = vrr.ps; + hash_table = (hcb->hash_func)(hcb->pta, + priv->_curva, rid, priv->ps); + + priv->s_sect = s_sect; + priv->cl = cl; + priv->_tr_idx = 0; + priv->hash_base = hash_table; + priv->cur_cch = hash_table; + return (hcb->next_overlap)(hcb); +} + +static thash_data_t *vhpt_find_overlap(thash_cb_t *hcb, + u64 va, u64 ps, int rid, char cl, search_section_t s_sect) +{ + thash_data_t *hash_table; + thash_internal_t *priv = &hcb->priv; + u64 tag; + rr_t vrr; + + priv->_curva = PAGEALIGN(va,ps); + priv->_eva = priv->_curva + PSIZE(ps); + priv->rid = rid; + vrr = (hcb->get_rr_fn)(hcb->vcpu,va); + priv->ps = vrr.ps; + hash_table = (hcb->hash_func)( hcb->pta, + priv->_curva, rid, priv->ps); + tag = (hcb->vs->tag_func)( hcb->pta, + priv->_curva, rid, priv->ps); + + priv->tag = tag; + priv->hash_base = hash_table; + priv->cur_cch = hash_table; + return (hcb->next_overlap)(hcb); +} + + +static thash_data_t *vtr_find_next_overlap(thash_cb_t *hcb) +{ + thash_data_t *tr; + thash_internal_t *priv = &hcb->priv; + int num; + + if ( priv->cl == ISIDE_TLB ) { + num = NITRS; + tr = &ITR(hcb,0); + } + else { + num = NDTRS; + tr = &DTR(hcb,0); + } + for (; priv->_tr_idx < num; priv->_tr_idx ++ ) { + if ( __is_tlb_overlap(hcb, &tr[priv->_tr_idx], + priv->rid, priv->cl, + priv->_curva, priv->_eva) ) { + return &tr[priv->_tr_idx++]; + } + } + return NULL; +} + +/* + * Similar with vtlb_next_overlap but find next entry. + * NOTES: + * Intermediate position information is stored in hcb->priv. + */ +static thash_data_t *vtlb_next_overlap(thash_cb_t *hcb) +{ + thash_data_t *ovl; + thash_internal_t *priv = &hcb->priv; + u64 addr,rr_psize; + rr_t vrr; + + if ( priv->s_sect.tr ) { + ovl = vtr_find_next_overlap (hcb); + if ( ovl ) return ovl; + priv->s_sect.tr = 0; + } + if ( priv->s_sect.v == 0 ) return NULL; + vrr = (hcb->get_rr_fn)(hcb->vcpu,priv->_curva); + rr_psize = PSIZE(vrr.ps); + + while ( priv->_curva < priv->_eva ) { + if ( !INVALID_ENTRY(hcb, priv->hash_base) ) { + ovl = _vtlb_next_overlap_in_chain(hcb); + if ( ovl ) { + priv->cur_cch = ovl->next; + return ovl; + } + } + priv->_curva += rr_psize; + priv->hash_base = (hcb->hash_func)( hcb->pta, + priv->_curva, priv->rid, priv->ps); + priv->cur_cch = priv->hash_base; + } + return NULL; +} + +static thash_data_t *vhpt_next_overlap(thash_cb_t *hcb) +{ + thash_data_t *ovl; + thash_internal_t *priv = &hcb->priv; + u64 addr,rr_psize; + rr_t vrr; + + vrr = (hcb->get_rr_fn)(hcb->vcpu,priv->_curva); + rr_psize = PSIZE(vrr.ps); + + while ( priv->_curva < priv->_eva ) { + if ( !INVALID_ENTRY(hcb, priv->hash_base) ) { + ovl = _vhpt_next_overlap_in_chain(hcb); + if ( ovl ) { + priv->cur_cch = ovl->next; + return ovl; + } + } + priv->_curva += rr_psize; + priv->hash_base = (hcb->hash_func)( hcb->pta, + priv->_curva, priv->rid, priv->ps); + priv->tag = (hcb->vs->tag_func)( hcb->pta, + priv->_curva, priv->rid, priv->ps); + priv->cur_cch = priv->hash_base; + } + return NULL; +} + + +/* + * Find and purge overlap entries in hash table and its collision chain. + * PARAS: + * 1: in: TLB format entry, rid:ps must be same with vrr[]. + * rid, va & ps identify the address space for purge + * 2: section can be combination of TR, TC and FM. (thash_SECTION_XX) + * 3: cl means I side or D side. + * NOTES: + * + */ +void thash_purge_entries(thash_cb_t *hcb, + thash_data_t *in, search_section_t p_sect) +{ + return thash_purge_entries_ex(hcb, in->rid, in->vadr, + in->ps, p_sect, in->cl); +} + +void thash_purge_entries_ex(thash_cb_t *hcb, + u64 rid, u64 va, u64 ps, + search_section_t p_sect, + CACHE_LINE_TYPE cl) +{ + thash_data_t *ovl; + + ovl = (hcb->find_overlap)(hcb, va, ps, rid, cl, p_sect); + while ( ovl != NULL ) { + (hcb->rem_hash)(hcb, ovl); + ovl = (hcb->next_overlap)(hcb); + }; +} + + +/* + * Purge all TCs or VHPT entries including those in Hash table. + * + */ + +// TODO: add sections. +void thash_purge_all(thash_cb_t *hcb) +{ + thash_data_t *hash_table; + +#ifdef VTLB_DEBUG + extern u64 sanity_check; + static u64 statistics_before_purge_all=0; + if ( statistics_before_purge_all ) { + sanity_check = 1; + check_vtlb_sanity(hcb); + } +#endif + + hash_table = (thash_data_t*)((u64)hcb->hash + hcb->hash_sz); + + for (--hash_table;(u64)hash_table >= (u64)hcb->hash;hash_table--) { + thash_rem_line(hcb, hash_table); + } +} + + +/* + * Lookup the hash table and its collision chain to find an entry + * covering this address rid:va or the entry. + * + * INPUT: + * in: TLB format for both VHPT & TLB. + */ +thash_data_t *vtlb_lookup(thash_cb_t *hcb, + thash_data_t *in) +{ + return vtlb_lookup_ex(hcb, in->rid, in->vadr, in->cl); +} + +thash_data_t *vtlb_lookup_ex(thash_cb_t *hcb, + u64 rid, u64 va, + CACHE_LINE_TYPE cl) +{ + thash_data_t *hash_table, *cch; + u64 tag; + rr_t vrr; + + ASSERT ( hcb->ht == THASH_VTLB ); + + cch = __vtr_lookup(hcb, rid, va, cl);; + if ( cch ) return cch; + + vrr = (hcb->get_rr_fn)(hcb->vcpu,va); + hash_table = (hcb->hash_func)( hcb->pta,va, rid, vrr.ps); + + if ( INVALID_ENTRY(hcb, hash_table ) ) + return NULL; + + + for (cch=hash_table; cch; cch = cch->next) { + if ( __is_translated(cch, rid, va, cl) ) + return cch; + } + return NULL; +} + + +/* + * Notifier when TLB is deleted from hash table and its collision chain. + * NOTES: + * The typical situation is that TLB remove needs to inform + * VHPT to remove too. + * PARAS: + * 1: hcb is TLB object. + * 2: The format of entry is always in TLB. + * + */ +void tlb_remove_notifier(thash_cb_t *hcb, thash_data_t *entry) +{ + thash_cb_t *vhpt; + search_section_t s_sect; + + s_sect.v = 0; + thash_purge_entries(hcb->ts->vhpt, entry, s_sect); + machine_tlb_purge(entry->rid, entry->vadr, entry->ps); +} + +/* + * Initialize internal control data before service. + */ +void thash_init(thash_cb_t *hcb, u64 sz) +{ + thash_data_t *hash_table; + + cch_mem_init (hcb); + hcb->magic = THASH_CB_MAGIC; + hcb->pta.val = hcb->hash; + hcb->pta.vf = 1; + hcb->pta.ve = 1; + hcb->pta.size = sz; + hcb->get_rr_fn = vmmu_get_rr; + ASSERT ( hcb->hash_sz % sizeof(thash_data_t) == 0 ); + if ( hcb->ht == THASH_TLB ) { + hcb->remove_notifier = tlb_remove_notifier; + hcb->find_overlap = vtlb_find_overlap; + hcb->next_overlap = vtlb_next_overlap; + hcb->rem_hash = rem_vtlb; + hcb->ins_hash = vtlb_insert; + __init_tr(hcb); + } + else { + hcb->remove_notifier = NULL; + hcb->find_overlap = vhpt_find_overlap; + hcb->next_overlap = vhpt_next_overlap; + hcb->rem_hash = rem_thash; + hcb->ins_hash = vhpt_insert; + } + hash_table = (thash_data_t*)((u64)hcb->hash + hcb->hash_sz); + + for (--hash_table;(u64)hash_table >= (u64)hcb->hash;hash_table--) { + INVALIDATE_HASH(hcb,hash_table); + } +} + + +#ifdef VTLB_DEBUG +static u64 cch_length_statistics[MAX_CCH_LENGTH+1]; +u64 sanity_check=0; +u64 vtlb_chain_sanity(thash_cb_t *vtlb, thash_cb_t *vhpt, thash_data_t *hash) +{ + thash_data_t *cch; + thash_data_t *ovl; + search_section_t s_sect; + u64 num=0; + + s_sect.v = 0; + for (cch=hash; cch; cch=cch->next) { + ovl = thash_find_overlap(vhpt, cch, s_sect); + while ( ovl != NULL ) { + ovl->checked = 1; + ovl = (vhpt->next_overlap)(vhpt); + }; + num ++; + } + if ( num >= MAX_CCH_LENGTH ) { + cch_length_statistics[MAX_CCH_LENGTH] ++; + } + else { + cch_length_statistics[num] ++; + } + return num; +} + +void check_vtlb_sanity(thash_cb_t *vtlb) +{ +// struct pfn_info *page; + u64 hash_num, i, psr; + static u64 check_ok_num, check_fail_num,check_invalid; +// void *vb1, *vb2; + thash_data_t *hash, *cch; + thash_data_t *ovl; + search_section_t s_sect; + thash_cb_t *vhpt = vtlb->ts->vhpt; + u64 invalid_ratio; + + if ( sanity_check == 0 ) return; + sanity_check --; + s_sect.v = 0; +// page = alloc_domheap_pages (NULL, VCPU_TLB_ORDER); +// if ( page == NULL ) { +// panic("No enough contiguous memory for init_domain_mm\n"); +// }; +// vb1 = page_to_virt(page); +// printf("Allocated page=%lp vbase=%lp\n", page, vb1); +// vb2 = vb1 + vtlb->hash_sz; + hash_num = vhpt->hash_sz / sizeof(thash_data_t); +// printf("vb2=%lp, size=%lx hash_num=%lx\n", vb2, vhpt->hash_sz, hash_num); + printf("vtlb=%lp, hash=%lp size=0x%lx; vhpt=%lp, hash=%lp size=0x%lx\n", + vtlb, vtlb->hash,vtlb->hash_sz, + vhpt, vhpt->hash, vhpt->hash_sz); + //memcpy(vb1, vtlb->hash, vtlb->hash_sz); + //memcpy(vb2, vhpt->hash, vhpt->hash_sz); + for ( i=0; i < sizeof(cch_length_statistics)/sizeof(cch_length_statistics[0]); i++ ) { + cch_length_statistics[i] = 0; + } + + local_irq_save(psr); + + hash = vhpt->hash; + for (i=0; i < hash_num; i++) { + if ( !INVALID_ENTRY(vhpt, hash) ) { + for ( cch= hash; cch; cch=cch->next) { + cch->checked = 0; + } + } + hash ++; + } + printf("Done vhpt clear checked flag, hash_num=0x%lx\n", hash_num); + check_invalid = 0; + check_ok_num=0; + hash = vtlb->hash; + for ( i=0; i< hash_num; i++ ) { + if ( !INVALID_ENTRY(vtlb, hash) ) { + check_ok_num += vtlb_chain_sanity(vtlb, vhpt, hash); + } + else { + check_invalid++; + } + hash ++; + } + printf("Done vtlb entry check, hash=%lp\n", hash); + printf("check_ok_num = 0x%lx check_invalid=0x%lx\n", check_ok_num,check_invalid); + invalid_ratio = 1000*check_invalid / hash_num; + printf("%02ld.%01ld%% entries are invalid\n", + invalid_ratio/10, invalid_ratio % 10 ); + for (i=0; i<NDTRS; i++) { + ovl = thash_find_overlap(vhpt, &vtlb->ts->dtr[i], s_sect); + while ( ovl != NULL ) { + ovl->checked = 1; + ovl = (vhpt->next_overlap)(vhpt); + }; + } + printf("Done dTR\n"); + for (i=0; i<NITRS; i++) { + ovl = thash_find_overlap(vhpt, &vtlb->ts->itr[i], s_sect); + while ( ovl != NULL ) { + ovl->checked = 1; + ovl = (vhpt->next_overlap)(vhpt); + }; + } + printf("Done iTR\n"); + check_fail_num = 0; + check_invalid = 0; + check_ok_num=0; + hash = vhpt->hash; + for (i=0; i < hash_num; i++) { + if ( !INVALID_ENTRY(vhpt, hash) ) { + for ( cch= hash; cch; cch=cch->next) { + if ( !cch->checked ) { + printf ("!!!Hash=%lp cch=%lp not within vtlb\n", hash, cch); + check_fail_num ++; + } + else { + check_ok_num++; + } + } + } + else { + check_invalid ++; + } + hash ++; + } + local_irq_restore(psr); + printf("check_ok_num=0x%lx check_fail_num=0x%lx check_invalid=0x%lx\n", + check_ok_num, check_fail_num, check_invalid); + //memcpy(vtlb->hash, vb1, vtlb->hash_sz); + //memcpy(vhpt->hash, vb2, vhpt->hash_sz); + printf("The statistics of collision chain length is listed\n"); + for ( i=0; i < sizeof(cch_length_statistics)/sizeof(cch_length_statistics[0]); i++ ) { + printf("CCH length=%02ld, chain number=%ld\n", i, cch_length_statistics[i]); + } +// free_domheap_pages(page, VCPU_TLB_ORDER); + printf("Done check_vtlb\n"); +} + +void dump_vtlb(thash_cb_t *vtlb) +{ + static u64 dump_vtlb=0; + thash_data_t *hash, *cch, *tr; + u64 hash_num,i; + + if ( dump_vtlb == 0 ) return; + dump_vtlb --; + hash_num = vtlb->hash_sz / sizeof(thash_data_t); + hash = vtlb->hash; + + printf("Dump vTC\n"); + for ( i = 0; i < hash_num; i++ ) { + if ( !INVALID_ENTRY(vtlb, hash) ) { + printf("VTLB at hash=%lp\n", hash); + for (cch=hash; cch; cch=cch->next) { + printf("Entry %lp va=%lx ps=%lx rid=%lx\n", + cch, cch->vadr, cch->ps, cch->rid); + } + } + hash ++; + } + printf("Dump vDTR\n"); + for (i=0; i<NDTRS; i++) { + tr = &DTR(vtlb,i); + printf("Entry %lp va=%lx ps=%lx rid=%lx\n", + tr, tr->vadr, tr->ps, tr->rid); + } + printf("Dump vITR\n"); + for (i=0; i<NITRS; i++) { + tr = &ITR(vtlb,i); + printf("Entry %lp va=%lx ps=%lx rid=%lx\n", + tr, tr->vadr, tr->ps, tr->rid); + } + printf("End of vTLB dump\n"); +} +#endif diff --git a/xen/arch/ia64/xenmem.c b/xen/arch/ia64/xenmem.c index c3f255fd51..a009037b90 100644 --- a/xen/arch/ia64/xenmem.c +++ b/xen/arch/ia64/xenmem.c @@ -27,15 +27,47 @@ static unsigned long num_dma_physpages; /* * Set up the page tables. */ +#ifdef CONFIG_VTI +unsigned long *mpt_table; +unsigned long *mpt_table_size; +#endif void paging_init (void) { struct pfn_info *pg; + +#ifdef CONFIG_VTI + unsigned int mpt_order; + /* Create machine to physical mapping table + * NOTE: similar to frame table, later we may need virtually + * mapped mpt table if large hole exists. Also MAX_ORDER needs + * to be changed in common code, which only support 16M by far + */ + mpt_table_size = max_page * sizeof(unsigned long); + mpt_order = get_order(mpt_table_size); + ASSERT(mpt_order <= MAX_ORDER); + if ((mpt_table = alloc_xenheap_pages(mpt_order)) == NULL) + panic("Not enough memory to bootstrap Xen.\n"); + + printk("machine to physical table: 0x%lx\n", (u64)mpt_table); + memset(mpt_table, 0x55, mpt_table_size); + + /* Any more setup here? On VMX enabled platform, + * there's no need to keep guest linear pg table, + * and read only mpt table. MAP cache is not used + * in this stage, and later it will be in region 5. + * IO remap is in region 6 with identity mapping. + */ + /* HV_tlb_init(); */ + +#else // CONFIG_VTI + /* Allocate and map the machine-to-phys table */ if ((pg = alloc_domheap_pages(NULL, 10)) == NULL) panic("Not enough memory to bootstrap Xen.\n"); memset(page_to_virt(pg), 0x55, 16UL << 20); +#endif // CONFIG_VTI /* Other mapping setup */ diff --git a/xen/arch/ia64/xenmisc.c b/xen/arch/ia64/xenmisc.c index 2f5562c46a..73792debcc 100644 --- a/xen/arch/ia64/xenmisc.c +++ b/xen/arch/ia64/xenmisc.c @@ -95,6 +95,10 @@ int reprogram_ac_timer(s_time_t timeout) { struct exec_domain *ed = current; +#ifdef CONFIG_VTI + if(VMX_DOMAIN(ed)) + return 1; +#endif // CONFIG_VTI local_cpu_data->itm_next = timeout; if (is_idle_task(ed->domain)) vcpu_safe_set_itm(timeout); else vcpu_set_next_timer(current); @@ -238,7 +242,22 @@ void context_switch(struct exec_domain *prev, struct exec_domain *next) //if (prev->domain->id == 1 && next->domain->id == 0) cs10foo(); //if (prev->domain->id == 0 && next->domain->id == 1) cs01foo(); //printk("@@sw %d->%d\n",prev->domain->id,next->domain->id); +#ifdef CONFIG_VTI + unsigned long psr; + /* Interrupt is enabled after next task is chosen. + * So we have to disable it for stack switch. + */ + local_irq_save(psr); + vtm_domain_out(prev); + /* Housekeeping for prev domain */ +#endif // CONFIG_VTI + switch_to(prev,next,prev); +#ifdef CONFIG_VTI + /* Post-setup for new domain */ + vtm_domain_in(current); + local_irq_restore(psr); +#endif // CONFIG_VTI // leave this debug for now: it acts as a heartbeat when more than // one domain is active { @@ -251,8 +270,14 @@ if (!i--) { printk("+",id); cnt[id] = 100; } clear_bit(EDF_RUNNING, &prev->flags); //if (!is_idle_task(next->domain) ) //send_guest_virq(next, VIRQ_TIMER); +#ifdef CONFIG_VTI + if (VMX_DOMAIN(current)) + vmx_load_all_rr(current); + return; +#else // CONFIG_VTI load_region_regs(current); if (vcpu_timer_expired(current)) vcpu_pend_timer(current); +#endif // CONFIG_VTI } void continue_running(struct exec_domain *same) diff --git a/xen/arch/ia64/xensetup.c b/xen/arch/ia64/xensetup.c index ec864e1ef2..b01cf0a369 100644 --- a/xen/arch/ia64/xensetup.c +++ b/xen/arch/ia64/xensetup.c @@ -169,6 +169,11 @@ void start_kernel(void) printk("xen image pstart: 0x%lx, xenheap pend: 0x%lx\n", xen_pstart, xenheap_phys_end); +#ifdef CONFIG_VTI + /* If we want to enable vhpt for all regions, related initialization + * for HV TLB must be done earlier before first TLB miss + */ +#endif // CONFIG_VTI /* Find next hole */ firsthole_start = 0; efi_memmap_walk(xen_find_first_hole, &firsthole_start); @@ -198,7 +203,13 @@ void start_kernel(void) efi_memmap_walk(find_max_pfn, &max_page); printf("find_memory: efi_memmap_walk returns max_page=%lx\n",max_page); - heap_start = memguard_init(&_end); +#ifdef CONFIG_VTI + /* Only support up to 64G physical memory by far */ + if (max_page > (0x1000000000UL / PAGE_SIZE)) + panic("Not suppport memory larger than 16G\n"); +#endif // CONFIG_VTI + + heap_start = memguard_init(ia64_imva(&_end)); printf("Before heap_start: 0x%lx\n", heap_start); heap_start = __va(init_boot_allocator(__pa(heap_start))); printf("After heap_start: 0x%lx\n", heap_start); @@ -235,6 +246,9 @@ printk("About to call scheduler_init()\n"); local_irq_disable(); printk("About to call xen_time_init()\n"); xen_time_init(); +#ifdef CONFIG_VTI + init_xen_time(); /* initialise the time */ +#endif // CONFIG_VTI printk("About to call ac_timer_init()\n"); ac_timer_init(); // init_xen_time(); ??? @@ -274,6 +288,8 @@ printk("About to call init_idle_task()\n"); * above our heap. The second module, if present, is an initrd ramdisk. */ printk("About to call construct_dom0()\n"); + dom0_memory_start = __va(ia64_boot_param->initrd_start); + dom0_memory_end = ia64_boot_param->initrd_size; if ( construct_dom0(dom0, dom0_memory_start, dom0_memory_end, 0, 0, diff --git a/xen/arch/ia64/xentime.c b/xen/arch/ia64/xentime.c index a3790b4577..22aa437ebe 100644 --- a/xen/arch/ia64/xentime.c +++ b/xen/arch/ia64/xentime.c @@ -29,6 +29,7 @@ #ifdef XEN #include <linux/jiffies.h> // not included by xen/sched.h #endif +#include <xen/softirq.h> #define TIME_KEEPER_ID 0 extern unsigned long wall_jiffies; @@ -37,10 +38,39 @@ static s_time_t stime_irq; /* System time at last 'time update' */ unsigned long domain0_ready = 0; +#ifndef CONFIG_VTI static inline u64 get_time_delta(void) { return ia64_get_itc(); } +#else // CONFIG_VTI +static s_time_t stime_irq = 0x0; /* System time at last 'time update' */ +unsigned long itc_scale; +unsigned long itc_at_irq; +static unsigned long wc_sec, wc_usec; /* UTC time at last 'time update'. */ +//static rwlock_t time_lock = RW_LOCK_UNLOCKED; +static irqreturn_t vmx_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs); + +static inline u64 get_time_delta(void) +{ + s64 delta_itc; + u64 delta, cur_itc; + + cur_itc = ia64_get_itc(); + + delta_itc = (s64)(cur_itc - itc_at_irq); + if ( unlikely(delta_itc < 0) ) delta_itc = 0; + delta = ((u64)delta_itc) * itc_scale; + delta = delta >> 32; + + return delta; +} + +u64 tick_to_ns(u64 tick) +{ + return (tick * itc_scale) >> 32; +} +#endif // CONFIG_VTI s_time_t get_s_time(void) { @@ -74,9 +104,32 @@ void update_dom_time(struct exec_domain *ed) /* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */ void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base) { +#ifdef CONFIG_VTI + s64 delta; + long _usecs = (long)usecs; + + write_lock_irq(&xtime_lock); + + delta = (s64)(stime_irq - system_time_base); + + _usecs += (long)(delta/1000); + while ( _usecs >= 1000000 ) + { + _usecs -= 1000000; + secs++; + } + + wc_sec = secs; + wc_usec = _usecs; + + write_unlock_irq(&xtime_lock); + + update_dom_time(current->domain); +#else // FIXME: Should this be do_settimeofday (from linux)??? printf("do_settime: called, not implemented, stopping\n"); dummy(); +#endif } irqreturn_t @@ -200,7 +253,11 @@ xen_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs) } static struct irqaction xen_timer_irqaction = { +#ifdef CONFIG_VTI + .handler = vmx_timer_interrupt, +#else // CONFIG_VTI .handler = xen_timer_interrupt, +#endif // CONFIG_VTI #ifndef XEN .flags = SA_INTERRUPT, #endif @@ -213,3 +270,111 @@ xen_time_init (void) register_percpu_irq(IA64_TIMER_VECTOR, &xen_timer_irqaction); ia64_init_itm(); } + + +#ifdef CONFIG_VTI + +/* Late init function (after all CPUs are booted). */ +int __init init_xen_time() +{ + struct timespec tm; + + itc_scale = 1000000000UL << 32 ; + itc_scale /= local_cpu_data->itc_freq; + + /* System time ticks from zero. */ + stime_irq = (s_time_t)0; + itc_at_irq = ia64_get_itc(); + + /* Wallclock time starts as the initial RTC time. */ + efi_gettimeofday(&tm); + wc_sec = tm.tv_sec; + wc_usec = tm.tv_nsec/1000; + + + printk("Time init:\n"); + printk(".... System Time: %ldns\n", NOW()); + printk(".... scale: %16lX\n", itc_scale); + printk(".... Wall Clock: %lds %ldus\n", wc_sec, wc_usec); + + return 0; +} + +static irqreturn_t +vmx_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs) +{ + unsigned long new_itm; + struct exec_domain *ed = current; + + + new_itm = local_cpu_data->itm_next; + + if (!time_after(ia64_get_itc(), new_itm)) + return; + + while (1) { +#ifdef CONFIG_SMP + /* + * For UP, this is done in do_timer(). Weird, but + * fixing that would require updates to all + * platforms. + */ + update_process_times(user_mode(ed, regs)); +#endif + new_itm += local_cpu_data->itm_delta; + + if (smp_processor_id() == TIME_KEEPER_ID) { + /* + * Here we are in the timer irq handler. We have irqs locally + * disabled, but we don't know if the timer_bh is running on + * another CPU. We need to avoid to SMP race by acquiring the + * xtime_lock. + */ + local_cpu_data->itm_next = new_itm; + + write_lock_irq(&xtime_lock); + /* Update jiffies counter. */ + (*(unsigned long *)&jiffies_64)++; + + /* Update wall time. */ + wc_usec += 1000000/HZ; + if ( wc_usec >= 1000000 ) + { + wc_usec -= 1000000; + wc_sec++; + } + + /* Updates system time (nanoseconds since boot). */ + stime_irq += MILLISECS(1000/HZ); + itc_at_irq = ia64_get_itc(); + + write_unlock_irq(&xtime_lock); + + } else + local_cpu_data->itm_next = new_itm; + + if (time_after(new_itm, ia64_get_itc())) + break; + } + + do { + /* + * If we're too close to the next clock tick for + * comfort, we increase the safety margin by + * intentionally dropping the next tick(s). We do NOT + * update itm.next because that would force us to call + * do_timer() which in turn would let our clock run + * too fast (with the potentially devastating effect + * of losing monotony of time). + */ + while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2)) + new_itm += local_cpu_data->itm_delta; + ia64_set_itm(new_itm); + /* double check, in case we got hit by a (slow) PMI: */ + } while (time_after_eq(ia64_get_itc(), new_itm)); + raise_softirq(AC_TIMER_SOFTIRQ); + + return IRQ_HANDLED; +} +#endif // CONFIG_VTI + diff --git a/xen/common/elf.c b/xen/common/elf.c index 98f9565e2d..42c88b0d96 100644 --- a/xen/common/elf.c +++ b/xen/common/elf.c @@ -88,9 +88,12 @@ int parseelfimage(struct domain_setup_info *dsi) if ( guestinfo == NULL ) { printk("Not a Xen-ELF image: '__xen_guest' section not found.\n"); + dsi->xen_elf_image = 0; #if FORCE_XENELF_IMAGE return -EINVAL; #endif + } else { + dsi->xen_elf_image = 1; } for ( h = 0; h < ehdr->e_phnum; h++ ) diff --git a/xen/include/asm-ia64/config.h b/xen/include/asm-ia64/config.h index 9b656e493b..4c7371cb3e 100644 --- a/xen/include/asm-ia64/config.h +++ b/xen/include/asm-ia64/config.h @@ -1,3 +1,6 @@ +#ifndef _IA64_CONFIG_H_ +#define _IA64_CONFIG_H_ + // control flags for turning on/off features under test #undef CLONE_DOMAIN0 //#define CLONE_DOMAIN0 1 @@ -40,6 +43,8 @@ typedef int pid_t; extern unsigned long xenheap_phys_end; extern unsigned long xen_pstart; extern unsigned long xenheap_size; +extern unsigned long dom0_start; +extern unsigned long dom0_size; // from linux/include/linux/mm.h extern struct page *mem_map; @@ -198,7 +203,11 @@ void sort_main_extable(void); #define NO_UART_CONFIG_OK // see drivers/char/console.c +#ifndef CONFIG_VTI #define OPT_CONSOLE_STR "com1" +#else // CONFIG_VTI +#define OPT_CONSOLE_STR "com2" +#endif // CONFIG_VTI #define __attribute_used__ __attribute__ ((unused)) @@ -312,3 +321,5 @@ extern unsigned int watchdog_on; #else # define __attribute_used__ __attribute__((__unused__)) #endif + +#endif /* _IA64_CONFIG_H_ */ diff --git a/xen/include/asm-ia64/domain.h b/xen/include/asm-ia64/domain.h index 7b6758078e..25d580431e 100644 --- a/xen/include/asm-ia64/domain.h +++ b/xen/include/asm-ia64/domain.h @@ -2,6 +2,11 @@ #define __ASM_DOMAIN_H__ #include <linux/thread_info.h> +#ifdef CONFIG_VTI +#include <asm/vmx_vpd.h> +#include <asm/vmmu.h> +#include <asm/regionreg.h> +#endif // CONFIG_VTI extern void arch_do_createdomain(struct exec_domain *); @@ -10,6 +15,14 @@ extern int arch_final_setup_guestos( extern void domain_relinquish_resources(struct domain *); +#ifdef CONFIG_VTI +struct trap_bounce { + // TO add, FIXME Eddie +}; + +#define PMT_SIZE (32L*1024*1024) // 32M for PMT +#endif // CONFIG_VTI + struct arch_domain { struct mm_struct *active_mm; struct mm_struct *mm; @@ -18,6 +31,12 @@ struct arch_domain { int ending_rid; /* one beyond highest RID assigned to domain */ int rid_bits; /* number of virtual rid bits (default: 18) */ int breakimm; +#ifdef CONFIG_VTI + int imp_va_msb; + ia64_rr emul_phy_rr0; + ia64_rr emul_phy_rr4; + u64 *pmt; /* physical to machine table */ +#endif //CONFIG_VTI u64 xen_vastart; u64 xen_vaend; u64 shared_info_va; @@ -57,6 +76,16 @@ struct arch_exec_domain { void *regs; /* temporary until find a better way to do privops */ struct mm_struct *active_mm; struct thread_struct _thread; // this must be last +#ifdef CONFIG_VTI + void (*schedule_tail) (struct exec_domain *); + struct trap_bounce trap_bounce; + thash_cb_t *vtlb; + //for phycial emulation + unsigned long old_rsc; + int mode_flags; + + struct arch_vmx_struct arch_vmx; /* Virtual Machine Extensions */ +#endif // CONFIG_VTI }; #define active_mm arch.active_mm diff --git a/xen/include/asm-ia64/gcc_intrin.h b/xen/include/asm-ia64/gcc_intrin.h new file mode 100644 index 0000000000..a87d8588f7 --- /dev/null +++ b/xen/include/asm-ia64/gcc_intrin.h @@ -0,0 +1,657 @@ +#ifndef _ASM_IA64_GCC_INTRIN_H +#define _ASM_IA64_GCC_INTRIN_H +/* + * + * Copyright (C) 2002,2003 Jun Nakajima <jun.nakajima@intel.com> + * Copyright (C) 2002,2003 Suresh Siddha <suresh.b.siddha@intel.com> + */ + +#include <linux/compiler.h> + +/* define this macro to get some asm stmts included in 'c' files */ +#define ASM_SUPPORTED + +/* Optimization barrier */ +/* The "volatile" is due to gcc bugs */ +#define ia64_barrier() asm volatile ("":::"memory") + +#define ia64_stop() asm volatile (";;"::) + +#define ia64_invala_gr(regnum) asm volatile ("invala.e r%0" :: "i"(regnum)) + +#define ia64_invala_fr(regnum) asm volatile ("invala.e f%0" :: "i"(regnum)) + +extern void ia64_bad_param_for_setreg (void); +extern void ia64_bad_param_for_getreg (void); + +register unsigned long ia64_r13 asm ("r13") __attribute_used__; + +#define ia64_setreg(regnum, val) \ +({ \ + switch (regnum) { \ + case _IA64_REG_PSR_L: \ + asm volatile ("mov psr.l=%0" :: "r"(val) : "memory"); \ + break; \ + case _IA64_REG_AR_KR0 ... _IA64_REG_AR_EC: \ + asm volatile ("mov ar%0=%1" :: \ + "i" (regnum - _IA64_REG_AR_KR0), \ + "r"(val): "memory"); \ + break; \ + case _IA64_REG_CR_DCR ... _IA64_REG_CR_LRR1: \ + asm volatile ("mov cr%0=%1" :: \ + "i" (regnum - _IA64_REG_CR_DCR), \ + "r"(val): "memory" ); \ + break; \ + case _IA64_REG_SP: \ + asm volatile ("mov r12=%0" :: \ + "r"(val): "memory"); \ + break; \ + case _IA64_REG_GP: \ + asm volatile ("mov gp=%0" :: "r"(val) : "memory"); \ + break; \ + default: \ + ia64_bad_param_for_setreg(); \ + break; \ + } \ +}) + +#define ia64_getreg(regnum) \ +({ \ + __u64 ia64_intri_res; \ + \ + switch (regnum) { \ + case _IA64_REG_GP: \ + asm volatile ("mov %0=gp" : "=r"(ia64_intri_res)); \ + break; \ + case _IA64_REG_IP: \ + asm volatile ("mov %0=ip" : "=r"(ia64_intri_res)); \ + break; \ + case _IA64_REG_PSR: \ + asm volatile ("mov %0=psr" : "=r"(ia64_intri_res)); \ + break; \ + case _IA64_REG_TP: /* for current() */ \ + ia64_intri_res = ia64_r13; \ + break; \ + case _IA64_REG_AR_KR0 ... _IA64_REG_AR_EC: \ + asm volatile ("mov %0=ar%1" : "=r" (ia64_intri_res) \ + : "i"(regnum - _IA64_REG_AR_KR0)); \ + break; \ + case _IA64_REG_CR_DCR ... _IA64_REG_CR_LRR1: \ + asm volatile ("mov %0=cr%1" : "=r" (ia64_intri_res) \ + : "i" (regnum - _IA64_REG_CR_DCR)); \ + break; \ + case _IA64_REG_SP: \ + asm volatile ("mov %0=sp" : "=r" (ia64_intri_res)); \ + break; \ + default: \ + ia64_bad_param_for_getreg(); \ + break; \ + } \ + ia64_intri_res; \ +}) + +#define ia64_hint_pause 0 + +#define ia64_hint(mode) \ +({ \ + switch (mode) { \ + case ia64_hint_pause: \ + asm volatile ("hint @pause" ::: "memory"); \ + break; \ + } \ +}) + + +/* Integer values for mux1 instruction */ +#define ia64_mux1_brcst 0 +#define ia64_mux1_mix 8 +#define ia64_mux1_shuf 9 +#define ia64_mux1_alt 10 +#define ia64_mux1_rev 11 + +#define ia64_mux1(x, mode) \ +({ \ + __u64 ia64_intri_res; \ + \ + switch (mode) { \ + case ia64_mux1_brcst: \ + asm ("mux1 %0=%1,@brcst" : "=r" (ia64_intri_res) : "r" (x)); \ + break; \ + case ia64_mux1_mix: \ + asm ("mux1 %0=%1,@mix" : "=r" (ia64_intri_res) : "r" (x)); \ + break; \ + case ia64_mux1_shuf: \ + asm ("mux1 %0=%1,@shuf" : "=r" (ia64_intri_res) : "r" (x)); \ + break; \ + case ia64_mux1_alt: \ + asm ("mux1 %0=%1,@alt" : "=r" (ia64_intri_res) : "r" (x)); \ + break; \ + case ia64_mux1_rev: \ + asm ("mux1 %0=%1,@rev" : "=r" (ia64_intri_res) : "r" (x)); \ + break; \ + } \ + ia64_intri_res; \ +}) + +#define ia64_popcnt(x) \ +({ \ + __u64 ia64_intri_res; \ + asm ("popcnt %0=%1" : "=r" (ia64_intri_res) : "r" (x)); \ + \ + ia64_intri_res; \ +}) + +#define ia64_getf_exp(x) \ +({ \ + long ia64_intri_res; \ + \ + asm ("getf.exp %0=%1" : "=r"(ia64_intri_res) : "f"(x)); \ + \ + ia64_intri_res; \ +}) + +#define ia64_shrp(a, b, count) \ +({ \ + __u64 ia64_intri_res; \ + asm ("shrp %0=%1,%2,%3" : "=r"(ia64_intri_res) : "r"(a), "r"(b), "i"(count)); \ + ia64_intri_res; \ +}) + +#define ia64_ldfs(regnum, x) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("ldfs %0=[%1]" :"=f"(__f__): "r"(x)); \ +}) + +#define ia64_ldfd(regnum, x) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("ldfd %0=[%1]" :"=f"(__f__): "r"(x)); \ +}) + +#define ia64_ldfe(regnum, x) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("ldfe %0=[%1]" :"=f"(__f__): "r"(x)); \ +}) + +#define ia64_ldf8(regnum, x) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("ldf8 %0=[%1]" :"=f"(__f__): "r"(x)); \ +}) + +#define ia64_ldf_fill(regnum, x) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("ldf.fill %0=[%1]" :"=f"(__f__): "r"(x)); \ +}) + +#define ia64_stfs(x, regnum) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("stfs [%0]=%1" :: "r"(x), "f"(__f__) : "memory"); \ +}) + +#define ia64_stfd(x, regnum) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("stfd [%0]=%1" :: "r"(x), "f"(__f__) : "memory"); \ +}) + +#define ia64_stfe(x, regnum) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("stfe [%0]=%1" :: "r"(x), "f"(__f__) : "memory"); \ +}) + +#define ia64_stf8(x, regnum) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("stf8 [%0]=%1" :: "r"(x), "f"(__f__) : "memory"); \ +}) + +#define ia64_stf_spill(x, regnum) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("stf.spill [%0]=%1" :: "r"(x), "f"(__f__) : "memory"); \ +}) + +#define ia64_fetchadd4_acq(p, inc) \ +({ \ + \ + __u64 ia64_intri_res; \ + asm volatile ("fetchadd4.acq %0=[%1],%2" \ + : "=r"(ia64_intri_res) : "r"(p), "i" (inc) \ + : "memory"); \ + \ + ia64_intri_res; \ +}) + +#define ia64_fetchadd4_rel(p, inc) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("fetchadd4.rel %0=[%1],%2" \ + : "=r"(ia64_intri_res) : "r"(p), "i" (inc) \ + : "memory"); \ + \ + ia64_intri_res; \ +}) + +#define ia64_fetchadd8_acq(p, inc) \ +({ \ + \ + __u64 ia64_intri_res; \ + asm volatile ("fetchadd8.acq %0=[%1],%2" \ + : "=r"(ia64_intri_res) : "r"(p), "i" (inc) \ + : "memory"); \ + \ + ia64_intri_res; \ +}) + +#define ia64_fetchadd8_rel(p, inc) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("fetchadd8.rel %0=[%1],%2" \ + : "=r"(ia64_intri_res) : "r"(p), "i" (inc) \ + : "memory"); \ + \ + ia64_intri_res; \ +}) + +#define ia64_xchg1(ptr,x) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("xchg1 %0=[%1],%2" \ + : "=r" (ia64_intri_res) : "r" (ptr), "r" (x) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_xchg2(ptr,x) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("xchg2 %0=[%1],%2" : "=r" (ia64_intri_res) \ + : "r" (ptr), "r" (x) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_xchg4(ptr,x) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("xchg4 %0=[%1],%2" : "=r" (ia64_intri_res) \ + : "r" (ptr), "r" (x) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_xchg8(ptr,x) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("xchg8 %0=[%1],%2" : "=r" (ia64_intri_res) \ + : "r" (ptr), "r" (x) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg1_acq(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + asm volatile ("cmpxchg1.acq %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg1_rel(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + asm volatile ("cmpxchg1.rel %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg2_acq(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + asm volatile ("cmpxchg2.acq %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg2_rel(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + \ + asm volatile ("cmpxchg2.rel %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg4_acq(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + asm volatile ("cmpxchg4.acq %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg4_rel(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + asm volatile ("cmpxchg4.rel %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg8_acq(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + asm volatile ("cmpxchg8.acq %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg8_rel(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + \ + asm volatile ("cmpxchg8.rel %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_mf() asm volatile ("mf" ::: "memory") +#define ia64_mfa() asm volatile ("mf.a" ::: "memory") + +#ifdef CONFIG_VTI +/* + * Flushrs instruction stream. + */ +#define ia64_flushrs() asm volatile ("flushrs;;":::"memory") + +#define ia64_loadrs() asm volatile ("loadrs;;":::"memory") + +#define ia64_get_rsc() \ +({ \ + unsigned long val; \ + asm volatile ("mov %0=ar.rsc;;" : "=r"(val) :: "memory"); \ + val; \ +}) + +#define ia64_set_rsc(val) \ + asm volatile ("mov ar.rsc=%0;;" :: "r"(val) : "memory") + +#define ia64_get_bspstore() \ +({ \ + unsigned long val; \ + asm volatile ("mov %0=ar.bspstore;;" : "=r"(val) :: "memory"); \ + val; \ +}) + +#define ia64_set_bspstore(val) \ + asm volatile ("mov ar.bspstore=%0;;" :: "r"(val) : "memory") + +#define ia64_get_rnat() \ +({ \ + unsigned long val; \ + asm volatile ("mov %0=ar.rnat;" : "=r"(val) :: "memory"); \ + val; \ +}) + +#define ia64_set_rnat(val) \ + asm volatile ("mov ar.rnat=%0;;" :: "r"(val) : "memory") + +#define ia64_ttag(addr) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("ttag %0=%1" : "=r"(ia64_intri_res) : "r" (addr)); \ + ia64_intri_res; \ +}) + +#define ia64_get_dcr() \ +({ \ + __u64 result; \ + asm volatile ("mov %0=cr.dcr" : "=r"(result) : ); \ + result; \ +}) + +#define ia64_set_dcr(val) \ +({ \ + asm volatile ("mov cr.dcr=%0" :: "r"(val) ); \ +}) + +#endif // CONFIG_VTI + + +#define ia64_invala() asm volatile ("invala" ::: "memory") + +#define ia64_thash(addr) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("thash %0=%1" : "=r"(ia64_intri_res) : "r" (addr)); \ + ia64_intri_res; \ +}) + +#define ia64_srlz_i() asm volatile (";; srlz.i ;;" ::: "memory") +#define ia64_srlz_d() asm volatile (";; srlz.d" ::: "memory"); + +#ifdef HAVE_SERIALIZE_DIRECTIVE +# define ia64_dv_serialize_data() asm volatile (".serialize.data"); +# define ia64_dv_serialize_instruction() asm volatile (".serialize.instruction"); +#else +# define ia64_dv_serialize_data() +# define ia64_dv_serialize_instruction() +#endif + +#define ia64_nop(x) asm volatile ("nop %0"::"i"(x)); + +#define ia64_itci(addr) asm volatile ("itc.i %0;;" :: "r"(addr) : "memory") + +#define ia64_itcd(addr) asm volatile ("itc.d %0;;" :: "r"(addr) : "memory") + + +#define ia64_itri(trnum, addr) asm volatile ("itr.i itr[%0]=%1" \ + :: "r"(trnum), "r"(addr) : "memory") + +#define ia64_itrd(trnum, addr) asm volatile ("itr.d dtr[%0]=%1" \ + :: "r"(trnum), "r"(addr) : "memory") + +#define ia64_tpa(addr) \ +({ \ + __u64 ia64_pa; \ + asm volatile ("tpa %0 = %1" : "=r"(ia64_pa) : "r"(addr) : "memory"); \ + ia64_pa; \ +}) + +#define __ia64_set_dbr(index, val) \ + asm volatile ("mov dbr[%0]=%1" :: "r"(index), "r"(val) : "memory") + +#define ia64_set_ibr(index, val) \ + asm volatile ("mov ibr[%0]=%1" :: "r"(index), "r"(val) : "memory") + +#define ia64_set_pkr(index, val) \ + asm volatile ("mov pkr[%0]=%1" :: "r"(index), "r"(val) : "memory") + +#define ia64_set_pmc(index, val) \ + asm volatile ("mov pmc[%0]=%1" :: "r"(index), "r"(val) : "memory") + +#define ia64_set_pmd(index, val) \ + asm volatile ("mov pmd[%0]=%1" :: "r"(index), "r"(val) : "memory") + +#define ia64_set_rr(index, val) \ + asm volatile ("mov rr[%0]=%1" :: "r"(index), "r"(val) : "memory"); + +#define ia64_get_cpuid(index) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov %0=cpuid[%r1]" : "=r"(ia64_intri_res) : "rO"(index)); \ + ia64_intri_res; \ +}) + +#define __ia64_get_dbr(index) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov %0=dbr[%1]" : "=r"(ia64_intri_res) : "r"(index)); \ + ia64_intri_res; \ +}) + +#define ia64_get_ibr(index) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov %0=ibr[%1]" : "=r"(ia64_intri_res) : "r"(index)); \ + ia64_intri_res; \ +}) + +#define ia64_get_pkr(index) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov %0=pkr[%1]" : "=r"(ia64_intri_res) : "r"(index)); \ + ia64_intri_res; \ +}) + +#define ia64_get_pmc(index) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov %0=pmc[%1]" : "=r"(ia64_intri_res) : "r"(index)); \ + ia64_intri_res; \ +}) + + +#define ia64_get_pmd(index) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov %0=pmd[%1]" : "=r"(ia64_intri_res) : "r"(index)); \ + ia64_intri_res; \ +}) + +#define ia64_get_rr(index) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov %0=rr[%1]" : "=r"(ia64_intri_res) : "r" (index)); \ + ia64_intri_res; \ +}) + +#define ia64_fc(addr) asm volatile ("fc %0" :: "r"(addr) : "memory") + + +#define ia64_sync_i() asm volatile (";; sync.i" ::: "memory") + +#define ia64_ssm(mask) asm volatile ("ssm %0":: "i"((mask)) : "memory") +#define ia64_rsm(mask) asm volatile ("rsm %0":: "i"((mask)) : "memory") +#define ia64_sum(mask) asm volatile ("sum %0":: "i"((mask)) : "memory") +#define ia64_rum(mask) asm volatile ("rum %0":: "i"((mask)) : "memory") + +#define ia64_ptce(addr) asm volatile ("ptc.e %0" :: "r"(addr)) + +#define ia64_ptcga(addr, size) \ +do { \ + asm volatile ("ptc.ga %0,%1" :: "r"(addr), "r"(size) : "memory"); \ + ia64_dv_serialize_data(); \ +} while (0) + +#define ia64_ptcl(addr, size) \ +do { \ + asm volatile ("ptc.l %0,%1" :: "r"(addr), "r"(size) : "memory"); \ + ia64_dv_serialize_data(); \ +} while (0) + +#define ia64_ptri(addr, size) \ + asm volatile ("ptr.i %0,%1" :: "r"(addr), "r"(size) : "memory") + +#define ia64_ptrd(addr, size) \ + asm volatile ("ptr.d %0,%1" :: "r"(addr), "r"(size) : "memory") + +/* Values for lfhint in ia64_lfetch and ia64_lfetch_fault */ + +#define ia64_lfhint_none 0 +#define ia64_lfhint_nt1 1 +#define ia64_lfhint_nt2 2 +#define ia64_lfhint_nta 3 + +#define ia64_lfetch(lfhint, y) \ +({ \ + switch (lfhint) { \ + case ia64_lfhint_none: \ + asm volatile ("lfetch [%0]" : : "r"(y)); \ + break; \ + case ia64_lfhint_nt1: \ + asm volatile ("lfetch.nt1 [%0]" : : "r"(y)); \ + break; \ + case ia64_lfhint_nt2: \ + asm volatile ("lfetch.nt2 [%0]" : : "r"(y)); \ + break; \ + case ia64_lfhint_nta: \ + asm volatile ("lfetch.nta [%0]" : : "r"(y)); \ + break; \ + } \ +}) + +#define ia64_lfetch_excl(lfhint, y) \ +({ \ + switch (lfhint) { \ + case ia64_lfhint_none: \ + asm volatile ("lfetch.excl [%0]" :: "r"(y)); \ + break; \ + case ia64_lfhint_nt1: \ + asm volatile ("lfetch.excl.nt1 [%0]" :: "r"(y)); \ + break; \ + case ia64_lfhint_nt2: \ + asm volatile ("lfetch.excl.nt2 [%0]" :: "r"(y)); \ + break; \ + case ia64_lfhint_nta: \ + asm volatile ("lfetch.excl.nta [%0]" :: "r"(y)); \ + break; \ + } \ +}) + +#define ia64_lfetch_fault(lfhint, y) \ +({ \ + switch (lfhint) { \ + case ia64_lfhint_none: \ + asm volatile ("lfetch.fault [%0]" : : "r"(y)); \ + break; \ + case ia64_lfhint_nt1: \ + asm volatile ("lfetch.fault.nt1 [%0]" : : "r"(y)); \ + break; \ + case ia64_lfhint_nt2: \ + asm volatile ("lfetch.fault.nt2 [%0]" : : "r"(y)); \ + break; \ + case ia64_lfhint_nta: \ + asm volatile ("lfetch.fault.nta [%0]" : : "r"(y)); \ + break; \ + } \ +}) + +#define ia64_lfetch_fault_excl(lfhint, y) \ +({ \ + switch (lfhint) { \ + case ia64_lfhint_none: \ + asm volatile ("lfetch.fault.excl [%0]" :: "r"(y)); \ + break; \ + case ia64_lfhint_nt1: \ + asm volatile ("lfetch.fault.excl.nt1 [%0]" :: "r"(y)); \ + break; \ + case ia64_lfhint_nt2: \ + asm volatile ("lfetch.fault.excl.nt2 [%0]" :: "r"(y)); \ + break; \ + case ia64_lfhint_nta: \ + asm volatile ("lfetch.fault.excl.nta [%0]" :: "r"(y)); \ + break; \ + } \ +}) + +#define ia64_intrin_local_irq_restore(x) \ +do { \ + asm volatile (";; cmp.ne p6,p7=%0,r0;;" \ + "(p6) ssm psr.i;" \ + "(p7) rsm psr.i;;" \ + "(p6) srlz.d" \ + :: "r"((x)) : "p6", "p7", "memory"); \ +} while (0) + +#endif /* _ASM_IA64_GCC_INTRIN_H */ diff --git a/xen/include/asm-ia64/ia64_int.h b/xen/include/asm-ia64/ia64_int.h index 76485135cb..2e4f61b6b1 100644 --- a/xen/include/asm-ia64/ia64_int.h +++ b/xen/include/asm-ia64/ia64_int.h @@ -36,6 +36,10 @@ #define IA64_NO_FAULT 0x0000 #define IA64_RFI_IN_PROGRESS 0x0001 #define IA64_RETRY 0x0002 +#ifdef CONFIG_VTI +#define IA64_FAULT 0x0001 +#define IA64_INJ_FAULT 0x0005 +#endif //CONFIG_VTI #define IA64_FORCED_IFA 0x0004 #define IA64_ILLOP_FAULT (IA64_GENEX_VECTOR | 0x00) #define IA64_PRIVOP_FAULT (IA64_GENEX_VECTOR | 0x10) diff --git a/xen/include/asm-ia64/ia64regs.h b/xen/include/asm-ia64/ia64regs.h new file mode 100644 index 0000000000..d30d54dacc --- /dev/null +++ b/xen/include/asm-ia64/ia64regs.h @@ -0,0 +1,129 @@ +/* + * Copyright (C) 2002,2003 Intel Corp. + * Jun Nakajima <jun.nakajima@intel.com> + * Suresh Siddha <suresh.b.siddha@intel.com> + */ + +#ifndef _ASM_IA64_IA64REGS_H +#define _ASM_IA64_IA64REGS_H + +/* + * Register Names for getreg() and setreg(). + * + * The "magic" numbers happen to match the values used by the Intel compiler's + * getreg()/setreg() intrinsics. + */ + +/* Special Registers */ + +#define _IA64_REG_IP 1016 /* getreg only */ +#define _IA64_REG_PSR 1019 +#define _IA64_REG_PSR_L 1019 + +/* General Integer Registers */ + +#define _IA64_REG_GP 1025 /* R1 */ +#define _IA64_REG_R8 1032 /* R8 */ +#define _IA64_REG_R9 1033 /* R9 */ +#define _IA64_REG_SP 1036 /* R12 */ +#define _IA64_REG_TP 1037 /* R13 */ + +/* Application Registers */ + +#define _IA64_REG_AR_KR0 3072 +#define _IA64_REG_AR_KR1 3073 +#define _IA64_REG_AR_KR2 3074 +#define _IA64_REG_AR_KR3 3075 +#define _IA64_REG_AR_KR4 3076 +#define _IA64_REG_AR_KR5 3077 +#define _IA64_REG_AR_KR6 3078 +#define _IA64_REG_AR_KR7 3079 +#define _IA64_REG_AR_RSC 3088 +#define _IA64_REG_AR_BSP 3089 +#define _IA64_REG_AR_BSPSTORE 3090 +#define _IA64_REG_AR_RNAT 3091 +#define _IA64_REG_AR_FCR 3093 +#define _IA64_REG_AR_EFLAG 3096 +#define _IA64_REG_AR_CSD 3097 +#define _IA64_REG_AR_SSD 3098 +#define _IA64_REG_AR_CFLAG 3099 +#define _IA64_REG_AR_FSR 3100 +#define _IA64_REG_AR_FIR 3101 +#define _IA64_REG_AR_FDR 3102 +#define _IA64_REG_AR_CCV 3104 +#define _IA64_REG_AR_UNAT 3108 +#define _IA64_REG_AR_FPSR 3112 +#define _IA64_REG_AR_ITC 3116 +#define _IA64_REG_AR_PFS 3136 +#define _IA64_REG_AR_LC 3137 +#define _IA64_REG_AR_EC 3138 + +/* Control Registers */ + +#define _IA64_REG_CR_DCR 4096 +#define _IA64_REG_CR_ITM 4097 +#define _IA64_REG_CR_IVA 4098 +#define _IA64_REG_CR_PTA 4104 +#define _IA64_REG_CR_IPSR 4112 +#define _IA64_REG_CR_ISR 4113 +#define _IA64_REG_CR_IIP 4115 +#define _IA64_REG_CR_IFA 4116 +#define _IA64_REG_CR_ITIR 4117 +#define _IA64_REG_CR_IIPA 4118 +#define _IA64_REG_CR_IFS 4119 +#define _IA64_REG_CR_IIM 4120 +#define _IA64_REG_CR_IHA 4121 +#define _IA64_REG_CR_LID 4160 +#define _IA64_REG_CR_IVR 4161 /* getreg only */ +#define _IA64_REG_CR_TPR 4162 +#define _IA64_REG_CR_EOI 4163 +#define _IA64_REG_CR_IRR0 4164 /* getreg only */ +#define _IA64_REG_CR_IRR1 4165 /* getreg only */ +#define _IA64_REG_CR_IRR2 4166 /* getreg only */ +#define _IA64_REG_CR_IRR3 4167 /* getreg only */ +#define _IA64_REG_CR_ITV 4168 +#define _IA64_REG_CR_PMV 4169 +#define _IA64_REG_CR_CMCV 4170 +#define _IA64_REG_CR_LRR0 4176 +#define _IA64_REG_CR_LRR1 4177 + +#ifdef CONFIG_VTI +#define IA64_REG_CR_DCR 0 +#define IA64_REG_CR_ITM 1 +#define IA64_REG_CR_IVA 2 +#define IA64_REG_CR_PTA 8 +#define IA64_REG_CR_IPSR 16 +#define IA64_REG_CR_ISR 17 +#define IA64_REG_CR_IIP 19 +#define IA64_REG_CR_IFA 20 +#define IA64_REG_CR_ITIR 21 +#define IA64_REG_CR_IIPA 22 +#define IA64_REG_CR_IFS 23 +#define IA64_REG_CR_IIM 24 +#define IA64_REG_CR_IHA 25 +#define IA64_REG_CR_LID 64 +#define IA64_REG_CR_IVR 65 +#define IA64_REG_CR_TPR 66 +#define IA64_REG_CR_EOI 67 +#define IA64_REG_CR_IRR0 68 +#define IA64_REG_CR_IRR1 69 +#define IA64_REG_CR_IRR2 70 +#define IA64_REG_CR_IRR3 71 +#define IA64_REG_CR_ITV 72 +#define IA64_REG_CR_PMV 73 +#define IA64_REG_CR_CMCV 74 +#define IA64_REG_CR_LRR0 80 +#define IA64_REG_CR_LRR1 81 +#endif // CONFIG_VTI + +/* Indirect Registers for getindreg() and setindreg() */ + +#define _IA64_REG_INDR_CPUID 9000 /* getindreg only */ +#define _IA64_REG_INDR_DBR 9001 +#define _IA64_REG_INDR_IBR 9002 +#define _IA64_REG_INDR_PKR 9003 +#define _IA64_REG_INDR_PMC 9004 +#define _IA64_REG_INDR_PMD 9005 +#define _IA64_REG_INDR_RR 9006 + +#endif /* _ASM_IA64_IA64REGS_H */ diff --git a/xen/include/asm-ia64/mm.h b/xen/include/asm-ia64/mm.h index 22d3d33917..56df1706f9 100644 --- a/xen/include/asm-ia64/mm.h +++ b/xen/include/asm-ia64/mm.h @@ -359,4 +359,22 @@ extern unsigned long num_physpages; extern unsigned long totalram_pages; extern int nr_swap_pages; +#ifdef CONFIG_VTI +extern unsigned long *mpt_table; +#undef machine_to_phys_mapping +#define machine_to_phys_mapping mpt_table + +/* If pmt table is provided by control pannel later, we need __get_user +* here. However if it's allocated by HV, we should access it directly +*/ +#define phys_to_machine_mapping(d, gpfn) \ + ((d) == dom0 ? gpfn : (d)->arch.pmt[(gpfn)]) + +#define __mfn_to_gpfn(_d, mfn) \ + machine_to_phys_mapping[(mfn)] + +#define __gpfn_to_mfn(_d, gpfn) \ + phys_to_machine_mapping((_d), (gpfn)) +#endif // CONFIG_VTI + #endif /* __ASM_IA64_MM_H__ */ diff --git a/xen/include/asm-ia64/mmu_context.h b/xen/include/asm-ia64/mmu_context.h index a08b5fd100..4f51c65756 100644 --- a/xen/include/asm-ia64/mmu_context.h +++ b/xen/include/asm-ia64/mmu_context.h @@ -2,7 +2,11 @@ #define __ASM_MMU_CONTEXT_H //dummy file to resolve non-arch-indep include #ifdef XEN +#ifndef CONFIG_VTI #define IA64_REGION_ID_KERNEL 0 +#else // CONFIG_VTI +#define IA64_REGION_ID_KERNEL 0x1e0000 /* Start from all 1 in highest 4 bits */ +#endif // CONFIG_VTI #define ia64_rid(ctx,addr) (((ctx) << 3) | (addr >> 61)) #ifndef __ASSEMBLY__ diff --git a/xen/include/asm-ia64/pal.h b/xen/include/asm-ia64/pal.h new file mode 100644 index 0000000000..55612c1846 --- /dev/null +++ b/xen/include/asm-ia64/pal.h @@ -0,0 +1,1567 @@ +#ifndef _ASM_IA64_PAL_H +#define _ASM_IA64_PAL_H + +/* + * Processor Abstraction Layer definitions. + * + * This is based on Intel IA-64 Architecture Software Developer's Manual rev 1.0 + * chapter 11 IA-64 Processor Abstraction Layer + * + * Copyright (C) 1998-2001 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * Stephane Eranian <eranian@hpl.hp.com> + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> + * Copyright (C) 1999 Srinivasa Prasad Thirumalachar <sprasad@sprasad.engr.sgi.com> + * + * 99/10/01 davidm Make sure we pass zero for reserved parameters. + * 00/03/07 davidm Updated pal_cache_flush() to be in sync with PAL v2.6. + * 00/03/23 cfleck Modified processor min-state save area to match updated PAL & SAL info + * 00/05/24 eranian Updated to latest PAL spec, fix structures bugs, added + * 00/05/25 eranian Support for stack calls, and static physical calls + * 00/06/18 eranian Support for stacked physical calls + */ + +/* + * Note that some of these calls use a static-register only calling + * convention which has nothing to do with the regular calling + * convention. + */ +#define PAL_CACHE_FLUSH 1 /* flush i/d cache */ +#define PAL_CACHE_INFO 2 /* get detailed i/d cache info */ +#define PAL_CACHE_INIT 3 /* initialize i/d cache */ +#define PAL_CACHE_SUMMARY 4 /* get summary of cache heirarchy */ +#define PAL_MEM_ATTRIB 5 /* list supported memory attributes */ +#define PAL_PTCE_INFO 6 /* purge TLB info */ +#define PAL_VM_INFO 7 /* return supported virtual memory features */ +#define PAL_VM_SUMMARY 8 /* return summary on supported vm features */ +#define PAL_BUS_GET_FEATURES 9 /* return processor bus interface features settings */ +#define PAL_BUS_SET_FEATURES 10 /* set processor bus features */ +#define PAL_DEBUG_INFO 11 /* get number of debug registers */ +#define PAL_FIXED_ADDR 12 /* get fixed component of processors's directed address */ +#define PAL_FREQ_BASE 13 /* base frequency of the platform */ +#define PAL_FREQ_RATIOS 14 /* ratio of processor, bus and ITC frequency */ +#define PAL_PERF_MON_INFO 15 /* return performance monitor info */ +#define PAL_PLATFORM_ADDR 16 /* set processor interrupt block and IO port space addr */ +#define PAL_PROC_GET_FEATURES 17 /* get configurable processor features & settings */ +#define PAL_PROC_SET_FEATURES 18 /* enable/disable configurable processor features */ +#define PAL_RSE_INFO 19 /* return rse information */ +#define PAL_VERSION 20 /* return version of PAL code */ +#define PAL_MC_CLEAR_LOG 21 /* clear all processor log info */ +#define PAL_MC_DRAIN 22 /* drain operations which could result in an MCA */ +#define PAL_MC_EXPECTED 23 /* set/reset expected MCA indicator */ +#define PAL_MC_DYNAMIC_STATE 24 /* get processor dynamic state */ +#define PAL_MC_ERROR_INFO 25 /* get processor MCA info and static state */ +#define PAL_MC_RESUME 26 /* Return to interrupted process */ +#define PAL_MC_REGISTER_MEM 27 /* Register memory for PAL to use during MCAs and inits */ +#define PAL_HALT 28 /* enter the low power HALT state */ +#define PAL_HALT_LIGHT 29 /* enter the low power light halt state*/ +#define PAL_COPY_INFO 30 /* returns info needed to relocate PAL */ +#define PAL_CACHE_LINE_INIT 31 /* init tags & data of cache line */ +#define PAL_PMI_ENTRYPOINT 32 /* register PMI memory entry points with the processor */ +#define PAL_ENTER_IA_32_ENV 33 /* enter IA-32 system environment */ +#define PAL_VM_PAGE_SIZE 34 /* return vm TC and page walker page sizes */ + +#define PAL_MEM_FOR_TEST 37 /* get amount of memory needed for late processor test */ +#define PAL_CACHE_PROT_INFO 38 /* get i/d cache protection info */ +#define PAL_REGISTER_INFO 39 /* return AR and CR register information*/ +#define PAL_SHUTDOWN 40 /* enter processor shutdown state */ +#define PAL_PREFETCH_VISIBILITY 41 /* Make Processor Prefetches Visible */ + +#define PAL_COPY_PAL 256 /* relocate PAL procedures and PAL PMI */ +#define PAL_HALT_INFO 257 /* return the low power capabilities of processor */ +#define PAL_TEST_PROC 258 /* perform late processor self-test */ +#define PAL_CACHE_READ 259 /* read tag & data of cacheline for diagnostic testing */ +#define PAL_CACHE_WRITE 260 /* write tag & data of cacheline for diagnostic testing */ +#define PAL_VM_TR_READ 261 /* read contents of translation register */ + +#ifndef __ASSEMBLY__ + +#include <linux/types.h> +#include <asm/fpu.h> + +/* + * Data types needed to pass information into PAL procedures and + * interpret information returned by them. + */ + +/* Return status from the PAL procedure */ +typedef s64 pal_status_t; + +#define PAL_STATUS_SUCCESS 0 /* No error */ +#define PAL_STATUS_UNIMPLEMENTED (-1) /* Unimplemented procedure */ +#define PAL_STATUS_EINVAL (-2) /* Invalid argument */ +#define PAL_STATUS_ERROR (-3) /* Error */ +#define PAL_STATUS_CACHE_INIT_FAIL (-4) /* Could not initialize the + * specified level and type of + * cache without sideeffects + * and "restrict" was 1 + */ + +/* Processor cache level in the heirarchy */ +typedef u64 pal_cache_level_t; +#define PAL_CACHE_LEVEL_L0 0 /* L0 */ +#define PAL_CACHE_LEVEL_L1 1 /* L1 */ +#define PAL_CACHE_LEVEL_L2 2 /* L2 */ + + +/* Processor cache type at a particular level in the heirarchy */ + +typedef u64 pal_cache_type_t; +#define PAL_CACHE_TYPE_INSTRUCTION 1 /* Instruction cache */ +#define PAL_CACHE_TYPE_DATA 2 /* Data or unified cache */ +#define PAL_CACHE_TYPE_INSTRUCTION_DATA 3 /* Both Data & Instruction */ + + +#define PAL_CACHE_FLUSH_INVALIDATE 1 /* Invalidate clean lines */ +#define PAL_CACHE_FLUSH_CHK_INTRS 2 /* check for interrupts/mc while flushing */ + +/* Processor cache line size in bytes */ +typedef int pal_cache_line_size_t; + +/* Processor cache line state */ +typedef u64 pal_cache_line_state_t; +#define PAL_CACHE_LINE_STATE_INVALID 0 /* Invalid */ +#define PAL_CACHE_LINE_STATE_SHARED 1 /* Shared */ +#define PAL_CACHE_LINE_STATE_EXCLUSIVE 2 /* Exclusive */ +#define PAL_CACHE_LINE_STATE_MODIFIED 3 /* Modified */ + +typedef struct pal_freq_ratio { + u64 den : 32, num : 32; /* numerator & denominator */ +} itc_ratio, proc_ratio; + +typedef union pal_cache_config_info_1_s { + struct { + u64 u : 1, /* 0 Unified cache ? */ + at : 2, /* 2-1 Cache mem attr*/ + reserved : 5, /* 7-3 Reserved */ + associativity : 8, /* 16-8 Associativity*/ + line_size : 8, /* 23-17 Line size */ + stride : 8, /* 31-24 Stride */ + store_latency : 8, /*39-32 Store latency*/ + load_latency : 8, /* 47-40 Load latency*/ + store_hints : 8, /* 55-48 Store hints*/ + load_hints : 8; /* 63-56 Load hints */ + } pcci1_bits; + u64 pcci1_data; +} pal_cache_config_info_1_t; + +typedef union pal_cache_config_info_2_s { + struct { + u64 cache_size : 32, /*cache size in bytes*/ + + + alias_boundary : 8, /* 39-32 aliased addr + * separation for max + * performance. + */ + tag_ls_bit : 8, /* 47-40 LSb of addr*/ + tag_ms_bit : 8, /* 55-48 MSb of addr*/ + reserved : 8; /* 63-56 Reserved */ + } pcci2_bits; + u64 pcci2_data; +} pal_cache_config_info_2_t; + + +typedef struct pal_cache_config_info_s { + pal_status_t pcci_status; + pal_cache_config_info_1_t pcci_info_1; + pal_cache_config_info_2_t pcci_info_2; + u64 pcci_reserved; +} pal_cache_config_info_t; + +#define pcci_ld_hints pcci_info_1.pcci1_bits.load_hints +#define pcci_st_hints pcci_info_1.pcci1_bits.store_hints +#define pcci_ld_latency pcci_info_1.pcci1_bits.load_latency +#define pcci_st_latency pcci_info_1.pcci1_bits.store_latency +#define pcci_stride pcci_info_1.pcci1_bits.stride +#define pcci_line_size pcci_info_1.pcci1_bits.line_size +#define pcci_assoc pcci_info_1.pcci1_bits.associativity +#define pcci_cache_attr pcci_info_1.pcci1_bits.at +#define pcci_unified pcci_info_1.pcci1_bits.u +#define pcci_tag_msb pcci_info_2.pcci2_bits.tag_ms_bit +#define pcci_tag_lsb pcci_info_2.pcci2_bits.tag_ls_bit +#define pcci_alias_boundary pcci_info_2.pcci2_bits.alias_boundary +#define pcci_cache_size pcci_info_2.pcci2_bits.cache_size + + + +/* Possible values for cache attributes */ + +#define PAL_CACHE_ATTR_WT 0 /* Write through cache */ +#define PAL_CACHE_ATTR_WB 1 /* Write back cache */ +#define PAL_CACHE_ATTR_WT_OR_WB 2 /* Either write thru or write + * back depending on TLB + * memory attributes + */ + + +/* Possible values for cache hints */ + +#define PAL_CACHE_HINT_TEMP_1 0 /* Temporal level 1 */ +#define PAL_CACHE_HINT_NTEMP_1 1 /* Non-temporal level 1 */ +#define PAL_CACHE_HINT_NTEMP_ALL 3 /* Non-temporal all levels */ + +/* Processor cache protection information */ +typedef union pal_cache_protection_element_u { + u32 pcpi_data; + struct { + u32 data_bits : 8, /* # data bits covered by + * each unit of protection + */ + + tagprot_lsb : 6, /* Least -do- */ + tagprot_msb : 6, /* Most Sig. tag address + * bit that this + * protection covers. + */ + prot_bits : 6, /* # of protection bits */ + method : 4, /* Protection method */ + t_d : 2; /* Indicates which part + * of the cache this + * protection encoding + * applies. + */ + } pcp_info; +} pal_cache_protection_element_t; + +#define pcpi_cache_prot_part pcp_info.t_d +#define pcpi_prot_method pcp_info.method +#define pcpi_prot_bits pcp_info.prot_bits +#define pcpi_tagprot_msb pcp_info.tagprot_msb +#define pcpi_tagprot_lsb pcp_info.tagprot_lsb +#define pcpi_data_bits pcp_info.data_bits + +/* Processor cache part encodings */ +#define PAL_CACHE_PROT_PART_DATA 0 /* Data protection */ +#define PAL_CACHE_PROT_PART_TAG 1 /* Tag protection */ +#define PAL_CACHE_PROT_PART_TAG_DATA 2 /* Tag+data protection (tag is + * more significant ) + */ +#define PAL_CACHE_PROT_PART_DATA_TAG 3 /* Data+tag protection (data is + * more significant ) + */ +#define PAL_CACHE_PROT_PART_MAX 6 + + +typedef struct pal_cache_protection_info_s { + pal_status_t pcpi_status; + pal_cache_protection_element_t pcp_info[PAL_CACHE_PROT_PART_MAX]; +} pal_cache_protection_info_t; + + +/* Processor cache protection method encodings */ +#define PAL_CACHE_PROT_METHOD_NONE 0 /* No protection */ +#define PAL_CACHE_PROT_METHOD_ODD_PARITY 1 /* Odd parity */ +#define PAL_CACHE_PROT_METHOD_EVEN_PARITY 2 /* Even parity */ +#define PAL_CACHE_PROT_METHOD_ECC 3 /* ECC protection */ + + +/* Processor cache line identification in the heirarchy */ +typedef union pal_cache_line_id_u { + u64 pclid_data; + struct { + u64 cache_type : 8, /* 7-0 cache type */ + level : 8, /* 15-8 level of the + * cache in the + * heirarchy. + */ + way : 8, /* 23-16 way in the set + */ + part : 8, /* 31-24 part of the + * cache + */ + reserved : 32; /* 63-32 is reserved*/ + } pclid_info_read; + struct { + u64 cache_type : 8, /* 7-0 cache type */ + level : 8, /* 15-8 level of the + * cache in the + * heirarchy. + */ + way : 8, /* 23-16 way in the set + */ + part : 8, /* 31-24 part of the + * cache + */ + mesi : 8, /* 39-32 cache line + * state + */ + start : 8, /* 47-40 lsb of data to + * invert + */ + length : 8, /* 55-48 #bits to + * invert + */ + trigger : 8; /* 63-56 Trigger error + * by doing a load + * after the write + */ + + } pclid_info_write; +} pal_cache_line_id_u_t; + +#define pclid_read_part pclid_info_read.part +#define pclid_read_way pclid_info_read.way +#define pclid_read_level pclid_info_read.level +#define pclid_read_cache_type pclid_info_read.cache_type + +#define pclid_write_trigger pclid_info_write.trigger +#define pclid_write_length pclid_info_write.length +#define pclid_write_start pclid_info_write.start +#define pclid_write_mesi pclid_info_write.mesi +#define pclid_write_part pclid_info_write.part +#define pclid_write_way pclid_info_write.way +#define pclid_write_level pclid_info_write.level +#define pclid_write_cache_type pclid_info_write.cache_type + +/* Processor cache line part encodings */ +#define PAL_CACHE_LINE_ID_PART_DATA 0 /* Data */ +#define PAL_CACHE_LINE_ID_PART_TAG 1 /* Tag */ +#define PAL_CACHE_LINE_ID_PART_DATA_PROT 2 /* Data protection */ +#define PAL_CACHE_LINE_ID_PART_TAG_PROT 3 /* Tag protection */ +#define PAL_CACHE_LINE_ID_PART_DATA_TAG_PROT 4 /* Data+tag + * protection + */ +typedef struct pal_cache_line_info_s { + pal_status_t pcli_status; /* Return status of the read cache line + * info call. + */ + u64 pcli_data; /* 64-bit data, tag, protection bits .. */ + u64 pcli_data_len; /* data length in bits */ + pal_cache_line_state_t pcli_cache_line_state; /* mesi state */ + +} pal_cache_line_info_t; + + +/* Machine Check related crap */ + +/* Pending event status bits */ +typedef u64 pal_mc_pending_events_t; + +#define PAL_MC_PENDING_MCA (1 << 0) +#define PAL_MC_PENDING_INIT (1 << 1) + +/* Error information type */ +typedef u64 pal_mc_info_index_t; + +#define PAL_MC_INFO_PROCESSOR 0 /* Processor */ +#define PAL_MC_INFO_CACHE_CHECK 1 /* Cache check */ +#define PAL_MC_INFO_TLB_CHECK 2 /* Tlb check */ +#define PAL_MC_INFO_BUS_CHECK 3 /* Bus check */ +#define PAL_MC_INFO_REQ_ADDR 4 /* Requestor address */ +#define PAL_MC_INFO_RESP_ADDR 5 /* Responder address */ +#define PAL_MC_INFO_TARGET_ADDR 6 /* Target address */ +#define PAL_MC_INFO_IMPL_DEP 7 /* Implementation + * dependent + */ + + +typedef struct pal_process_state_info_s { + u64 reserved1 : 2, + rz : 1, /* PAL_CHECK processor + * rendezvous + * successful. + */ + + ra : 1, /* PAL_CHECK attempted + * a rendezvous. + */ + me : 1, /* Distinct multiple + * errors occurred + */ + + mn : 1, /* Min. state save + * area has been + * registered with PAL + */ + + sy : 1, /* Storage integrity + * synched + */ + + + co : 1, /* Continuable */ + ci : 1, /* MC isolated */ + us : 1, /* Uncontained storage + * damage. + */ + + + hd : 1, /* Non-essential hw + * lost (no loss of + * functionality) + * causing the + * processor to run in + * degraded mode. + */ + + tl : 1, /* 1 => MC occurred + * after an instr was + * executed but before + * the trap that + * resulted from instr + * execution was + * generated. + * (Trap Lost ) + */ + mi : 1, /* More information available + * call PAL_MC_ERROR_INFO + */ + pi : 1, /* Precise instruction pointer */ + pm : 1, /* Precise min-state save area */ + + dy : 1, /* Processor dynamic + * state valid + */ + + + in : 1, /* 0 = MC, 1 = INIT */ + rs : 1, /* RSE valid */ + cm : 1, /* MC corrected */ + ex : 1, /* MC is expected */ + cr : 1, /* Control regs valid*/ + pc : 1, /* Perf cntrs valid */ + dr : 1, /* Debug regs valid */ + tr : 1, /* Translation regs + * valid + */ + rr : 1, /* Region regs valid */ + ar : 1, /* App regs valid */ + br : 1, /* Branch regs valid */ + pr : 1, /* Predicate registers + * valid + */ + + fp : 1, /* fp registers valid*/ + b1 : 1, /* Preserved bank one + * general registers + * are valid + */ + b0 : 1, /* Preserved bank zero + * general registers + * are valid + */ + gr : 1, /* General registers + * are valid + * (excl. banked regs) + */ + dsize : 16, /* size of dynamic + * state returned + * by the processor + */ + + reserved2 : 11, + cc : 1, /* Cache check */ + tc : 1, /* TLB check */ + bc : 1, /* Bus check */ + rc : 1, /* Register file check */ + uc : 1; /* Uarch check */ + +} pal_processor_state_info_t; + +typedef struct pal_cache_check_info_s { + u64 op : 4, /* Type of cache + * operation that + * caused the machine + * check. + */ + level : 2, /* Cache level */ + reserved1 : 2, + dl : 1, /* Failure in data part + * of cache line + */ + tl : 1, /* Failure in tag part + * of cache line + */ + dc : 1, /* Failure in dcache */ + ic : 1, /* Failure in icache */ + mesi : 3, /* Cache line state */ + mv : 1, /* mesi valid */ + way : 5, /* Way in which the + * error occurred + */ + wiv : 1, /* Way field valid */ + reserved2 : 10, + + index : 20, /* Cache line index */ + reserved3 : 2, + + is : 1, /* instruction set (1 == ia32) */ + iv : 1, /* instruction set field valid */ + pl : 2, /* privilege level */ + pv : 1, /* privilege level field valid */ + mcc : 1, /* Machine check corrected */ + tv : 1, /* Target address + * structure is valid + */ + rq : 1, /* Requester identifier + * structure is valid + */ + rp : 1, /* Responder identifier + * structure is valid + */ + pi : 1; /* Precise instruction pointer + * structure is valid + */ +} pal_cache_check_info_t; + +typedef struct pal_tlb_check_info_s { + + u64 tr_slot : 8, /* Slot# of TR where + * error occurred + */ + trv : 1, /* tr_slot field is valid */ + reserved1 : 1, + level : 2, /* TLB level where failure occurred */ + reserved2 : 4, + dtr : 1, /* Fail in data TR */ + itr : 1, /* Fail in inst TR */ + dtc : 1, /* Fail in data TC */ + itc : 1, /* Fail in inst. TC */ + op : 4, /* Cache operation */ + reserved3 : 30, + + is : 1, /* instruction set (1 == ia32) */ + iv : 1, /* instruction set field valid */ + pl : 2, /* privilege level */ + pv : 1, /* privilege level field valid */ + mcc : 1, /* Machine check corrected */ + tv : 1, /* Target address + * structure is valid + */ + rq : 1, /* Requester identifier + * structure is valid + */ + rp : 1, /* Responder identifier + * structure is valid + */ + pi : 1; /* Precise instruction pointer + * structure is valid + */ +} pal_tlb_check_info_t; + +typedef struct pal_bus_check_info_s { + u64 size : 5, /* Xaction size */ + ib : 1, /* Internal bus error */ + eb : 1, /* External bus error */ + cc : 1, /* Error occurred + * during cache-cache + * transfer. + */ + type : 8, /* Bus xaction type*/ + sev : 5, /* Bus error severity*/ + hier : 2, /* Bus hierarchy level */ + reserved1 : 1, + bsi : 8, /* Bus error status + * info + */ + reserved2 : 22, + + is : 1, /* instruction set (1 == ia32) */ + iv : 1, /* instruction set field valid */ + pl : 2, /* privilege level */ + pv : 1, /* privilege level field valid */ + mcc : 1, /* Machine check corrected */ + tv : 1, /* Target address + * structure is valid + */ + rq : 1, /* Requester identifier + * structure is valid + */ + rp : 1, /* Responder identifier + * structure is valid + */ + pi : 1; /* Precise instruction pointer + * structure is valid + */ +} pal_bus_check_info_t; + +typedef struct pal_reg_file_check_info_s { + u64 id : 4, /* Register file identifier */ + op : 4, /* Type of register + * operation that + * caused the machine + * check. + */ + reg_num : 7, /* Register number */ + rnv : 1, /* reg_num valid */ + reserved2 : 38, + + is : 1, /* instruction set (1 == ia32) */ + iv : 1, /* instruction set field valid */ + pl : 2, /* privilege level */ + pv : 1, /* privilege level field valid */ + mcc : 1, /* Machine check corrected */ + reserved3 : 3, + pi : 1; /* Precise instruction pointer + * structure is valid + */ +} pal_reg_file_check_info_t; + +typedef struct pal_uarch_check_info_s { + u64 sid : 5, /* Structure identification */ + level : 3, /* Level of failure */ + array_id : 4, /* Array identification */ + op : 4, /* Type of + * operation that + * caused the machine + * check. + */ + way : 6, /* Way of structure */ + wv : 1, /* way valid */ + xv : 1, /* index valid */ + reserved1 : 8, + index : 8, /* Index or set of the uarch + * structure that failed. + */ + reserved2 : 24, + + is : 1, /* instruction set (1 == ia32) */ + iv : 1, /* instruction set field valid */ + pl : 2, /* privilege level */ + pv : 1, /* privilege level field valid */ + mcc : 1, /* Machine check corrected */ + tv : 1, /* Target address + * structure is valid + */ + rq : 1, /* Requester identifier + * structure is valid + */ + rp : 1, /* Responder identifier + * structure is valid + */ + pi : 1; /* Precise instruction pointer + * structure is valid + */ +} pal_uarch_check_info_t; + +typedef union pal_mc_error_info_u { + u64 pmei_data; + pal_processor_state_info_t pme_processor; + pal_cache_check_info_t pme_cache; + pal_tlb_check_info_t pme_tlb; + pal_bus_check_info_t pme_bus; + pal_reg_file_check_info_t pme_reg_file; + pal_uarch_check_info_t pme_uarch; +} pal_mc_error_info_t; + +#define pmci_proc_unknown_check pme_processor.uc +#define pmci_proc_bus_check pme_processor.bc +#define pmci_proc_tlb_check pme_processor.tc +#define pmci_proc_cache_check pme_processor.cc +#define pmci_proc_dynamic_state_size pme_processor.dsize +#define pmci_proc_gpr_valid pme_processor.gr +#define pmci_proc_preserved_bank0_gpr_valid pme_processor.b0 +#define pmci_proc_preserved_bank1_gpr_valid pme_processor.b1 +#define pmci_proc_fp_valid pme_processor.fp +#define pmci_proc_predicate_regs_valid pme_processor.pr +#define pmci_proc_branch_regs_valid pme_processor.br +#define pmci_proc_app_regs_valid pme_processor.ar +#define pmci_proc_region_regs_valid pme_processor.rr +#define pmci_proc_translation_regs_valid pme_processor.tr +#define pmci_proc_debug_regs_valid pme_processor.dr +#define pmci_proc_perf_counters_valid pme_processor.pc +#define pmci_proc_control_regs_valid pme_processor.cr +#define pmci_proc_machine_check_expected pme_processor.ex +#define pmci_proc_machine_check_corrected pme_processor.cm +#define pmci_proc_rse_valid pme_processor.rs +#define pmci_proc_machine_check_or_init pme_processor.in +#define pmci_proc_dynamic_state_valid pme_processor.dy +#define pmci_proc_operation pme_processor.op +#define pmci_proc_trap_lost pme_processor.tl +#define pmci_proc_hardware_damage pme_processor.hd +#define pmci_proc_uncontained_storage_damage pme_processor.us +#define pmci_proc_machine_check_isolated pme_processor.ci +#define pmci_proc_continuable pme_processor.co +#define pmci_proc_storage_intergrity_synced pme_processor.sy +#define pmci_proc_min_state_save_area_regd pme_processor.mn +#define pmci_proc_distinct_multiple_errors pme_processor.me +#define pmci_proc_pal_attempted_rendezvous pme_processor.ra +#define pmci_proc_pal_rendezvous_complete pme_processor.rz + + +#define pmci_cache_level pme_cache.level +#define pmci_cache_line_state pme_cache.mesi +#define pmci_cache_line_state_valid pme_cache.mv +#define pmci_cache_line_index pme_cache.index +#define pmci_cache_instr_cache_fail pme_cache.ic +#define pmci_cache_data_cache_fail pme_cache.dc +#define pmci_cache_line_tag_fail pme_cache.tl +#define pmci_cache_line_data_fail pme_cache.dl +#define pmci_cache_operation pme_cache.op +#define pmci_cache_way_valid pme_cache.wv +#define pmci_cache_target_address_valid pme_cache.tv +#define pmci_cache_way pme_cache.way +#define pmci_cache_mc pme_cache.mc + +#define pmci_tlb_instr_translation_cache_fail pme_tlb.itc +#define pmci_tlb_data_translation_cache_fail pme_tlb.dtc +#define pmci_tlb_instr_translation_reg_fail pme_tlb.itr +#define pmci_tlb_data_translation_reg_fail pme_tlb.dtr +#define pmci_tlb_translation_reg_slot pme_tlb.tr_slot +#define pmci_tlb_mc pme_tlb.mc + +#define pmci_bus_status_info pme_bus.bsi +#define pmci_bus_req_address_valid pme_bus.rq +#define pmci_bus_resp_address_valid pme_bus.rp +#define pmci_bus_target_address_valid pme_bus.tv +#define pmci_bus_error_severity pme_bus.sev +#define pmci_bus_transaction_type pme_bus.type +#define pmci_bus_cache_cache_transfer pme_bus.cc +#define pmci_bus_transaction_size pme_bus.size +#define pmci_bus_internal_error pme_bus.ib +#define pmci_bus_external_error pme_bus.eb +#define pmci_bus_mc pme_bus.mc + +/* + * NOTE: this min_state_save area struct only includes the 1KB + * architectural state save area. The other 3 KB is scratch space + * for PAL. + */ + +typedef struct pal_min_state_area_s { + u64 pmsa_nat_bits; /* nat bits for saved GRs */ + u64 pmsa_gr[15]; /* GR1 - GR15 */ + u64 pmsa_bank0_gr[16]; /* GR16 - GR31 */ + u64 pmsa_bank1_gr[16]; /* GR16 - GR31 */ + u64 pmsa_pr; /* predicate registers */ + u64 pmsa_br0; /* branch register 0 */ + u64 pmsa_rsc; /* ar.rsc */ + u64 pmsa_iip; /* cr.iip */ + u64 pmsa_ipsr; /* cr.ipsr */ + u64 pmsa_ifs; /* cr.ifs */ + u64 pmsa_xip; /* previous iip */ + u64 pmsa_xpsr; /* previous psr */ + u64 pmsa_xfs; /* previous ifs */ + u64 pmsa_br1; /* branch register 1 */ + u64 pmsa_reserved[70]; /* pal_min_state_area should total to 1KB */ +} pal_min_state_area_t; + + +struct ia64_pal_retval { + /* + * A zero status value indicates call completed without error. + * A negative status value indicates reason of call failure. + * A positive status value indicates success but an + * informational value should be printed (e.g., "reboot for + * change to take effect"). + */ + s64 status; + u64 v0; + u64 v1; + u64 v2; +}; + +/* + * Note: Currently unused PAL arguments are generally labeled + * "reserved" so the value specified in the PAL documentation + * (generally 0) MUST be passed. Reserved parameters are not optional + * parameters. + */ +extern struct ia64_pal_retval ia64_pal_call_static (u64, u64, u64, u64, u64); +extern struct ia64_pal_retval ia64_pal_call_stacked (u64, u64, u64, u64); +extern struct ia64_pal_retval ia64_pal_call_phys_static (u64, u64, u64, u64); +extern struct ia64_pal_retval ia64_pal_call_phys_stacked (u64, u64, u64, u64); +extern void ia64_save_scratch_fpregs (struct ia64_fpreg *); +extern void ia64_load_scratch_fpregs (struct ia64_fpreg *); + +#define PAL_CALL(iprv,a0,a1,a2,a3) do { \ + struct ia64_fpreg fr[6]; \ + ia64_save_scratch_fpregs(fr); \ + iprv = ia64_pal_call_static(a0, a1, a2, a3, 0); \ + ia64_load_scratch_fpregs(fr); \ +} while (0) + +#define PAL_CALL_IC_OFF(iprv,a0,a1,a2,a3) do { \ + struct ia64_fpreg fr[6]; \ + ia64_save_scratch_fpregs(fr); \ + iprv = ia64_pal_call_static(a0, a1, a2, a3, 1); \ + ia64_load_scratch_fpregs(fr); \ +} while (0) + +#define PAL_CALL_STK(iprv,a0,a1,a2,a3) do { \ + struct ia64_fpreg fr[6]; \ + ia64_save_scratch_fpregs(fr); \ + iprv = ia64_pal_call_stacked(a0, a1, a2, a3); \ + ia64_load_scratch_fpregs(fr); \ +} while (0) + +#define PAL_CALL_PHYS(iprv,a0,a1,a2,a3) do { \ + struct ia64_fpreg fr[6]; \ + ia64_save_scratch_fpregs(fr); \ + iprv = ia64_pal_call_phys_static(a0, a1, a2, a3); \ + ia64_load_scratch_fpregs(fr); \ +} while (0) + +#define PAL_CALL_PHYS_STK(iprv,a0,a1,a2,a3) do { \ + struct ia64_fpreg fr[6]; \ + ia64_save_scratch_fpregs(fr); \ + iprv = ia64_pal_call_phys_stacked(a0, a1, a2, a3); \ + ia64_load_scratch_fpregs(fr); \ +} while (0) + +typedef int (*ia64_pal_handler) (u64, ...); +extern ia64_pal_handler ia64_pal; +extern void ia64_pal_handler_init (void *); + +extern ia64_pal_handler ia64_pal; + +extern pal_cache_config_info_t l0d_cache_config_info; +extern pal_cache_config_info_t l0i_cache_config_info; +extern pal_cache_config_info_t l1_cache_config_info; +extern pal_cache_config_info_t l2_cache_config_info; + +extern pal_cache_protection_info_t l0d_cache_protection_info; +extern pal_cache_protection_info_t l0i_cache_protection_info; +extern pal_cache_protection_info_t l1_cache_protection_info; +extern pal_cache_protection_info_t l2_cache_protection_info; + +extern pal_cache_config_info_t pal_cache_config_info_get(pal_cache_level_t, + pal_cache_type_t); + +extern pal_cache_protection_info_t pal_cache_protection_info_get(pal_cache_level_t, + pal_cache_type_t); + + +extern void pal_error(int); + + +/* Useful wrappers for the current list of pal procedures */ + +typedef union pal_bus_features_u { + u64 pal_bus_features_val; + struct { + u64 pbf_reserved1 : 29; + u64 pbf_req_bus_parking : 1; + u64 pbf_bus_lock_mask : 1; + u64 pbf_enable_half_xfer_rate : 1; + u64 pbf_reserved2 : 22; + u64 pbf_disable_xaction_queueing : 1; + u64 pbf_disable_resp_err_check : 1; + u64 pbf_disable_berr_check : 1; + u64 pbf_disable_bus_req_internal_err_signal : 1; + u64 pbf_disable_bus_req_berr_signal : 1; + u64 pbf_disable_bus_init_event_check : 1; + u64 pbf_disable_bus_init_event_signal : 1; + u64 pbf_disable_bus_addr_err_check : 1; + u64 pbf_disable_bus_addr_err_signal : 1; + u64 pbf_disable_bus_data_err_check : 1; + } pal_bus_features_s; +} pal_bus_features_u_t; + +extern void pal_bus_features_print (u64); + +/* Provide information about configurable processor bus features */ +static inline s64 +ia64_pal_bus_get_features (pal_bus_features_u_t *features_avail, + pal_bus_features_u_t *features_status, + pal_bus_features_u_t *features_control) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS(iprv, PAL_BUS_GET_FEATURES, 0, 0, 0); + if (features_avail) + features_avail->pal_bus_features_val = iprv.v0; + if (features_status) + features_status->pal_bus_features_val = iprv.v1; + if (features_control) + features_control->pal_bus_features_val = iprv.v2; + return iprv.status; +} + +/* Enables/disables specific processor bus features */ +static inline s64 +ia64_pal_bus_set_features (pal_bus_features_u_t feature_select) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS(iprv, PAL_BUS_SET_FEATURES, feature_select.pal_bus_features_val, 0, 0); + return iprv.status; +} + +/* Get detailed cache information */ +static inline s64 +ia64_pal_cache_config_info (u64 cache_level, u64 cache_type, pal_cache_config_info_t *conf) +{ + struct ia64_pal_retval iprv; + + PAL_CALL(iprv, PAL_CACHE_INFO, cache_level, cache_type, 0); + + if (iprv.status == 0) { + conf->pcci_status = iprv.status; + conf->pcci_info_1.pcci1_data = iprv.v0; + conf->pcci_info_2.pcci2_data = iprv.v1; + conf->pcci_reserved = iprv.v2; + } + return iprv.status; + +} + +/* Get detailed cche protection information */ +static inline s64 +ia64_pal_cache_prot_info (u64 cache_level, u64 cache_type, pal_cache_protection_info_t *prot) +{ + struct ia64_pal_retval iprv; + + PAL_CALL(iprv, PAL_CACHE_PROT_INFO, cache_level, cache_type, 0); + + if (iprv.status == 0) { + prot->pcpi_status = iprv.status; + prot->pcp_info[0].pcpi_data = iprv.v0 & 0xffffffff; + prot->pcp_info[1].pcpi_data = iprv.v0 >> 32; + prot->pcp_info[2].pcpi_data = iprv.v1 & 0xffffffff; + prot->pcp_info[3].pcpi_data = iprv.v1 >> 32; + prot->pcp_info[4].pcpi_data = iprv.v2 & 0xffffffff; + prot->pcp_info[5].pcpi_data = iprv.v2 >> 32; + } + return iprv.status; +} + +/* + * Flush the processor instruction or data caches. *PROGRESS must be + * initialized to zero before calling this for the first time.. + */ +static inline s64 +ia64_pal_cache_flush (u64 cache_type, u64 invalidate, u64 *progress, u64 *vector) +{ + struct ia64_pal_retval iprv; + PAL_CALL_IC_OFF(iprv, PAL_CACHE_FLUSH, cache_type, invalidate, *progress); + if (vector) + *vector = iprv.v0; + *progress = iprv.v1; + return iprv.status; +} + + +/* Initialize the processor controlled caches */ +static inline s64 +ia64_pal_cache_init (u64 level, u64 cache_type, u64 rest) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_CACHE_INIT, level, cache_type, rest); + return iprv.status; +} + +/* Initialize the tags and data of a data or unified cache line of + * processor controlled cache to known values without the availability + * of backing memory. + */ +static inline s64 +ia64_pal_cache_line_init (u64 physical_addr, u64 data_value) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_CACHE_LINE_INIT, physical_addr, data_value, 0); + return iprv.status; +} + + +/* Read the data and tag of a processor controlled cache line for diags */ +static inline s64 +ia64_pal_cache_read (pal_cache_line_id_u_t line_id, u64 physical_addr) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_CACHE_READ, line_id.pclid_data, physical_addr, 0); + return iprv.status; +} + +/* Return summary information about the heirarchy of caches controlled by the processor */ +static inline s64 +ia64_pal_cache_summary (u64 *cache_levels, u64 *unique_caches) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_CACHE_SUMMARY, 0, 0, 0); + if (cache_levels) + *cache_levels = iprv.v0; + if (unique_caches) + *unique_caches = iprv.v1; + return iprv.status; +} + +/* Write the data and tag of a processor-controlled cache line for diags */ +static inline s64 +ia64_pal_cache_write (pal_cache_line_id_u_t line_id, u64 physical_addr, u64 data) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_CACHE_WRITE, line_id.pclid_data, physical_addr, data); + return iprv.status; +} + + +/* Return the parameters needed to copy relocatable PAL procedures from ROM to memory */ +static inline s64 +ia64_pal_copy_info (u64 copy_type, u64 num_procs, u64 num_iopics, + u64 *buffer_size, u64 *buffer_align) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_COPY_INFO, copy_type, num_procs, num_iopics); + if (buffer_size) + *buffer_size = iprv.v0; + if (buffer_align) + *buffer_align = iprv.v1; + return iprv.status; +} + +/* Copy relocatable PAL procedures from ROM to memory */ +static inline s64 +ia64_pal_copy_pal (u64 target_addr, u64 alloc_size, u64 processor, u64 *pal_proc_offset) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_COPY_PAL, target_addr, alloc_size, processor); + if (pal_proc_offset) + *pal_proc_offset = iprv.v0; + return iprv.status; +} + +/* Return the number of instruction and data debug register pairs */ +static inline s64 +ia64_pal_debug_info (u64 *inst_regs, u64 *data_regs) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_DEBUG_INFO, 0, 0, 0); + if (inst_regs) + *inst_regs = iprv.v0; + if (data_regs) + *data_regs = iprv.v1; + + return iprv.status; +} + +#ifdef TBD +/* Switch from IA64-system environment to IA-32 system environment */ +static inline s64 +ia64_pal_enter_ia32_env (ia32_env1, ia32_env2, ia32_env3) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_ENTER_IA_32_ENV, ia32_env1, ia32_env2, ia32_env3); + return iprv.status; +} +#endif + +/* Get unique geographical address of this processor on its bus */ +static inline s64 +ia64_pal_fixed_addr (u64 *global_unique_addr) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_FIXED_ADDR, 0, 0, 0); + if (global_unique_addr) + *global_unique_addr = iprv.v0; + return iprv.status; +} + +/* Get base frequency of the platform if generated by the processor */ +static inline s64 +ia64_pal_freq_base (u64 *platform_base_freq) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_FREQ_BASE, 0, 0, 0); + if (platform_base_freq) + *platform_base_freq = iprv.v0; + return iprv.status; +} + +/* + * Get the ratios for processor frequency, bus frequency and interval timer to + * to base frequency of the platform + */ +static inline s64 +ia64_pal_freq_ratios (struct pal_freq_ratio *proc_ratio, struct pal_freq_ratio *bus_ratio, + struct pal_freq_ratio *itc_ratio) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_FREQ_RATIOS, 0, 0, 0); + if (proc_ratio) + *(u64 *)proc_ratio = iprv.v0; + if (bus_ratio) + *(u64 *)bus_ratio = iprv.v1; + if (itc_ratio) + *(u64 *)itc_ratio = iprv.v2; + return iprv.status; +} + +/* Make the processor enter HALT or one of the implementation dependent low + * power states where prefetching and execution are suspended and cache and + * TLB coherency is not maintained. + */ +static inline s64 +ia64_pal_halt (u64 halt_state) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_HALT, halt_state, 0, 0); + return iprv.status; +} + +typedef union pal_power_mgmt_info_u { + u64 ppmi_data; + struct { + u64 exit_latency : 16, + entry_latency : 16, + power_consumption : 28, + im : 1, + co : 1, + reserved : 2; + } pal_power_mgmt_info_s; +} pal_power_mgmt_info_u_t; + +/* Return information about processor's optional power management capabilities. */ +static inline s64 +ia64_pal_halt_info (pal_power_mgmt_info_u_t *power_buf) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_HALT_INFO, (unsigned long) power_buf, 0, 0); + return iprv.status; +} + +/* Cause the processor to enter LIGHT HALT state, where prefetching and execution are + * suspended, but cache and TLB coherency is maintained. + */ +static inline s64 +ia64_pal_halt_light (void) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_HALT_LIGHT, 0, 0, 0); + return iprv.status; +} + +/* Clear all the processor error logging registers and reset the indicator that allows + * the error logging registers to be written. This procedure also checks the pending + * machine check bit and pending INIT bit and reports their states. + */ +static inline s64 +ia64_pal_mc_clear_log (u64 *pending_vector) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MC_CLEAR_LOG, 0, 0, 0); + if (pending_vector) + *pending_vector = iprv.v0; + return iprv.status; +} + +/* Ensure that all outstanding transactions in a processor are completed or that any + * MCA due to thes outstanding transaction is taken. + */ +static inline s64 +ia64_pal_mc_drain (void) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MC_DRAIN, 0, 0, 0); + return iprv.status; +} + +/* Return the machine check dynamic processor state */ +static inline s64 +ia64_pal_mc_dynamic_state (u64 offset, u64 *size, u64 *pds) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MC_DYNAMIC_STATE, offset, 0, 0); + if (size) + *size = iprv.v0; + if (pds) + *pds = iprv.v1; + return iprv.status; +} + +/* Return processor machine check information */ +static inline s64 +ia64_pal_mc_error_info (u64 info_index, u64 type_index, u64 *size, u64 *error_info) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MC_ERROR_INFO, info_index, type_index, 0); + if (size) + *size = iprv.v0; + if (error_info) + *error_info = iprv.v1; + return iprv.status; +} + +/* Inform PALE_CHECK whether a machine check is expected so that PALE_CHECK willnot + * attempt to correct any expected machine checks. + */ +static inline s64 +ia64_pal_mc_expected (u64 expected, u64 *previous) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MC_EXPECTED, expected, 0, 0); + if (previous) + *previous = iprv.v0; + return iprv.status; +} + +/* Register a platform dependent location with PAL to which it can save + * minimal processor state in the event of a machine check or initialization + * event. + */ +static inline s64 +ia64_pal_mc_register_mem (u64 physical_addr) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MC_REGISTER_MEM, physical_addr, 0, 0); + return iprv.status; +} + +/* Restore minimal architectural processor state, set CMC interrupt if necessary + * and resume execution + */ +static inline s64 +ia64_pal_mc_resume (u64 set_cmci, u64 save_ptr) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MC_RESUME, set_cmci, save_ptr, 0); + return iprv.status; +} + +/* Return the memory attributes implemented by the processor */ +static inline s64 +ia64_pal_mem_attrib (u64 *mem_attrib) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MEM_ATTRIB, 0, 0, 0); + if (mem_attrib) + *mem_attrib = iprv.v0 & 0xff; + return iprv.status; +} + +/* Return the amount of memory needed for second phase of processor + * self-test and the required alignment of memory. + */ +static inline s64 +ia64_pal_mem_for_test (u64 *bytes_needed, u64 *alignment) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MEM_FOR_TEST, 0, 0, 0); + if (bytes_needed) + *bytes_needed = iprv.v0; + if (alignment) + *alignment = iprv.v1; + return iprv.status; +} + +typedef union pal_perf_mon_info_u { + u64 ppmi_data; + struct { + u64 generic : 8, + width : 8, + cycles : 8, + retired : 8, + reserved : 32; + } pal_perf_mon_info_s; +} pal_perf_mon_info_u_t; + +/* Return the performance monitor information about what can be counted + * and how to configure the monitors to count the desired events. + */ +static inline s64 +ia64_pal_perf_mon_info (u64 *pm_buffer, pal_perf_mon_info_u_t *pm_info) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_PERF_MON_INFO, (unsigned long) pm_buffer, 0, 0); + if (pm_info) + pm_info->ppmi_data = iprv.v0; + return iprv.status; +} + +/* Specifies the physical address of the processor interrupt block + * and I/O port space. + */ +static inline s64 +ia64_pal_platform_addr (u64 type, u64 physical_addr) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_PLATFORM_ADDR, type, physical_addr, 0); + return iprv.status; +} + +/* Set the SAL PMI entrypoint in memory */ +static inline s64 +ia64_pal_pmi_entrypoint (u64 sal_pmi_entry_addr) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_PMI_ENTRYPOINT, sal_pmi_entry_addr, 0, 0); + return iprv.status; +} + +struct pal_features_s; +/* Provide information about configurable processor features */ +static inline s64 +ia64_pal_proc_get_features (u64 *features_avail, + u64 *features_status, + u64 *features_control) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, 0, 0); + if (iprv.status == 0) { + *features_avail = iprv.v0; + *features_status = iprv.v1; + *features_control = iprv.v2; + } + return iprv.status; +} + +/* Enable/disable processor dependent features */ +static inline s64 +ia64_pal_proc_set_features (u64 feature_select) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES, feature_select, 0, 0); + return iprv.status; +} + +/* + * Put everything in a struct so we avoid the global offset table whenever + * possible. + */ +typedef struct ia64_ptce_info_s { + u64 base; + u32 count[2]; + u32 stride[2]; +} ia64_ptce_info_t; + +/* Return the information required for the architected loop used to purge + * (initialize) the entire TC + */ +static inline s64 +ia64_get_ptce (ia64_ptce_info_t *ptce) +{ + struct ia64_pal_retval iprv; + + if (!ptce) + return -1; + + PAL_CALL(iprv, PAL_PTCE_INFO, 0, 0, 0); + if (iprv.status == 0) { + ptce->base = iprv.v0; + ptce->count[0] = iprv.v1 >> 32; + ptce->count[1] = iprv.v1 & 0xffffffff; + ptce->stride[0] = iprv.v2 >> 32; + ptce->stride[1] = iprv.v2 & 0xffffffff; + } + return iprv.status; +} + +/* Return info about implemented application and control registers. */ +static inline s64 +ia64_pal_register_info (u64 info_request, u64 *reg_info_1, u64 *reg_info_2) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_REGISTER_INFO, info_request, 0, 0); + if (reg_info_1) + *reg_info_1 = iprv.v0; + if (reg_info_2) + *reg_info_2 = iprv.v1; + return iprv.status; +} + +typedef union pal_hints_u { + u64 ph_data; + struct { + u64 si : 1, + li : 1, + reserved : 62; + } pal_hints_s; +} pal_hints_u_t; + +/* Return information about the register stack and RSE for this processor + * implementation. + */ +static inline s64 +ia64_pal_rse_info (u64 *num_phys_stacked, pal_hints_u_t *hints) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_RSE_INFO, 0, 0, 0); + if (num_phys_stacked) + *num_phys_stacked = iprv.v0; + if (hints) + hints->ph_data = iprv.v1; + return iprv.status; +} + +/* Cause the processor to enter SHUTDOWN state, where prefetching and execution are + * suspended, but cause cache and TLB coherency to be maintained. + * This is usually called in IA-32 mode. + */ +static inline s64 +ia64_pal_shutdown (void) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_SHUTDOWN, 0, 0, 0); + return iprv.status; +} + +/* Perform the second phase of processor self-test. */ +static inline s64 +ia64_pal_test_proc (u64 test_addr, u64 test_size, u64 attributes, u64 *self_test_state) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_TEST_PROC, test_addr, test_size, attributes); + if (self_test_state) + *self_test_state = iprv.v0; + return iprv.status; +} + +typedef union pal_version_u { + u64 pal_version_val; + struct { + u64 pv_pal_b_rev : 8; + u64 pv_pal_b_model : 8; + u64 pv_reserved1 : 8; + u64 pv_pal_vendor : 8; + u64 pv_pal_a_rev : 8; + u64 pv_pal_a_model : 8; + u64 pv_reserved2 : 16; + } pal_version_s; +} pal_version_u_t; + + +/* Return PAL version information */ +static inline s64 +ia64_pal_version (pal_version_u_t *pal_min_version, pal_version_u_t *pal_cur_version) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS(iprv, PAL_VERSION, 0, 0, 0); + if (pal_min_version) + pal_min_version->pal_version_val = iprv.v0; + + if (pal_cur_version) + pal_cur_version->pal_version_val = iprv.v1; + + return iprv.status; +} + +typedef union pal_tc_info_u { + u64 pti_val; + struct { + u64 num_sets : 8, + associativity : 8, + num_entries : 16, + pf : 1, + unified : 1, + reduce_tr : 1, + reserved : 29; + } pal_tc_info_s; +} pal_tc_info_u_t; + +#define tc_reduce_tr pal_tc_info_s.reduce_tr +#define tc_unified pal_tc_info_s.unified +#define tc_pf pal_tc_info_s.pf +#define tc_num_entries pal_tc_info_s.num_entries +#define tc_associativity pal_tc_info_s.associativity +#define tc_num_sets pal_tc_info_s.num_sets + + +/* Return information about the virtual memory characteristics of the processor + * implementation. + */ +static inline s64 +ia64_pal_vm_info (u64 tc_level, u64 tc_type, pal_tc_info_u_t *tc_info, u64 *tc_pages) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_VM_INFO, tc_level, tc_type, 0); + if (tc_info) + tc_info->pti_val = iprv.v0; + if (tc_pages) + *tc_pages = iprv.v1; + return iprv.status; +} + +/* Get page size information about the virtual memory characteristics of the processor + * implementation. + */ +static inline s64 +ia64_pal_vm_page_size (u64 *tr_pages, u64 *vw_pages) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_VM_PAGE_SIZE, 0, 0, 0); + if (tr_pages) + *tr_pages = iprv.v0; + if (vw_pages) + *vw_pages = iprv.v1; + return iprv.status; +} + +typedef union pal_vm_info_1_u { + u64 pvi1_val; + struct { + u64 vw : 1, + phys_add_size : 7, + key_size : 8, + max_pkr : 8, + hash_tag_id : 8, + max_dtr_entry : 8, + max_itr_entry : 8, + max_unique_tcs : 8, + num_tc_levels : 8; + } pal_vm_info_1_s; +} pal_vm_info_1_u_t; + +typedef union pal_vm_info_2_u { + u64 pvi2_val; + struct { + u64 impl_va_msb : 8, + rid_size : 8, + reserved : 48; + } pal_vm_info_2_s; +} pal_vm_info_2_u_t; + +/* Get summary information about the virtual memory characteristics of the processor + * implementation. + */ +static inline s64 +ia64_pal_vm_summary (pal_vm_info_1_u_t *vm_info_1, pal_vm_info_2_u_t *vm_info_2) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_VM_SUMMARY, 0, 0, 0); + if (vm_info_1) + vm_info_1->pvi1_val = iprv.v0; + if (vm_info_2) + vm_info_2->pvi2_val = iprv.v1; + return iprv.status; +} + +typedef union pal_itr_valid_u { + u64 piv_val; + struct { + u64 access_rights_valid : 1, + priv_level_valid : 1, + dirty_bit_valid : 1, + mem_attr_valid : 1, + reserved : 60; + } pal_tr_valid_s; +} pal_tr_valid_u_t; + +/* Read a translation register */ +static inline s64 +ia64_pal_tr_read (u64 reg_num, u64 tr_type, u64 *tr_buffer, pal_tr_valid_u_t *tr_valid) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS_STK(iprv, PAL_VM_TR_READ, reg_num, tr_type,(u64)ia64_tpa(tr_buffer)); + if (tr_valid) + tr_valid->piv_val = iprv.v0; + return iprv.status; +} + +/* + * PAL_PREFETCH_VISIBILITY transaction types + */ +#define PAL_VISIBILITY_VIRTUAL 0 +#define PAL_VISIBILITY_PHYSICAL 1 + +/* + * PAL_PREFETCH_VISIBILITY return codes + */ +#define PAL_VISIBILITY_OK 1 +#define PAL_VISIBILITY_OK_REMOTE_NEEDED 0 +#define PAL_VISIBILITY_INVAL_ARG -2 +#define PAL_VISIBILITY_ERROR -3 + +static inline s64 +ia64_pal_prefetch_visibility (s64 trans_type) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_PREFETCH_VISIBILITY, trans_type, 0, 0); + return iprv.status; +} + +#ifdef CONFIG_VTI +#include <asm/vmx_pal.h> +#endif // CONFIG_VTI +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_IA64_PAL_H */ diff --git a/xen/include/asm-ia64/privop.h b/xen/include/asm-ia64/privop.h index 1dc1f8e855..4cad79ee3a 100644 --- a/xen/include/asm-ia64/privop.h +++ b/xen/include/asm-ia64/privop.h @@ -2,7 +2,11 @@ #define _XEN_IA64_PRIVOP_H #include <asm/ia64_int.h> +#ifdef CONFIG_VTI +#include <asm/vmx_vcpu.h> +#else //CONFIG_VTI #include <asm/vcpu.h> +#endif //CONFIG_VTI typedef unsigned long IA64_INST; @@ -129,6 +133,13 @@ typedef union U_INST64_M46 { struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6, x3:3, un1:1, major:4; }; } INST64_M46; +#ifdef CONFIG_VTI +typedef union U_INST64_M47 { + IA64_INST inst; + struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; }; +} INST64_M47; +#endif // CONFIG_VTI + typedef union U_INST64 { IA64_INST inst; struct { unsigned long :37, major:4; } generic; @@ -154,6 +165,9 @@ typedef union U_INST64 { INST64_M44 M44; // set/reset system mask INST64_M45 M45; // translation purge INST64_M46 M46; // translation access (tpa,tak) +#ifdef CONFIG_VTI + INST64_M47 M47; // purge translation entry +#endif // CONFIG_VTI } INST64; #define MASK_41 ((UINT64)0x1ffffffffff) diff --git a/xen/include/asm-ia64/ptrace.h b/xen/include/asm-ia64/ptrace.h new file mode 100644 index 0000000000..d4487878f8 --- /dev/null +++ b/xen/include/asm-ia64/ptrace.h @@ -0,0 +1,341 @@ +#ifndef _ASM_IA64_PTRACE_H +#define _ASM_IA64_PTRACE_H + +/* + * Copyright (C) 1998-2004 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * Stephane Eranian <eranian@hpl.hp.com> + * Copyright (C) 2003 Intel Co + * Suresh Siddha <suresh.b.siddha@intel.com> + * Fenghua Yu <fenghua.yu@intel.com> + * Arun Sharma <arun.sharma@intel.com> + * + * 12/07/98 S. Eranian added pt_regs & switch_stack + * 12/21/98 D. Mosberger updated to match latest code + * 6/17/99 D. Mosberger added second unat member to "struct switch_stack" + * + */ +/* + * When a user process is blocked, its state looks as follows: + * + * +----------------------+ ------- IA64_STK_OFFSET + * | | ^ + * | struct pt_regs | | + * | | | + * +----------------------+ | + * | | | + * | memory stack | | + * | (growing downwards) | | + * //.....................// | + * | + * //.....................// | + * | | | + * +----------------------+ | + * | struct switch_stack | | + * | | | + * +----------------------+ | + * | | | + * //.....................// | + * | + * //.....................// | + * | | | + * | register stack | | + * | (growing upwards) | | + * | | | + * +----------------------+ | --- IA64_RBS_OFFSET + * | struct thread_info | | ^ + * +----------------------+ | | + * | | | | + * | struct task_struct | | | + * current -> | | | | + * +----------------------+ ------- + * + * Note that ar.ec is not saved explicitly in pt_reg or switch_stack. + * This is because ar.ec is saved as part of ar.pfs. + */ + +#include <linux/config.h> + +#include <asm/fpu.h> +#include <asm/offsets.h> + +/* + * Base-2 logarithm of number of pages to allocate per task structure + * (including register backing store and memory stack): + */ +#if defined(CONFIG_IA64_PAGE_SIZE_4KB) +# define KERNEL_STACK_SIZE_ORDER 3 +#elif defined(CONFIG_IA64_PAGE_SIZE_8KB) +# define KERNEL_STACK_SIZE_ORDER 2 +#elif defined(CONFIG_IA64_PAGE_SIZE_16KB) +# define KERNEL_STACK_SIZE_ORDER 1 +#else +# define KERNEL_STACK_SIZE_ORDER 0 +#endif + +#define IA64_RBS_OFFSET ((IA64_TASK_SIZE + IA64_THREAD_INFO_SIZE + 15) & ~15) +#define IA64_STK_OFFSET ((1 << KERNEL_STACK_SIZE_ORDER)*PAGE_SIZE) + +#define KERNEL_STACK_SIZE IA64_STK_OFFSET + +#ifndef __ASSEMBLY__ + +#include <asm/current.h> +#include <asm/page.h> + +/* + * This struct defines the way the registers are saved on system + * calls. + * + * We don't save all floating point register because the kernel + * is compiled to use only a very small subset, so the other are + * untouched. + * + * THIS STRUCTURE MUST BE A MULTIPLE 16-BYTE IN SIZE + * (because the memory stack pointer MUST ALWAYS be aligned this way) + * + */ +#ifdef CONFIG_VTI +#include "vmx_ptrace.h" +#else //CONFIG_VTI +struct pt_regs { + /* The following registers are saved by SAVE_MIN: */ + unsigned long b6; /* scratch */ + unsigned long b7; /* scratch */ + + unsigned long ar_csd; /* used by cmp8xchg16 (scratch) */ + unsigned long ar_ssd; /* reserved for future use (scratch) */ + + unsigned long r8; /* scratch (return value register 0) */ + unsigned long r9; /* scratch (return value register 1) */ + unsigned long r10; /* scratch (return value register 2) */ + unsigned long r11; /* scratch (return value register 3) */ + + unsigned long cr_ipsr; /* interrupted task's psr */ + unsigned long cr_iip; /* interrupted task's instruction pointer */ + /* + * interrupted task's function state; if bit 63 is cleared, it + * contains syscall's ar.pfs.pfm: + */ + unsigned long cr_ifs; + + unsigned long ar_unat; /* interrupted task's NaT register (preserved) */ + unsigned long ar_pfs; /* prev function state */ + unsigned long ar_rsc; /* RSE configuration */ + /* The following two are valid only if cr_ipsr.cpl > 0: */ + unsigned long ar_rnat; /* RSE NaT */ + unsigned long ar_bspstore; /* RSE bspstore */ + + unsigned long pr; /* 64 predicate registers (1 bit each) */ + unsigned long b0; /* return pointer (bp) */ + unsigned long loadrs; /* size of dirty partition << 16 */ + + unsigned long r1; /* the gp pointer */ + unsigned long r12; /* interrupted task's memory stack pointer */ + unsigned long r13; /* thread pointer */ + + unsigned long ar_fpsr; /* floating point status (preserved) */ + unsigned long r15; /* scratch */ + + /* The remaining registers are NOT saved for system calls. */ + + unsigned long r14; /* scratch */ + unsigned long r2; /* scratch */ + unsigned long r3; /* scratch */ + + /* The following registers are saved by SAVE_REST: */ + unsigned long r16; /* scratch */ + unsigned long r17; /* scratch */ + unsigned long r18; /* scratch */ + unsigned long r19; /* scratch */ + unsigned long r20; /* scratch */ + unsigned long r21; /* scratch */ + unsigned long r22; /* scratch */ + unsigned long r23; /* scratch */ + unsigned long r24; /* scratch */ + unsigned long r25; /* scratch */ + unsigned long r26; /* scratch */ + unsigned long r27; /* scratch */ + unsigned long r28; /* scratch */ + unsigned long r29; /* scratch */ + unsigned long r30; /* scratch */ + unsigned long r31; /* scratch */ + + unsigned long ar_ccv; /* compare/exchange value (scratch) */ + + /* + * Floating point registers that the kernel considers scratch: + */ + struct ia64_fpreg f6; /* scratch */ + struct ia64_fpreg f7; /* scratch */ + struct ia64_fpreg f8; /* scratch */ + struct ia64_fpreg f9; /* scratch */ + struct ia64_fpreg f10; /* scratch */ + struct ia64_fpreg f11; /* scratch */ +}; +#endif // CONFIG_VTI + +/* + * This structure contains the addition registers that need to + * preserved across a context switch. This generally consists of + * "preserved" registers. + */ +struct switch_stack { + unsigned long caller_unat; /* user NaT collection register (preserved) */ + unsigned long ar_fpsr; /* floating-point status register */ + + struct ia64_fpreg f2; /* preserved */ + struct ia64_fpreg f3; /* preserved */ + struct ia64_fpreg f4; /* preserved */ + struct ia64_fpreg f5; /* preserved */ + + struct ia64_fpreg f12; /* scratch, but untouched by kernel */ + struct ia64_fpreg f13; /* scratch, but untouched by kernel */ + struct ia64_fpreg f14; /* scratch, but untouched by kernel */ + struct ia64_fpreg f15; /* scratch, but untouched by kernel */ + struct ia64_fpreg f16; /* preserved */ + struct ia64_fpreg f17; /* preserved */ + struct ia64_fpreg f18; /* preserved */ + struct ia64_fpreg f19; /* preserved */ + struct ia64_fpreg f20; /* preserved */ + struct ia64_fpreg f21; /* preserved */ + struct ia64_fpreg f22; /* preserved */ + struct ia64_fpreg f23; /* preserved */ + struct ia64_fpreg f24; /* preserved */ + struct ia64_fpreg f25; /* preserved */ + struct ia64_fpreg f26; /* preserved */ + struct ia64_fpreg f27; /* preserved */ + struct ia64_fpreg f28; /* preserved */ + struct ia64_fpreg f29; /* preserved */ + struct ia64_fpreg f30; /* preserved */ + struct ia64_fpreg f31; /* preserved */ + + unsigned long r4; /* preserved */ + unsigned long r5; /* preserved */ + unsigned long r6; /* preserved */ + unsigned long r7; /* preserved */ + + unsigned long b0; /* so we can force a direct return in copy_thread */ + unsigned long b1; + unsigned long b2; + unsigned long b3; + unsigned long b4; + unsigned long b5; + + unsigned long ar_pfs; /* previous function state */ + unsigned long ar_lc; /* loop counter (preserved) */ + unsigned long ar_unat; /* NaT bits for r4-r7 */ + unsigned long ar_rnat; /* RSE NaT collection register */ + unsigned long ar_bspstore; /* RSE dirty base (preserved) */ + unsigned long pr; /* 64 predicate registers (1 bit each) */ +}; + +#ifdef __KERNEL__ +/* + * We use the ia64_psr(regs)->ri to determine which of the three + * instructions in bundle (16 bytes) took the sample. Generate + * the canonical representation by adding to instruction pointer. + */ +# define instruction_pointer(regs) ((regs)->cr_iip + ia64_psr(regs)->ri) +/* Conserve space in histogram by encoding slot bits in address + * bits 2 and 3 rather than bits 0 and 1. + */ +#define profile_pc(regs) \ +({ \ + unsigned long __ip = instruction_pointer(regs); \ + (__ip & ~3UL) + ((__ip & 3UL) << 2); \ +}) + + /* given a pointer to a task_struct, return the user's pt_regs */ +# define ia64_task_regs(t) (((struct pt_regs *) ((char *) (t) + IA64_STK_OFFSET)) - 1) +# define ia64_psr(regs) ((struct ia64_psr *) &(regs)->cr_ipsr) +# define user_mode(regs) (((struct ia64_psr *) &(regs)->cr_ipsr)->cpl != 0) +# define user_stack(task,regs) ((long) regs - (long) task == IA64_STK_OFFSET - sizeof(*regs)) +# define fsys_mode(task,regs) \ + ({ \ + struct task_struct *_task = (task); \ + struct pt_regs *_regs = (regs); \ + !user_mode(_regs) && user_stack(_task, _regs); \ + }) + + /* + * System call handlers that, upon successful completion, need to return a negative value + * should call force_successful_syscall_return() right before returning. On architectures + * where the syscall convention provides for a separate error flag (e.g., alpha, ia64, + * ppc{,64}, sparc{,64}, possibly others), this macro can be used to ensure that the error + * flag will not get set. On architectures which do not support a separate error flag, + * the macro is a no-op and the spurious error condition needs to be filtered out by some + * other means (e.g., in user-level, by passing an extra argument to the syscall handler, + * or something along those lines). + * + * On ia64, we can clear the user's pt_regs->r8 to force a successful syscall. + */ +# define force_successful_syscall_return() (ia64_task_regs(current)->r8 = 0) + + struct task_struct; /* forward decl */ + struct unw_frame_info; /* forward decl */ + + extern void show_regs (struct pt_regs *); + extern void ia64_do_show_stack (struct unw_frame_info *, void *); + extern unsigned long ia64_get_user_rbs_end (struct task_struct *, struct pt_regs *, + unsigned long *); + extern long ia64_peek (struct task_struct *, struct switch_stack *, unsigned long, + unsigned long, long *); + extern long ia64_poke (struct task_struct *, struct switch_stack *, unsigned long, + unsigned long, long); + extern void ia64_flush_fph (struct task_struct *); + extern void ia64_sync_fph (struct task_struct *); + extern long ia64_sync_user_rbs (struct task_struct *, struct switch_stack *, + unsigned long, unsigned long); + + /* get nat bits for scratch registers such that bit N==1 iff scratch register rN is a NaT */ + extern unsigned long ia64_get_scratch_nat_bits (struct pt_regs *pt, unsigned long scratch_unat); + /* put nat bits for scratch registers such that scratch register rN is a NaT iff bit N==1 */ + extern unsigned long ia64_put_scratch_nat_bits (struct pt_regs *pt, unsigned long nat); + + extern void ia64_increment_ip (struct pt_regs *pt); + extern void ia64_decrement_ip (struct pt_regs *pt); + +#endif /* !__KERNEL__ */ + +/* pt_all_user_regs is used for PTRACE_GETREGS PTRACE_SETREGS */ +struct pt_all_user_regs { + unsigned long nat; + unsigned long cr_iip; + unsigned long cfm; + unsigned long cr_ipsr; + unsigned long pr; + + unsigned long gr[32]; + unsigned long br[8]; + unsigned long ar[128]; + struct ia64_fpreg fr[128]; +}; + +#endif /* !__ASSEMBLY__ */ + +/* indices to application-registers array in pt_all_user_regs */ +#define PT_AUR_RSC 16 +#define PT_AUR_BSP 17 +#define PT_AUR_BSPSTORE 18 +#define PT_AUR_RNAT 19 +#define PT_AUR_CCV 32 +#define PT_AUR_UNAT 36 +#define PT_AUR_FPSR 40 +#define PT_AUR_PFS 64 +#define PT_AUR_LC 65 +#define PT_AUR_EC 66 + +/* + * The numbers chosen here are somewhat arbitrary but absolutely MUST + * not overlap with any of the number assigned in <linux/ptrace.h>. + */ +#define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */ +#define PTRACE_OLD_GETSIGINFO 13 /* (replaced by PTRACE_GETSIGINFO in <linux/ptrace.h>) */ +#define PTRACE_OLD_SETSIGINFO 14 /* (replaced by PTRACE_SETSIGINFO in <linux/ptrace.h>) */ +#define PTRACE_GETREGS 18 /* get all registers (pt_all_user_regs) in one shot */ +#define PTRACE_SETREGS 19 /* set all registers (pt_all_user_regs) in one shot */ + +#define PTRACE_OLDSETOPTIONS 21 + +#endif /* _ASM_IA64_PTRACE_H */ diff --git a/xen/include/asm-ia64/regionreg.h b/xen/include/asm-ia64/regionreg.h index 19641d15df..aa590dfbf9 100644 --- a/xen/include/asm-ia64/regionreg.h +++ b/xen/include/asm-ia64/regionreg.h @@ -1,4 +1,12 @@ +#ifndef _REGIONREG_H_ +#define _REGIONREG_H_ +#ifdef CONFIG_VTI +#define XEN_DEFAULT_RID 0xf00000 +#define DOMAIN_RID_SHIFT 20 +#define DOMAIN_RID_MASK (~(1U<<DOMAIN_RID_SHIFT -1)) +#else //CONFIG_VTI #define XEN_DEFAULT_RID 7 +#endif // CONFIG_VTI #define IA64_MIN_IMPL_RID_MSB 17 #define _REGION_ID(x) ({ia64_rr _v; _v.rrval = (long) (x); _v.rid;}) #define _REGION_PAGE_SIZE(x) ({ia64_rr _v; _v.rrval = (long) (x); _v.ps;}) @@ -8,10 +16,10 @@ typedef union ia64_rr { struct { unsigned long ve : 1; /* enable hw walker */ - unsigned long : 1; /* reserved */ + unsigned long reserved0 : 1; /* reserved */ unsigned long ps : 6; /* log page size */ unsigned long rid: 24; /* region id */ - unsigned long : 32; /* reserved */ + unsigned long reserved1 : 32; /* reserved */ }; unsigned long rrval; } ia64_rr; @@ -31,3 +39,4 @@ typedef union ia64_rr { #define RR_RID(arg) (((arg) & 0x0000000000ffffff) << 8) #define RR_RID_MASK 0x00000000ffffff00L +#endif /* !_REGIONREG_H_ */ diff --git a/xen/include/asm-ia64/regs.h b/xen/include/asm-ia64/regs.h index f3f803d24c..3f7b97d996 100644 --- a/xen/include/asm-ia64/regs.h +++ b/xen/include/asm-ia64/regs.h @@ -1,2 +1,3 @@ #include <asm/ptrace.h> #define cpu_user_regs pt_regs +#define xen_regs pt_regs diff --git a/xen/include/asm-ia64/serial.h b/xen/include/asm-ia64/serial.h index 1ef5776032..faeaf526e8 100644 --- a/xen/include/asm-ia64/serial.h +++ b/xen/include/asm-ia64/serial.h @@ -6,20 +6,22 @@ #include <xen/serial.h> #include <asm/hpsim_ssc.h> -#if 1 +#ifndef CONFIG_VTI #define arch_serial_putc(_uart, _c) \ ( platform_is_hp_ski() ? (ia64_ssc(c,0,0,0,SSC_PUTCHAR), 1) : \ ( longs_peak_putc(c), 1 )) -#else + +#define OPT_COM1_STR "115200" +#define OPT_COM2_STR "" +#else // CONFIG_VTI #define arch_serial_putc(_uart, _c) \ ( platform_is_hp_ski() ? (ia64_ssc(c,0,0,0,SSC_PUTCHAR), 1) : \ ( (inb((_uart)->io_base + LSR) & LSR_THRE) ? \ (outb((_c), (_uart)->io_base + THR), 1) : 0 )) -#endif - -#define OPT_COM1_STR "115200" -#define OPT_COM2_STR "" +#define OPT_COM1_STR "" +#define OPT_COM2_STR "57600,8n1" +#endif // CONFIG_VTI unsigned char irq_serial_getc(int handle); diff --git a/xen/include/asm-ia64/tlb.h b/xen/include/asm-ia64/tlb.h index 84c08e8bd9..7947bf3dcc 100644 --- a/xen/include/asm-ia64/tlb.h +++ b/xen/include/asm-ia64/tlb.h @@ -34,4 +34,18 @@ typedef struct { unsigned long vadr; unsigned long rid; } TR_ENTRY; + +#ifdef CONFIG_VTI +typedef union { + unsigned long value; + struct { + uint64_t ve : 1; + uint64_t rv1 : 1; + uint64_t ps : 6; + uint64_t rid : 24; + uint64_t rv2 : 32; + }; +} rr_t; +#endif // CONFIG_VTI + #endif diff --git a/xen/include/asm-ia64/virt_event.h b/xen/include/asm-ia64/virt_event.h new file mode 100644 index 0000000000..d0b66afd7e --- /dev/null +++ b/xen/include/asm-ia64/virt_event.h @@ -0,0 +1,114 @@ +#ifndef __VIRT_EVENT_H__ +#define __VIRT_EVENT_H__ + +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * virt_event.h: + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Shaofan Li (Susie Li) (susie.li@intel.com) + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + */ + + +#define EVENT_MOV_TO_AR 1 +#define EVENT_MOV_TO_AR_IMM 2 +#define EVENT_MOV_FROM_AR 3 +#define EVENT_MOV_TO_CR 4 +#define EVENT_MOV_FROM_CR 5 +#define EVENT_MOV_TO_PSR 6 +#define EVENT_MOV_FROM_PSR 7 +#define EVENT_ITC_D 8 +#define EVENT_ITC_I 9 +#define EVENT_MOV_TO_RR 10 +#define EVENT_MOV_TO_DBR 11 +#define EVENT_MOV_TO_IBR 12 +#define EVENT_MOV_TO_PKR 13 +#define EVENT_MOV_TO_PMC 14 +#define EVENT_MOV_TO_PMD 15 +#define EVENT_ITR_D 16 +#define EVENT_ITR_I 17 +#define EVENT_MOV_FROM_RR 18 +#define EVENT_MOV_FROM_DBR 19 +#define EVENT_MOV_FROM_IBR 20 +#define EVENT_MOV_FROM_PKR 21 +#define EVENT_MOV_FROM_PMC 22 +#define EVENT_MOV_FROM_CPUID 23 +#define EVENT_SSM 24 +#define EVENT_RSM 25 +#define EVENT_PTC_L 26 +#define EVENT_PTC_G 27 +#define EVENT_PTC_GA 28 +#define EVENT_PTR_D 29 +#define EVENT_PTR_I 30 +#define EVENT_THASH 31 +#define EVENT_TTAG 32 +#define EVENT_TPA 33 +#define EVENT_TAK 34 +#define EVENT_PTC_E 35 +#define EVENT_COVER 36 +#define EVENT_RFI 37 +#define EVENT_BSW_0 38 +#define EVENT_BSW_1 39 +#define EVENT_VMSW 40 + +#if 0 +/* VMAL 1.0 */ +#define EVENT_MOV_TO_AR 1 +#define EVENT_MOV_TO_AR_IMM 2 +#define EVENT_MOV_FROM_AR 3 +#define EVENT_MOV_TO_CR 4 +#define EVENT_MOV_FROM_CR 5 +#define EVENT_MOV_TO_PSR 6 +#define EVENT_MOV_FROM_PSR 7 +#define EVENT_ITC_D 8 +#define EVENT_ITC_I 9 +#define EVENT_MOV_TO_RR 10 +#define EVENT_MOV_TO_DBR 11 +#define EVENT_MOV_TO_IBR 12 +#define EVENT_MOV_TO_PKR 13 +#define EVENT_MOV_TO_PMC 14 +#define EVENT_MOV_TO_PMD 15 +#define EVENT_ITR_D 16 +#define EVENT_ITR_I 17 +#define EVENT_MOV_FROM_RR 18 +#define EVENT_MOV_FROM_DBR 19 +#define EVENT_MOV_FROM_IBR 20 +#define EVENT_MOV_FROM_PKR 21 +#define EVENT_MOV_FROM_PMC 22 +#define EVENT_MOV_FROM_PMD 23 +#define EVENT_MOV_FROM_CPUID 24 +#define EVENT_SSM 25 +#define EVENT_RSM 26 +#define EVENT_PTC_L 27 +#define EVENT_PTC_G 28 +#define EVENT_PTC_GA 29 +#define EVENT_PTR_D 30 +#define EVENT_PTR_I 31 +#define EVENT_THASH 32 +#define EVENT_TTAG 33 +#define EVENT_TPA 34 +#define EVENT_TAK 35 +#define EVENT_PTC_E 36 +#define EVENT_COVER 37 +#define EVENT_RFI 38 +#define EVENT_BSW_0 39 +#define EVENT_BSW_1 40 +#define EVENT_VMSW 41 + + +#endif /* VMAL 2.0 */ +#endif /* __VIRT_EVENT_H__ */ diff --git a/xen/include/asm-ia64/vmmu.h b/xen/include/asm-ia64/vmmu.h new file mode 100644 index 0000000000..717a7e809b --- /dev/null +++ b/xen/include/asm-ia64/vmmu.h @@ -0,0 +1,344 @@ + +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmmu.h: virtual memory management unit related APIs and data structure. + * Copyright (c) 2004, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) + */ + +#ifndef XEN_TLBthash_H +#define XEN_TLBthash_H + +#include "xen/config.h" +#include "xen/types.h" +#include "public/xen.h" +#include "asm/tlb.h" + +#define THASH_TLB_TR 0 +#define THASH_TLB_TC 1 +#define THASH_TLB_FM 2 // foreign map + +#define THASH_SECTION_TR (1<<0) +#define THASH_SECTION_TC (1<<1) +#define THASH_SECTION_FM (1<<2) + +/* + * Next bit definition must be same with THASH_TLB_XX + */ +typedef union search_section { + struct { + u32 tr : 1; + u32 tc : 1; + u32 fm : 1; + u32 rsv: 29; + }; + u32 v; +} search_section_t; + +#define MAX_CCN_DEPTH 4 // collision chain depth +#define VCPU_TLB_SHIFT (22) +#define VCPU_TLB_SIZE (1UL<<VCPU_TLB_SHIFT) +#define VCPU_TLB_ORDER VCPU_TLB_SHIFT - PAGE_SHIFT +#define PTA_BASE_SHIFT (15) + +#ifndef __ASSEMBLY__ +#define HIGH_32BITS(x) bits(x,32,63) +#define LOW_32BITS(x) bits(x,0,31) + +typedef enum { + ISIDE_TLB=0, + DSIDE_TLB=1 +} CACHE_LINE_TYPE; + +typedef struct thash_data { + union { + struct { + u64 p : 1; // 0 + u64 rv1 : 1; // 1 + u64 ma : 3; // 2-4 + u64 a : 1; // 5 + u64 d : 1; // 6 + u64 pl : 2; // 7-8 + u64 ar : 3; // 9-11 + u64 ppn : 38; // 12-49 + u64 rv2 : 2; // 50-51 + u64 ed : 1; // 52 + u64 ig1 : 11; //53-63 + }; + struct { + u64 __rv1 : 12; + // sizeof(domid_t) must be less than 38!!! Refer to its definition + u64 fm_dom : 38; // 12-49 foreign map domain ID + u64 __rv2 : 3; // 50-52 + // next extension to ig1, only for TLB instance + u64 section : 2; // 53-54 TR, TC or FM (thash_TLB_XX) + CACHE_LINE_TYPE cl : 1; // I side or D side cache line + u64 nomap : 1; // entry cann't be inserted into machine TLB. + u64 __ig1 : 5; // 56-61 + u64 checked : 1; // for VTLB/VHPT sanity check + u64 invalid : 1; // invalid entry + }; + u64 page_flags; + }; // same for VHPT and TLB + + union { + struct { + u64 rv3 : 2; // 0-1 + u64 ps : 6; // 2-7 + u64 key : 24; // 8-31 + u64 rv4 : 32; // 32-63 + }; + struct { + u64 __rv3 : 32; // 0-31 + // next extension to rv4 + u64 rid : 24; // 32-55 + u64 __rv4 : 8; // 56-63 + }; + u64 itir; + }; + union { + struct { // For TLB + u64 ig2 : 12; // 0-11 + u64 vpn : 49; // 12-60 + u64 vrn : 3; // 61-63 + }; + u64 vadr; + u64 ifa; + struct { // For VHPT + u64 tag : 63; // 0-62 + u64 ti : 1; // 63, invalid entry for VHPT + }; + u64 etag; // extended tag for VHPT + }; + union { + struct thash_data *next; + u64 tr_idx; + }; +} thash_data_t; + +#define INVALID_VHPT(hdata) ((hdata)->ti) +#define INVALID_TLB(hdata) ((hdata)->invalid) +#define INVALID_ENTRY(hcb, hdata) \ + ((hcb)->ht==THASH_TLB ? INVALID_TLB(hdata) : INVALID_VHPT(hdata)) + +typedef enum { + THASH_TLB=0, + THASH_VHPT +} THASH_TYPE; + +struct thash_cb; +typedef union thash_cch_mem { + thash_data_t data; + union thash_cch_mem *next; +} thash_cch_mem_t; + + +/* + * Use to calculate the HASH index of thash_data_t. + */ +typedef u64 *(THASH_FN)(PTA pta, u64 va, u64 rid, u64 ps); +typedef u64 *(TTAG_FN)(PTA pta, u64 va, u64 rid, u64 ps); +typedef u64 *(GET_MFN_FN)(domid_t d, u64 gpfn, u64 pages); +typedef void *(REM_NOTIFIER_FN)(struct hash_cb *hcb, thash_data_t *entry); +typedef void (RECYCLE_FN)(struct hash_cb *hc, u64 para); +typedef rr_t (GET_RR_FN)(struct exec_domain *vcpu, u64 reg); +typedef thash_data_t *(FIND_OVERLAP_FN)(struct thash_cb *hcb, + u64 va, u64 ps, int rid, char cl, search_section_t s_sect); +typedef thash_data_t *(FIND_NEXT_OVL_FN)(struct thash_cb *hcb); +typedef void (REM_THASH_FN)(struct thash_cb *hcb, thash_data_t *entry); +typedef void (INS_THASH_FN)(struct thash_cb *hcb, thash_data_t *entry, u64 va); + +typedef struct tlb_special { + thash_data_t itr[NITRS]; + thash_data_t dtr[NDTRS]; + struct thash_cb *vhpt; +} tlb_special_t; + +typedef struct vhpt_cb { + //u64 pta; // pta value. + GET_MFN_FN *get_mfn; + TTAG_FN *tag_func; +} vhpt_special; + +typedef struct thash_internal { + thash_data_t *hash_base; + thash_data_t *cur_cch; // head of overlap search + int rid; + int ps; + union { + u64 tag; // for VHPT + struct { // for TLB + char _tr_idx; // -1: means done of TR search + char cl; + search_section_t s_sect; // search section combinations + }; + }; + u64 _curva; // current address to search + u64 _eva; +} thash_internal_t; + +#define THASH_CB_MAGIC 0x55aa00aa55aa55aaUL +typedef struct thash_cb { + /* THASH base information */ + THASH_TYPE ht; // For TLB or VHPT + u64 magic; + thash_data_t *hash; // hash table pointer, aligned at thash_sz. + u64 hash_sz; // size of above data. + void *cch_buf; // base address of collision chain. + u64 cch_sz; // size of above data. + THASH_FN *hash_func; + GET_RR_FN *get_rr_fn; + RECYCLE_FN *recycle_notifier; + thash_cch_mem_t *cch_freelist; + struct exec_domain *vcpu; + PTA pta; + /* VTLB/VHPT common information */ + FIND_OVERLAP_FN *find_overlap; + FIND_NEXT_OVL_FN *next_overlap; + REM_THASH_FN *rem_hash; // remove hash entry. + INS_THASH_FN *ins_hash; // insert hash entry. + REM_NOTIFIER_FN *remove_notifier; + /* private information */ + thash_internal_t priv; + union { + tlb_special_t *ts; + vhpt_special *vs; + }; + // Internal positon information, buffer and storage etc. TBD +} thash_cb_t; + +#define ITR(hcb,id) ((hcb)->ts->itr[id]) +#define DTR(hcb,id) ((hcb)->ts->dtr[id]) +#define INVALIDATE_HASH(hcb,hash) { \ + INVALID_ENTRY(hcb, hash) = 1; \ + hash->next = NULL; } + +#define PURGABLE_ENTRY(hcb,en) \ + ((hcb)->ht == THASH_VHPT || (en)->section == THASH_TLB_TC) + + +/* + * Initialize internal control data before service. + */ +extern void thash_init(thash_cb_t *hcb, u64 sz); + +/* + * Insert an entry to hash table. + * NOTES: + * 1: TLB entry may be TR, TC or Foreign Map. For TR entry, + * itr[]/dtr[] need to be updated too. + * 2: Inserting to collision chain may trigger recycling if + * the buffer for collision chain is empty. + * 3: The new entry is inserted at the hash table. + * (I.e. head of the collision chain) + * 4: Return the entry in hash table or collision chain. + * + */ +extern void thash_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va); +extern void thash_tr_insert(thash_cb_t *hcb, thash_data_t *entry, u64 va, int idx); + +/* + * Force to delete a found entry no matter TR or foreign map for TLB. + * NOTES: + * 1: TLB entry may be TR, TC or Foreign Map. For TR entry, + * itr[]/dtr[] need to be updated too. + * 2: This API must be called after thash_find_overlap() or + * thash_find_next_overlap(). + * 3: Return TRUE or FALSE + * + */ +extern void thash_remove(thash_cb_t *hcb, thash_data_t *entry); +extern void thash_tr_remove(thash_cb_t *hcb, thash_data_t *entry/*, int idx*/); + +/* + * Find an overlap entry in hash table and its collision chain. + * Refer to SDM2 4.1.1.4 for overlap definition. + * PARAS: + * 1: in: TLB format entry, rid:ps must be same with vrr[]. + * va & ps identify the address space for overlap lookup + * 2: section can be combination of TR, TC and FM. (THASH_SECTION_XX) + * 3: cl means I side or D side. + * RETURNS: + * NULL to indicate the end of findings. + * NOTES: + * + */ +extern thash_data_t *thash_find_overlap(thash_cb_t *hcb, + thash_data_t *in, search_section_t s_sect); +extern thash_data_t *thash_find_overlap_ex(thash_cb_t *hcb, + u64 va, u64 ps, int rid, char cl, search_section_t s_sect); + + +/* + * Similar with thash_find_overlap but find next entry. + * NOTES: + * Intermediate position information is stored in hcb->priv. + */ +extern thash_data_t *thash_find_next_overlap(thash_cb_t *hcb); + +/* + * Find and purge overlap entries in hash table and its collision chain. + * PARAS: + * 1: in: TLB format entry, rid:ps must be same with vrr[]. + * rid, va & ps identify the address space for purge + * 2: section can be combination of TR, TC and FM. (thash_SECTION_XX) + * 3: cl means I side or D side. + * NOTES: + * + */ +extern void thash_purge_entries(thash_cb_t *hcb, + thash_data_t *in, search_section_t p_sect); +extern void thash_purge_entries_ex(thash_cb_t *hcb, + u64 rid, u64 va, u64 sz, + search_section_t p_sect, + CACHE_LINE_TYPE cl); +extern thash_cb_t *init_domain_tlb(struct exec_domain *d); + +/* + * Purge all TCs or VHPT entries including those in Hash table. + * + */ +extern void thash_purge_all(thash_cb_t *hcb); + +/* + * Lookup the hash table and its collision chain to find an entry + * covering this address rid:va. + * + */ +extern thash_data_t *vtlb_lookup(thash_cb_t *hcb, + thash_data_t *in); +extern thash_data_t *vtlb_lookup_ex(thash_cb_t *hcb, + u64 rid, u64 va,CACHE_LINE_TYPE cl); + + +#define ITIR_RV_MASK (((1UL<<32)-1)<<32 | 0x3) +#define PAGE_FLAGS_RV_MASK (0x2 | (0x3UL<<50)|(((1UL<<11)-1)<<53)) +extern u64 machine_ttag(PTA pta, u64 va, u64 rid, u64 ps); +extern u64 machine_thash(PTA pta, u64 va, u64 rid, u64 ps); +extern void purge_machine_tc_by_domid(domid_t domid); +extern void machine_tlb_insert(struct exec_domain *d, thash_data_t *tlb); +extern rr_t vmmu_get_rr(struct exec_domain *vcpu, u64 va); + +#define VTLB_DEBUG +#ifdef VTLB_DEBUG +extern void check_vtlb_sanity(thash_cb_t *vtlb); +extern void dump_vtlb(thash_cb_t *vtlb); +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* XEN_TLBthash_H */ diff --git a/xen/include/asm-ia64/vmx.h b/xen/include/asm-ia64/vmx.h new file mode 100644 index 0000000000..82ce400dee --- /dev/null +++ b/xen/include/asm-ia64/vmx.h @@ -0,0 +1,35 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx.h: prototype for generial vmx related interface + * Copyright (c) 2004, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Kun Tian (Kevin Tian) (kevin.tian@intel.com) + */ + +#ifndef _ASM_IA64_VT_H +#define _ASM_IA64_VT_H + +extern void identify_vmx_feature(void); +extern unsigned int vmx_enabled; +extern void vmx_init_env(void); +extern void vmx_final_setup_domain(struct domain *d); +extern void vmx_save_state(struct exec_domain *ed); +extern void vmx_load_state(struct exec_domain *ed); +extern vmx_insert_double_mapping(u64,u64,u64,u64,u64); +extern void vmx_purge_double_mapping(u64, u64, u64); +extern void vmx_change_double_mapping(struct exec_domain *ed, u64 oldrr7, u64 newrr7); + +#endif /* _ASM_IA64_VT_H */ diff --git a/xen/include/asm-ia64/vmx_mm_def.h b/xen/include/asm-ia64/vmx_mm_def.h new file mode 100644 index 0000000000..3ea642d898 --- /dev/null +++ b/xen/include/asm-ia64/vmx_mm_def.h @@ -0,0 +1,176 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_mm_def.h: + * Copyright (c) 2004, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Kun Tian (Kevin Tian) (kevin.tian@intel.com) + */ +#ifndef _MM_DEF_H_ +#define _MM_DEF_H_ + + +/* VHPT size 4M */ +//#define VHPT_SIZE_PS 22 +//#define VHPT_SIZE (1 << VHPT_SIZE_PS) +#define ARCH_PAGE_SHIFT 12 +#define ARCH_PAGE_SIZE PSIZE(ARCH_PAGE_SHIFT) +#define INVALID_MFN (-1) + +#define MAX_PHYS_ADDR_BITS 50 +#define PMASK(size) (~((size) - 1)) +#define PSIZE(size) (1UL<<(size)) +//#define PAGE_SIZE_4K PSIZE(12) +#define POFFSET(vaddr, ps) ((vaddr) & (PSIZE(ps) - 1)) +#define PPN_2_PA(ppn) ((ppn)<<12) +#define CLEARLSB(ppn, nbits) ((((uint64_t)ppn) >> (nbits)) << (nbits)) +#define PAGEALIGN(va, ps) (va & ~(PSIZE(ps)-1)) + +#define TLB_AR_R 0 +#define TLB_AR_RX 1 +#define TLB_AR_RW 2 +#define TLB_AR_RWX 3 +#define TLB_AR_R_RW 4 +#define TLB_AR_RX_RWX 5 +#define TLB_AR_RWX_RW 6 +#define TLB_AR_XP 7 + +#define IA64_ISR_CODE_MASK0 0xf +#define IA64_UNIMPL_DADDR_FAULT 0x30 +#define IA64_UNIMPL_IADDR_TRAP 0x10 +#define IA64_RESERVED_REG_FAULT 0x30 +#define IA64_REG_NAT_CONSUMPTION_FAULT 0x10 +#define IA64_NAT_CONSUMPTION_FAULT 0x20 +#define IA64_PRIV_OP_FAULT 0x10 + +#define DEFER_NONE 0 +#define DEFER_ALWAYS 0x1 +#define DEFER_DM 0x100 /* bit 8 */ +#define DEFER_DP 0X200 /* bit 9 */ +#define DEFER_DK 0x400 /* bit 10 */ +#define DEFER_DX 0x800 /* bit 11 */ +#define DEFER_DR 0x1000 /* bit 12 */ +#define DEFER_DA 0x2000 /* bit 13 */ +#define DEFER_DD 0x4000 /* bit 14 */ + +#define ACCESS_RIGHT(a) ((a) & (ACCESS_FETCHADD - 1)) + +#define ACCESS_READ 0x1 +#define ACCESS_WRITE 0x2 +#define ACCESS_EXECUTE 0x4 +#define ACCESS_XP0 0x8 +#define ACCESS_XP1 0x10 +#define ACCESS_XP2 0x20 +#define ACCESS_FETCHADD 0x40 +#define ACCESS_XCHG 0x80 +#define ACCESS_CMPXCHG 0x100 + +#define ACCESS_SIZE_1 0x10000 +#define ACCESS_SIZE_2 0x20000 +#define ACCESS_SIZE_4 0x40000 +#define ACCESS_SIZE_8 0x80000 +#define ACCESS_SIZE_10 0x100000 +#define ACCESS_SIZE_16 0x200000 + +#define STLB_TC 0 +#define STLB_TR 1 + +#define VMM_RR_MASK 0xfffff +#define VMM_RR_SHIFT 20 + +#define IA64_RR_SHIFT 61 + +#define PHYS_PAGE_SHIFT PPN_SHIFT + +#define STLB_SZ_SHIFT 8 // 256 +#define STLB_SIZE (1UL<<STLB_SZ_SHIFT) +#define STLB_PPS_SHIFT 12 +#define STLB_PPS (1UL<<STLB_PPS_SHIFT) +#define GUEST_TRNUM 8 + +/* Virtual address memory attributes encoding */ +#define VA_MATTR_WB 0x0 +#define VA_MATTR_UC 0x4 +#define VA_MATTR_UCE 0x5 +#define VA_MATTR_WC 0x6 +#define VA_MATTR_NATPAGE 0x7 + +#define VRN_MASK 0xe000000000000000L +#define PTA_BASE_MASK 0x3fffffffffffL +#define PTA_BASE_SHIFT 15 +#define VHPT_OFFSET_MASK 0x7fff + +#define BITS_SHIFT_256MB 28 +#define SIZE_256MB (1UL<<BITS_SHIFT_256MB) +#define TLB_GR_RV_BITS ((1UL<<1) | (3UL<<50)) +#define HPA_MAPPING_ATTRIBUTE 0x61 //ED:0;AR:0;PL:0;D:1;A:1;P:1 +#define VPN_2_VRN(vpn) ((vpn << PPN_SHIFT) >> IA64_VRN_SHIFT) + +typedef enum { INSTRUCTION, DATA, REGISTER } miss_type; + +//typedef enum { MVHPT, STLB } vtlb_loc_type_t; +typedef enum { DATA_REF, NA_REF, INST_REF, RSE_REF } vhpt_ref_t; + +typedef enum { + PIB_MMIO=0, + VGA_BUFF, + CHIPSET_IO, + LOW_MMIO, + LEGACY_IO, + IO_SAPIC, + NOT_IO +} mmio_type_t; + +typedef struct mmio_list { + mmio_type_t iot; + u64 start; // start address of this memory IO block + u64 end; // end address (include this one) +} mmio_list_t; + +static __inline__ uint64_t +bits_v(uint64_t v, uint32_t bs, uint32_t be) +{ + uint64_t result; + __asm __volatile("shl %0=%1, %2;; shr.u %0=%0, %3;;" + : "=r" (result): "r"(v), "r"(63-be), "r" (bs+63-be) ); +} + +#define bits(val, bs, be) \ +({ \ + u64 ret; \ + \ + __asm __volatile("extr.u %0=%1, %2, %3" \ + : "=r" (ret): "r"(val), \ + "M" ((bs)), \ + "M" ((be) - (bs) + 1) ); \ + ret; \ +}) + +/* + * clear bits (pos, len) from v. + * + */ +#define clearbits(v, pos, len) \ +({ \ + u64 ret; \ + \ + __asm __volatile("dep.z %0=%1, %2, %3" \ + : "=r" (ret): "r"(v), \ + "M" ((pos)), \ + "M" ((len))); \ + ret; \ + }) + +#endif diff --git a/xen/include/asm-ia64/vmx_pal.h b/xen/include/asm-ia64/vmx_pal.h new file mode 100644 index 0000000000..de1c7ccd4a --- /dev/null +++ b/xen/include/asm-ia64/vmx_pal.h @@ -0,0 +1,120 @@ +#ifndef _ASM_IA64_VT_PAL_H +#define _ASM_IA64_VT_PAL_H + +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_pal.h: VT-I specific PAL (Processor Abstraction Layer) definitions + * Copyright (c) 2004, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) + * Fred Yang (fred.yang@intel.com) + * Kun Tian (Kevin Tian) (kevin.tian@intel.com) + */ + +#include <xen/types.h> +/* PAL PROCEDURE FOR VIRTUALIZATION */ +#define PAL_VP_CREATE 265 +/* Stacked Virt. Initializes a new VPD for the operation of + * a new virtual processor in the virtual environment. +*/ +#define PAL_VP_ENV_INFO 266 +/*Stacked Virt. Returns the parameters needed to enter a virtual environment.*/ +#define PAL_VP_EXIT_ENV 267 +/*Stacked Virt. Allows a logical processor to exit a virtual environment.*/ +#define PAL_VP_INIT_ENV 268 +/*Stacked Virt. Allows a logical processor to enter a virtual environment.*/ +#define PAL_VP_REGISTER 269 +/*Stacked Virt. Register a different host IVT for the virtual processor.*/ +#define PAL_VP_RESUME 270 +/* Renamed from PAL_VP_RESUME */ +#define PAL_VP_RESTORE 270 +/*Stacked Virt. Resumes virtual processor operation on the logical processor.*/ +#define PAL_VP_SUSPEND 271 +/* Renamed from PAL_VP_SUSPEND */ +#define PAL_VP_SAVE 271 +/* Stacked Virt. Suspends operation for the specified virtual processor on + * the logical processor. + */ +#define PAL_VP_TERMINATE 272 +/* Stacked Virt. Terminates operation for the specified virtual processor.*/ + +static inline s64 +ia64_pal_vp_env_info(u64 *buffer_size, u64 *vp_env_info) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_VP_ENV_INFO, 0, 0, 0); + *buffer_size=iprv.v0; + *vp_env_info=iprv.v1; + return iprv.status; +} + +static inline s64 +ia64_pal_vp_exit_env(u64 iva) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_VP_EXIT_ENV, (u64)iva, 0, 0); + return iprv.status; +} + +/* config_options in pal_vp_init_env */ +#define VP_INITIALIZE 1UL +#define VP_FR_PMC 1UL<<1 +#define VP_OPCODE 1UL<<8 +#define VP_CAUSE 1UL<<9 +/* init vp env with initializing vm_buffer */ +#define VP_INIT_ENV_INITALIZE VP_INITIALIZE|VP_FR_PMC|VP_OPCODE|VP_CAUSE +/* init vp env without initializing vm_buffer */ +#define VP_INIT_ENV VP_FR_PMC|VP_OPCODE|VP_CAUSE + +static inline s64 +ia64_pal_vp_init_env (u64 config_options, u64 pbase_addr, \ + u64 vbase_addr, u64 * vsa_base) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_VP_INIT_ENV, config_options, pbase_addr,\ + vbase_addr); + *vsa_base=iprv.v0; + return iprv.status; +} + +static inline s64 +ia64_pal_vp_create (u64 *vpd, u64 *host_iva, u64* opt_handler) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva, + (u64)opt_handler); + return iprv.status; +} + +static inline s64 +ia64_pal_vp_restore (u64 *vpd, u64 pal_proc_vector) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_VP_RESTORE, (u64)vpd, pal_proc_vector, 0); + return iprv.status; +} + +static inline s64 +ia64_pal_vp_save (u64 *vpd, u64 pal_proc_vector) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_VP_SAVE, (u64)vpd, pal_proc_vector, 0); + return iprv.status; +} + +#define PAL_PROC_VM_BIT (1UL << 40) +#define PAL_PROC_VMSW_BIT (1UL << 54) +#endif /* _ASM_IA64_VT_PAL_H */ diff --git a/xen/include/asm-ia64/vmx_pal_vsa.h b/xen/include/asm-ia64/vmx_pal_vsa.h new file mode 100644 index 0000000000..72ad1e6ca7 --- /dev/null +++ b/xen/include/asm-ia64/vmx_pal_vsa.h @@ -0,0 +1,44 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + */ + + + +#ifndef _PAL_VSA_H_ +#define _PAL_VSA_H_ + +/* PAL virtualization services */ + +#ifndef __ASSEMBLY__ +extern UINT64 ia64_call_vsa(UINT64 proc,UINT64 arg1, UINT64 arg2, + UINT64 arg3, UINT64 arg4, UINT64 arg5, + UINT64 arg6, UINT64 arg7); +extern UINT64 __vsa_base; +#endif /* __ASSEMBLY__ */ + +#define PAL_VPS_RESUME_NORMAL 0x0000 +#define PAL_VPS_RESUME_HANDLER 0x0400 +#define PAL_VPS_SYNC_READ 0x0800 +#define PAL_VPS_SYNC_WRITE 0x0c00 +#define PAL_VPS_SET_PENDING_INTERRUPT 0x1000 +#define PAL_VPS_THASH 0x1400 +#define PAL_VPS_TTAG 0x1800 + +#endif /* _PAL_VSA_H_ */ + diff --git a/xen/include/asm-ia64/vmx_phy_mode.h b/xen/include/asm-ia64/vmx_phy_mode.h new file mode 100644 index 0000000000..20c669e69c --- /dev/null +++ b/xen/include/asm-ia64/vmx_phy_mode.h @@ -0,0 +1,126 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_phy_mode.h: + * Copyright (c) 2004, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#ifndef _PHY_MODE_H_ +#define _PHY_MODE_H_ + +/* + * Guest Physical Mode is emulated by GVMM, which is actually running + * in virtual mode. + * + * For all combinations of (it,dt,rt), only three were taken into + * account: + * (0,0,0): some firmware and kernel start code execute in this mode; + * (1,1,1): most kernel C code execute in this mode; + * (1,0,1): some low level TLB miss handler code execute in this mode; + * Till now, no other kind of combinations were found. + * + * Because all physical addresses fall into two categories: + * 0x0xxxxxxxxxxxxxxx, which is cacheable, and 0x8xxxxxxxxxxxxxxx, which + * is uncacheable. These two kinds of addresses reside in region 0 and 4 + * of the virtual mode. Therefore, we load two different Region IDs + * (A, B) into RR0 and RR4, respectively, when guest is entering phsical + * mode. These two RIDs are totally different from the RIDs used in + * virtual mode. So, the aliasness between physical addresses and virtual + * addresses can be disambiguated by different RIDs. + * + * RID A and B are stolen from the cpu ulm region id. In linux, each + * process is allocated 8 RIDs: + * mmu_context << 3 + 0 + * mmu_context << 3 + 1 + * mmu_context << 3 + 2 + * mmu_context << 3 + 3 + * mmu_context << 3 + 4 + * mmu_context << 3 + 5 + * mmu_context << 3 + 6 + * mmu_context << 3 + 7 + * Because all processes share region 5~7, the last 3 are left untouched. + * So, we stolen "mmu_context << 3 + 5" and "mmu_context << 3 + 6" from + * ulm and use them as RID A and RID B. + * + * When guest is running in (1,0,1) mode, the instructions been accessed + * reside in region 5~7, not in region 0 or 4. So, instruction can be + * accessed in virtual mode without interferring physical data access. + * + * When dt!=rt, it is rarely to perform "load/store" and "RSE" operation + * at the same time. No need to consider such a case. We consider (0,1) + * as (0,0). + * + */ + + +#include <asm/vmx_vcpu.h> +#include <asm/regionreg.h> +#include <asm/gcc_intrin.h> +#include <asm/pgtable.h> +/* Due to change of ia64_set_rr interface */ + +#define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX) +#define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX) + +#ifdef PHY_16M /* 16M: large granule for test*/ +#define EMUL_PHY_PAGE_SHIFT 24 +#else /* 4K: emulated physical page granule */ +#define EMUL_PHY_PAGE_SHIFT 12 +#endif +#define IA64_RSC_MODE 0x0000000000000003 +#define XEN_RR7_RID (0xf00010) +#define GUEST_IN_PHY 0x1 +extern int valid_mm_mode[]; +extern int mm_switch_table[][8]; +extern void physical_mode_init(VCPU *); +extern void switch_to_physical_rid(VCPU *); +extern void switch_to_virtual_rid(VCPU *vcpu); +extern void switch_mm_mode(VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr); +extern void stlb_phys_lookup(VCPU *vcpu, UINT64 paddr, UINT64 type); +extern void check_mm_mode_switch (VCPU *vcpu, IA64_PSR old_psr, IA64_PSR new_psr); +extern void prepare_if_physical_mode(VCPU *vcpu); +extern void recover_if_physical_mode(VCPU *vcpu); +extern void vmx_init_all_rr(VCPU *vcpu); +extern void vmx_load_all_rr(VCPU *vcpu); +/* + * No sanity check here, since all psr changes have been + * checked in switch_mm_mode(). + */ +#define is_physical_mode(v) \ + ((v->arch.mode_flags) & GUEST_IN_PHY) + +#define is_virtual_mode(v) \ + (!is_physical_mode(v)) + +#define MODE_IND(psr) \ + (((psr).it << 2) + ((psr).dt << 1) + (psr).rt) + +#define SW_BAD 0 /* Bad mode transitition */ +#define SW_V2P 1 /* Physical emulatino is activated */ +#define SW_P2V 2 /* Exit physical mode emulation */ +#define SW_SELF 3 /* No mode transition */ +#define SW_NOP 4 /* Mode transition, but without action required */ + +#define INV_MODE 0 /* Invalid mode */ +#define GUEST_VIRT 1 /* Guest in virtual mode */ +#define GUEST_PHYS 2 /* Guest in physical mode, requiring emulation */ + + + +#endif /* _PHY_MODE_H_ */ + + + diff --git a/xen/include/asm-ia64/vmx_platform.h b/xen/include/asm-ia64/vmx_platform.h new file mode 100644 index 0000000000..bf59e61fec --- /dev/null +++ b/xen/include/asm-ia64/vmx_platform.h @@ -0,0 +1,37 @@ +/* + * vmx_platform.h: VMX platform support + * Copyright (c) 2004, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ +#ifndef __ASM_IA64_VMX_PLATFORM_H__ +#define __ASM_IA64_VMX_PLATFORM_H__ + +#include <public/xen.h> + + +struct mmio_list; +typedef struct virutal_platform_def { + //unsigned long *real_mode_data; /* E820, etc. */ + //unsigned long shared_page_va; + //struct vmx_virpit_t vmx_pit; + //struct vmx_handler_t vmx_handler; + //struct mi_per_cpu_info mpci; /* MMIO */ + unsigned long pib_base; + unsigned char xtp; + struct mmio_list *mmio; +} vir_plat_t; + +#endif diff --git a/xen/include/asm-ia64/vmx_ptrace.h b/xen/include/asm-ia64/vmx_ptrace.h new file mode 100644 index 0000000000..4065c097f4 --- /dev/null +++ b/xen/include/asm-ia64/vmx_ptrace.h @@ -0,0 +1,97 @@ +/* + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * Stephane Eranian <eranian@hpl.hp.com> + * Copyright (C) 2003 Intel Co + * Suresh Siddha <suresh.b.siddha@intel.com> + * Fenghua Yu <fenghua.yu@intel.com> + * Arun Sharma <arun.sharma@intel.com> + * + * 12/07/98 S. Eranian added pt_regs & switch_stack + * 12/21/98 D. Mosberger updated to match latest code + * 6/17/99 D. Mosberger added second unat member to "struct switch_stack" + * 4/28/05 Anthony Xu ported to Xen + * + */ + +struct pt_regs { + /* The following registers are saved by SAVE_MIN: */ + unsigned long b6; /* scratch */ + unsigned long b7; /* scratch */ + + unsigned long ar_csd; /* used by cmp8xchg16 (scratch) */ + unsigned long ar_ssd; /* reserved for future use (scratch) */ + + unsigned long r8; /* scratch (return value register 0) */ + unsigned long r9; /* scratch (return value register 1) */ + unsigned long r10; /* scratch (return value register 2) */ + unsigned long r11; /* scratch (return value register 3) */ + + unsigned long cr_ipsr; /* interrupted task's psr */ + unsigned long cr_iip; /* interrupted task's instruction pointer */ + unsigned long cr_ifs; /* interrupted task's function state */ + + unsigned long ar_unat; /* interrupted task's NaT register (preserved) */ + unsigned long ar_pfs; /* prev function state */ + unsigned long ar_rsc; /* RSE configuration */ + /* The following two are valid only if cr_ipsr.cpl > 0: */ + unsigned long ar_rnat; /* RSE NaT */ + unsigned long ar_bspstore; /* RSE bspstore */ + + unsigned long pr; /* 64 predicate registers (1 bit each) */ + unsigned long b0; /* return pointer (bp) */ + unsigned long loadrs; /* size of dirty partition << 16 */ + + unsigned long r1; /* the gp pointer */ + unsigned long r12; /* interrupted task's memory stack pointer */ + unsigned long r13; /* thread pointer */ + + unsigned long ar_fpsr; /* floating point status (preserved) */ + unsigned long r15; /* scratch */ + + /* The remaining registers are NOT saved for system calls. */ + + unsigned long r14; /* scratch */ + unsigned long r2; /* scratch */ + unsigned long r3; /* scratch */ + unsigned long r4; /* preserved */ + unsigned long r5; /* preserved */ + unsigned long r6; /* preserved */ + unsigned long r7; /* preserved */ + unsigned long cr_iipa; /* for emulation */ + unsigned long cr_isr; /* for emulation */ + unsigned long eml_unat; /* used for emulating instruction */ + unsigned long rfi_pfs; /* used for elulating rfi */ + + /* The following registers are saved by SAVE_REST: */ + unsigned long r16; /* scratch */ + unsigned long r17; /* scratch */ + unsigned long r18; /* scratch */ + unsigned long r19; /* scratch */ + unsigned long r20; /* scratch */ + unsigned long r21; /* scratch */ + unsigned long r22; /* scratch */ + unsigned long r23; /* scratch */ + unsigned long r24; /* scratch */ + unsigned long r25; /* scratch */ + unsigned long r26; /* scratch */ + unsigned long r27; /* scratch */ + unsigned long r28; /* scratch */ + unsigned long r29; /* scratch */ + unsigned long r30; /* scratch */ + unsigned long r31; /* scratch */ + + unsigned long ar_ccv; /* compare/exchange value (scratch) */ + + /* + * Floating point registers that the kernel considers scratch: + */ + struct ia64_fpreg f6; /* scratch */ + struct ia64_fpreg f7; /* scratch */ + struct ia64_fpreg f8; /* scratch */ + struct ia64_fpreg f9; /* scratch */ + struct ia64_fpreg f10; /* scratch */ + struct ia64_fpreg f11; /* scratch */ +}; + + diff --git a/xen/include/asm-ia64/vmx_vcpu.h b/xen/include/asm-ia64/vmx_vcpu.h new file mode 100644 index 0000000000..59966d1c7c --- /dev/null +++ b/xen/include/asm-ia64/vmx_vcpu.h @@ -0,0 +1,598 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx_vcpu.h: + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) + */ + +#ifndef _XEN_IA64_VMX_VCPU_H +#define _XEN_IA64_VMX_VCPU_H + + +#include <xen/sched.h> +#include <asm/ia64_int.h> +#include <asm/vmx_vpd.h> +#include <asm/ptrace.h> +#include <asm/regs.h> +#include <asm/regionreg.h> +#include <asm/types.h> +#include <asm/vcpu.h> + +#define VRN_SHIFT 61 +#define VRN0 0x0UL +#define VRN1 0x1UL +#define VRN2 0x2UL +#define VRN3 0x3UL +#define VRN4 0x4UL +#define VRN5 0x5UL +#define VRN6 0x6UL +#define VRN7 0x7UL + +// this def for vcpu_regs won't work if kernel stack is present +#define vcpu_regs(vcpu) (((struct pt_regs *) ((char *) (vcpu) + IA64_STK_OFFSET)) - 1) +#define VMX_VPD(x,y) ((x)->arch.arch_vmx.vpd->y) + +#define VMX(x,y) ((x)->arch.arch_vmx.y) + +#define VPD_CR(x,y) (((cr_t*)VMX_VPD(x,vcr))->y) + +#define VMM_RR_SHIFT 20 +#define VMM_RR_MASK ((1UL<<VMM_RR_SHIFT)-1) +#define VRID_2_MRID(vcpu,rid) ((rid) & VMM_RR_MASK) | \ + ((vcpu->domain->id) << VMM_RR_SHIFT) +extern u64 indirect_reg_igfld_MASK ( int type, int index, u64 value); +extern u64 cr_igfld_mask (int index, u64 value); +extern int check_indirect_reg_rsv_fields ( int type, int index, u64 value ); +extern u64 set_isr_ei_ni (VCPU *vcpu); +extern u64 set_isr_for_na_inst(VCPU *vcpu, int op); + + +/* next all for CONFIG_VTI APIs definition */ +extern void vmx_vcpu_set_psr(VCPU *vcpu, unsigned long value); +extern UINT64 vmx_vcpu_sync_mpsr(UINT64 mipsr, UINT64 value); +extern void vmx_vcpu_set_psr_sync_mpsr(VCPU * vcpu, UINT64 value); +extern IA64FAULT vmx_vcpu_cover(VCPU *vcpu); +extern thash_cb_t *vmx_vcpu_get_vtlb(VCPU *vcpu); +extern thash_cb_t *vmx_vcpu_get_vhpt(VCPU *vcpu); +ia64_rr vmx_vcpu_rr(VCPU *vcpu,UINT64 vadr); +extern IA64FAULT vmx_vcpu_set_rr(VCPU *vcpu, UINT64 reg, UINT64 val); +extern IA64FAULT vmx_vcpu_get_rr(VCPU *vcpu, UINT64 reg, UINT64 *pval); +extern IA64FAULT vmx_vcpu_get_pkr(VCPU *vcpu, UINT64 reg, UINT64 *pval); +IA64FAULT vmx_vcpu_set_pkr(VCPU *vcpu, UINT64 reg, UINT64 val); +extern IA64FAULT vmx_vcpu_itc_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa); +extern IA64FAULT vmx_vcpu_itc_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa); +extern IA64FAULT vmx_vcpu_itr_i(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa, UINT64 idx); +extern IA64FAULT vmx_vcpu_itr_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa, UINT64 idx); +extern IA64FAULT vmx_vcpu_ptr_d(VCPU *vcpu,UINT64 vadr,UINT64 ps); +extern IA64FAULT vmx_vcpu_ptr_i(VCPU *vcpu,UINT64 vadr,UINT64 ps); +extern IA64FAULT vmx_vcpu_ptc_l(VCPU *vcpu, UINT64 vadr, UINT64 ps); +extern IA64FAULT vmx_vcpu_ptc_e(VCPU *vcpu, UINT64 vadr); +extern IA64FAULT vmx_vcpu_ptc_g(VCPU *vcpu, UINT64 vadr, UINT64 ps); +extern IA64FAULT vmx_vcpu_ptc_ga(VCPU *vcpu,UINT64 vadr,UINT64 ps); +extern IA64FAULT vmx_vcpu_thash(VCPU *vcpu, UINT64 vadr, UINT64 *pval); +extern u64 vmx_vcpu_get_itir_on_fault(VCPU *vcpu, u64 ifa); +extern IA64FAULT vmx_vcpu_ttag(VCPU *vcpu, UINT64 vadr, UINT64 *pval); +extern IA64FAULT vmx_vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr); +extern IA64FAULT vmx_vcpu_tak(VCPU *vcpu, UINT64 vadr, UINT64 *key); +extern IA64FAULT vmx_vcpu_rfi(VCPU *vcpu); +extern UINT64 vmx_vcpu_get_psr(VCPU *vcpu); +extern IA64FAULT vmx_vcpu_get_bgr(VCPU *vcpu, unsigned int reg, UINT64 *val); +extern IA64FAULT vmx_vcpu_set_bgr(VCPU *vcpu, unsigned int reg, u64 val,int nat); +extern IA64FAULT vmx_vcpu_get_gr(VCPU *vcpu, unsigned reg, UINT64 * val); +extern IA64FAULT vmx_vcpu_set_gr(VCPU *vcpu, unsigned reg, u64 value, int nat); +extern IA64FAULT vmx_vcpu_reset_psr_sm(VCPU *vcpu, UINT64 imm24); +extern IA64FAULT vmx_vcpu_set_psr_sm(VCPU *vcpu, UINT64 imm24); +extern IA64FAULT vmx_vcpu_set_psr_l(VCPU *vcpu, UINT64 val); +extern void vtm_init(VCPU *vcpu); +extern uint64_t vtm_get_itc(VCPU *vcpu); +extern void vtm_set_itc(VCPU *vcpu, uint64_t new_itc); +extern void vtm_set_itv(VCPU *vcpu); +extern void vtm_interruption_update(VCPU *vcpu, vtime_t* vtm); +extern void vtm_domain_out(VCPU *vcpu); +extern void vtm_domain_in(VCPU *vcpu); +extern void vlsapic_reset(VCPU *vcpu); +extern int vmx_check_pending_irq(VCPU *vcpu); +extern void guest_write_eoi(VCPU *vcpu); +extern uint64_t guest_read_vivr(VCPU *vcpu); +extern void vmx_inject_vhpi(VCPU *vcpu, u8 vec); +extern void vmx_vcpu_pend_interrupt(VCPU *vcpu, UINT64 vector); +extern struct virutal_platform_def *vmx_vcpu_get_plat(VCPU *vcpu); +extern void memread_p(VCPU *vcpu, void *src, void *dest, size_t s); +extern void memread_v(VCPU *vcpu, thash_data_t *vtlb, void *src, void *dest, size_t s); +extern void memwrite_v(VCPU *vcpu, thash_data_t *vtlb, void *src, void *dest, size_t s); +extern void memwrite_p(VCPU *vcpu, void *src, void *dest, size_t s); + + +/************************************************************************** + VCPU control register access routines +**************************************************************************/ + +static inline +IA64FAULT vmx_vcpu_get_dcr(VCPU *vcpu, UINT64 *pval) +{ + *pval = VPD_CR(vcpu,dcr); + return (IA64_NO_FAULT); +} + +static inline +IA64FAULT vmx_vcpu_get_itm(VCPU *vcpu, UINT64 *pval) +{ + *pval = VPD_CR(vcpu,itm); + return (IA64_NO_FAULT); +} + +static inline +IA64FAULT vmx_vcpu_get_iva(VCPU *vcpu, UINT64 *pval) +{ + *pval = VPD_CR(vcpu,iva); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_pta(VCPU *vcpu, UINT64 *pval) +{ + *pval = VPD_CR(vcpu,pta); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_ipsr(VCPU *vcpu, UINT64 *pval) +{ + *pval = VPD_CR(vcpu,ipsr); + return (IA64_NO_FAULT); +} + +static inline +IA64FAULT vmx_vcpu_get_isr(VCPU *vcpu, UINT64 *pval) +{ + *pval = VPD_CR(vcpu,isr); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_iip(VCPU *vcpu, UINT64 *pval) +{ + *pval = VPD_CR(vcpu,iip); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_ifa(VCPU *vcpu, UINT64 *pval) +{ + *pval = VPD_CR(vcpu,ifa); + return (IA64_NO_FAULT); +} + +static inline +IA64FAULT vmx_vcpu_get_itir(VCPU *vcpu, UINT64 *pval) +{ + *pval = VPD_CR(vcpu,itir); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_iipa(VCPU *vcpu, UINT64 *pval) +{ + *pval = VPD_CR(vcpu,iipa); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_ifs(VCPU *vcpu, UINT64 *pval) +{ + *pval = VPD_CR(vcpu,ifs); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_iim(VCPU *vcpu, UINT64 *pval) +{ + *pval = VPD_CR(vcpu,iim); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_iha(VCPU *vcpu, UINT64 *pval) +{ + *pval = VPD_CR(vcpu,iha); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_lid(VCPU *vcpu, UINT64 *pval) +{ + *pval = VPD_CR(vcpu,lid); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_ivr(VCPU *vcpu, UINT64 *pval) +{ + *pval = guest_read_vivr(vcpu); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_tpr(VCPU *vcpu, UINT64 *pval) +{ + *pval = VPD_CR(vcpu,tpr); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_eoi(VCPU *vcpu, UINT64 *pval) +{ + *pval = 0L; // reads of eoi always return 0 + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_irr0(VCPU *vcpu, UINT64 *pval) +{ + *pval = VPD_CR(vcpu,irr[0]); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_irr1(VCPU *vcpu, UINT64 *pval) +{ + *pval = VPD_CR(vcpu,irr[1]); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_irr2(VCPU *vcpu, UINT64 *pval) +{ + *pval = VPD_CR(vcpu,irr[2]); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_irr3(VCPU *vcpu, UINT64 *pval) +{ + *pval = VPD_CR(vcpu,irr[3]); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_itv(VCPU *vcpu, UINT64 *pval) +{ + *pval = VPD_CR(vcpu,itv); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_pmv(VCPU *vcpu, UINT64 *pval) +{ + *pval = VPD_CR(vcpu,pmv); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_cmcv(VCPU *vcpu, UINT64 *pval) +{ + *pval = VPD_CR(vcpu,cmcv); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_lrr0(VCPU *vcpu, UINT64 *pval) +{ + *pval = VPD_CR(vcpu,lrr0); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_lrr1(VCPU *vcpu, UINT64 *pval) +{ *pval = VPD_CR(vcpu,lrr1); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT +vmx_vcpu_set_dcr(VCPU *vcpu, u64 val) +{ + u64 mdcr, mask; + VPD_CR(vcpu,dcr)=val; + /* All vDCR bits will go to mDCR, except for be/pp bit */ + mdcr = ia64_get_dcr(); + mask = IA64_DCR_BE | IA64_DCR_PP; + mdcr = ( mdcr & mask ) | ( val & (~mask) ); + ia64_set_dcr( mdcr); + + return IA64_NO_FAULT; +} + +static inline +IA64FAULT +vmx_vcpu_set_itm(VCPU *vcpu, u64 val) +{ + vtime_t *vtm; + + vtm=&(vcpu->arch.arch_vmx.vtm); + VPD_CR(vcpu,itm)=val; + vtm_interruption_update(vcpu, vtm); + return IA64_NO_FAULT; +} +static inline +IA64FAULT +vmx_vcpu_set_iva(VCPU *vcpu, u64 val) +{ + VPD_CR(vcpu,iva)=val; + return IA64_NO_FAULT; +} + +static inline +IA64FAULT +vmx_vcpu_set_pta(VCPU *vcpu, u64 val) +{ + VPD_CR(vcpu,pta)=val; + return IA64_NO_FAULT; +} + +static inline +IA64FAULT +vmx_vcpu_set_ipsr(VCPU *vcpu, u64 val) +{ + VPD_CR(vcpu,ipsr)=val; + return IA64_NO_FAULT; +} + +static inline +IA64FAULT +vmx_vcpu_set_isr(VCPU *vcpu, u64 val) +{ + VPD_CR(vcpu,isr)=val; + return IA64_NO_FAULT; +} + +static inline +IA64FAULT +vmx_vcpu_set_iip(VCPU *vcpu, u64 val) +{ + VPD_CR(vcpu,iip)=val; + return IA64_NO_FAULT; +} + +static inline +IA64FAULT +vmx_vcpu_set_ifa(VCPU *vcpu, u64 val) +{ + VPD_CR(vcpu,ifa)=val; + return IA64_NO_FAULT; +} + +static inline +IA64FAULT +vmx_vcpu_set_itir(VCPU *vcpu, u64 val) +{ + VPD_CR(vcpu,itir)=val; + return IA64_NO_FAULT; +} + +static inline +IA64FAULT +vmx_vcpu_set_iipa(VCPU *vcpu, u64 val) +{ + VPD_CR(vcpu,iipa)=val; + return IA64_NO_FAULT; +} + +static inline +IA64FAULT +vmx_vcpu_set_ifs(VCPU *vcpu, u64 val) +{ + VPD_CR(vcpu,ifs)=val; + return IA64_NO_FAULT; +} +static inline +IA64FAULT +vmx_vcpu_set_iim(VCPU *vcpu, u64 val) +{ + VPD_CR(vcpu,iim)=val; + return IA64_NO_FAULT; +} + +static inline +IA64FAULT +vmx_vcpu_set_iha(VCPU *vcpu, u64 val) +{ + VPD_CR(vcpu,iha)=val; + return IA64_NO_FAULT; +} + +static inline +IA64FAULT +vmx_vcpu_set_lid(VCPU *vcpu, u64 val) +{ + VPD_CR(vcpu,lid)=val; + return IA64_NO_FAULT; +} +static inline +IA64FAULT +vmx_vcpu_set_tpr(VCPU *vcpu, u64 val) +{ + VPD_CR(vcpu,tpr)=val; + //TODO + return IA64_NO_FAULT; +} +static inline +IA64FAULT +vmx_vcpu_set_eoi(VCPU *vcpu, u64 val) +{ + guest_write_eoi(vcpu); + return IA64_NO_FAULT; +} + +static inline +IA64FAULT +vmx_vcpu_set_itv(VCPU *vcpu, u64 val) +{ + + VPD_CR(vcpu,itv)=val; + vtm_set_itv(vcpu); + return IA64_NO_FAULT; +} +static inline +IA64FAULT +vmx_vcpu_set_pmv(VCPU *vcpu, u64 val) +{ + VPD_CR(vcpu,pmv)=val; + return IA64_NO_FAULT; +} +static inline +IA64FAULT +vmx_vcpu_set_cmcv(VCPU *vcpu, u64 val) +{ + VPD_CR(vcpu,cmcv)=val; + return IA64_NO_FAULT; +} +static inline +IA64FAULT +vmx_vcpu_set_lrr0(VCPU *vcpu, u64 val) +{ + VPD_CR(vcpu,lrr0)=val; + return IA64_NO_FAULT; +} +static inline +IA64FAULT +vmx_vcpu_set_lrr1(VCPU *vcpu, u64 val) +{ + VPD_CR(vcpu,lrr1)=val; + return IA64_NO_FAULT; +} + + + + +/************************************************************************** + VCPU privileged application register access routines +**************************************************************************/ +static inline +IA64FAULT vmx_vcpu_set_itc(VCPU *vcpu, UINT64 val) +{ + vtm_set_itc(vcpu, val); + return IA64_NO_FAULT; +} +static inline +IA64FAULT vmx_vcpu_get_itc(VCPU *vcpu,UINT64 *val) +{ + *val = vtm_get_itc(vcpu); + return IA64_NO_FAULT; +} +static inline +IA64FAULT vmx_vcpu_get_rr(VCPU *vcpu, UINT64 reg, UINT64 *pval) +{ + *pval = VMX(vcpu,vrr[reg>>61]); + return (IA64_NO_FAULT); +} +/************************************************************************** + VCPU debug breakpoint register access routines +**************************************************************************/ + +static inline +IA64FAULT vmx_vcpu_get_cpuid(VCPU *vcpu, UINT64 reg, UINT64 *pval) +{ + // TODO: unimplemented DBRs return a reserved register fault + // TODO: Should set Logical CPU state, not just physical + if(reg > 4){ + panic("there are only five cpuid registers"); + } + *pval=VMX_VPD(vcpu,vcpuid[reg]); + return (IA64_NO_FAULT); +} + + +static inline +IA64FAULT vmx_vcpu_set_dbr(VCPU *vcpu, UINT64 reg, UINT64 val) +{ + // TODO: unimplemented DBRs return a reserved register fault + // TODO: Should set Logical CPU state, not just physical + ia64_set_dbr(reg,val); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_set_ibr(VCPU *vcpu, UINT64 reg, UINT64 val) +{ + // TODO: unimplemented IBRs return a reserved register fault + // TODO: Should set Logical CPU state, not just physical + ia64_set_ibr(reg,val); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_dbr(VCPU *vcpu, UINT64 reg, UINT64 *pval) +{ + // TODO: unimplemented DBRs return a reserved register fault + UINT64 val = ia64_get_dbr(reg); + *pval = val; + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_ibr(VCPU *vcpu, UINT64 reg, UINT64 *pval) +{ + // TODO: unimplemented IBRs return a reserved register fault + UINT64 val = ia64_get_ibr(reg); + *pval = val; + return (IA64_NO_FAULT); +} + +/************************************************************************** + VCPU performance monitor register access routines +**************************************************************************/ +static inline +IA64FAULT vmx_vcpu_set_pmc(VCPU *vcpu, UINT64 reg, UINT64 val) +{ + // TODO: Should set Logical CPU state, not just physical + // NOTE: Writes to unimplemented PMC registers are discarded + ia64_set_pmc(reg,val); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_set_pmd(VCPU *vcpu, UINT64 reg, UINT64 val) +{ + // TODO: Should set Logical CPU state, not just physical + // NOTE: Writes to unimplemented PMD registers are discarded + ia64_set_pmd(reg,val); + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_pmc(VCPU *vcpu, UINT64 reg, UINT64 *pval) +{ + // NOTE: Reads from unimplemented PMC registers return zero + UINT64 val = (UINT64)ia64_get_pmc(reg); + *pval = val; + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_get_pmd(VCPU *vcpu, UINT64 reg, UINT64 *pval) +{ + // NOTE: Reads from unimplemented PMD registers return zero + UINT64 val = (UINT64)ia64_get_pmd(reg); + *pval = val; + return (IA64_NO_FAULT); +} + +/************************************************************************** + VCPU banked general register access routines +**************************************************************************/ +static inline +IA64FAULT vmx_vcpu_bsw0(VCPU *vcpu) +{ + + VMX_VPD(vcpu,vpsr) &= ~IA64_PSR_BN; + return (IA64_NO_FAULT); +} +static inline +IA64FAULT vmx_vcpu_bsw1(VCPU *vcpu) +{ + + VMX_VPD(vcpu,vpsr) |= IA64_PSR_BN; + return (IA64_NO_FAULT); +} + +#define redistribute_rid(rid) (((rid) & ~0xffff) | (((rid) << 8) & 0xff00) | (((rid) >> 8) & 0xff)) +static inline unsigned long +vmx_vrrtomrr(VCPU *vcpu,unsigned long val) +{ + ia64_rr rr; + u64 rid; + rr.rrval=val; + rid=(((u64)vcpu->domain->id)<<DOMAIN_RID_SHIFT) + rr.rid; + rr.rid = redistribute_rid(rid); + rr.ve=1; + return rr.rrval; +} +#endif diff --git a/xen/include/asm-ia64/vmx_vpd.h b/xen/include/asm-ia64/vmx_vpd.h new file mode 100644 index 0000000000..fea1cc21ea --- /dev/null +++ b/xen/include/asm-ia64/vmx_vpd.h @@ -0,0 +1,193 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vmx.h: prototype for generial vmx related interface + * Copyright (c) 2004, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Kun Tian (Kevin Tian) (kevin.tian@intel.com) + */ + +#ifndef _VPD_H_ +#define _VPD_H_ + +#ifndef __ASSEMBLY__ + +#include <asm/vtm.h> +#include <asm/vmx_platform.h> + +#define VPD_SHIFT 17 /* 128K requirement */ +#define VPD_SIZE (1 << VPD_SHIFT) +typedef union { + unsigned long value; + struct { + int a_int:1; + int a_from_int_cr:1; + int a_to_int_cr:1; + int a_from_psr:1; + int a_from_cpuid:1; + int a_cover:1; + int a_bsw:1; + long reserved:57; + }; +} vac_t; + +typedef union { + unsigned long value; + struct { + int d_vmsw:1; + int d_extint:1; + int d_ibr_dbr:1; + int d_pmc:1; + int d_to_pmd:1; + int d_itm:1; + long reserved:58; + }; +} vdc_t; + +typedef struct { + unsigned long dcr; // CR0 + unsigned long itm; + unsigned long iva; + unsigned long rsv1[5]; + unsigned long pta; // CR8 + unsigned long rsv2[7]; + unsigned long ipsr; // CR16 + unsigned long isr; + unsigned long rsv3; + unsigned long iip; + unsigned long ifa; + unsigned long itir; + unsigned long iipa; + unsigned long ifs; + unsigned long iim; // CR24 + unsigned long iha; + unsigned long rsv4[38]; + unsigned long lid; // CR64 + unsigned long ivr; + unsigned long tpr; + unsigned long eoi; + unsigned long irr[4]; + unsigned long itv; // CR72 + unsigned long pmv; + unsigned long cmcv; + unsigned long rsv5[5]; + unsigned long lrr0; // CR80 + unsigned long lrr1; + unsigned long rsv6[46]; +} cr_t; + +typedef struct vpd { + vac_t vac; + vdc_t vdc; + unsigned long virt_env_vaddr; + unsigned long reserved1[29]; + unsigned long vhpi; + unsigned long reserved2[95]; + unsigned long vgr[16]; + unsigned long vbgr[16]; + unsigned long vnat; + unsigned long vbnat; + unsigned long vcpuid[5]; + unsigned long reserved3[11]; + unsigned long vpsr; + unsigned long vpr; + unsigned long reserved4[76]; + unsigned long vcr[128]; + unsigned long reserved5[128]; + unsigned long reserved6[3456]; + unsigned long vmm_avail[128]; + unsigned long reserved7[4096]; +} vpd_t; + +void vmx_enter_scheduler(void); + +//FIXME: Map for LID to exec_domain, Eddie +#define MAX_NUM_LPS (1UL<<16) +extern struct exec_domain *lid_edt[MAX_NUM_LPS]; + +struct arch_vmx_struct { +// struct virutal_platform_def vmx_platform; + vpd_t *vpd; + vtime_t vtm; + unsigned long vrr[8]; + unsigned long mrr5; + unsigned long mrr6; + unsigned long mrr7; + unsigned long mpta; + unsigned long rfi_pfs; + unsigned long rfi_iip; + unsigned long rfi_ipsr; + unsigned long rfi_ifs; + unsigned long in_service[4]; // vLsapic inservice IRQ bits + struct virutal_platform_def vmx_platform; + unsigned long flags; +}; + +#define vmx_schedule_tail(next) \ + (next)->thread.arch_vmx.arch_vmx_schedule_tail((next)) + +#define VMX_DOMAIN(d) d->arch.arch_vmx.flags + +#define ARCH_VMX_VMCS_LOADED 0 /* VMCS has been loaded and active */ +#define ARCH_VMX_VMCS_LAUNCH 1 /* Needs VMCS launch */ +#define ARCH_VMX_VMCS_RESUME 2 /* Needs VMCS resume */ +#define ARCH_VMX_IO_WAIT 3 /* Waiting for I/O completion */ + + +#define VMX_DEBUG 1 +#if VMX_DEBUG +#define DBG_LEVEL_0 (1 << 0) +#define DBG_LEVEL_1 (1 << 1) +#define DBG_LEVEL_2 (1 << 2) +#define DBG_LEVEL_3 (1 << 3) +#define DBG_LEVEL_IO (1 << 4) +#define DBG_LEVEL_VMMU (1 << 5) + +extern unsigned int opt_vmx_debug_level; +#define VMX_DBG_LOG(level, _f, _a...) \ + if ((level) & opt_vmx_debug_level) \ + printk("[VMX]" _f "\n", ## _a ) +#else +#define VMX_DBG_LOG(level, _f, _a...) +#endif + +#define __vmx_bug(regs) \ + do { \ + printk("__vmx_bug at %s:%d\n", __FILE__, __LINE__); \ + show_registers(regs); \ + domain_crash(); \ + } while (0) + +#endif //__ASSEMBLY__ + + +// VPD field offset +#define VPD_VAC_START_OFFSET 0 +#define VPD_VDC_START_OFFSET 8 +#define VPD_VHPI_START_OFFSET 256 +#define VPD_VGR_START_OFFSET 1024 +#define VPD_VBGR_START_OFFSET 1152 +#define VPD_VNAT_START_OFFSET 1280 +#define VPD_VBNAT_START_OFFSET 1288 +#define VPD_VCPUID_START_OFFSET 1296 +#define VPD_VPSR_START_OFFSET 1424 +#define VPD_VPR_START_OFFSET 1432 +#define VPD_VRSE_CFLE_START_OFFSET 1440 +#define VPD_VCR_START_OFFSET 2048 +#define VPD_VRR_START_OFFSET 3072 +#define VPD_VMM_VAIL_START_OFFSET 31744 + + +#endif /* _VPD_H_ */ diff --git a/xen/include/asm-ia64/vtm.h b/xen/include/asm-ia64/vtm.h new file mode 100644 index 0000000000..5e2adeb1e1 --- /dev/null +++ b/xen/include/asm-ia64/vtm.h @@ -0,0 +1,68 @@ + +/* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */ +/* + * vtm.h: virtual timer head file. + * Copyright (c) 2004, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) + */ + +#ifndef _VTM_H_ +#define _VTM_H_ + +#include <xen/ac_timer.h> +#include <xen/types.h> + +#define MAX_JUMP_STEP (5000) /* 500ms, max jump step */ +#define MIN_GUEST_RUNNING_TIME (0) /* 10ms for guest os to run */ +#define ITV_VECTOR_MASK (0xff) + +typedef struct vtime { + long vtm_offset; // guest ITC = host ITC + vtm_offset + uint64_t vtm_local_drift; + uint64_t last_itc; + /* + * Local drift (temporary) after guest suspension + * In case of long jump amount of ITC after suspension, + * guest ITC = host ITC + vtm_offset - vtm_local_drift; + * so that the duration passed saw in guest ITC is limited to + * cfg_max_jump that will make all kind of device driver happy. + */ + + // next all uses ITC tick as unit + uint64_t cfg_max_jump; // max jump within one time suspendsion + uint64_t cfg_min_grun; // min guest running time since last jump +// uint64_t latest_read_itc; // latest guest read ITC + struct ac_timer vtm_timer; + int timer_hooked; // vtm_timer is hooked +// int triggered; + + + uint64_t guest_running_time; // guest running time since last switch + //uint64_t vtm_last_suspending_time; + //uint64_t switch_in_time; + //uint64_t switch_out_time; + //uint64_t itc_freq; + +} vtime_t; + +#define ITV_VECTOR(itv) (itv&0xff) +#define ITV_IRQ_MASK(itv) (itv&(1<<16)) + +#define VTM_FIRED(vtm) ((vtm)->triggered) + +extern void vtm_init(); +#endif /* _STATS_H_ */ diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 6f64e0c057..2e1f519c71 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -159,6 +159,8 @@ struct domain_setup_info unsigned int load_symtab; unsigned long symtab_addr; unsigned long symtab_len; + /* Indicate whether it's xen specific image */ + unsigned int xen_elf_image; }; #include <asm/uaccess.h> /* for KERNEL_DS */ |