diff options
81 files changed, 1292 insertions, 1937 deletions
@@ -5,6 +5,7 @@ dc2f08429f17e6614fd2f1ab88cc09ca0a850f32 RELEASE-2.0.2 fb875591fd72e15c31879c0e9034d99b80225595 RELEASE-2.0.4 1a522944f76540ea9d73fcc1b0d13d0f670183f0 RELEASE-2.0.5 487b2ee37d1cecb5f3e7a546b05ad097a0226f2f beta1 +1f84d0497a5901b9f8d1a051b87871d140b7e23f ia64-stable 3d330e41f41ce1bc118c02346e18949ad5d67f6b latest-semistable 30c521db4c71960b0cf1d9c9e1b658e77b535a3e latest-stable 9afec5bc14aeb197ef37ea54a57eacd427463fc3 semistable @@ -1264,7 +1264,6 @@ 41a61536SZbR6cj1ukWTb0DYU-vz9w xen/common/multicall.c 3ddb79bdD4SLmmdMD7yLW5HcUWucXw xen/common/page_alloc.c 3e54c38dkHAev597bPr71-hGzTdocg xen/common/perfc.c -3ddb79bdHqdQpATqC0rmUZNbsb6L6A xen/common/resource.c 40589968dD2D1aejwSOvrROg7fOvGQ xen/common/sched_bvt.c 41ebbfe9oF1BF3cH5v7yE3eOL9uPbA xen/common/sched_sedf.c 3e397e6619PgAfBbw2XFbXkewvUWgw xen/common/schedule.c @@ -1452,7 +1451,6 @@ 41262590CyJy4vd42dnqzsn8-eeGvw xen/include/xen/grant_table.h 3ddb79c0GurNF9tDWqQbAwJFH8ugfA xen/include/xen/init.h 428084e41zemtCAtYLcD9bUzwE35SA xen/include/xen/inttypes.h -3ddb79c1nzaWu8NoF4xCCMSFJR4MlA xen/include/xen/ioport.h 3ddb79c2qAxCOABlkKtD8Txohe-qEw xen/include/xen/irq.h 3ddb79c2b3qe-6Ann09FqZBF4IrJaQ xen/include/xen/irq_cpustat.h 3e4540ccPHqIIv2pvnQ1gV8LUnoHIg xen/include/xen/kernel.h @@ -1462,7 +1460,6 @@ 3ddb79c1gs2VbLbQlw0dcDUXYIepDA xen/include/xen/mm.h 3ddb79c1ieLZfGSFwfvvSQ2NK1BMSg xen/include/xen/multiboot.h 41a61536ii6j2lJ2rXwMOLaG1CHPvw xen/include/xen/multicall.h -3ddb79c2Fg44_PBPVxHSC0gTOMq4Ow xen/include/xen/pci.h 3ddb79c0MOVXq8qZDQRGb6z64_xAwg xen/include/xen/pci_ids.h 3e54c38dlSCVdyVM4PKcrSfzLLxWUQ xen/include/xen/perfc.h 3e54c38de9SUSYSAwxDf_DwkpAnQFA xen/include/xen/perfc_defn.h diff --git a/BitKeeper/etc/logging_ok b/BitKeeper/etc/logging_ok index 379ce87a72..3cdc0126d1 100644 --- a/BitKeeper/etc/logging_ok +++ b/BitKeeper/etc/logging_ok @@ -80,6 +80,7 @@ rn@wyvis.camb.intel-research.net rn@wyvis.research.intel-research.net rneugeba@wyvis.research rneugeba@wyvis.research.intel-research.net +rusty@rustcorp.com.au ryanh@us.ibm.com sd386@font.cl.cam.ac.uk shand@spidean.research.intel-research.net diff --git a/docs/src/user.tex b/docs/src/user.tex index 0df10120bc..8ce88ae7fe 100644 --- a/docs/src/user.tex +++ b/docs/src/user.tex @@ -484,7 +484,7 @@ distribution. The entry should look something like the following: {\small \begin{verbatim} title Xen 2.0 / XenLinux 2.6.9 - kernel /boot/xen.gz dom0_mem=131072 + kernel /boot/xen.gz dom0_mem=128M module /boot/vmlinuz-2.6.9-xen0 root=/dev/sda4 ro console=tty0 \end{verbatim} } @@ -524,7 +524,7 @@ have problems. \subsection{Serial Console (optional)} -%% kernel /boot/xen.gz dom0_mem=131072 com1=115200,8n1 +%% kernel /boot/xen.gz dom0_mem=128M com1=115200,8n1 %% module /boot/vmlinuz-2.6.9-xen0 root=/dev/sda4 ro @@ -534,9 +534,9 @@ with: \begin{quote} {\small \begin{verbatim} - kernel /boot/xen.gz dom0_mem=131072 com1=115200,8n1 + kernel /boot/xen.gz dom0_mem=128M com1=115200,8n1 \end{verbatim}} -\end{quote} +\end{quote} This configures Xen to output on COM1 at 115,200 baud, 8 data bits, 1 stop bit and no parity. Modify these parameters for your set up. @@ -1680,12 +1680,6 @@ should be appended to Xen's command line, either manually or by editing \path{grub.conf}. \begin{description} -\item [ignorebiostables ] - Disable parsing of BIOS-supplied tables. This may help with some - chipsets that aren't fully supported by Xen. If you specify this - option then ACPI tables are also ignored, and SMP support is - disabled. - \item [noreboot ] Don't reboot the machine automatically on errors. This is useful to catch debug output if you aren't catching console messages @@ -1695,10 +1689,6 @@ editing \path{grub.conf}. Disable SMP support. This option is implied by `ignorebiostables'. -\item [noacpi ] - Disable ACPI tables, which confuse Xen on some chipsets. - This option is implied by `ignorebiostables'. - \item [watchdog ] Enable NMI watchdog which can report certain failures. @@ -1756,8 +1746,18 @@ editing \path{grub.conf}. `nmi=dom0': Inform DOM0 of the NMI. \\ `nmi=ignore': Ignore the NMI. +\item [mem=xxx ] + Set the physical RAM address limit. Any RAM appearing beyond this + physical address in the memory map will be ignored. This parameter + may be specified with a B, K, M or G suffix, representing bytes, + kilobytes, megabytes and gigabytes respectively. The + default unit, if no suffix is specified, is bytes. + \item [dom0\_mem=xxx ] - Set the amount of memory (in kB) to be allocated to domain0. + Set the amount of memory to be allocated to domain0. This parameter + may be specified with a B, K, M or G suffix, representing bytes, + kilobytes, megabytes and gigabytes respectively. The + default unit, if no suffix is specified, is kilobytes. \item [tbuf\_size=xxx ] Set the size of the per-cpu trace buffers, in pages @@ -1769,16 +1769,29 @@ editing \path{grub.conf}. Select the CPU scheduler Xen should use. The current possibilities are `bvt' (default), `atropos' and `rrobin'. For more information see Section~\ref{s:sched}. - -\item [physdev\_dom0\_hide=(xx:xx.x)(yy:yy.y)\ldots ] -Hide selected PCI devices from domain 0 (for instance, to stop it -taking ownership of them so that they can be driven by another -domain). Device IDs should be given in hex format. Bridge devices do -not need to be hidden --- they are hidden implicitly, since guest OSes -do not need to configure them. \end{description} +In addition, the following platform-specific options may be specified +on the Xen command line. Since domain 0 shares responsibility for +booting the platform, Xen will automatically propagate these options +to its command line. +These options are taken from Linux's command-line syntax with +unchanged semantics. + +\begin{description} +\item [acpi=off,force,strict,ht,noirq,\ldots ] + Modify how Xen (and domain 0) parses the BIOS ACPI tables. + +\item [acpi\_skip\_timer\_override ] + Instruct Xen (and domain 0) to ignore timer-interrupt override + instructions specified by the BIOS ACPI tables. + +\item [noapic ] + Instruct Xen (and domain 0) to ignore any IOAPICs that are present in + the system, and instead continue to use the legacy PIC. + +\end{description} \section{XenLinux Boot Options} diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c index 72b9206b75..a53fdd31d4 100644 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c @@ -1571,12 +1571,21 @@ void __init setup_arch(char **cmdline_p) op.u.set_iopl.iopl = current->thread.io_pl = 1; HYPERVISOR_physdev_op(&op); - /* - * Parse the ACPI tables for possible boot-time SMP configuration. - */ - acpi_boot_table_init(); - acpi_boot_init(); +#ifdef CONFIG_ACPI_BOOT + if ( !(xen_start_info.flags & SIF_INITDOMAIN) ) + { + printk(KERN_INFO "Not running in dom0: Disabling ACPI\n"); + acpi_disabled = 1; + acpi_ht = 0; + } +#endif + /* + * Parse the ACPI tables for possible boot-time SMP configuration. + */ + acpi_boot_table_init(); + acpi_boot_init(); + #ifdef CONFIG_X86_LOCAL_APIC if (smp_found_config) get_smp_config(); diff --git a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c index 3e9f82f1d9..c1ca5c5c0e 100644 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/smpboot.c @@ -54,6 +54,9 @@ #include <asm/desc.h> #include <asm/arch_hooks.h> +#ifndef CONFIG_X86_IO_APIC +#define Dprintk(args...) +#endif #include <mach_wakecpu.h> #include <smpboot_hooks.h> @@ -493,19 +496,7 @@ static void __init start_secondary(void *unused) local_irq_enable(); wmb(); - if (0) { - char *msg2 = "delay2\n"; - int timeout; - for (timeout = 0; timeout < 50000; timeout++) { - udelay(1000); - if (timeout == 2000) { - (void)HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg2), msg2); - timeout = 0; - } - } - } cpu_idle(); - return 0; } /* @@ -1107,15 +1098,18 @@ static void __init smp_boot_cpus(unsigned int max_cpus) cpus_clear(cpu_sibling_map[0]); cpu_set(0, cpu_sibling_map[0]); +#ifdef CONFIG_X86_IO_APIC /* * If we couldn't find an SMP configuration at boot time, * get out of here now! */ - if (!smp_found_config /* && !acpi_lapic) */) { + if (!smp_found_config && !acpi_lapic) { printk(KERN_NOTICE "SMP motherboard not detected.\n"); smpboot_clear_io_apic_irqs(); #if 0 phys_cpu_present_map = physid_mask_of_physid(0); +#endif +#ifdef CONFIG_X86_LOCAL_APIC if (APIC_init_uniprocessor()) printk(KERN_NOTICE "Local APIC not detected." " Using dummy APIC emulation.\n"); @@ -1123,6 +1117,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) map_cpu_to_logical_apicid(); return; } +#endif #if 0 /* diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c b/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c index 2554f999d7..b1acbcfc3a 100644 --- a/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c +++ b/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c @@ -486,12 +486,11 @@ static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) preq.nr_sects += seg[i].nsec; aop[i].u.map_grant_ref.host_virt_addr = MMAP_VADDR(pending_idx, i); - aop[i].u.map_grant_ref.dom = blkif->domid; aop[i].u.map_grant_ref.ref = blkif_gref_from_fas(fas); - aop[i].u.map_grant_ref.flags = ( GNTMAP_host_map | - ( ( operation == READ ) ? - 0 : GNTMAP_readonly ) ); + aop[i].u.map_grant_ref.flags = GNTMAP_host_map; + if ( operation == WRITE ) + aop[i].u.map_grant_ref.flags |= GNTMAP_readonly; } if ( unlikely(HYPERVISOR_grant_table_op( diff --git a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c index 311b8398ed..f15fcab014 100644 --- a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c +++ b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c @@ -824,7 +824,7 @@ static int blkif_queue_request(unsigned long id, buffer_ma >> PAGE_SHIFT, ( operation == BLKIF_OP_WRITE ? 1 : 0 ) ); - blk_shadow[id].frame[req->nr_segments] = + blk_shadow[req->id].frame[req->nr_segments] = buffer_ma >> PAGE_SHIFT; req->frame_and_sects[req->nr_segments] = diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h index 7479f82686..28adeaf244 100644 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/smpboot_hooks.h @@ -3,9 +3,7 @@ static inline void smpboot_clear_io_apic_irqs(void) { -#if 1 - printk("smpboot_clear_io_apic_irqs\n"); -#else +#ifdef CONFIG_X86_IO_APIC io_apic_irqs = 0; #endif } @@ -43,12 +41,14 @@ static inline void smpboot_restore_warm_reset_vector(void) static inline void smpboot_setup_io_apic(void) { +#ifdef CONFIG_X86_IO_APIC /* * Here we can be sure that there is an IO-APIC in the system. Let's * go and set it up: */ if (!skip_ioapic_setup && nr_ioapics) setup_IO_APIC(); +#endif } diff --git a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/smpboot_hooks.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/smpboot_hooks.h index 7479f82686..28adeaf244 100644 --- a/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/smpboot_hooks.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/smpboot_hooks.h @@ -3,9 +3,7 @@ static inline void smpboot_clear_io_apic_irqs(void) { -#if 1 - printk("smpboot_clear_io_apic_irqs\n"); -#else +#ifdef CONFIG_X86_IO_APIC io_apic_irqs = 0; #endif } @@ -43,12 +41,14 @@ static inline void smpboot_restore_warm_reset_vector(void) static inline void smpboot_setup_io_apic(void) { +#ifdef CONFIG_X86_IO_APIC /* * Here we can be sure that there is an IO-APIC in the system. Let's * go and set it up: */ if (!skip_ioapic_setup && nr_ioapics) setup_IO_APIC(); +#endif } diff --git a/tools/libxc/xc.h b/tools/libxc/xc.h index dd7a712ad6..3b464eb683 100644 --- a/tools/libxc/xc.h +++ b/tools/libxc/xc.h @@ -87,22 +87,22 @@ typedef struct xc_core_header { long xc_ptrace(enum __ptrace_request request, - u32 domid, - long addr, - long data); + u32 domid, + long addr, + long data); long xc_ptrace_core(enum __ptrace_request request, - u32 domid, - long addr, - long data); + u32 domid, + long addr, + long data); int xc_waitdomain(int domain, - int *status, - int options); + int *status, + int options); int xc_waitdomain_core(int domain, - int *status, - int options); + int *status, + int options); /* * DOMAIN MANAGEMENT FUNCTIONS @@ -110,7 +110,6 @@ int xc_waitdomain_core(int domain, typedef struct { u32 domid; - unsigned int cpu; unsigned int dying:1, crashed:1, shutdown:1, paused:1, blocked:1, running:1; unsigned int shutdown_reason; /* only meaningful if shutdown==1 */ @@ -118,6 +117,9 @@ typedef struct { unsigned long shared_info_frame; u64 cpu_time; unsigned long max_memkb; + unsigned int vcpus; + s32 vcpu_to_cpu[MAX_VIRT_CPUS]; + cpumap_t cpumap[MAX_VIRT_CPUS]; } xc_dominfo_t; typedef dom0_getdomaininfo_t xc_domaininfo_t; @@ -129,8 +131,8 @@ int xc_domain_create(int xc_handle, int xc_domain_dumpcore(int xc_handle, - u32 domid, - const char *corename); + u32 domid, + const char *corename); /** @@ -167,7 +169,8 @@ int xc_domain_destroy(int xc_handle, u32 domid); int xc_domain_pincpu(int xc_handle, u32 domid, - int cpu); + int vcpu, + cpumap_t *cpumap); /** * This function will return information about one or more domains. * @@ -195,11 +198,11 @@ int xc_domain_getinfo(int xc_handle, * domain * @return 0 on success, -1 on failure */ -int xc_domain_getfullinfo(int xc_handle, - u32 domid, - u32 vcpu, - xc_domaininfo_t *info, - vcpu_guest_context_t *ctxt); +int xc_domain_get_vcpu_context(int xc_handle, + u32 domid, + u32 vcpu, + vcpu_guest_context_t *ctxt); + int xc_domain_setcpuweight(int xc_handle, u32 domid, float weight); @@ -260,8 +263,8 @@ xc_plan9_build (int xc_handle, u32 domid, const char *image_name, const char *cmdline, - unsigned int control_evtchn, - unsigned long flags); + unsigned int control_evtchn, + unsigned long flags); struct mem_map; int xc_vmx_build(int xc_handle, @@ -418,7 +421,7 @@ int xc_msr_write(int xc_handle, int cpu_mask, int msr, unsigned int low, /** * Memory maps a range within one domain to a local address range. Mappings * should be unmapped with munmap and should follow the same rules as mmap - * regarding page alignment. + * regarding page alignment. Returns NULL on failure. * * In Linux, the ring queue for the control channel is accessible by mapping * the shared_info_frame (from xc_domain_getinfo()) + 2048. The structure @@ -438,7 +441,7 @@ void *xc_map_foreign_batch(int xc_handle, u32 dom, int prot, unsigned long *arr, int num ); int xc_get_pfn_list(int xc_handle, u32 domid, unsigned long *pfn_buf, - unsigned long max_pfns); + unsigned long max_pfns); /*\ * GRANT TABLE FUNCTIONS diff --git a/tools/libxc/xc_core.c b/tools/libxc/xc_core.c index 84ef16bf37..2d6b7d0fa2 100644 --- a/tools/libxc/xc_core.c +++ b/tools/libxc/xc_core.c @@ -7,6 +7,7 @@ /* number of pages to write at a time */ #define DUMP_INCREMENT 4 * 1024 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) + static int copy_from_domain_page(int xc_handle, u32 domid, @@ -28,13 +29,14 @@ xc_domain_dumpcore(int xc_handle, u32 domid, const char *corename) { - vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt; unsigned long nr_pages; unsigned long *page_array; - xc_domaininfo_t st_info, *info = &st_info; + xc_dominfo_t info; int i, dump_fd; char *dump_mem, *dump_mem_start = NULL; struct xc_core_header header; + vcpu_guest_context_t ctxt[MAX_VIRT_CPUS]; + if ((dump_fd = open(corename, O_CREAT|O_RDWR, S_IWUSR|S_IRUSR)) < 0) { PERROR("Could not open corefile %s: %s", corename, strerror(errno)); @@ -46,14 +48,25 @@ xc_domain_dumpcore(int xc_handle, goto error_out; } - if (xc_domain_getfullinfo(xc_handle, domid, 0/* XXX hardcode */, info, ctxt)) { - PERROR("Could not get full info for domain"); + if (xc_domain_getinfo(xc_handle, domid, 1, &info)) { + PERROR("Could not get info for domain"); goto error_out; } + + for (i = 0; i < sizeof(info.vcpu_to_cpu) / sizeof(info.vcpu_to_cpu[0]); + i++) { + if (info.vcpu_to_cpu[i] == -1) + continue; + if (xc_domain_get_vcpu_context(xc_handle, domid, i, &ctxt[i])) { + PERROR("Could not get all vcpu contexts for domain"); + goto error_out; + } + } + + nr_pages = info.nr_pages; - nr_pages = info->tot_pages; header.xch_magic = 0xF00FEBED; - header.xch_nr_vcpus = 1; /* no interface to query at the moment */ + header.xch_nr_vcpus = info.vcpus; header.xch_nr_pages = nr_pages; header.xch_ctxt_offset = sizeof(struct xc_core_header); header.xch_index_offset = sizeof(struct xc_core_header) + @@ -62,7 +75,7 @@ xc_domain_dumpcore(int xc_handle, sizeof(vcpu_guest_context_t) + nr_pages * sizeof(unsigned long)); write(dump_fd, &header, sizeof(struct xc_core_header)); - write(dump_fd, ctxt, sizeof(st_ctxt)); + write(dump_fd, &ctxt, sizeof(ctxt[0]) * info.vcpus); if ((page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL) { printf("Could not allocate memory\n"); diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c index 488139cfd1..ed0149c60d 100644 --- a/tools/libxc/xc_domain.c +++ b/tools/libxc/xc_domain.c @@ -16,6 +16,8 @@ int xc_domain_create(int xc_handle, { int err, errno_saved; dom0_op_t op; + u32 vcpu = 0; /* FIXME, hard coded initial pin to vcpu 0 */ + cpumap_t cpumap = 1 << cpu; op.cmd = DOM0_CREATEDOMAIN; op.u.createdomain.domain = (domid_t)*pdomid; @@ -25,7 +27,7 @@ int xc_domain_create(int xc_handle, *pdomid = (u16)op.u.createdomain.domain; if ( (cpu != -1) && - ((err = xc_domain_pincpu(xc_handle, *pdomid, cpu)) != 0) ) + ((err = xc_domain_pincpu(xc_handle, *pdomid, vcpu, &cpumap)) != 0) ) goto fail; if ( (err = xc_domain_setcpuweight(xc_handle, *pdomid, cpu_weight)) != 0 ) @@ -84,13 +86,14 @@ int xc_domain_destroy(int xc_handle, int xc_domain_pincpu(int xc_handle, u32 domid, - int cpu) + int vcpu, + cpumap_t *cpumap) { dom0_op_t op; op.cmd = DOM0_PINCPUDOMAIN; op.u.pincpudomain.domain = (domid_t)domid; - op.u.pincpudomain.exec_domain = 0; - op.u.pincpudomain.cpu = cpu; + op.u.pincpudomain.exec_domain = vcpu; + op.u.pincpudomain.cpumap = cpumap; return do_dom0_op(xc_handle, &op); } @@ -109,14 +112,9 @@ int xc_domain_getinfo(int xc_handle, { op.cmd = DOM0_GETDOMAININFO; op.u.getdomaininfo.domain = (domid_t)next_domid; - op.u.getdomaininfo.exec_domain = 0; // FIX ME?!? - op.u.getdomaininfo.ctxt = NULL; /* no exec context info, thanks. */ if ( (rc = do_dom0_op(xc_handle, &op)) < 0 ) break; - info->domid = (u16)op.u.getdomaininfo.domain; - - info->cpu = - (op.u.getdomaininfo.flags>>DOMFLAGS_CPUSHIFT) & DOMFLAGS_CPUMASK; + info->domid = (u16)op.u.getdomaininfo.domain; info->dying = !!(op.u.getdomaininfo.flags & DOMFLAGS_DYING); info->crashed = !!(op.u.getdomaininfo.flags & DOMFLAGS_CRASHED); @@ -133,29 +131,33 @@ int xc_domain_getinfo(int xc_handle, info->max_memkb = op.u.getdomaininfo.max_pages<<(PAGE_SHIFT); info->shared_info_frame = op.u.getdomaininfo.shared_info_frame; info->cpu_time = op.u.getdomaininfo.cpu_time; + info->vcpus = op.u.getdomaininfo.n_vcpu; + memcpy(&info->vcpu_to_cpu, &op.u.getdomaininfo.vcpu_to_cpu, + sizeof(info->vcpu_to_cpu)); + memcpy(&info->cpumap, &op.u.getdomaininfo.cpumap, + sizeof(info->cpumap)); next_domid = (u16)op.u.getdomaininfo.domain + 1; info++; } - if(!nr_doms) return rc; + if( !nr_doms ) return rc; return nr_doms; } -int xc_domain_getfullinfo(int xc_handle, - u32 domid, - u32 vcpu, - xc_domaininfo_t *info, - vcpu_guest_context_t *ctxt) +int xc_domain_get_vcpu_context(int xc_handle, + u32 domid, + u32 vcpu, + vcpu_guest_context_t *ctxt) { int rc, errno_saved; dom0_op_t op; - op.cmd = DOM0_GETDOMAININFO; - op.u.getdomaininfo.domain = (domid_t)domid; - op.u.getdomaininfo.exec_domain = (u16)vcpu; - op.u.getdomaininfo.ctxt = ctxt; + op.cmd = DOM0_GETVCPUCONTEXT; + op.u.getvcpucontext.domain = (domid_t)domid; + op.u.getvcpucontext.exec_domain = (u16)vcpu; + op.u.getvcpucontext.ctxt = ctxt; if ( (ctxt != NULL) && ((rc = mlock(ctxt, sizeof(*ctxt))) != 0) ) @@ -170,10 +172,7 @@ int xc_domain_getfullinfo(int xc_handle, errno = errno_saved; } - if ( info != NULL ) - memcpy(info, &op.u.getdomaininfo, sizeof(*info)); - - if ( ((u16)op.u.getdomaininfo.domain != domid) && (rc > 0) ) + if ( rc > 0 ) return -ESRCH; else return rc; diff --git a/tools/libxc/xc_linux_build.c b/tools/libxc/xc_linux_build.c index abf427f87f..364453190d 100644 --- a/tools/libxc/xc_linux_build.c +++ b/tools/libxc/xc_linux_build.c @@ -356,14 +356,19 @@ int xc_linux_build(int xc_handle, op.cmd = DOM0_GETDOMAININFO; op.u.getdomaininfo.domain = (domid_t)domid; - op.u.getdomaininfo.exec_domain = 0; - op.u.getdomaininfo.ctxt = ctxt; if ( (do_dom0_op(xc_handle, &op) < 0) || ((u16)op.u.getdomaininfo.domain != domid) ) { PERROR("Could not get info on domain"); goto error_out; } + + if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) ) + { + PERROR("Could not get vcpu context"); + goto error_out; + } + if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) || (ctxt->pt_base != 0) ) { @@ -409,7 +414,7 @@ int xc_linux_build(int xc_handle, ctxt->user_regs.eip = vkern_entry; ctxt->user_regs.esp = vstartinfo_start + 2*PAGE_SIZE; ctxt->user_regs.esi = vstartinfo_start; - ctxt->user_regs.eflags = (1<<9) | (1<<2); + ctxt->user_regs.eflags = 1 << 9; /* Interrupt Enable */ /* FPU is set up to default initial state. */ memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); diff --git a/tools/libxc/xc_linux_restore.c b/tools/libxc/xc_linux_restore.c index 3d3f4c1e18..6442e3926a 100644 --- a/tools/libxc/xc_linux_restore.c +++ b/tools/libxc/xc_linux_restore.c @@ -181,8 +181,6 @@ int xc_linux_restore(int xc_handle, XcIOContext *ioctxt) /* Get the domain's shared-info frame. */ op.cmd = DOM0_GETDOMAININFO; op.u.getdomaininfo.domain = (domid_t)dom; - op.u.getdomaininfo.exec_domain = 0; - op.u.getdomaininfo.ctxt = NULL; if ( do_dom0_op(xc_handle, &op) < 0 ) { xcio_error(ioctxt, "Could not get information on new domain"); diff --git a/tools/libxc/xc_linux_save.c b/tools/libxc/xc_linux_save.c index f8a28cf43f..17000d668e 100644 --- a/tools/libxc/xc_linux_save.c +++ b/tools/libxc/xc_linux_save.c @@ -324,7 +324,7 @@ static int analysis_phase( int xc_handle, u32 domid, int suspend_and_state(int xc_handle, XcIOContext *ioctxt, - xc_domaininfo_t *info, + xc_dominfo_t *info, vcpu_guest_context_t *ctxt) { int i=0; @@ -333,27 +333,29 @@ int suspend_and_state(int xc_handle, XcIOContext *ioctxt, retry: - if ( xc_domain_getfullinfo(xc_handle, ioctxt->domain, /* FIXME */ 0, - info, ctxt) ) + if ( xc_domain_getinfo(xc_handle, ioctxt->domain, 1, info) ) { xcio_error(ioctxt, "Could not get full domain info"); return -1; } - if ( (info->flags & - (DOMFLAGS_SHUTDOWN | (SHUTDOWN_suspend<<DOMFLAGS_SHUTDOWNSHIFT))) == - (DOMFLAGS_SHUTDOWN | (SHUTDOWN_suspend<<DOMFLAGS_SHUTDOWNSHIFT)) ) + if ( xc_domain_get_vcpu_context(xc_handle, ioctxt->domain, 0 /* XXX */, + ctxt) ) + { + xcio_error(ioctxt, "Could not get vcpu context"); + } + + if ( info->shutdown && info->shutdown_reason == SHUTDOWN_suspend ) { return 0; // success } - if ( info->flags & DOMFLAGS_PAUSED ) + if ( info->paused ) { // try unpausing domain, wait, and retest xc_domain_unpause( xc_handle, ioctxt->domain ); - xcio_error(ioctxt, "Domain was paused. Wait and re-test. (%u)", - info->flags); + xcio_error(ioctxt, "Domain was paused. Wait and re-test."); usleep(10000); // 10ms goto retry; @@ -362,19 +364,19 @@ retry: if( ++i < 100 ) { - xcio_error(ioctxt, "Retry suspend domain (%u)", info->flags); + xcio_error(ioctxt, "Retry suspend domain."); usleep(10000); // 10ms goto retry; } - xcio_error(ioctxt, "Unable to suspend domain. (%u)", info->flags); + xcio_error(ioctxt, "Unable to suspend domain."); return -1; } int xc_linux_save(int xc_handle, XcIOContext *ioctxt) { - xc_domaininfo_t info; + xc_dominfo_t info; int rc = 1, i, j, k, last_iter, iter = 0; unsigned long mfn; @@ -444,13 +446,18 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt) xcio_perror(ioctxt, "Unable to mlock ctxt"); return 1; } - - if ( xc_domain_getfullinfo( xc_handle, domid, /* FIXME */ 0, - &info, &ctxt) ) + + if ( xc_domain_getinfo(xc_handle, domid, 1, &info) ) { xcio_error(ioctxt, "Could not get full domain info"); goto out; } + if ( xc_domain_get_vcpu_context( xc_handle, domid, /* FIXME */ 0, + &ctxt) ) + { + xcio_error(ioctxt, "Could not get vcpu context"); + goto out; + } shared_info_frame = info.shared_info_frame; /* A cheesy test to see whether the domain contains valid state. */ @@ -459,7 +466,7 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt) goto out; } - nr_pfns = info.max_pages; + nr_pfns = info.max_memkb >> PAGE_SHIFT; /* cheesy sanity check */ if ( nr_pfns > 1024*1024 ){ @@ -546,8 +553,7 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt) if ( suspend_and_state( xc_handle, ioctxt, &info, &ctxt) ) { - xcio_error(ioctxt, "Domain appears not to have suspended: %u", - info.flags); + xcio_error(ioctxt, "Domain appears not to have suspended"); goto out; } @@ -913,14 +919,12 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt) if ( suspend_and_state( xc_handle, ioctxt, &info, &ctxt) ) { xcio_error(ioctxt, - "Domain appears not to have suspended: %u", - info.flags); + "Domain appears not to have suspended"); goto out; } xcio_info(ioctxt, - "SUSPEND flags %08u shinfo %08lx eip %08u " - "esi %08u\n",info.flags, + "SUSPEND shinfo %08lx eip %08u esi %08u\n", info.shared_info_frame, ctxt.user_regs.eip, ctxt.user_regs.esi ); } diff --git a/tools/libxc/xc_plan9_build.c b/tools/libxc/xc_plan9_build.c index 3260661acc..ab8886a5d3 100644 --- a/tools/libxc/xc_plan9_build.c +++ b/tools/libxc/xc_plan9_build.c @@ -440,17 +440,21 @@ xc_plan9_build(int xc_handle, op.cmd = DOM0_GETDOMAININFO; op.u.getdomaininfo.domain = (domid_t) domid; - op.u.getdomaininfo.exec_domain = 0; - op.u.getdomaininfo.ctxt = ctxt; if ((do_dom0_op(xc_handle, &op) < 0) || ((u32) op.u.getdomaininfo.domain != domid)) { PERROR("Could not get info on domain"); goto error_out; } DPRINTF(("xc_get_tot_pages returns %ld pages\n", tot_pages)); + + if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) ) + { + PERROR("Could not get vcpu context"); + goto error_out; + } if (!(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) - || (op.u.getdomaininfo.ctxt->pt_base != 0)) { + || (ctxt->pt_base != 0)) { ERROR("Domain is already constructed"); goto error_out; } @@ -495,7 +499,7 @@ xc_plan9_build(int xc_handle, /* why is this set? */ ctxt->user_regs.esi = ctxt->user_regs.esp; - ctxt->user_regs.eflags = (1 << 9) | (1 << 2); + ctxt->user_regs.eflags = 1 << 9; /* Interrupt Enable */ /* FPU is set up to default initial state. */ memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); diff --git a/tools/libxc/xc_private.c b/tools/libxc/xc_private.c index 15327ccdbb..550989e3e0 100644 --- a/tools/libxc/xc_private.c +++ b/tools/libxc/xc_private.c @@ -13,18 +13,18 @@ void *xc_map_foreign_batch(int xc_handle, u32 dom, int prot, privcmd_mmapbatch_t ioctlx; void *addr; addr = mmap(NULL, num*PAGE_SIZE, prot, MAP_SHARED, xc_handle, 0); - if ( addr != NULL ) + if ( addr == MAP_FAILED ) + return NULL; + + ioctlx.num=num; + ioctlx.dom=dom; + ioctlx.addr=(unsigned long)addr; + ioctlx.arr=arr; + if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAPBATCH, &ioctlx ) < 0 ) { - ioctlx.num=num; - ioctlx.dom=dom; - ioctlx.addr=(unsigned long)addr; - ioctlx.arr=arr; - if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAPBATCH, &ioctlx ) < 0 ) - { - perror("XXXXXXXX"); - munmap(addr, num*PAGE_SIZE); - return 0; - } + perror("XXXXXXXX"); + munmap(addr, num*PAGE_SIZE); + return NULL; } return addr; @@ -40,19 +40,19 @@ void *xc_map_foreign_range(int xc_handle, u32 dom, privcmd_mmap_entry_t entry; void *addr; addr = mmap(NULL, size, prot, MAP_SHARED, xc_handle, 0); - if ( addr != NULL ) + if ( addr == MAP_FAILED ) + return NULL; + + ioctlx.num=1; + ioctlx.dom=dom; + ioctlx.entry=&entry; + entry.va=(unsigned long) addr; + entry.mfn=mfn; + entry.npages=(size+PAGE_SIZE-1)>>PAGE_SHIFT; + if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx ) < 0 ) { - ioctlx.num=1; - ioctlx.dom=dom; - ioctlx.entry=&entry; - entry.va=(unsigned long) addr; - entry.mfn=mfn; - entry.npages=(size+PAGE_SIZE-1)>>PAGE_SHIFT; - if ( ioctl( xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx ) < 0 ) - { - munmap(addr, size); - return 0; - } + munmap(addr, size); + return NULL; } return addr; } @@ -173,17 +173,16 @@ long long xc_domain_get_cpu_usage( int xc_handle, domid_t domid, int vcpu ) { dom0_op_t op; - op.cmd = DOM0_GETDOMAININFO; - op.u.getdomaininfo.domain = (domid_t)domid; - op.u.getdomaininfo.exec_domain = (u16)vcpu; - op.u.getdomaininfo.ctxt = NULL; - if ( (do_dom0_op(xc_handle, &op) < 0) || - ((u16)op.u.getdomaininfo.domain != domid) ) + op.cmd = DOM0_GETVCPUCONTEXT; + op.u.getvcpucontext.domain = (domid_t)domid; + op.u.getvcpucontext.exec_domain = (u16)vcpu; + op.u.getvcpucontext.ctxt = NULL; + if ( (do_dom0_op(xc_handle, &op) < 0) ) { PERROR("Could not get info on domain"); return -1; } - return op.u.getdomaininfo.cpu_time; + return op.u.getvcpucontext.cpu_time; } @@ -258,8 +257,6 @@ long xc_get_tot_pages(int xc_handle, u32 domid) dom0_op_t op; op.cmd = DOM0_GETDOMAININFO; op.u.getdomaininfo.domain = (domid_t)domid; - op.u.getdomaininfo.exec_domain = 0; - op.u.getdomaininfo.ctxt = NULL; return (do_dom0_op(xc_handle, &op) < 0) ? -1 : op.u.getdomaininfo.tot_pages; } diff --git a/tools/libxc/xc_ptrace.c b/tools/libxc/xc_ptrace.c index 63efdd7cb9..ca051fab97 100644 --- a/tools/libxc/xc_ptrace.c +++ b/tools/libxc/xc_ptrace.c @@ -71,7 +71,7 @@ struct gdb_regs { #define FETCH_REGS(cpu) \ if (!regs_valid[cpu]) \ { \ - int retval = xc_domain_getfullinfo(xc_handle, domid, cpu, NULL, &ctxt[cpu]); \ + int retval = xc_domain_get_vcpu_context(xc_handle, domid, cpu, &ctxt[cpu]); \ if (retval) \ goto error_out; \ cr3[cpu] = ctxt[cpu].pt_base; /* physical address */ \ @@ -221,7 +221,6 @@ xc_waitdomain(int domain, int *status, int options) { dom0_op_t op; int retval; - vcpu_guest_context_t ctxt; struct timespec ts; ts.tv_sec = 0; ts.tv_nsec = 10*1000*1000; @@ -234,12 +233,10 @@ xc_waitdomain(int domain, int *status, int options) } op.cmd = DOM0_GETDOMAININFO; op.u.getdomaininfo.domain = domain; - op.u.getdomaininfo.exec_domain = 0; - op.u.getdomaininfo.ctxt = &ctxt; retry: retval = do_dom0_op(xc_handle, &op); - if (retval) { + if (retval || op.u.getdomaininfo.domain != domain) { printf("getdomaininfo failed\n"); goto done; } @@ -325,10 +322,8 @@ xc_ptrace(enum __ptrace_request request, u32 domid, long eaddr, long edata) case PTRACE_ATTACH: op.cmd = DOM0_GETDOMAININFO; op.u.getdomaininfo.domain = domid; - op.u.getdomaininfo.exec_domain = 0; - op.u.getdomaininfo.ctxt = NULL; retval = do_dom0_op(xc_handle, &op); - if (retval) { + if (retval || op.u.getdomaininfo.domain != domid) { perror("dom0 op failed"); goto error_out; } diff --git a/tools/libxc/xc_ptrace_core.c b/tools/libxc/xc_ptrace_core.c index a936bd8b42..7a5eeea21d 100644 --- a/tools/libxc/xc_ptrace_core.c +++ b/tools/libxc/xc_ptrace_core.c @@ -107,6 +107,7 @@ map_domain_va(unsigned long domfd, int cpu, void * guest_va) { unsigned long pde, page; unsigned long va = (unsigned long)guest_va; + void *v; static unsigned long cr3_phys[MAX_VIRT_CPUS]; static unsigned long *cr3_virt[MAX_VIRT_CPUS]; @@ -120,13 +121,15 @@ map_domain_va(unsigned long domfd, int cpu, void * guest_va) cr3_phys[cpu] = cr3[cpu]; if (cr3_virt[cpu]) munmap(cr3_virt[cpu], PAGE_SIZE); - if ((cr3_virt[cpu] = mmap(NULL, PAGE_SIZE, PROT_READ, - MAP_PRIVATE, domfd, map_mtop_offset(cr3_phys[cpu]))) == - (unsigned long*)0xffffffff) + v = mmap( + NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, domfd, + map_mtop_offset(cr3_phys[cpu])); + if (v == MAP_FAILED) { perror("mmap failed"); goto error_out; } + cr3_virt[cpu] = v; } if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0) /* logical address */ goto error_out; @@ -137,9 +140,12 @@ map_domain_va(unsigned long domfd, int cpu, void * guest_va) pde_phys[cpu] = pde; if (pde_virt[cpu]) munmap(pde_virt[cpu], PAGE_SIZE); - if ((pde_virt[cpu] = mmap(NULL, PAGE_SIZE, PROT_READ, - MAP_PRIVATE, domfd, map_mtop_offset(pde_phys[cpu]))) == NULL) + v = mmap( + NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, domfd, + map_mtop_offset(pde_phys[cpu])); + if (v == MAP_FAILED) goto error_out; + pde_virt[cpu] = v; } if ((page = pde_virt[cpu][vtopti(va)]) == 0) /* logical address */ goto error_out; @@ -150,12 +156,15 @@ map_domain_va(unsigned long domfd, int cpu, void * guest_va) page_phys[cpu] = page; if (page_virt[cpu]) munmap(page_virt[cpu], PAGE_SIZE); - if ((page_virt[cpu] = mmap(NULL, PAGE_SIZE, PROT_READ, - MAP_PRIVATE, domfd, map_mtop_offset(page_phys[cpu]))) == NULL) { + v = mmap( + NULL, PAGE_SIZE, PROT_READ, MAP_PRIVATE, domfd, + map_mtop_offset(page_phys[cpu])); + if (v == MAP_FAILED) { printf("cr3 %lx pde %lx page %lx pti %lx\n", cr3[cpu], pde, page, vtopti(va)); page_phys[cpu] = 0; goto error_out; } + page_virt[cpu] = v; } return (void *)(((unsigned long)page_virt[cpu]) | (va & BSD_PAGE_MASK)); diff --git a/tools/libxc/xc_vmx_build.c b/tools/libxc/xc_vmx_build.c index a89b783fee..32c9a14786 100644 --- a/tools/libxc/xc_vmx_build.c +++ b/tools/libxc/xc_vmx_build.c @@ -447,7 +447,7 @@ static int setup_guest(int xc_handle, ctxt->user_regs.esi = vboot_params_start; ctxt->user_regs.edi = vboot_params_start + 0x2d0; - ctxt->user_regs.eflags = (1<<2); + ctxt->user_regs.eflags = 0; return 0; @@ -543,14 +543,19 @@ int xc_vmx_build(int xc_handle, op.cmd = DOM0_GETDOMAININFO; op.u.getdomaininfo.domain = (domid_t)domid; - op.u.getdomaininfo.exec_domain = 0; - op.u.getdomaininfo.ctxt = ctxt; if ( (do_dom0_op(xc_handle, &op) < 0) || ((u16)op.u.getdomaininfo.domain != domid) ) { PERROR("Could not get info on domain"); goto error_out; } + + if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) ) + { + PERROR("Could not get vcpu context"); + goto error_out; + } + if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) || (ctxt->pt_base != 0) ) { diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c index faa6e06d62..a7b4aba7c6 100644 --- a/tools/python/xen/lowlevel/xc/xc.c +++ b/tools/python/xen/lowlevel/xc/xc.c @@ -155,15 +155,16 @@ static PyObject *pyxc_domain_pincpu(PyObject *self, XcObject *xc = (XcObject *)self; u32 dom; - int cpu = -1; + int vcpu = 0; + cpumap_t cpumap = 0xFFFFFFFF; - static char *kwd_list[] = { "dom", "cpu", NULL }; + static char *kwd_list[] = { "dom", "vcpu", "cpumap", NULL }; - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i|i", kwd_list, - &dom, &cpu) ) + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i|ii", kwd_list, + &dom, &vcpu, &cpumap) ) return NULL; - if ( xc_domain_pincpu(xc->xc_handle, dom, cpu) != 0 ) + if ( xc_domain_pincpu(xc->xc_handle, dom, vcpu, &cpumap) != 0 ) return PyErr_SetFromErrno(xc_error); Py_INCREF(zero); @@ -175,10 +176,10 @@ static PyObject *pyxc_domain_getinfo(PyObject *self, PyObject *kwds) { XcObject *xc = (XcObject *)self; - PyObject *list; + PyObject *list, *vcpu_list, *cpumap_list, *info_dict; u32 first_dom = 0; - int max_doms = 1024, nr_doms, i; + int max_doms = 1024, nr_doms, i, j; xc_dominfo_t *info; static char *kwd_list[] = { "first_dom", "max_doms", NULL }; @@ -195,23 +196,33 @@ static PyObject *pyxc_domain_getinfo(PyObject *self, list = PyList_New(nr_doms); for ( i = 0 ; i < nr_doms; i++ ) { - PyList_SetItem( - list, i, - Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:i,s:i,s:i" - ",s:l,s:L,s:l,s:i}", - "dom", info[i].domid, - "cpu", info[i].cpu, - "dying", info[i].dying, - "crashed", info[i].crashed, - "shutdown", info[i].shutdown, - "paused", info[i].paused, - "blocked", info[i].blocked, - "running", info[i].running, - "mem_kb", info[i].nr_pages*4, - "cpu_time", info[i].cpu_time, - "maxmem_kb", info[i].max_memkb, - "shutdown_reason", info[i].shutdown_reason - )); + vcpu_list = PyList_New(MAX_VIRT_CPUS); + cpumap_list = PyList_New(MAX_VIRT_CPUS); + for ( j = 0; j < MAX_VIRT_CPUS; j++ ) { + PyList_SetItem( vcpu_list, j, + Py_BuildValue("i", info[i].vcpu_to_cpu[j])); + PyList_SetItem( cpumap_list, j, + Py_BuildValue("i", info[i].cpumap[j])); + } + + info_dict = Py_BuildValue("{s:i,s:i,s:i,s:i,s:i,s:i,s:i,s:i" + ",s:l,s:L,s:l,s:i}", + "dom", info[i].domid, + "vcpus", info[i].vcpus, + "dying", info[i].dying, + "crashed", info[i].crashed, + "shutdown", info[i].shutdown, + "paused", info[i].paused, + "blocked", info[i].blocked, + "running", info[i].running, + "mem_kb", info[i].nr_pages*4, + "cpu_time", info[i].cpu_time, + "maxmem_kb", info[i].max_memkb, + "shutdown_reason", info[i].shutdown_reason); + PyDict_SetItemString( info_dict, "vcpu_to_cpu", vcpu_list ); + PyDict_SetItemString( info_dict, "cpumap", cpumap_list ); + PyList_SetItem( list, i, info_dict); + } free(info); @@ -959,9 +970,10 @@ static PyMethodDef pyxc_methods[] = { { "domain_pincpu", (PyCFunction)pyxc_domain_pincpu, METH_VARARGS | METH_KEYWORDS, "\n" - "Pin a domain to a specified CPU.\n" - " dom [int]: Identifier of domain to be pinned.\n" - " cpu [int, -1]: CPU to pin to, or -1 to unpin\n\n" + "Pin a VCPU to a specified set CPUs.\n" + " dom [int]: Identifier of domain to which VCPU belongs.\n" + " vcpu [int, 0]: VCPU being pinned.\n" + " cpumap [int, -1]: Bitmap of usable CPUs.\n\n" "Returns: [int] 0 on success; -1 on error.\n" }, { "domain_getinfo", @@ -976,6 +988,7 @@ static PyMethodDef pyxc_methods[] = { " domain-id space was reached.\n" " dom [int]: Identifier of domain to which this info pertains\n" " cpu [int]: CPU to which this domain is bound\n" + " vcpus [int]: Number of Virtual CPUS in this domain\n" " dying [int]: Bool - is the domain dying?\n" " crashed [int]: Bool - has the domain crashed?\n" " shutdown [int]: Bool - has the domain shut itself down?\n" @@ -986,7 +999,8 @@ static PyMethodDef pyxc_methods[] = { " maxmem_kb [int]: Maximum memory limit, in kilobytes\n" " cpu_time [long]: CPU time consumed, in nanoseconds\n" " shutdown_reason [int]: Numeric code from guest OS, explaining " - "reason why it shut itself down.\n" }, + "reason why it shut itself down.\n" + " vcpu_to_cpu [[int]]: List that maps VCPUS to CPUS\n" }, { "linux_save", (PyCFunction)pyxc_linux_save, diff --git a/tools/python/xen/xend/XendClient.py b/tools/python/xen/xend/XendClient.py index c762cb64a9..39a72a43b7 100644 --- a/tools/python/xen/xend/XendClient.py +++ b/tools/python/xen/xend/XendClient.py @@ -246,10 +246,11 @@ class Xend: 'live' : live, 'resource' : resource }) - def xend_domain_pincpu(self, id, cpu): + def xend_domain_pincpu(self, id, vcpu, cpumap): return self.xendPost(self.domainurl(id), {'op' : 'pincpu', - 'cpu' : cpu }) + 'vcpu' : vcpu, + 'cpumap' : cpumap }) def xend_domain_cpu_bvt_set(self, id, mcuadv, warpback, warpvalue, warpl, warpu): return self.xendPost(self.domainurl(id), diff --git a/tools/python/xen/xend/XendDomain.py b/tools/python/xen/xend/XendDomain.py index 2a4d472143..71f2cd8020 100644 --- a/tools/python/xen/xend/XendDomain.py +++ b/tools/python/xen/xend/XendDomain.py @@ -612,15 +612,16 @@ class XendDomain: xmigrate = XendMigrate.instance() return xmigrate.save_begin(dominfo, dst) - def domain_pincpu(self, id, cpu): - """Pin a domain to a cpu. + def domain_pincpu(self, id, vcpu, cpumap): + """Set which cpus vcpu can use - @param id: domain - @param cpu: cpu number + @param id: domain + @param vcpu: vcpu number + @param cpumap: bitmap of usbale cpus """ dominfo = self.domain_lookup(id) try: - return xc.domain_pincpu(int(dominfo.id), cpu) + return xc.domain_pincpu(int(dominfo.id), vcpu, cpumap) except Exception, ex: raise XendError(str(ex)) diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py index e97266b303..2af99694ab 100644 --- a/tools/python/xen/xend/XendDomainInfo.py +++ b/tools/python/xen/xend/XendDomainInfo.py @@ -370,8 +370,12 @@ class XendDomainInfo: if self.info['shutdown']: reason = shutdown_reason(self.info['shutdown_reason']) sxpr.append(['shutdown_reason', reason]) - sxpr.append(['cpu', self.info['cpu']]) + sxpr.append(['cpu', self.info['vcpu_to_cpu'][0]]) sxpr.append(['cpu_time', self.info['cpu_time']/1e9]) + sxpr.append(['vcpus', self.info['vcpus']]) + sxpr.append(['cpumap', self.info['cpumap']]) + sxpr.append(['vcpu_to_cpu', ''.join(map(lambda x: str(x), + self.info['vcpu_to_cpu'][0:self.info['vcpus']]))]) if self.start_time: up_time = time.time() - self.start_time @@ -449,7 +453,7 @@ class XendDomainInfo: raise VmError('missing memory size') cpu = sxp.child_value(config, 'cpu') if self.recreate and self.dom and cpu is not None: - xc.domain_pincpu(self.dom, int(cpu)) + xc.domain_pincpu(self.dom, 0, 1<<int(cpu)) try: image = sxp.child_value(self.config, 'image') self.vcpus = int(sxp.child_value(image, 'vcpus')) diff --git a/tools/python/xen/xend/server/SrvDomain.py b/tools/python/xen/xend/server/SrvDomain.py index 0bc011337e..8bbf2e424a 100644 --- a/tools/python/xen/xend/server/SrvDomain.py +++ b/tools/python/xen/xend/server/SrvDomain.py @@ -92,7 +92,8 @@ class SrvDomain(SrvDir): def op_pincpu(self, op, req): fn = FormFn(self.xd.domain_pincpu, [['dom', 'str'], - ['cpu', 'int']]) + ['vcpu', 'int'], + ['cpumap', 'int']]) val = fn(req.args, {'dom': self.dom.id}) return val diff --git a/tools/python/xen/xend/server/SrvUsbif.py b/tools/python/xen/xend/server/SrvUsbif.py index 0062ab531b..2360c0cbb6 100644 --- a/tools/python/xen/xend/server/SrvUsbif.py +++ b/tools/python/xen/xend/server/SrvUsbif.py @@ -107,6 +107,7 @@ class SrvDomain(SrvDir): def op_pincpu(self, op, req): fn = FormFn(self.xd.domain_pincpu, [['dom', 'str'], + ['vcpu', 'int'], ['cpu', 'int']]) val = fn(req.args, {'dom': self.dom.id}) return val diff --git a/tools/python/xen/xm/main.py b/tools/python/xen/xm/main.py index f7d29ff5ab..b0c027e4b7 100644 --- a/tools/python/xen/xm/main.py +++ b/tools/python/xen/xm/main.py @@ -6,6 +6,8 @@ import os.path import sys from getopt import getopt import socket +import warnings +warnings.filterwarnings('ignore', category=FutureWarning) from xen.xend import PrettyPrint from xen.xend import sxp @@ -340,8 +342,8 @@ class ProgList(Prog): name = "list" info = """List information about domains.""" - short_options = 'l' - long_options = ['long'] + short_options = 'lv' + long_options = ['long','vcpus'] def help(self, args): if help: @@ -350,11 +352,13 @@ class ProgList(Prog): Either all domains or the domains given. -l, --long Get more detailed information. + -v, --vcpus Show VCPU to CPU mapping. """ return def main(self, args): use_long = 0 + show_vcpus = 0 (options, params) = getopt(args[1:], self.short_options, self.long_options) @@ -362,6 +366,8 @@ class ProgList(Prog): for (k, v) in options: if k in ['-l', '--long']: use_long = 1 + if k in ['-v', '--vcpus']: + show_vcpus = 1 if n == 0: doms = server.xend_domains() @@ -371,11 +377,13 @@ class ProgList(Prog): if use_long: self.long_list(doms) + elif show_vcpus: + self.show_vcpus(doms) else: self.brief_list(doms) def brief_list(self, doms): - print 'Name Id Mem(MB) CPU State Time(s) Console' + print 'Name Id Mem(MB) CPU VCPU(s) State Time(s) Console' for dom in doms: info = server.xend_domain(dom) d = {} @@ -383,6 +391,7 @@ class ProgList(Prog): d['name'] = sxp.child_value(info, 'name', '??') d['mem'] = int(sxp.child_value(info, 'memory', '0')) d['cpu'] = int(sxp.child_value(info, 'cpu', '0')) + d['vcpus'] = int(sxp.child_value(info, 'vcpus', '0')) d['state'] = sxp.child_value(info, 'state', '??') d['cpu_time'] = float(sxp.child_value(info, 'cpu_time', '0')) console = sxp.child(info, 'console') @@ -390,9 +399,27 @@ class ProgList(Prog): d['port'] = sxp.child_value(console, 'console_port') else: d['port'] = '' - print ("%(name)-16s %(dom)3d %(mem)7d %(cpu)3d %(state)5s %(cpu_time)7.1f %(port)4s" + print ("%(name)-16s %(dom)3d %(mem)7d %(cpu)3d %(vcpus)5d %(state)5s %(cpu_time)7.1f %(port)4s" % d) + def show_vcpus(self, doms): + print 'Name Id VCPU CPU CPUMAP' + for dom in doms: + info = server.xend_domain(dom) + vcpu_to_cpu = sxp.child_value(info, 'vcpu_to_cpu', '?').replace('-','') + cpumap = sxp.child_value(info, 'cpumap', []) + mask = ((int(sxp.child_value(info, 'vcpus', '0')))**2) - 1 + count = 0 + for cpu in vcpu_to_cpu: + d = {} + d['name'] = sxp.child_value(info, 'name', '??') + d['dom'] = int(sxp.child_value(info, 'id', '-1')) + d['vcpu'] = int(count) + d['cpu'] = int(cpu) + d['cpumap'] = int(cpumap[count])&mask + count = count + 1 + print ("%(name)-16s %(dom)3d %(vcpu)4d %(cpu)3d 0x%(cpumap)x" % d) + def long_list(self, doms): for dom in doms: info = server.xend_domain(dom) @@ -474,17 +501,35 @@ xm.prog(ProgUnpause) class ProgPincpu(Prog): group = 'domain' name = "pincpu" - info = """Pin a domain to a cpu. """ + info = """Set which cpus a VCPU can use. """ def help(self, args): - print args[0],'DOM CPU' - print '\nPin domain DOM to cpu CPU.' + print args[0],'DOM VCPU CPUS' + print '\nSet which cpus VCPU in domain DOM can use.' + + # convert list of cpus to bitmap integer value + def make_map(self, cpulist): + cpus = [] + cpumap = 0 + for c in cpulist.split(','): + if len(c) > 1: + (x,y) = c.split('-') + for i in range(int(x),int(y)+1): + cpus.append(int(i)) + else: + cpus.append(int(c)) + cpus.sort() + for c in cpus: + cpumap = cpumap | 1<<c + + return cpumap def main(self, args): - if len(args) != 3: self.err("%s: Invalid argument(s)" % args[0]) - dom = args[1] - cpu = int(args[2]) - server.xend_domain_pincpu(dom, cpu) + if len(args) != 4: self.err("%s: Invalid argument(s)" % args[0]) + dom = args[1] + vcpu = int(args[2]) + cpumap = self.make_map(args[3]); + server.xend_domain_pincpu(dom, vcpu, cpumap) xm.prog(ProgPincpu) diff --git a/xen/arch/ia64/asm-offsets.c b/xen/arch/ia64/asm-offsets.c index 73f2500f81..4326ea0078 100644 --- a/xen/arch/ia64/asm-offsets.c +++ b/xen/arch/ia64/asm-offsets.c @@ -8,6 +8,7 @@ #include <xen/sched.h> #include <asm/processor.h> #include <asm/ptrace.h> +#include <public/xen.h> #define task_struct exec_domain @@ -37,6 +38,9 @@ void foo(void) BLANK(); + DEFINE(XSI_PSR_IC_OFS, offsetof(vcpu_info_t, arch.interrupt_collection_enabled)); + DEFINE(XSI_PSR_IC, (SHAREDINFO_ADDR+offsetof(vcpu_info_t, arch.interrupt_collection_enabled))); + DEFINE(XSI_PSR_I_OFS, offsetof(vcpu_info_t, arch.interrupt_delivery_enabled)); //DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked)); //DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid)); //DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader)); @@ -46,8 +50,8 @@ void foo(void) //DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand)); //DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal)); //DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid)); - DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp)); - DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack)); + DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct exec_domain, arch._thread.ksp)); + DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct exec_domain, arch._thread.on_ustack)); BLANK(); diff --git a/xen/arch/ia64/domain.c b/xen/arch/ia64/domain.c index 19a608e1ac..eb342e7021 100644 --- a/xen/arch/ia64/domain.c +++ b/xen/arch/ia64/domain.c @@ -191,7 +191,7 @@ void arch_do_createdomain(struct exec_domain *ed) // stay on kernel stack because may get interrupts! // ia64_ret_from_clone (which b0 gets in new_thread) switches // to user stack - ed->thread.on_ustack = 0; + ed->arch._thread.on_ustack = 0; } void arch_do_boot_vcpu(struct exec_domain *p) @@ -261,7 +261,7 @@ void new_thread(struct exec_domain *ed, printf("new_thread: ed=%p, start_pc=%p, regs=%p, sw=%p, new_rbs=%p, IA64_STK_OFFSET=%p, &r8=%p\n", ed,start_pc,regs,sw,new_rbs,IA64_STK_OFFSET,®s->r8); sw->b0 = (unsigned long) &ia64_ret_from_clone; - ed->thread.ksp = (unsigned long) sw - 16; + ed->arch._thread.ksp = (unsigned long) sw - 16; //ed->thread_info->flags = 0; printk("new_thread, about to call init_all_rr\n"); init_all_rr(ed); diff --git a/xen/arch/ia64/irq.c b/xen/arch/ia64/irq.c index 1b4e277343..b04d49cc70 100644 --- a/xen/arch/ia64/irq.c +++ b/xen/arch/ia64/irq.c @@ -1406,9 +1406,11 @@ int pirq_guest_bind(struct exec_domain *d, int irq, int will_share) desc->handler->startup(irq); /* Attempt to bind the interrupt target to the correct CPU. */ +#if 0 /* FIXME CONFIG_SMP ??? */ if ( desc->handler->set_affinity != NULL ) desc->handler->set_affinity( irq, apicid_to_phys_cpu_present(d->processor)); +#endif } else if ( !will_share || !action->shareable ) { diff --git a/xen/arch/ia64/ivt.S b/xen/arch/ia64/ivt.S index f68e78a834..a44f5eb8a9 100644 --- a/xen/arch/ia64/ivt.S +++ b/xen/arch/ia64/ivt.S @@ -778,10 +778,22 @@ ENTRY(break_fault) mov r17=cr.iim mov r31=pr ;; + movl r18=XSI_PSR_IC + ;; + ld8 r19=[r18] + ;; cmp.eq p7,p0=r0,r17 // is this a psuedo-cover? - // FIXME: may also need to check slot==2? (p7) br.sptk.many dispatch_privop_fault + ;; + cmp.ne p7,p0=r0,r19 +(p7) br.sptk.many dispatch_break_fault + // If we get to here, we have a hyperprivop + // For now, hyperprivops are handled through the break mechanism + // Later, they will be fast hand-coded assembly with psr.ic off + // which means no calls, no use of r1-r15 and no memory accesses + // except to pinned addresses! br.sptk.many dispatch_break_fault + ;; #endif mov r16=IA64_KR(CURRENT) // r16 = current task; 12 cycle read lat. mov r17=cr.iim diff --git a/xen/arch/ia64/mm_init.c b/xen/arch/ia64/mm_init.c index d316e05357..0e3ce45dec 100644 --- a/xen/arch/ia64/mm_init.c +++ b/xen/arch/ia64/mm_init.c @@ -227,7 +227,7 @@ ia64_set_rbs_bot (void) if (stack_size > MAX_USER_STACK_SIZE) stack_size = MAX_USER_STACK_SIZE; - current->thread.rbs_bot = STACK_TOP - stack_size; + current->arch._thread.rbs_bot = STACK_TOP - stack_size; } /* @@ -255,7 +255,7 @@ printf("ia64_init_addr_space: called, not implemented\n"); if (vma) { memset(vma, 0, sizeof(*vma)); vma->vm_mm = current->mm; - vma->vm_start = current->thread.rbs_bot & PAGE_MASK; + vma->vm_start = current->arch._thread.rbs_bot & PAGE_MASK; vma->vm_end = vma->vm_start + PAGE_SIZE; vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7]; vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE|VM_GROWSUP; diff --git a/xen/arch/ia64/patch/linux-2.6.11/unaligned.c b/xen/arch/ia64/patch/linux-2.6.11/unaligned.c index bcc165e95e..0ed114e064 100644 --- a/xen/arch/ia64/patch/linux-2.6.11/unaligned.c +++ b/xen/arch/ia64/patch/linux-2.6.11/unaligned.c @@ -1,5 +1,5 @@ --- ../../linux-2.6.11/arch/ia64/kernel/unaligned.c 2005-03-02 00:38:25.000000000 -0700 -+++ arch/ia64/unaligned.c 2005-04-28 15:40:13.000000000 -0600 ++++ arch/ia64/unaligned.c 2005-05-10 15:46:09.000000000 -0600 @@ -437,7 +437,11 @@ } @@ -12,7 +12,31 @@ setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs) { struct switch_stack *sw = (struct switch_stack *) regs - 1; -@@ -611,7 +615,11 @@ +@@ -522,7 +526,11 @@ + */ + if (regnum >= IA64_FIRST_ROTATING_FR) { + ia64_sync_fph(current); ++#ifdef XEN ++ current->arch._thread.fph[fph_index(regs, regnum)] = *fpval; ++#else + current->thread.fph[fph_index(regs, regnum)] = *fpval; ++#endif + } else { + /* + * pt_regs or switch_stack ? +@@ -581,7 +589,11 @@ + */ + if (regnum >= IA64_FIRST_ROTATING_FR) { + ia64_flush_fph(current); ++#ifdef XEN ++ *fpval = current->arch._thread.fph[fph_index(regs, regnum)]; ++#else + *fpval = current->thread.fph[fph_index(regs, regnum)]; ++#endif + } else { + /* + * f0 = 0.0, f1= 1.0. Those registers are constant and are thus +@@ -611,7 +623,11 @@ } @@ -24,7 +48,7 @@ getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs) { struct switch_stack *sw = (struct switch_stack *) regs - 1; -@@ -1294,6 +1302,9 @@ +@@ -1294,6 +1310,9 @@ void ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) { @@ -34,7 +58,7 @@ struct ia64_psr *ipsr = ia64_psr(regs); mm_segment_t old_fs = get_fs(); unsigned long bundle[2]; -@@ -1502,4 +1513,5 @@ +@@ -1502,4 +1521,5 @@ si.si_imm = 0; force_sig_info(SIGBUS, &si, current); goto done; diff --git a/xen/arch/ia64/privop.c b/xen/arch/ia64/privop.c index 0b59b60ad8..aec763f80d 100644 --- a/xen/arch/ia64/privop.c +++ b/xen/arch/ia64/privop.c @@ -205,7 +205,8 @@ IA64FAULT priv_itc_d(VCPU *vcpu, INST64 inst) return(IA64_ILLOP_FAULT); if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT) return(IA64_ILLOP_FAULT); - pte = vcpu_get_gr(vcpu,inst.M41.r2); + if (!inst.inst) pte = vcpu_get_tmp(vcpu,0); + else pte = vcpu_get_gr(vcpu,inst.M41.r2); return (vcpu_itc_d(vcpu,pte,itir,ifa)); } @@ -219,7 +220,8 @@ IA64FAULT priv_itc_i(VCPU *vcpu, INST64 inst) return(IA64_ILLOP_FAULT); if ((fault = vcpu_get_ifa(vcpu,&ifa)) != IA64_NO_FAULT) return(IA64_ILLOP_FAULT); - pte = vcpu_get_gr(vcpu,inst.M41.r2); + if (!inst.inst) pte = vcpu_get_tmp(vcpu,0); + else pte = vcpu_get_gr(vcpu,inst.M41.r2); return (vcpu_itc_i(vcpu,pte,itir,ifa)); } @@ -417,10 +419,17 @@ IA64FAULT priv_mov_from_pmc(VCPU *vcpu, INST64 inst) UINT64 val; IA64FAULT fault; - fault = vcpu_get_pmc(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val); - if (fault == IA64_NO_FAULT) - return vcpu_set_gr(vcpu, inst.M43.r1, val); - else return fault; + if (inst.M43.r1 > 63) { // privified mov from pmd + fault = vcpu_get_pmd(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val); + if (fault == IA64_NO_FAULT) + return vcpu_set_gr(vcpu, inst.M43.r1-64, val); + } + else { + fault = vcpu_get_pmc(vcpu,vcpu_get_gr(vcpu,inst.M43.r3),&val); + if (fault == IA64_NO_FAULT) + return vcpu_set_gr(vcpu, inst.M43.r1, val); + } + return fault; } unsigned long from_cr_cnt[128] = { 0 }; @@ -531,6 +540,8 @@ struct { unsigned long bsw0; unsigned long bsw1; unsigned long cover; + unsigned long fc; + unsigned long cpuid; unsigned long Mpriv_cnt[64]; } privcnt = { 0 }; @@ -631,7 +642,11 @@ priv_handle_op(VCPU *vcpu, REGS *regs, int privlvl) else x6 = 0x1a; } } - privcnt.Mpriv_cnt[x6]++; + if (x6 == 52 && inst.M28.r3 > 63) + privcnt.fc++; + else if (x6 == 16 && inst.M43.r3 > 63) + privcnt.cpuid++; + else privcnt.Mpriv_cnt[x6]++; return (*pfunc)(vcpu,inst); break; case B: @@ -682,7 +697,7 @@ priv_handle_op(VCPU *vcpu, REGS *regs, int privlvl) //printf("We who are about do die salute you\n"); printf("handle_op: can't handle privop at 0x%lx (op=0x%016lx) slot %d (type=%d)\n", iip, (UINT64)inst.inst, slot, slot_type); - //printf("vtop(0x%lx)==0x%lx\r\n", iip, tr_vtop(iip)); + //printf("vtop(0x%lx)==0x%lx\n", iip, tr_vtop(iip)); //thread_mozambique("privop fault\n"); return (IA64_ILLOP_FAULT); } @@ -745,6 +760,64 @@ priv_emulate(VCPU *vcpu, REGS *regs, UINT64 isr) } +// FIXME: Move these to include/public/arch-ia64? +#define HYPERPRIVOP_RFI 0x1 +#define HYPERPRIVOP_RSM_DT 0x2 +#define HYPERPRIVOP_SSM_DT 0x3 +#define HYPERPRIVOP_COVER 0x4 +#define HYPERPRIVOP_ITC_D 0x5 +#define HYPERPRIVOP_ITC_I 0x6 +#define HYPERPRIVOP_MAX 0x6 + +char *hyperpriv_str[HYPERPRIVOP_MAX+1] = { + 0, "rfi", "rsm.dt", "ssm.dt", "cover", "itc.d", "itc.i", + 0 +}; + +unsigned long hyperpriv_cnt[HYPERPRIVOP_MAX+1] = { 0 }; + +/* hyperprivops are generally executed in assembly (with physical psr.ic off) + * so this code is primarily used for debugging them */ +int +ia64_hyperprivop(unsigned long iim, REGS *regs) +{ + struct exec_domain *ed = (struct domain *) current; + INST64 inst; + UINT64 val; + +// FIXME: Add instrumentation for these +// FIXME: Handle faults appropriately for these + if (!iim || iim > HYPERPRIVOP_MAX) { + printf("bad hyperprivop; ignored\n"); + return 1; + } + hyperpriv_cnt[iim]++; + switch(iim) { + case HYPERPRIVOP_RFI: + (void)vcpu_rfi(ed); + return 0; // don't update iip + case HYPERPRIVOP_RSM_DT: + (void)vcpu_reset_psr_dt(ed); + return 1; + case HYPERPRIVOP_SSM_DT: + (void)vcpu_set_psr_dt(ed); + return 1; + case HYPERPRIVOP_COVER: + (void)vcpu_cover(ed); + return 1; + case HYPERPRIVOP_ITC_D: + inst.inst = 0; + (void)priv_itc_d(ed,inst); + return 1; + case HYPERPRIVOP_ITC_I: + inst.inst = 0; + (void)priv_itc_i(ed,inst); + return 1; + } + return 0; +} + + /************************************************************************** Privileged operation instrumentation routines **************************************************************************/ @@ -798,55 +871,61 @@ int dump_privop_counts(char *buf) sum += privcnt.rfi; sum += privcnt.bsw0; sum += privcnt.bsw1; sum += privcnt.cover; for (i=0; i < 64; i++) sum += privcnt.Mpriv_cnt[i]; - s += sprintf(s,"Privop statistics: (Total privops: %ld)\r\n",sum); + s += sprintf(s,"Privop statistics: (Total privops: %ld)\n",sum); if (privcnt.mov_to_ar_imm) - s += sprintf(s,"%10d %s [%d%%]\r\n", privcnt.mov_to_ar_imm, + s += sprintf(s,"%10d %s [%d%%]\n", privcnt.mov_to_ar_imm, "mov_to_ar_imm", (privcnt.mov_to_ar_imm*100L)/sum); if (privcnt.mov_to_ar_reg) - s += sprintf(s,"%10d %s [%d%%]\r\n", privcnt.mov_to_ar_reg, + s += sprintf(s,"%10d %s [%d%%]\n", privcnt.mov_to_ar_reg, "mov_to_ar_reg", (privcnt.mov_to_ar_reg*100L)/sum); if (privcnt.mov_from_ar) - s += sprintf(s,"%10d %s [%d%%]\r\n", privcnt.mov_from_ar, + s += sprintf(s,"%10d %s [%d%%]\n", privcnt.mov_from_ar, "privified-mov_from_ar", (privcnt.mov_from_ar*100L)/sum); if (privcnt.ssm) - s += sprintf(s,"%10d %s [%d%%]\r\n", privcnt.ssm, + s += sprintf(s,"%10d %s [%d%%]\n", privcnt.ssm, "ssm", (privcnt.ssm*100L)/sum); if (privcnt.rsm) - s += sprintf(s,"%10d %s [%d%%]\r\n", privcnt.rsm, + s += sprintf(s,"%10d %s [%d%%]\n", privcnt.rsm, "rsm", (privcnt.rsm*100L)/sum); if (privcnt.rfi) - s += sprintf(s,"%10d %s [%d%%]\r\n", privcnt.rfi, + s += sprintf(s,"%10d %s [%d%%]\n", privcnt.rfi, "rfi", (privcnt.rfi*100L)/sum); if (privcnt.bsw0) - s += sprintf(s,"%10d %s [%d%%]\r\n", privcnt.bsw0, + s += sprintf(s,"%10d %s [%d%%]\n", privcnt.bsw0, "bsw0", (privcnt.bsw0*100L)/sum); if (privcnt.bsw1) - s += sprintf(s,"%10d %s [%d%%]\r\n", privcnt.bsw1, + s += sprintf(s,"%10d %s [%d%%]\n", privcnt.bsw1, "bsw1", (privcnt.bsw1*100L)/sum); if (privcnt.cover) - s += sprintf(s,"%10d %s [%d%%]\r\n", privcnt.cover, + s += sprintf(s,"%10d %s [%d%%]\n", privcnt.cover, "cover", (privcnt.cover*100L)/sum); + if (privcnt.fc) + s += sprintf(s,"%10d %s [%d%%]\n", privcnt.fc, + "privified-fc", (privcnt.fc*100L)/sum); + if (privcnt.cpuid) + s += sprintf(s,"%10d %s [%d%%]\n", privcnt.cpuid, + "privified-getcpuid", (privcnt.cpuid*100L)/sum); for (i=0; i < 64; i++) if (privcnt.Mpriv_cnt[i]) { - if (!Mpriv_str[i]) s += sprintf(s,"PRIVSTRING NULL!!\r\n"); - else s += sprintf(s,"%10d %s [%d%%]\r\n", privcnt.Mpriv_cnt[i], + if (!Mpriv_str[i]) s += sprintf(s,"PRIVSTRING NULL!!\n"); + else s += sprintf(s,"%10d %s [%d%%]\n", privcnt.Mpriv_cnt[i], Mpriv_str[i], (privcnt.Mpriv_cnt[i]*100L)/sum); if (i == 0x24) { // mov from CR s += sprintf(s," ["); for (j=0; j < 128; j++) if (from_cr_cnt[j]) { if (!cr_str[j]) - s += sprintf(s,"PRIVSTRING NULL!!\r\n"); + s += sprintf(s,"PRIVSTRING NULL!!\n"); s += sprintf(s,"%s(%d),",cr_str[j],from_cr_cnt[j]); } - s += sprintf(s,"]\r\n"); + s += sprintf(s,"]\n"); } else if (i == 0x2c) { // mov to CR s += sprintf(s," ["); for (j=0; j < 128; j++) if (to_cr_cnt[j]) { if (!cr_str[j]) - s += sprintf(s,"PRIVSTRING NULL!!\r\n"); + s += sprintf(s,"PRIVSTRING NULL!!\n"); s += sprintf(s,"%s(%d),",cr_str[j],to_cr_cnt[j]); } - s += sprintf(s,"]\r\n"); + s += sprintf(s,"]\n"); } } return s - buf; @@ -864,19 +943,88 @@ int zero_privop_counts(char *buf) privcnt.ssm = 0; privcnt.rsm = 0; privcnt.rfi = 0; privcnt.bsw0 = 0; privcnt.bsw1 = 0; privcnt.cover = 0; + privcnt.fc = 0; privcnt.cpuid = 0; for (i=0; i < 64; i++) privcnt.Mpriv_cnt[i] = 0; for (j=0; j < 128; j++) from_cr_cnt[j] = 0; for (j=0; j < 128; j++) to_cr_cnt[j] = 0; - s += sprintf(s,"All privop statistics zeroed\r\n"); + s += sprintf(s,"All privop statistics zeroed\n"); return s - buf; } +#ifdef PRIVOP_ADDR_COUNT + +extern struct privop_addr_count privop_addr_counter[]; + +void privop_count_addr(unsigned long iip, int inst) +{ + struct privop_addr_count *v = &privop_addr_counter[inst]; + int i; + + for (i = 0; i < PRIVOP_COUNT_NADDRS; i++) { + if (!v->addr[i]) { v->addr[i] = iip; v->count[i]++; return; } + else if (v->addr[i] == iip) { v->count[i]++; return; } + } + v->overflow++;; +} + +int dump_privop_addrs(char *buf) +{ + int i,j; + char *s = buf; + s += sprintf(s,"Privop addresses:\n"); + for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) { + struct privop_addr_count *v = &privop_addr_counter[i]; + s += sprintf(s,"%s:\n",v->instname); + for (j = 0; j < PRIVOP_COUNT_NADDRS; j++) { + if (!v->addr[j]) break; + s += sprintf(s," @%p #%ld\n",v->addr[j],v->count[j]); + } + if (v->overflow) + s += sprintf(s," other #%ld\n",v->overflow); + } + return s - buf; +} + +void zero_privop_addrs(void) +{ + int i,j; + for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) { + struct privop_addr_count *v = &privop_addr_counter[i]; + for (j = 0; j < PRIVOP_COUNT_NADDRS; j++) + v->addr[j] = v->count[j] = 0; + v->overflow = 0; + } +} +#endif + +int dump_hyperprivop_counts(char *buf) +{ + int i; + char *s = buf; + s += sprintf(s,"Hyperprivops:\n"); + for (i = 1; i <= HYPERPRIVOP_MAX; i++) + if (hyperpriv_cnt[i]) + s += sprintf(s,"%10d %s\n", + hyperpriv_cnt[i], hyperpriv_str[i]); + return s - buf; +} + +void zero_hyperprivop_counts(void) +{ + int i; + for (i = 0; i <= HYPERPRIVOP_MAX; i++) hyperpriv_cnt[i] = 0; +} + #define TMPBUFLEN 8*1024 int dump_privop_counts_to_user(char __user *ubuf, int len) { char buf[TMPBUFLEN]; int n = dump_privop_counts(buf); + n += dump_hyperprivop_counts(buf + n); +#ifdef PRIVOP_ADDR_COUNT + n += dump_privop_addrs(buf + n); +#endif if (len < TMPBUFLEN) return -1; if (__copy_to_user(ubuf,buf,n)) return -1; return n; @@ -887,6 +1035,10 @@ int zero_privop_counts_to_user(char __user *ubuf, int len) char buf[TMPBUFLEN]; int n = zero_privop_counts(buf); + zero_hyperprivop_counts(); +#ifdef PRIVOP_ADDR_COUNT + zero_privop_addrs(); +#endif if (len < TMPBUFLEN) return -1; if (__copy_to_user(ubuf,buf,n)) return -1; return n; diff --git a/xen/arch/ia64/process.c b/xen/arch/ia64/process.c index 221812123d..dd9e58071f 100644 --- a/xen/arch/ia64/process.c +++ b/xen/arch/ia64/process.c @@ -51,6 +51,7 @@ extern unsigned long dom0_start, dom0_size; IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | IA64_PSR_IA) #define PSCB(x,y) x->vcpu_info->arch.y +#define PSCBX(x,y) x->arch.y extern unsigned long vcpu_verbose; @@ -154,7 +155,7 @@ panic_domain(regs,"psr.ic off, delivering fault=%lx,iip=%p,ifa=%p,isr=%p,PSCB.ii } //printf("Delivering NESTED DATA TLB fault\n"); vector = IA64_DATA_NESTED_TLB_VECTOR; - regs->cr_iip = ((unsigned long) PSCB(ed,iva) + vector) & ~0xffUL; + regs->cr_iip = ((unsigned long) PSCBX(ed,iva) + vector) & ~0xffUL; regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET; // NOTE: nested trap must NOT pass PSCB address //regs->r31 = (unsigned long) &PSCB(ed); @@ -187,7 +188,7 @@ panic_domain(regs,"psr.ic off, delivering fault=%lx,iip=%p,ifa=%p,isr=%p,PSCB.ii PSCB(ed,ifs) = 0; PSCB(ed,incomplete_regframe) = 0; - regs->cr_iip = ((unsigned long) PSCB(ed,iva) + vector) & ~0xffUL; + regs->cr_iip = ((unsigned long) PSCBX(ed,iva) + vector) & ~0xffUL; regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET; #ifdef CONFIG_SMP #error "sharedinfo doesn't handle smp yet" @@ -516,7 +517,7 @@ printf("ia64_fault, vector=0x%p, ifa=%p, iip=%p, ipsr=%p, isr=%p\n", case 32: /* fp fault */ case 33: /* fp trap */ //result = handle_fpu_swa((vector == 32) ? 1 : 0, regs, isr); - if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) { + //if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) { //siginfo.si_signo = SIGFPE; //siginfo.si_errno = 0; //siginfo.si_code = FPE_FLTINV; @@ -525,7 +526,7 @@ printf("ia64_fault, vector=0x%p, ifa=%p, iip=%p, ipsr=%p, isr=%p\n", //siginfo.si_isr = isr; //siginfo.si_imm = 0; //force_sig_info(SIGFPE, &siginfo, current); - } + //} //return; sprintf(buf, "FP fault/trap"); break; @@ -722,6 +723,10 @@ ia64_handle_break (unsigned long ifa, struct pt_regs *regs, unsigned long isr, u if (ia64_hypercall(regs)) vcpu_increment_iip(current); } + else if (!PSCB(ed,interrupt_collection_enabled)) { + if (ia64_hyperprivop(iim,regs)) + vcpu_increment_iip(current); + } else reflect_interruption(ifa,isr,iim,regs,IA64_BREAK_VECTOR); } diff --git a/xen/arch/ia64/vcpu.c b/xen/arch/ia64/vcpu.c index d1769a20c3..29b20a4df0 100644 --- a/xen/arch/ia64/vcpu.c +++ b/xen/arch/ia64/vcpu.c @@ -1,6 +1,6 @@ /* * Virtualized CPU functions - * + * * Copyright (C) 2004 Hewlett-Packard Co. * Dan Magenheimer (dan.magenheimer@hp.com) * @@ -26,6 +26,7 @@ typedef union { // this def for vcpu_regs won't work if kernel stack is present #define vcpu_regs(vcpu) ((struct pt_regs *) vcpu->arch.regs) #define PSCB(x,y) x->vcpu_info->arch.y +#define PSCBX(x,y) x->arch.y #define TRUE 1 #define FALSE 0 @@ -37,6 +38,17 @@ typedef union { #define STATIC +#ifdef PRIVOP_ADDR_COUNT +struct privop_addr_count privop_addr_counter[PRIVOP_COUNT_NINSTS] = { + { "rsm", { 0 }, { 0 }, 0 }, + { "ssm", { 0 }, { 0 }, 0 } +}; +extern void privop_count_addr(unsigned long addr, int inst); +#define PRIVOP_COUNT_ADDR(regs,inst) privop_count_addr(regs->cr_iip,inst) +#else +#define PRIVOP_COUNT_ADDR(x,y) do {} while (0) +#endif + unsigned long vcpu_verbose = 0; #define verbose(a...) do {if (vcpu_verbose) printf(a);} while(0) @@ -77,30 +89,20 @@ vcpu_set_gr(VCPU *vcpu, unsigned reg, UINT64 value) IA64FAULT vcpu_set_ar(VCPU *vcpu, UINT64 reg, UINT64 val) { if (reg == 44) return (vcpu_set_itc(vcpu,val)); - if (reg == 27) return (IA64_ILLOP_FAULT); - if (reg > 7) return (IA64_ILLOP_FAULT); - PSCB(vcpu,krs[reg]) = val; -#if 0 -// for now, privify kr read's so all kr accesses are privileged - switch (reg) { - case 0: asm volatile ("mov ar.k0=%0" :: "r"(val)); break; - case 1: asm volatile ("mov ar.k1=%0" :: "r"(val)); break; - case 2: asm volatile ("mov ar.k2=%0" :: "r"(val)); break; - case 3: asm volatile ("mov ar.k3=%0" :: "r"(val)); break; - case 4: asm volatile ("mov ar.k4=%0" :: "r"(val)); break; - case 5: asm volatile ("mov ar.k5=%0" :: "r"(val)); break; - case 6: asm volatile ("mov ar.k6=%0" :: "r"(val)); break; - case 7: asm volatile ("mov ar.k7=%0" :: "r"(val)); break; - case 27: asm volatile ("mov ar.cflg=%0" :: "r"(val)); break; - } -#endif + else if (reg == 27) return (IA64_ILLOP_FAULT); + else if (reg == 24) + printf("warning: setting ar.eflg is a no-op; no IA-32 support\n"); + else if (reg > 7) return (IA64_ILLOP_FAULT); + else PSCB(vcpu,krs[reg]) = val; return IA64_NO_FAULT; } IA64FAULT vcpu_get_ar(VCPU *vcpu, UINT64 reg, UINT64 *val) { - if (reg > 7) return (IA64_ILLOP_FAULT); - *val = PSCB(vcpu,krs[reg]); + if (reg == 24) + printf("warning: getting ar.eflg is a no-op; no IA-32 support\n"); + else if (reg > 7) return (IA64_ILLOP_FAULT); + else *val = PSCB(vcpu,krs[reg]); return IA64_NO_FAULT; } @@ -119,11 +121,18 @@ void vcpu_set_metaphysical_mode(VCPU *vcpu, BOOLEAN newmode) } } +IA64FAULT vcpu_reset_psr_dt(VCPU *vcpu) +{ + vcpu_set_metaphysical_mode(vcpu,TRUE); + return IA64_NO_FAULT; +} + IA64FAULT vcpu_reset_psr_sm(VCPU *vcpu, UINT64 imm24) { struct ia64_psr psr, imm, *ipsr; REGS *regs = vcpu_regs(vcpu); + PRIVOP_COUNT_ADDR(regs,_RSM); // TODO: All of these bits need to be virtualized // TODO: Only allowed for current vcpu __asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory"); @@ -152,12 +161,19 @@ IA64FAULT vcpu_reset_psr_sm(VCPU *vcpu, UINT64 imm24) extern UINT64 vcpu_check_pending_interrupts(VCPU *vcpu); #define SPURIOUS_VECTOR 0xf +IA64FAULT vcpu_set_psr_dt(VCPU *vcpu) +{ + vcpu_set_metaphysical_mode(vcpu,FALSE); + return IA64_NO_FAULT; +} + IA64FAULT vcpu_set_psr_sm(VCPU *vcpu, UINT64 imm24) { struct ia64_psr psr, imm, *ipsr; REGS *regs = vcpu_regs(vcpu); UINT64 mask, enabling_interrupts = 0; + PRIVOP_COUNT_ADDR(regs,_SSM); // TODO: All of these bits need to be virtualized __asm__ __volatile ("mov %0=psr;;" : "=r"(psr) :: "memory"); imm = *(struct ia64_psr *)&imm24; @@ -274,9 +290,9 @@ BOOLEAN vcpu_get_psr_i(VCPU *vcpu) UINT64 vcpu_get_ipsr_int_state(VCPU *vcpu,UINT64 prevpsr) { - UINT64 dcr = PSCB(vcpu,dcr); + UINT64 dcr = PSCBX(vcpu,dcr); PSR psr = {0}; - + //printf("*** vcpu_get_ipsr_int_state (0x%016lx)...",prevpsr); psr.i64 = prevpsr; psr.ia64_psr.be = 0; if (dcr & IA64_DCR_BE) psr.ia64_psr.be = 1; @@ -302,13 +318,13 @@ extern unsigned long privop_trace; //verbose("vcpu_get_dcr: called @%p\n",PSCB(vcpu,iip)); // Reads of cr.dcr on Xen always have the sign bit set, so // a domain can differentiate whether it is running on SP or not - *pval = PSCB(vcpu,dcr) | 0x8000000000000000L; + *pval = PSCBX(vcpu,dcr) | 0x8000000000000000L; return (IA64_NO_FAULT); } IA64FAULT vcpu_get_iva(VCPU *vcpu, UINT64 *pval) { - *pval = PSCB(vcpu,iva) & ~0x7fffL; + *pval = PSCBX(vcpu,iva) & ~0x7fffL; return (IA64_NO_FAULT); } @@ -408,13 +424,13 @@ extern unsigned long privop_trace; // a domain can differentiate whether it is running on SP or not // Thus, writes of DCR should ignore the sign bit //verbose("vcpu_set_dcr: called\n"); - PSCB(vcpu,dcr) = val & ~0x8000000000000000L; + PSCBX(vcpu,dcr) = val & ~0x8000000000000000L; return (IA64_NO_FAULT); } IA64FAULT vcpu_set_iva(VCPU *vcpu, UINT64 val) { - PSCB(vcpu,iva) = val & ~0x7fffL; + PSCBX(vcpu,iva) = val & ~0x7fffL; return (IA64_NO_FAULT); } @@ -508,16 +524,16 @@ void vcpu_pend_interrupt(VCPU *vcpu, UINT64 vector) return; } if (!test_bit(vector,PSCB(vcpu,delivery_mask))) return; - if (test_bit(vector,PSCB(vcpu,irr))) { + if (test_bit(vector,PSCBX(vcpu,irr))) { //printf("vcpu_pend_interrupt: overrun\n"); } - set_bit(vector,PSCB(vcpu,irr)); + set_bit(vector,PSCBX(vcpu,irr)); PSCB(vcpu,pending_interruption) = 1; } void early_tick(VCPU *vcpu) { - UINT64 *p = &PSCB(vcpu,irr[3]); + UINT64 *p = &PSCBX(vcpu,irr[3]); printf("vcpu_check_pending: about to deliver early tick\n"); printf("&irr[0]=%p, irr[0]=0x%lx\n",p,*p); } @@ -535,9 +551,9 @@ UINT64 vcpu_check_pending_interrupts(VCPU *vcpu) { UINT64 *p, *q, *r, bits, bitnum, mask, i, vector; - p = &PSCB(vcpu,irr[3]); + p = &PSCBX(vcpu,irr[3]); q = &PSCB(vcpu,delivery_mask[3]); - r = &PSCB(vcpu,insvc[3]); + r = &PSCBX(vcpu,insvc[3]); for (i = 3; ; p--, q--, r--, i--) { bits = *p & *q; if (bits) break; // got a potential interrupt @@ -577,9 +593,9 @@ UINT64 vcpu_check_pending_interrupts(VCPU *vcpu) #if 0 if (vector == (PSCB(vcpu,itv) & 0xff)) { UINT64 now = ia64_get_itc(); - UINT64 itm = PSCB(vcpu,domain_itm); + UINT64 itm = PSCBX(vcpu,domain_itm); if (now < itm) early_tick(vcpu); - + } #endif return vector; @@ -639,13 +655,13 @@ IA64FAULT vcpu_get_ivr(VCPU *vcpu, UINT64 *pval) i = vector >> 6; mask = 1L << (vector & 0x3f); //printf("ZZZZZZ vcpu_get_ivr: setting insvc mask for vector %ld\n",vector); - PSCB(vcpu,insvc[i]) |= mask; - PSCB(vcpu,irr[i]) &= ~mask; + PSCBX(vcpu,insvc[i]) |= mask; + PSCBX(vcpu,irr[i]) &= ~mask; //PSCB(vcpu,pending_interruption)--; *pval = vector; // if delivering a timer interrupt, remember domain_itm if (vector == (PSCB(vcpu,itv) & 0xff)) { - PSCB(vcpu,domain_itm_last) = PSCB(vcpu,domain_itm); + PSCBX(vcpu,domain_itm_last) = PSCBX(vcpu,domain_itm); } return IA64_NO_FAULT; } @@ -760,7 +776,7 @@ IA64FAULT vcpu_set_eoi(VCPU *vcpu, UINT64 val) UINT64 *p, bits, vec, bitnum; int i; - p = &PSCB(vcpu,insvc[3]); + p = &PSCBX(vcpu,insvc[3]); for (i = 3; (i >= 0) && !(bits = *p); i--, p--); if (i < 0) { printf("Trying to EOI interrupt when none are in-service.\r\n"); @@ -811,8 +827,8 @@ extern unsigned long privop_trace; if (val & 0xef00) return (IA64_ILLOP_FAULT); PSCB(vcpu,itv) = val; if (val & 0x10000) { -printf("**** vcpu_set_itv(%d): vitm=%lx, setting to 0\n",val,PSCB(vcpu,domain_itm)); - PSCB(vcpu,domain_itm) = 0; +printf("**** vcpu_set_itv(%d): vitm=%lx, setting to 0\n",val,PSCBX(vcpu,domain_itm)); + PSCBX(vcpu,domain_itm) = 0; } else vcpu_enable_timer(vcpu,1000000L); return (IA64_NO_FAULT); @@ -833,6 +849,20 @@ IA64FAULT vcpu_set_cmcv(VCPU *vcpu, UINT64 val) } /************************************************************************** + VCPU temporary register access routines +**************************************************************************/ +UINT64 vcpu_get_tmp(VCPU *vcpu, UINT64 index) +{ + if (index > 7) return 0; + return PSCB(vcpu,tmp[index]); +} + +void vcpu_set_tmp(VCPU *vcpu, UINT64 index, UINT64 val) +{ + if (index <= 7) PSCB(vcpu,tmp[index]) = val; +} + +/************************************************************************** Interval timer routines **************************************************************************/ @@ -845,14 +875,14 @@ BOOLEAN vcpu_timer_disabled(VCPU *vcpu) BOOLEAN vcpu_timer_inservice(VCPU *vcpu) { UINT64 itv = PSCB(vcpu,itv); - return (test_bit(itv, PSCB(vcpu,insvc))); + return (test_bit(itv, PSCBX(vcpu,insvc))); } BOOLEAN vcpu_timer_expired(VCPU *vcpu) { - unsigned long domain_itm = PSCB(vcpu,domain_itm); + unsigned long domain_itm = PSCBX(vcpu,domain_itm); unsigned long now = ia64_get_itc(); - + if (!domain_itm) return FALSE; if (now < domain_itm) return FALSE; if (vcpu_timer_disabled(vcpu)) return FALSE; @@ -877,36 +907,36 @@ void vcpu_safe_set_itm(unsigned long val) void vcpu_set_next_timer(VCPU *vcpu) { - UINT64 d = PSCB(vcpu,domain_itm); - //UINT64 s = PSCB(vcpu,xen_itm); + UINT64 d = PSCBX(vcpu,domain_itm); + //UINT64 s = PSCBX(vcpu,xen_itm); UINT64 s = local_cpu_data->itm_next; UINT64 now = ia64_get_itc(); - //UINT64 interval = PSCB(vcpu,xen_timer_interval); + //UINT64 interval = PSCBX(vcpu,xen_timer_interval); /* gloss over the wraparound problem for now... we know it exists * but it doesn't matter right now */ #if 0 /* ensure at least next SP tick is in the future */ - if (!interval) PSCB(vcpu,xen_itm) = now + + if (!interval) PSCBX(vcpu,xen_itm) = now + #if 0 (running_on_sim() ? SIM_DEFAULT_CLOCK_RATE : - DEFAULT_CLOCK_RATE); + DEFAULT_CLOCK_RATE); #else 3000000; //printf("vcpu_set_next_timer: HACK!\n"); #endif #if 0 - if (PSCB(vcpu,xen_itm) < now) - while (PSCB(vcpu,xen_itm) < now + (interval>>1)) - PSCB(vcpu,xen_itm) += interval; + if (PSCBX(vcpu,xen_itm) < now) + while (PSCBX(vcpu,xen_itm) < now + (interval>>1)) + PSCBX(vcpu,xen_itm) += interval; #endif #endif if (is_idle_task(vcpu->domain)) { printf("****** vcpu_set_next_timer called during idle!!\n"); } - //s = PSCB(vcpu,xen_itm); + //s = PSCBX(vcpu,xen_itm); if (d && (d > now) && (d < s)) { vcpu_safe_set_itm(d); //using_domain_as_itm++; @@ -920,10 +950,10 @@ void vcpu_set_next_timer(VCPU *vcpu) // parameter is a time interval specified in cycles void vcpu_enable_timer(VCPU *vcpu,UINT64 cycles) { - PSCB(vcpu,xen_timer_interval) = cycles; + PSCBX(vcpu,xen_timer_interval) = cycles; vcpu_set_next_timer(vcpu); printf("vcpu_enable_timer(%d): interval set to %d cycles\n", - PSCB(vcpu,xen_timer_interval)); + PSCBX(vcpu,xen_timer_interval)); __set_bit(PSCB(vcpu,itv), PSCB(vcpu,delivery_mask)); } @@ -933,30 +963,30 @@ IA64FAULT vcpu_set_itm(VCPU *vcpu, UINT64 val) //if (val < now) val = now + 1000; //printf("*** vcpu_set_itm: called with %lx\n",val); - PSCB(vcpu,domain_itm) = val; + PSCBX(vcpu,domain_itm) = val; vcpu_set_next_timer(vcpu); return (IA64_NO_FAULT); } IA64FAULT vcpu_set_itc(VCPU *vcpu, UINT64 val) { - + UINT64 oldnow = ia64_get_itc(); - UINT64 olditm = PSCB(vcpu,domain_itm); + UINT64 olditm = PSCBX(vcpu,domain_itm); unsigned long d = olditm - oldnow; unsigned long x = local_cpu_data->itm_next - oldnow; - + UINT64 newnow = val, min_delta; local_irq_disable(); if (olditm) { printf("**** vcpu_set_itc(%lx): vitm changed to %lx\n",val,newnow+d); - PSCB(vcpu,domain_itm) = newnow + d; + PSCBX(vcpu,domain_itm) = newnow + d; } local_cpu_data->itm_next = newnow + x; - d = PSCB(vcpu,domain_itm); + d = PSCBX(vcpu,domain_itm); x = local_cpu_data->itm_next; - + ia64_set_itc(newnow); if (d && (d > newnow) && (d < x)) { vcpu_safe_set_itm(d); @@ -991,7 +1021,7 @@ void vcpu_pend_timer(VCPU *vcpu) if (vcpu_timer_disabled(vcpu)) return; //if (vcpu_timer_inservice(vcpu)) return; - if (PSCB(vcpu,domain_itm_last) == PSCB(vcpu,domain_itm)) { + if (PSCBX(vcpu,domain_itm_last) == PSCBX(vcpu,domain_itm)) { // already delivered an interrupt for this so // don't deliver another return; @@ -999,7 +1029,7 @@ void vcpu_pend_timer(VCPU *vcpu) #if 0 // attempt to flag "timer tick before its due" source { - UINT64 itm = PSCB(vcpu,domain_itm); + UINT64 itm = PSCBX(vcpu,domain_itm); UINT64 now = ia64_get_itc(); if (now < itm) printf("******* vcpu_pend_timer: pending before due!\n"); } @@ -1011,7 +1041,7 @@ void vcpu_pend_timer(VCPU *vcpu) UINT64 vcpu_timer_pending_early(VCPU *vcpu) { UINT64 now = ia64_get_itc(); - UINT64 itm = PSCB(vcpu,domain_itm); + UINT64 itm = PSCBX(vcpu,domain_itm); if (vcpu_timer_disabled(vcpu)) return 0; if (!itm) return 0; @@ -1023,7 +1053,7 @@ void vcpu_poke_timer(VCPU *vcpu) { UINT64 itv = PSCB(vcpu,itv) & 0xff; UINT64 now = ia64_get_itc(); - UINT64 itm = PSCB(vcpu,domain_itm); + UINT64 itm = PSCBX(vcpu,domain_itm); UINT64 irr; if (vcpu_timer_disabled(vcpu)) return; @@ -1033,8 +1063,8 @@ void vcpu_poke_timer(VCPU *vcpu) while(1); } // using 0xef instead of itv so can get real irr - if (now > itm && !test_bit(0xefL, PSCB(vcpu,insvc))) { - if (!test_bit(0xefL,PSCB(vcpu,irr))) { + if (now > itm && !test_bit(0xefL, PSCBX(vcpu,insvc))) { + if (!test_bit(0xefL,PSCBX(vcpu,irr))) { irr = ia64_getreg(_IA64_REG_CR_IRR3); if (irr & (1L<<(0xef-0xc0))) return; if (now-itm>0x800000) @@ -1091,7 +1121,7 @@ printf("SI_CR_IIP/IPSR/IFS_OFFSET CHANGED, SEE dorfirfi\n"); printf("SI_CR_IIP=0x%x,IPSR=0x%x,IFS_OFFSET=0x%x\n",SI_OFS(iip),SI_OFS(ipsr),SI_OFS(ifs)); while(1); } - // TODO: validate PSCB(vcpu,iip) + // TODO: validate PSCB(vcpu,iip) // TODO: PSCB(vcpu,ipsr) = psr; PSCB(vcpu,ipsr) = psr.i64; // now set up the trampoline @@ -1338,7 +1368,6 @@ IA64FAULT vcpu_get_cpuid(VCPU *vcpu, UINT64 reg, UINT64 *pval) unsigned long vcpu_get_rr_ve(VCPU *vcpu,UINT64 vadr) { - ia64_rr rr; rr.rrval = PSCB(vcpu,rrs)[vadr>>61]; @@ -1348,7 +1377,6 @@ unsigned long vcpu_get_rr_ve(VCPU *vcpu,UINT64 vadr) unsigned long vcpu_get_rr_ps(VCPU *vcpu,UINT64 vadr) { - ia64_rr rr; rr.rrval = PSCB(vcpu,rrs)[vadr>>61]; @@ -1358,7 +1386,6 @@ unsigned long vcpu_get_rr_ps(VCPU *vcpu,UINT64 vadr) unsigned long vcpu_get_rr_rid(VCPU *vcpu,UINT64 vadr) { - ia64_rr rr; rr.rrval = PSCB(vcpu,rrs)[vadr>>61]; @@ -1445,8 +1472,8 @@ TR_ENTRY *vcpu_match_tr_entry(VCPU *vcpu, TR_ENTRY *trp, UINT64 ifa, int count) for (i = 0; i < count; i++, trp++) { if (!trp->p) continue; if (physicalize_rid(vcpu,trp->rid) != rid) continue; - if (ifa < trp->vadr) continue; - if (ifa >= (trp->vadr + (1L << trp->ps)) - 1) continue; + if (ifa < trp->vadr) continue; + if (ifa >= (trp->vadr + (1L << trp->ps)) - 1) continue; //if (trp->key && !match_pkr(vcpu,trp->key)) continue; return trp; } @@ -1457,9 +1484,9 @@ TR_ENTRY *match_tr(VCPU *vcpu, unsigned long ifa) { TR_ENTRY *trp; - trp = vcpu_match_tr_entry(vcpu,vcpu->vcpu_info->arch.dtrs,ifa,NDTRS); + trp = vcpu_match_tr_entry(vcpu,vcpu->arch.dtrs,ifa,NDTRS); if (trp) return trp; - trp = vcpu_match_tr_entry(vcpu,vcpu->vcpu_info->arch.itrs,ifa,NITRS); + trp = vcpu_match_tr_entry(vcpu,vcpu->arch.itrs,ifa,NITRS); if (trp) return trp; return 0; } @@ -1470,7 +1497,8 @@ IA64FAULT vcpu_itr_d(VCPU *vcpu, UINT64 slot, UINT64 pte, TR_ENTRY *trp; if (slot >= NDTRS) return IA64_RSVDREG_FAULT; - trp = &PSCB(vcpu,dtrs[slot]); + trp = &PSCBX(vcpu,dtrs[slot]); +//printf("***** itr.d: setting slot %d: ifa=%p\n",slot,ifa); vcpu_set_tr_entry(trp,pte,itir,ifa); return IA64_NO_FAULT; } @@ -1481,7 +1509,8 @@ IA64FAULT vcpu_itr_i(VCPU *vcpu, UINT64 slot, UINT64 pte, TR_ENTRY *trp; if (slot >= NITRS) return IA64_RSVDREG_FAULT; - trp = &PSCB(vcpu,itrs[slot]); + trp = &PSCBX(vcpu,itrs[slot]); +//printf("***** itr.i: setting slot %d: ifa=%p\n",slot,ifa); vcpu_set_tr_entry(trp,pte,itir,ifa); return IA64_NO_FAULT; } @@ -1524,12 +1553,12 @@ void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte, UINT64 #endif if (IorD & 0x4) return; // don't place in 1-entry TLB if (IorD & 0x1) { - vcpu_set_tr_entry(&PSCB(vcpu,itlb),pte,ps<<2,vaddr); - PSCB(vcpu,itlb_pte) = mp_pte; + vcpu_set_tr_entry(&PSCBX(vcpu,itlb),pte,ps<<2,vaddr); + PSCBX(vcpu,itlb_pte) = mp_pte; } if (IorD & 0x2) { - vcpu_set_tr_entry(&PSCB(vcpu,dtlb),pte,ps<<2,vaddr); - PSCB(vcpu,dtlb_pte) = mp_pte; + vcpu_set_tr_entry(&PSCBX(vcpu,dtlb),pte,ps<<2,vaddr); + PSCBX(vcpu,dtlb_pte) = mp_pte; } } @@ -1539,9 +1568,9 @@ unsigned long match_dtlb(VCPU *vcpu, unsigned long ifa, unsigned long *ps, unsig { TR_ENTRY *trp; - if (trp = vcpu_match_tr_entry(vcpu,&vcpu->vcpu_info->arch.dtlb,ifa,1)) { + if (trp = vcpu_match_tr_entry(vcpu,&vcpu->arch.dtlb,ifa,1)) { if (ps) *ps = trp->ps; - if (mp_pte) *mp_pte = vcpu->vcpu_info->arch.dtlb_pte; + if (mp_pte) *mp_pte = vcpu->arch.dtlb_pte; return (trp->page_flags); } return 0UL; @@ -1645,8 +1674,8 @@ IA64FAULT vcpu_ptc_e(VCPU *vcpu, UINT64 vadr) #endif local_flush_tlb_all(); // just invalidate the "whole" tlb - vcpu_purge_tr_entry(&PSCB(vcpu,dtlb)); - vcpu_purge_tr_entry(&PSCB(vcpu,itlb)); + vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb)); + vcpu_purge_tr_entry(&PSCBX(vcpu,itlb)); return IA64_NO_FAULT; } @@ -1666,8 +1695,8 @@ IA64FAULT vcpu_ptc_ga(VCPU *vcpu,UINT64 vadr,UINT64 addr_range) vhpt_flush_address(vadr,addr_range); #endif ia64_global_tlb_purge(vadr,vadr+addr_range,PAGE_SHIFT); - vcpu_purge_tr_entry(&PSCB(vcpu,dtlb)); - vcpu_purge_tr_entry(&PSCB(vcpu,itlb)); + vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb)); + vcpu_purge_tr_entry(&PSCBX(vcpu,itlb)); return IA64_NO_FAULT; } diff --git a/xen/arch/ia64/xenmisc.c b/xen/arch/ia64/xenmisc.c index 59a2ebc739..2f5562c46a 100644 --- a/xen/arch/ia64/xenmisc.c +++ b/xen/arch/ia64/xenmisc.c @@ -79,6 +79,14 @@ void raise_actimer_softirq(void) raise_softirq(AC_TIMER_SOFTIRQ); } +unsigned long __hypercall_create_continuation( + unsigned int op, unsigned int nr_args, ...) +{ + printf("__hypercall_create_continuation: not implemented!!!\n"); +} + +/////////////////////////////// + /////////////////////////////// // from arch/x86/apic.c /////////////////////////////// @@ -139,7 +147,7 @@ void free_page_type(struct pfn_info *page, unsigned int type) void show_registers(struct pt_regs *regs) { printf("*** ADD REGISTER DUMP HERE FOR DEBUGGING\n"); -} +} /////////////////////////////// // from common/keyhandler.c diff --git a/xen/arch/ia64/xensetup.c b/xen/arch/ia64/xensetup.c index ec864e1ef2..9de0a0eed5 100644 --- a/xen/arch/ia64/xensetup.c +++ b/xen/arch/ia64/xensetup.c @@ -52,15 +52,10 @@ unsigned char opt_com1[30] = "", opt_com2[30] = ""; unsigned int opt_dom0_mem = 16000; /* opt_noht: If true, Hyperthreading is ignored. */ int opt_noht=0; -/* opt_noacpi: If true, ACPI tables are not parsed. */ -int opt_noacpi=0; /* opt_nosmp: If true, secondary processors are ignored. */ int opt_nosmp=0; /* opt_noreboot: If true, machine will need manual reset on error. */ int opt_noreboot=0; -/* opt_ignorebiostables: If true, ACPI and MP tables are ignored. */ -/* NB. This flag implies 'nosmp' and 'noacpi'. */ -int opt_ignorebiostables=0; /* opt_watchdog: If true, run a watchdog NMI on each processor. */ int opt_watchdog=0; /* opt_pdb: Name of serial port for Xen pervasive debugger (and enable pdb) */ diff --git a/xen/arch/ia64/xentime.c b/xen/arch/ia64/xentime.c index 20d13eb7ef..a3790b4577 100644 --- a/xen/arch/ia64/xentime.c +++ b/xen/arch/ia64/xentime.c @@ -84,6 +84,17 @@ xen_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs) { unsigned long new_itm; +#define HEARTBEAT_FREQ 16 // period in seconds +#ifdef HEARTBEAT_FREQ + static long count = 0; + if (!(++count & ((HEARTBEAT_FREQ*1024)-1))) { + printf("Heartbeat... iip=%p,psr.i=%d,pend=%d\n", + regs->cr_iip, + current->vcpu_info->arch.interrupt_delivery_enabled, + current->vcpu_info->arch.pending_interruption); + count = 0; + } +#endif #ifndef XEN if (unlikely(cpu_is_offline(smp_processor_id()))) { return IRQ_HANDLED; diff --git a/xen/arch/x86/acpi/boot.c b/xen/arch/x86/acpi/boot.c index 7237d5e7e2..99fc21b018 100644 --- a/xen/arch/x86/acpi/boot.c +++ b/xen/arch/x86/acpi/boot.c @@ -36,23 +36,12 @@ #include <asm/io.h> #include <asm/irq.h> #include <asm/mpspec.h> - -int sbf_port; /* XXX XEN */ - -#ifdef CONFIG_X86_64 - -static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id) { } -extern void __init clustered_apic_check(void); -static inline int ioapic_setup_disabled(void) { return 0; } - -#else /* X86 */ - -#ifdef CONFIG_X86_LOCAL_APIC #include <mach_apic.h> #include <mach_mpparse.h> -#endif /* CONFIG_X86_LOCAL_APIC */ -#endif /* X86 */ +int sbf_port; +#define end_pfn_map max_page +#define CONFIG_ACPI_PCI #define BAD_MADT_ENTRY(entry, end) ( \ (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ @@ -109,7 +98,7 @@ char *__acpi_map_table(unsigned long phys_addr, unsigned long size) if (!phys_addr || !size) return NULL; - if (phys_addr < (max_page << PAGE_SHIFT)) + if (phys_addr < (end_pfn_map << PAGE_SHIFT)) return __va(phys_addr); return NULL; @@ -279,7 +268,7 @@ acpi_parse_lapic_nmi ( #endif /*CONFIG_X86_LOCAL_APIC*/ -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) +#if defined(CONFIG_X86_IO_APIC) /*&& defined(CONFIG_ACPI_INTERPRETER)*/ static int __init acpi_parse_ioapic ( @@ -302,6 +291,7 @@ acpi_parse_ioapic ( return 0; } +#ifdef CONFIG_ACPI_INTERPRETER /* * Parse Interrupt Source Override for the ACPI SCI */ @@ -335,6 +325,7 @@ acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) acpi_sci_override_gsi = gsi; return; } +#endif static int __init acpi_parse_int_src_ovr ( @@ -349,11 +340,13 @@ acpi_parse_int_src_ovr ( acpi_table_print_madt_entry(header); +#ifdef CONFIG_ACPI_INTERPRETER if (intsrc->bus_irq == acpi_fadt.sci_int) { acpi_sci_ioapic_setup(intsrc->global_irq, intsrc->flags.polarity, intsrc->flags.trigger); return 0; } +#endif if (acpi_skip_timer_override && intsrc->bus_irq == 0 && intsrc->global_irq == 2) { @@ -698,7 +691,7 @@ acpi_parse_madt_lapic_entries(void) } #endif /* CONFIG_X86_LOCAL_APIC */ -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) +#if defined(CONFIG_X86_IO_APIC) /*&& defined(CONFIG_ACPI_INTERPRETER)*/ /* * Parse IOAPIC related entries in MADT * returns 0 on success, < 0 on error @@ -744,12 +737,14 @@ acpi_parse_madt_ioapic_entries(void) return count; } +#ifdef CONFIG_ACPI_INTERPRETER /* * If BIOS did not supply an INT_SRC_OVR for the SCI * pretend we got one so we can set the SCI flags. */ if (!acpi_sci_override_gsi) acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0); +#endif /* Fill in identity legacy mapings where no override */ mp_config_acpi_legacy_irqs(); @@ -856,10 +851,6 @@ acpi_boot_table_init(void) return error; } -#if 0 /*def __i386__*/ - check_acpi_pci(); -#endif - acpi_table_parse(ACPI_BOOT, acpi_parse_sbf); /* diff --git a/xen/arch/x86/cdb.c b/xen/arch/x86/cdb.c index 6eb3515ec2..451ce46853 100644 --- a/xen/arch/x86/cdb.c +++ b/xen/arch/x86/cdb.c @@ -12,7 +12,9 @@ #include <asm/irq.h> #include <xen/spinlock.h> #include <asm/debugger.h> -#include <asm/init.h> +#include <xen/init.h> +#include <xen/smp.h> +#include <asm/apic.h> /* Printk isn't particularly safe just after we've trapped to the debugger. so avoid it. */ diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index ebfc81387c..298d65d4aa 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -253,6 +253,7 @@ void arch_do_createdomain(struct exec_domain *ed) d->shared_info = (void *)alloc_xenheap_page(); memset(d->shared_info, 0, PAGE_SIZE); ed->vcpu_info = &d->shared_info->vcpu_data[ed->id]; + ed->cpumap = CPUMAP_RUNANYWHERE; SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d); machine_to_phys_mapping[virt_to_phys(d->shared_info) >> PAGE_SHIFT] = INVALID_M2P_ENTRY; @@ -408,13 +409,15 @@ int arch_set_info_guest( memcpy(&ed->arch.guest_context, c, sizeof(*c)); - /* IOPL privileges are virtualised. */ - ed->arch.iopl = (ed->arch.guest_context.user_regs.eflags >> 12) & 3; - ed->arch.guest_context.user_regs.eflags &= ~EF_IOPL; + if ( !(c->flags & VGCF_VMX_GUEST) ) + { + /* IOPL privileges are virtualised. */ + ed->arch.iopl = (ed->arch.guest_context.user_regs.eflags >> 12) & 3; + ed->arch.guest_context.user_regs.eflags &= ~EF_IOPL; - /* Clear IOPL for unprivileged domains. */ - if ( !IS_PRIV(d) ) - ed->arch.guest_context.user_regs.eflags &= 0xffffcfff; + /* Ensure real hardware interrupts are enabled. */ + ed->arch.guest_context.user_regs.eflags |= EF_IE; + } if ( test_bit(EDF_DONEINIT, &ed->flags) ) return 0; diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c index 7ed67f4b6b..6d0b12f03b 100644 --- a/xen/arch/x86/domain_build.c +++ b/xen/arch/x86/domain_build.c @@ -7,6 +7,7 @@ #include <xen/config.h> #include <xen/init.h> #include <xen/lib.h> +#include <xen/ctype.h> #include <xen/sched.h> #include <xen/smp.h> #include <xen/delay.h> @@ -21,9 +22,18 @@ #include <asm/i387.h> #include <asm/shadow.h> -/* opt_dom0_mem: Kilobytes of memory allocated to domain 0. */ -static unsigned int opt_dom0_mem = 0; -integer_unit_param("dom0_mem", opt_dom0_mem); +/* opt_dom0_mem: memory allocated to domain 0. */ +static unsigned int opt_dom0_mem; +static void parse_dom0_mem(char *s) +{ + unsigned long long bytes = memparse(s); + /* If no unit is specified we default to kB units, not bytes. */ + if ( isdigit(s[strlen(s)-1]) ) + opt_dom0_mem = (unsigned int)bytes; + else + opt_dom0_mem = (unsigned int)(bytes >> 10); +} +custom_param("dom0_mem", parse_dom0_mem); static unsigned int opt_dom0_shadow = 0; boolean_param("dom0_shadow", opt_dom0_shadow); diff --git a/xen/arch/x86/e820.c b/xen/arch/x86/e820.c index 30c8d8b9bb..7c0050185f 100644 --- a/xen/arch/x86/e820.c +++ b/xen/arch/x86/e820.c @@ -3,6 +3,11 @@ #include <xen/lib.h> #include <asm/e820.h> +/* opt_mem: Limit of physical RAM. Any RAM beyond this point is ignored. */ +unsigned long long opt_mem; +static void parse_mem(char *s) { opt_mem = memparse(s); } +custom_param("mem", parse_mem); + struct e820map e820; static void __init add_memory_region(unsigned long long start, @@ -341,6 +346,31 @@ static void __init clip_4gb(void) #define clip_4gb() ((void)0) #endif +static void __init clip_mem(void) +{ + int i; + + if ( !opt_mem ) + return; + + for ( i = 0; i < e820.nr_map; i++ ) + { + if ( (e820.map[i].addr + e820.map[i].size) <= opt_mem ) + continue; + printk("Truncating memory map to %lukB\n", + (unsigned long)(opt_mem >> 10)); + if ( e820.map[i].addr >= opt_mem ) + { + e820.nr_map = i; + } + else + { + e820.map[i].size = opt_mem - e820.map[i].addr; + e820.nr_map = i + 1; + } + } +} + static void __init machine_specific_memory_setup( struct e820entry *raw, int raw_nr) { @@ -348,6 +378,7 @@ static void __init machine_specific_memory_setup( sanitize_e820_map(raw, &nr); (void)copy_e820_map(raw, nr); clip_4gb(); + clip_mem(); } unsigned long __init init_e820(struct e820entry *raw, int raw_nr) diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c index 3e7c89cf0e..bb69348b14 100644 --- a/xen/arch/x86/io_apic.c +++ b/xen/arch/x86/io_apic.c @@ -145,16 +145,16 @@ static void __unmask_IO_APIC_irq (unsigned int irq) __modify_IO_APIC_irq(irq, 0, 0x00010000); } -/* mask = 1, trigger = 0 */ -static void __mask_and_edge_IO_APIC_irq (unsigned int irq) +/* trigger = 0 */ +static void __edge_IO_APIC_irq (unsigned int irq) { - __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000); + __modify_IO_APIC_irq(irq, 0, 0x00008000); } -/* mask = 0, trigger = 1 */ -static void __unmask_and_level_IO_APIC_irq (unsigned int irq) +/* trigger = 1 */ +static void __level_IO_APIC_irq (unsigned int irq) { - __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000); + __modify_IO_APIC_irq(irq, 0x00008000, 0); } static void mask_IO_APIC_irq (unsigned int irq) @@ -1637,12 +1637,14 @@ static unsigned int startup_level_ioapic_irq (unsigned int irq) return 0; /* don't check for pending */ } -static void end_level_ioapic_irq (unsigned int irq) +static void mask_and_ack_level_ioapic_irq (unsigned int irq) { unsigned long v; int i; move_irq(irq); + + mask_IO_APIC_irq(irq); /* * It appears there is an erratum which affects at least version 0x11 * of I/O APIC (that's the 82093AA and cores integrated into various @@ -1671,12 +1673,17 @@ static void end_level_ioapic_irq (unsigned int irq) if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(irq); - __unmask_and_level_IO_APIC_irq(irq); + __edge_IO_APIC_irq(irq); + __level_IO_APIC_irq(irq); spin_unlock(&ioapic_lock); } } +static void end_level_ioapic_irq (unsigned int irq) +{ + unmask_IO_APIC_irq(irq); +} + #ifdef CONFIG_PCI_MSI static unsigned int startup_edge_ioapic_vector(unsigned int vector) { @@ -1699,6 +1706,13 @@ static unsigned int startup_level_ioapic_vector (unsigned int vector) return startup_level_ioapic_irq (irq); } +static void mask_and_ack_level_ioapic_vector (unsigned int vector) +{ + int irq = vector_to_irq(vector); + + mask_and_ack_level_ioapic_irq(irq); +} + static void end_level_ioapic_vector (unsigned int vector) { int irq = vector_to_irq(vector); diff --git a/xen/arch/x86/mtrr/main.c b/xen/arch/x86/mtrr/main.c index b7c4713c69..b6122d9d02 100644 --- a/xen/arch/x86/mtrr/main.c +++ b/xen/arch/x86/mtrr/main.c @@ -33,7 +33,6 @@ #include <xen/config.h> #include <xen/init.h> -#include <xen/pci.h> #include <xen/slab.h> #include <xen/smp.h> #include <xen/spinlock.h> @@ -97,25 +96,6 @@ void set_mtrr_ops(struct mtrr_ops * ops) /* Returns non-zero if we have the write-combining memory type */ static int have_wrcomb(void) { - struct pci_dev *dev; - - if ((dev = pci_find_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) { - /* ServerWorks LE chipsets have problems with write-combining - Don't allow it and leave room for other chipsets to be tagged */ - if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && - dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) { - printk(KERN_INFO "mtrr: Serverworks LE detected. Write-combining disabled.\n"); - return 0; - } - /* Intel 450NX errata # 23. Non ascending cachline evictions to - write combining memory may resulting in data corruption */ - if (dev->vendor == PCI_VENDOR_ID_INTEL && - dev->device == PCI_DEVICE_ID_INTEL_82451NX) - { - printk(KERN_INFO "mtrr: Intel 450NX MMC detected. Write-combining disabled.\n"); - return 0; - } - } return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0); } diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index ac659a52de..b28e0c58e1 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -3,7 +3,6 @@ #include <xen/init.h> #include <xen/lib.h> #include <xen/sched.h> -#include <xen/pci.h> #include <xen/serial.h> #include <xen/softirq.h> #include <xen/acpi.h> @@ -34,23 +33,29 @@ integer_param("xenheap_megabytes", opt_xenheap_megabytes); int opt_noht = 0; boolean_param("noht", opt_noht); -/* opt_noacpi: If true, ACPI tables are not parsed. */ -static int opt_noacpi = 0; -boolean_param("noacpi", opt_noacpi); - -/* opt_nosmp: If true, secondary processors are ignored. */ -static int opt_nosmp = 0; -boolean_param("nosmp", opt_nosmp); - -/* opt_ignorebiostables: If true, ACPI and MP tables are ignored. */ -/* NB. This flag implies 'nosmp' and 'noacpi'. */ -static int opt_ignorebiostables = 0; -boolean_param("ignorebiostables", opt_ignorebiostables); - /* opt_watchdog: If true, run a watchdog NMI on each processor. */ static int opt_watchdog = 0; boolean_param("watchdog", opt_watchdog); +/* **** Linux config option: propagated to domain0. */ +/* "acpi=off": Sisables both ACPI table parsing and interpreter. */ +/* "acpi=force": Override the disable blacklist. */ +/* "acpi=strict": Disables out-of-spec workarounds. */ +/* "acpi=ht": Limit ACPI just to boot-time to enable HT. */ +/* "acpi=noirq": Disables ACPI interrupt routing. */ +static void parse_acpi_param(char *s); +custom_param("acpi", parse_acpi_param); + +/* **** Linux config option: propagated to domain0. */ +/* acpi_skip_timer_override: Skip IRQ0 overrides. */ +extern int acpi_skip_timer_override; +boolean_param("acpi_skip_timer_override", acpi_skip_timer_override); + +/* **** Linux config option: propagated to domain0. */ +/* noapic: Disable IOAPIC setup. */ +extern int skip_ioapic_setup; +boolean_param("noapic", skip_ioapic_setup); + int early_boot = 1; unsigned long xenheap_phys_end; @@ -63,7 +68,6 @@ extern void ac_timer_init(void); extern void initialize_keytable(); extern int do_timer_lists_from_pit; -char ignore_irq13; /* set if exception 16 works */ struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1 }; #if defined(__x86_64__) @@ -329,6 +333,41 @@ void __init cpu_init(void) init_idle_task(); } +int acpi_force; +char acpi_param[10] = ""; +static void parse_acpi_param(char *s) +{ + /* Save the parameter so it can be propagated to domain0. */ + strncpy(acpi_param, s, sizeof(acpi_param)); + acpi_param[sizeof(acpi_param)-1] = '\0'; + + /* Interpret the parameter for use within Xen. */ + if ( !strcmp(s, "off") ) + { + disable_acpi(); + } + else if ( !strcmp(s, "force") ) + { + acpi_force = 1; + acpi_ht = 1; + acpi_disabled = 0; + } + else if ( !strcmp(s, "strict") ) + { + acpi_strict = 1; + } + else if ( !strcmp(s, "ht") ) + { + if ( !acpi_force ) + disable_acpi(); + acpi_ht = 1; + } + else if ( !strcmp(s, "noirq") ) + { + acpi_noirq_set(); + } +} + static void __init do_initcalls(void) { initcall_t *call; @@ -356,54 +395,36 @@ static void __init start_of_day(void) identify_cpu(&boot_cpu_data); /* get CPU type info */ if ( cpu_has_fxsr ) set_in_cr4(X86_CR4_OSFXSR); if ( cpu_has_xmm ) set_in_cr4(X86_CR4_OSXMMEXCPT); -#ifdef CONFIG_SMP - if ( opt_ignorebiostables ) - { - opt_nosmp = 1; /* No SMP without configuration */ - opt_noacpi = 1; /* ACPI will just confuse matters also */ - } - else - { - find_smp_config(); - smp_alloc_memory(); /* trampoline which other CPUs jump at */ - } -#endif - paging_init(); /* not much here now, but sets up fixmap */ - if ( !opt_noacpi ) - { - acpi_boot_table_init(); - acpi_boot_init(); - } -#ifdef CONFIG_SMP + + find_smp_config(); + + smp_alloc_memory(); + + paging_init(); + + acpi_boot_table_init(); + acpi_boot_init(); + if ( smp_found_config ) get_smp_config(); -#endif - init_apic_mappings(); /* make APICs addressable in our pagetables. */ + + init_apic_mappings(); + scheduler_init(); - init_IRQ(); /* installs simple interrupt wrappers. Starts HZ clock. */ + + init_IRQ(); + trap_init(); - time_init(); /* installs software handler for HZ clock. */ + + time_init(); arch_init_memory(); -#ifndef CONFIG_SMP - APIC_init_uniprocessor(); -#else - if ( opt_nosmp ) - APIC_init_uniprocessor(); - else - smp_boot_cpus(); - /* - * Does loads of stuff, including kicking the local - * APIC, and the IO APIC after other CPUs are booted. - * Each IRQ is preferably handled by IO-APIC, but - * fall thru to 8259A if we have to (but slower). - */ -#endif + smp_boot_cpus(); __sti(); - initialize_keytable(); /* call back handling for key codes */ + initialize_keytable(); serial_init_stage2(); @@ -420,19 +441,14 @@ static void __init start_of_day(void) check_nmi_watchdog(); -#ifdef CONFIG_PCI - pci_init(); -#endif do_initcalls(); -#ifdef CONFIG_SMP wait_init_idle = cpu_online_map; clear_bit(smp_processor_id(), &wait_init_idle); smp_threads_ready = 1; smp_commence(); /* Tell other CPUs that state of the world is stable. */ while ( wait_init_idle != 0 ) cpu_relax(); -#endif watchdog_on = 1; #ifdef __x86_64__ /* x86_32 uses low mappings when building DOM0. */ @@ -573,13 +589,32 @@ void __init __start_xen(multiboot_info_t *mbi) set_bit(DF_PRIVILEGED, &dom0->flags); - /* Grab the DOM0 command line. Skip past the image name. */ + /* Grab the DOM0 command line. */ cmdline = (char *)(mod[0].string ? __va(mod[0].string) : NULL); if ( cmdline != NULL ) { + static char dom0_cmdline[256]; + + /* Skip past the image name. */ while ( *cmdline == ' ' ) cmdline++; if ( (cmdline = strchr(cmdline, ' ')) != NULL ) while ( *cmdline == ' ' ) cmdline++; + + /* Copy the command line to a local buffer. */ + strcpy(dom0_cmdline, cmdline); + cmdline = dom0_cmdline; + + /* Append any extra parameters. */ + if ( skip_ioapic_setup && !strstr(cmdline, "noapic") ) + strcat(cmdline, " noapic"); + if ( acpi_skip_timer_override && + !strstr(cmdline, "acpi_skip_timer_override") ) + strcat(cmdline, " acpi_skip_timer_override"); + if ( (strlen(acpi_param) != 0) && !strstr(cmdline, "acpi=") ) + { + strcat(cmdline, " acpi="); + strcat(cmdline, acpi_param); + } } /* diff --git a/xen/arch/x86/shadow.c b/xen/arch/x86/shadow.c index 3aa12ee5ea..4dbfa6b02d 100644 --- a/xen/arch/x86/shadow.c +++ b/xen/arch/x86/shadow.c @@ -1217,7 +1217,7 @@ static int shadow_mode_table_op( int i, rc = 0; struct exec_domain *ed; - ASSERT(spin_is_locked(&d->arch.shadow_lock)); + ASSERT(shadow_lock_is_acquired(d)); SH_VLOG("shadow mode table op %lx %lx count %d", pagetable_val(d->exec_domain[0]->arch.guest_table), /* XXX SMP */ @@ -1813,7 +1813,7 @@ shadow_mark_mfn_out_of_sync(struct exec_domain *ed, unsigned long gpfn, struct pfn_info *page = &frame_table[mfn]; struct out_of_sync_entry *entry = shadow_alloc_oos_entry(d); - ASSERT(spin_is_locked(&d->arch.shadow_lock)); + ASSERT(shadow_lock_is_acquired(d)); ASSERT(pfn_valid(mfn)); #ifndef NDEBUG @@ -1943,7 +1943,7 @@ int __shadow_out_of_sync(struct exec_domain *ed, unsigned long va) l2_pgentry_t l2e; unsigned long l1pfn, l1mfn; - ASSERT(spin_is_locked(&d->arch.shadow_lock)); + ASSERT(shadow_lock_is_acquired(d)); ASSERT(VALID_M2P(l2pfn)); perfc_incrc(shadow_out_of_sync_calls); @@ -2127,7 +2127,7 @@ int shadow_remove_all_write_access( u32 found = 0, fixups, write_refs; unsigned long prediction, predicted_gpfn, predicted_smfn; - ASSERT(spin_is_locked(&d->arch.shadow_lock)); + ASSERT(shadow_lock_is_acquired(d)); ASSERT(VALID_MFN(readonly_gmfn)); perfc_incrc(remove_write_access); @@ -2245,7 +2245,7 @@ u32 shadow_remove_all_access(struct domain *d, unsigned long forbidden_gmfn) if ( unlikely(!shadow_mode_enabled(d)) ) return 0; - ASSERT(spin_is_locked(&d->arch.shadow_lock)); + ASSERT(shadow_lock_is_acquired(d)); perfc_incrc(remove_all_access); for (i = 0; i < shadow_ht_buckets; i++) @@ -2287,7 +2287,7 @@ static int resync_all(struct domain *d, u32 stype) int unshadow; int changed; - ASSERT(spin_is_locked(&d->arch.shadow_lock)); + ASSERT(shadow_lock_is_acquired(d)); for ( entry = d->arch.out_of_sync; entry; entry = entry->next) { @@ -2485,7 +2485,7 @@ void __shadow_sync_all(struct domain *d) perfc_incrc(shadow_sync_all); - ASSERT(spin_is_locked(&d->arch.shadow_lock)); + ASSERT(shadow_lock_is_acquired(d)); // First, remove all write permissions to the page tables // diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c index 7a3abe9913..0ab5868273 100644 --- a/xen/arch/x86/smpboot.c +++ b/xen/arch/x86/smpboot.c @@ -51,8 +51,11 @@ #include <mach_apic.h> #include <mach_wakecpu.h> -/* Cconfigured maximum number of CPUs to activate. We name the parameter -"maxcpus" rather than max_cpus to be compatible with Linux */ +/* opt_nosmp: If true, secondary processors are ignored. */ +static int opt_nosmp = 0; +boolean_param("nosmp", opt_nosmp); + +/* maxcpus: maximum number of CPUs to activate. */ static int max_cpus = -1; integer_param("maxcpus", max_cpus); @@ -797,8 +800,7 @@ void __init smp_boot_cpus(void) * If we couldnt find an SMP configuration at boot time, * get out of here now! */ - if (!smp_found_config) { - printk("SMP motherboard not detected.\n"); + if (!smp_found_config || opt_nosmp) { io_apic_irqs = 0; phys_cpu_present_map = physid_mask_of_physid(0); cpu_online_map = 1; diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 52bd64ac9b..c340169f46 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -433,10 +433,19 @@ static inline int admin_io_okay( #define outl_user(_v, _p, _d, _r) \ (admin_io_okay(_p, 4, _d, _r) ? outl(_v, _p) : ((void)0)) +/* Propagate a fault back to the guest kernel. */ +#define USER_READ_FAULT 4 /* user mode, read fault */ +#define USER_WRITE_FAULT 6 /* user mode, write fault */ +#define PAGE_FAULT(_faultaddr, _errcode) \ +({ propagate_page_fault(_faultaddr, _errcode); \ + return EXCRET_fault_fixed; \ +}) + +/* Isntruction fetch with error handling. */ #define insn_fetch(_type, _size, _ptr) \ ({ unsigned long _x; \ if ( get_user(_x, (_type *)eip) ) \ - goto read_fault; \ + PAGE_FAULT(eip, USER_READ_FAULT); \ eip += _size; (_type)_x; }) static int emulate_privileged_op(struct cpu_user_regs *regs) @@ -502,17 +511,17 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) case 1: data = (u8)inb_user((u16)regs->edx, ed, regs); if ( put_user((u8)data, (u8 *)regs->edi) ) - goto write_fault; + PAGE_FAULT(regs->edi, USER_WRITE_FAULT); break; case 2: data = (u16)inw_user((u16)regs->edx, ed, regs); if ( put_user((u16)data, (u16 *)regs->edi) ) - goto write_fault; + PAGE_FAULT(regs->edi, USER_WRITE_FAULT); break; case 4: data = (u32)inl_user((u16)regs->edx, ed, regs); if ( put_user((u32)data, (u32 *)regs->edi) ) - goto write_fault; + PAGE_FAULT(regs->edi, USER_WRITE_FAULT); break; } regs->edi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes; @@ -527,17 +536,17 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) { case 1: if ( get_user(data, (u8 *)regs->esi) ) - goto read_fault; + PAGE_FAULT(regs->esi, USER_READ_FAULT); outb_user((u8)data, (u16)regs->edx, ed, regs); break; case 2: if ( get_user(data, (u16 *)regs->esi) ) - goto read_fault; + PAGE_FAULT(regs->esi, USER_READ_FAULT); outw_user((u16)data, (u16)regs->edx, ed, regs); break; case 4: if ( get_user(data, (u32 *)regs->esi) ) - goto read_fault; + PAGE_FAULT(regs->esi, USER_READ_FAULT); outl_user((u32)data, (u16)regs->edx, ed, regs); break; } @@ -736,14 +745,6 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) fail: return 0; - - read_fault: - propagate_page_fault(eip, 4); /* user mode, read fault */ - return EXCRET_fault_fixed; - - write_fault: - propagate_page_fault(eip, 6); /* user mode, write fault */ - return EXCRET_fault_fixed; } asmlinkage int do_general_protection(struct cpu_user_regs *regs) @@ -919,13 +920,7 @@ asmlinkage int math_state_restore(struct cpu_user_regs *regs) /* Prevent recursion. */ clts(); - if ( !test_and_set_bit(EDF_USEDFPU, ¤t->flags) ) - { - if ( test_bit(EDF_DONEFPUINIT, ¤t->flags) ) - restore_fpu(current); - else - init_fpu(); - } + setup_fpu(current); if ( test_and_clear_bit(EDF_GUEST_STTS, ¤t->flags) ) { diff --git a/xen/arch/x86/vmx.c b/xen/arch/x86/vmx.c index 7fa433c0c1..75714157a8 100644 --- a/xen/arch/x86/vmx.c +++ b/xen/arch/x86/vmx.c @@ -154,6 +154,21 @@ static int vmx_do_page_fault(unsigned long va, struct cpu_user_regs *regs) return result; } +static void vmx_do_no_device_fault() +{ + unsigned long cr0; + + clts(); + setup_fpu(current); + __vmread(CR0_READ_SHADOW, &cr0); + if (!(cr0 & X86_CR0_TS)) { + __vmread(GUEST_CR0, &cr0); + cr0 &= ~X86_CR0_TS; + __vmwrite(GUEST_CR0, cr0); + } + __vmwrite(EXCEPTION_BITMAP, MONITOR_DEFAULT_EXCEPTION_BITMAP); +} + static void vmx_do_general_protection_fault(struct cpu_user_regs *regs) { unsigned long eip, error_code; @@ -894,6 +909,9 @@ static int vmx_cr_access(unsigned long exit_qualification, struct cpu_user_regs mov_from_cr(cr, gp, regs); break; case TYPE_CLTS: + clts(); + setup_fpu(current); + __vmread(GUEST_CR0, &value); value &= ~X86_CR0_TS; /* clear TS */ __vmwrite(GUEST_CR0, value); @@ -1093,6 +1111,11 @@ asmlinkage void vmx_vmexit_handler(struct cpu_user_regs regs) break; } #endif + case TRAP_no_device: + { + vmx_do_no_device_fault(); + break; + } case TRAP_gp_fault: { vmx_do_general_protection_fault(®s); diff --git a/xen/arch/x86/vmx_io.c b/xen/arch/x86/vmx_io.c index 05780a2157..49742286c5 100644 --- a/xen/arch/x86/vmx_io.c +++ b/xen/arch/x86/vmx_io.c @@ -429,6 +429,7 @@ void vmx_intr_assist(struct exec_domain *d) void vmx_do_resume(struct exec_domain *d) { + vmx_stts(); if ( test_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state) ) __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table)); else diff --git a/xen/arch/x86/vmx_vmcs.c b/xen/arch/x86/vmx_vmcs.c index 90cc88122c..375d20da48 100644 --- a/xen/arch/x86/vmx_vmcs.c +++ b/xen/arch/x86/vmx_vmcs.c @@ -164,6 +164,9 @@ void vmx_do_launch(struct exec_domain *ed) struct pfn_info *page; struct cpu_user_regs *regs = get_cpu_user_regs(); + vmx_stts(); + set_bit(EDF_GUEST_STTS, &ed->flags); + cpu = smp_processor_id(); page = (struct pfn_info *) alloc_domheap_page(NULL); diff --git a/xen/common/dom0_ops.c b/xen/common/dom0_ops.c index 23fc589d4d..b4fe12ecb9 100644 --- a/xen/common/dom0_ops.c +++ b/xen/common/dom0_ops.c @@ -221,7 +221,8 @@ long do_dom0_op(dom0_op_t *u_dom0_op) domid_t dom = op->u.pincpudomain.domain; struct domain *d = find_domain_by_id(dom); struct exec_domain *ed; - int cpu = op->u.pincpudomain.cpu; + cpumap_t cpumap; + if ( d == NULL ) { @@ -229,6 +230,14 @@ long do_dom0_op(dom0_op_t *u_dom0_op) break; } + if ( (op->u.pincpudomain.exec_domain >= MAX_VIRT_CPUS) || + !d->exec_domain[op->u.pincpudomain.exec_domain] ) + { + ret = -EINVAL; + put_domain(d); + break; + } + ed = d->exec_domain[op->u.pincpudomain.exec_domain]; if ( ed == NULL ) { @@ -244,17 +253,29 @@ long do_dom0_op(dom0_op_t *u_dom0_op) break; } - if ( cpu == -1 ) + if ( copy_from_user(&cpumap, op->u.pincpudomain.cpumap, + sizeof(cpumap)) ) { - clear_bit(EDF_CPUPINNED, &ed->flags); + ret = -EFAULT; + put_domain(d); + break; } + + /* update cpumap for this ed */ + ed->cpumap = cpumap; + + if ( cpumap == CPUMAP_RUNANYWHERE ) + clear_bit(EDF_CPUPINNED, &ed->flags); else { + /* pick a new cpu from the usable map */ + int new_cpu = (int)find_first_set_bit(cpumap) % smp_num_cpus; + exec_domain_pause(ed); - if ( ed->processor != (cpu % smp_num_cpus) ) + if ( ed->processor != new_cpu ) set_bit(EDF_MIGRATED, &ed->flags); set_bit(EDF_CPUPINNED, &ed->flags); - ed->processor = cpu % smp_num_cpus; + ed->processor = new_cpu; exec_domain_unpause(ed); } @@ -278,9 +299,11 @@ long do_dom0_op(dom0_op_t *u_dom0_op) case DOM0_GETDOMAININFO: { - struct vcpu_guest_context *c; - struct domain *d; - struct exec_domain *ed; + struct domain *d; + struct exec_domain *ed; + u64 cpu_time = 0; + int vcpu_count = 0; + int flags = DOMFLAGS_PAUSED | DOMFLAGS_BLOCKED; read_lock(&domlist_lock); @@ -301,34 +324,82 @@ long do_dom0_op(dom0_op_t *u_dom0_op) op->u.getdomaininfo.domain = d->id; - if ( (op->u.getdomaininfo.exec_domain >= MAX_VIRT_CPUS) || - !d->exec_domain[op->u.getdomaininfo.exec_domain] ) - { - ret = -EINVAL; - break; + memset(&op->u.getdomaininfo.vcpu_to_cpu, -1, + sizeof(op->u.getdomaininfo.vcpu_to_cpu)); + memset(&op->u.getdomaininfo.cpumap, 0, + sizeof(op->u.getdomaininfo.cpumap)); + + /* + * - domain is marked as paused or blocked only if all its vcpus + * are paused or blocked + * - domain is marked as running if any of its vcpus is running + */ + for_each_exec_domain ( d, ed ) { + op->u.getdomaininfo.vcpu_to_cpu[ed->id] = ed->processor; + op->u.getdomaininfo.cpumap[ed->id] = ed->cpumap; + if (!test_bit(EDF_CTRLPAUSE, &ed->flags)) + flags &= ~DOMFLAGS_PAUSED; + if (!test_bit(EDF_BLOCKED, &ed->flags)) + flags &= ~DOMFLAGS_BLOCKED; + if (test_bit(EDF_RUNNING, &ed->flags)) + flags |= DOMFLAGS_RUNNING; + if ( ed->cpu_time > cpu_time ) + cpu_time += ed->cpu_time; + vcpu_count++; } - - ed = d->exec_domain[op->u.getdomaininfo.exec_domain]; - op->u.getdomaininfo.flags = + op->u.getdomaininfo.cpu_time = cpu_time; + op->u.getdomaininfo.n_vcpu = vcpu_count; + + op->u.getdomaininfo.flags = flags | (test_bit( DF_DYING, &d->flags) ? DOMFLAGS_DYING : 0) | (test_bit( DF_CRASHED, &d->flags) ? DOMFLAGS_CRASHED : 0) | (test_bit( DF_SHUTDOWN, &d->flags) ? DOMFLAGS_SHUTDOWN : 0) | - (test_bit(EDF_CTRLPAUSE, &ed->flags) ? DOMFLAGS_PAUSED : 0) | - (test_bit(EDF_BLOCKED, &ed->flags) ? DOMFLAGS_BLOCKED : 0) | - (test_bit(EDF_RUNNING, &ed->flags) ? DOMFLAGS_RUNNING : 0); - - op->u.getdomaininfo.flags |= ed->processor << DOMFLAGS_CPUSHIFT; - op->u.getdomaininfo.flags |= d->shutdown_code << DOMFLAGS_SHUTDOWNSHIFT; op->u.getdomaininfo.tot_pages = d->tot_pages; op->u.getdomaininfo.max_pages = d->max_pages; - op->u.getdomaininfo.cpu_time = ed->cpu_time; op->u.getdomaininfo.shared_info_frame = __pa(d->shared_info) >> PAGE_SHIFT; - if ( op->u.getdomaininfo.ctxt != NULL ) + if ( copy_to_user(u_dom0_op, op, sizeof(*op)) ) + ret = -EINVAL; + + put_domain(d); + } + break; + + case DOM0_GETVCPUCONTEXT: + { + struct vcpu_guest_context *c; + struct domain *d; + struct exec_domain *ed; + + d = find_domain_by_id(op->u.getvcpucontext.domain); + if ( d == NULL ) + { + ret = -ESRCH; + break; + } + + if ( op->u.getvcpucontext.exec_domain >= MAX_VIRT_CPUS ) + { + ret = -EINVAL; + put_domain(d); + break; + } + + ed = d->exec_domain[op->u.getvcpucontext.exec_domain]; + if ( ed == NULL ) + { + ret = -ESRCH; + put_domain(d); + break; + } + + op->u.getvcpucontext.cpu_time = ed->cpu_time; + + if ( op->u.getvcpucontext.ctxt != NULL ) { if ( (c = xmalloc(struct vcpu_guest_context)) == NULL ) { @@ -345,7 +416,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op) if ( ed != current ) exec_domain_unpause(ed); - if ( copy_to_user(op->u.getdomaininfo.ctxt, c, sizeof(*c)) ) + if ( copy_to_user(op->u.getvcpucontext.ctxt, c, sizeof(*c)) ) ret = -EINVAL; xfree(c); diff --git a/xen/common/domain.c b/xen/common/domain.c index 7ab17768c6..6f729eb2ec 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -283,6 +283,7 @@ long do_boot_vcpu(unsigned long vcpu, struct vcpu_guest_context *ctxt) ed = d->exec_domain[vcpu]; atomic_set(&ed->pausecnt, 0); + ed->cpumap = CPUMAP_RUNANYWHERE; memcpy(&ed->arch, &idle0_exec_domain.arch, sizeof(ed->arch)); diff --git a/xen/common/kernel.c b/xen/common/kernel.c index 33122e9333..46d63901da 100644 --- a/xen/common/kernel.c +++ b/xen/common/kernel.c @@ -55,27 +55,13 @@ void cmdline_parse(char *cmdline) *(unsigned int *)param->var = simple_strtol(opt, (char **)&opt, 0); break; - case OPT_UINT_UNIT: - if ( opt != NULL ) - { - int base = 1; - unsigned int value; - - value = simple_strtoul(opt, (char **)&opt, 0); - if (*opt == 'G' || *opt == 'g') { - base = 1024 * 1024; - opt++; - } if (*opt == 'M' || *opt == 'm') { - base = 1024; - opt++; - } else if (*opt == 'K' || *opt == 'k') - opt++; - *(unsigned int *) param->var = value * base; - } - break; case OPT_BOOL: *(int *)param->var = 1; break; + case OPT_CUSTOM: + if ( opt != NULL ) + ((void (*)(char *))param->var)(opt); + break; } } cmdline = opt_end; diff --git a/xen/common/lib.c b/xen/common/lib.c index 2e94339025..6c0706f1f1 100644 --- a/xen/common/lib.c +++ b/xen/common/lib.c @@ -441,6 +441,22 @@ s64 __moddi3(s64 a, s64 b) #endif /* BITS_PER_LONG == 32 */ +unsigned long long memparse(char *s) +{ + unsigned long long ret = simple_strtoull(s, &s, 0); + + switch (*s) { + case 'G': case 'g': + ret <<= 10; + case 'M': case 'm': + ret <<= 10; + case 'K': case 'k': + ret <<= 10; + } + + return ret; +} + /* * Local variables: * mode: C diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index fb26edc69c..5a4115c82e 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -28,6 +28,7 @@ #include <xen/sched.h> #include <xen/spinlock.h> #include <xen/slab.h> +#include <xen/mm.h> #include <xen/irq.h> #include <xen/softirq.h> #include <xen/shadow.h> diff --git a/xen/common/resource.c b/xen/common/resource.c deleted file mode 100644 index 5f264c9806..0000000000 --- a/xen/common/resource.c +++ /dev/null @@ -1,329 +0,0 @@ -/* - * linux/kernel/resource.c - * - * Copyright (C) 1999 Linus Torvalds - * Copyright (C) 1999 Martin Mares <mj@ucw.cz> - * - * Arbitrary resource management. - */ - -#include <xen/config.h> -#include <xen/lib.h> -#include <xen/sched.h> -#include <xen/errno.h> -#include <xen/ioport.h> -#include <xen/init.h> -#include <xen/slab.h> -#include <xen/spinlock.h> -#include <asm/io.h> - -struct resource ioport_resource = { "PCI IO", 0x0000, IO_SPACE_LIMIT, IORESOURCE_IO }; -struct resource iomem_resource = { "PCI mem", 0x00000000, 0xffffffff, IORESOURCE_MEM }; - -static rwlock_t resource_lock = RW_LOCK_UNLOCKED; - -/* - * This generates reports for /proc/ioports and /proc/iomem - */ -static char * do_resource_list(struct resource *entry, const char *fmt, int offset, char *buf, char *end) -{ - if (offset < 0) - offset = 0; - - while (entry) { - const char *name = entry->name; - unsigned long from, to; - - if ((int) (end-buf) < 80) - return buf; - - from = entry->start; - to = entry->end; - if (!name) - name = "<BAD>"; - - buf += sprintf(buf, fmt + offset, from, to, name); - if (entry->child) - buf = do_resource_list(entry->child, fmt, offset-2, buf, end); - entry = entry->sibling; - } - - return buf; -} - -int get_resource_list(struct resource *root, char *buf, int size) -{ - char *fmt; - int retval; - - fmt = " %08lx-%08lx : %s\n"; - if (root->end < 0x10000) - fmt = " %04lx-%04lx : %s\n"; - read_lock(&resource_lock); - retval = do_resource_list(root->child, fmt, 8, buf, buf + size) - buf; - read_unlock(&resource_lock); - return retval; -} - -/* Return the conflict entry if you can't request it */ -static struct resource * __request_resource(struct resource *root, struct resource *new) -{ - unsigned long start = new->start; - unsigned long end = new->end; - struct resource *tmp, **p; - - if (end < start) - return root; - if (start < root->start) - return root; - if (end > root->end) - return root; - p = &root->child; - for (;;) { - tmp = *p; - if (!tmp || tmp->start > end) { - new->sibling = tmp; - *p = new; - new->parent = root; - return NULL; - } - p = &tmp->sibling; - if (tmp->end < start) - continue; - return tmp; - } -} - -static int __release_resource(struct resource *old) -{ - struct resource *tmp, **p; - - p = &old->parent->child; - for (;;) { - tmp = *p; - if (!tmp) - break; - if (tmp == old) { - *p = tmp->sibling; - old->parent = NULL; - return 0; - } - p = &tmp->sibling; - } - return -EINVAL; -} - -int request_resource(struct resource *root, struct resource *new) -{ - struct resource *conflict; - - write_lock(&resource_lock); - conflict = __request_resource(root, new); - write_unlock(&resource_lock); - return conflict ? -EBUSY : 0; -} - -int release_resource(struct resource *old) -{ - int retval; - - write_lock(&resource_lock); - retval = __release_resource(old); - write_unlock(&resource_lock); - return retval; -} - -int check_resource(struct resource *root, unsigned long start, unsigned long len) -{ - struct resource *conflict, tmp; - - tmp.start = start; - tmp.end = start + len - 1; - write_lock(&resource_lock); - conflict = __request_resource(root, &tmp); - if (!conflict) - __release_resource(&tmp); - write_unlock(&resource_lock); - return conflict ? -EBUSY : 0; -} - -/* - * Find empty slot in the resource tree given range and alignment. - */ -static int find_resource(struct resource *root, struct resource *new, - unsigned long size, - unsigned long min, unsigned long max, - unsigned long align, - void (*alignf)(void *, struct resource *, - unsigned long, unsigned long), - void *alignf_data) -{ - struct resource *this = root->child; - - new->start = root->start; - for(;;) { - if (this) - new->end = this->start; - else - new->end = root->end; - if (new->start < min) - new->start = min; - if (new->end > max) - new->end = max; - new->start = (new->start + align - 1) & ~(align - 1); - if (alignf) - alignf(alignf_data, new, size, align); - if (new->start < new->end && new->end - new->start + 1 >= size) { - new->end = new->start + size - 1; - return 0; - } - if (!this) - break; - new->start = this->end + 1; - this = this->sibling; - } - return -EBUSY; -} - -/* - * Allocate empty slot in the resource tree given range and alignment. - */ -int allocate_resource(struct resource *root, struct resource *new, - unsigned long size, - unsigned long min, unsigned long max, - unsigned long align, - void (*alignf)(void *, struct resource *, - unsigned long, unsigned long), - void *alignf_data) -{ - int err; - - write_lock(&resource_lock); - err = find_resource(root, new, size, min, max, align, alignf, alignf_data); - if (err >= 0 && __request_resource(root, new)) - err = -EBUSY; - write_unlock(&resource_lock); - return err; -} - -/* - * This is compatibility stuff for IO resources. - * - * Note how this, unlike the above, knows about - * the IO flag meanings (busy etc). - * - * Request-region creates a new busy region. - * - * Check-region returns non-zero if the area is already busy - * - * Release-region releases a matching busy region. - */ -struct resource * __request_region(struct resource *parent, unsigned long start, unsigned long n, const char *name) -{ - struct resource *res = xmalloc(struct resource); - - if (res) { - memset(res, 0, sizeof(*res)); - res->name = name; - res->start = start; - res->end = start + n - 1; - res->flags = IORESOURCE_BUSY; - - write_lock(&resource_lock); - - for (;;) { - struct resource *conflict; - - conflict = __request_resource(parent, res); - if (!conflict) - break; - if (conflict != parent) { - parent = conflict; - if (!(conflict->flags & IORESOURCE_BUSY)) - continue; - } - - /* Uhhuh, that didn't work out.. */ - xfree(res); - res = NULL; - break; - } - write_unlock(&resource_lock); - } - return res; -} - -void __release_region(struct resource *parent, unsigned long start, unsigned long n) -{ - struct resource **p; - unsigned long end; - - p = &parent->child; - end = start + n - 1; - - for (;;) { - struct resource *res = *p; - - if (!res) - break; - if (res->start <= start && res->end >= end) { - if (!(res->flags & IORESOURCE_BUSY)) { - p = &res->child; - continue; - } - if (res->start != start || res->end != end) - break; - *p = res->sibling; - xfree(res); - return; - } - p = &res->sibling; - } - printk("Trying to free nonexistent resource <%08lx-%08lx>\n", start, end); -} - - -#if 0 -/* - * Called from init/main.c to reserve IO ports. - */ -#define MAXRESERVE 4 -static int __init reserve_setup(char *str) -{ - static int reserved = 0; - static struct resource reserve[MAXRESERVE]; - - for (;;) { - int io_start, io_num; - int x = reserved; - - if (get_option (&str, &io_start) != 2) - break; - if (get_option (&str, &io_num) == 0) - break; - if (x < MAXRESERVE) { - struct resource *res = reserve + x; - res->name = "reserved"; - res->start = io_start; - res->end = io_start + io_num - 1; - res->flags = IORESOURCE_BUSY; - res->child = NULL; - if (request_resource(res->start >= 0x10000 ? &iomem_resource : &ioport_resource, res) == 0) - reserved = x+1; - } - } - return 1; -} - -__setup("reserve=", reserve_setup); -#endif - -/* - * Local variables: - * mode: C - * c-set-style: "BSD" - * c-basic-offset: 8 - * tab-width: 8 - * indent-tabs-mode: t - * End: - */ diff --git a/xen/common/schedule.c b/xen/common/schedule.c index fb2db1b30f..491c2f22ac 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -34,6 +34,7 @@ #include <xen/sched-if.h> #include <xen/softirq.h> #include <xen/trace.h> +#include <xen/mm.h> #include <public/sched_ctl.h> /* opt_sched: scheduler - default to Borrowed Virtual Time */ diff --git a/xen/drivers/char/console.c b/xen/drivers/char/console.c index f20d664c91..d9eb0c474a 100644 --- a/xen/drivers/char/console.c +++ b/xen/drivers/char/console.c @@ -17,8 +17,8 @@ #include <xen/console.h> #include <xen/serial.h> #include <xen/keyhandler.h> +#include <xen/mm.h> #include <asm/uaccess.h> -#include <asm/mm.h> #include <asm/debugger.h> #include <asm/io.h> diff --git a/xen/include/asm-ia64/config.h b/xen/include/asm-ia64/config.h index 27f1e43620..9b656e493b 100644 --- a/xen/include/asm-ia64/config.h +++ b/xen/include/asm-ia64/config.h @@ -166,12 +166,6 @@ struct device { #endif }; -// from linux/include/linux/pci.h -struct pci_bus_region { - unsigned long start; - unsigned long end; -}; - // warning: unless search_extable is declared, the return value gets // truncated to 32-bits, causing a very strange error in privop handling struct exception_table_entry; @@ -256,6 +250,14 @@ struct screen_info { }; #define seq_printf(a,b...) printf(b) #define CONFIG_BLK_DEV_INITRD // needed to reserve memory for domain0 +// +#define __smp_processor_id() (current->processor) + +// needed for newer ACPI code +#define asmlinkage + +#define FORCE_CRASH() asm("break 0;;"); + // these declarations got moved at some point, find a better place for them extern int opt_noht; extern int ht_per_core; diff --git a/xen/include/asm-ia64/domain.h b/xen/include/asm-ia64/domain.h index 6425368f51..7b6758078e 100644 --- a/xen/include/asm-ia64/domain.h +++ b/xen/include/asm-ia64/domain.h @@ -37,12 +37,30 @@ struct arch_domain { #define shared_info_va arch.shared_info_va struct arch_exec_domain { +#if 1 + TR_ENTRY itrs[NITRS]; + TR_ENTRY dtrs[NDTRS]; + TR_ENTRY itlb; + TR_ENTRY dtlb; + unsigned long itlb_pte; + unsigned long dtlb_pte; + unsigned long irr[4]; + unsigned long insvc[4]; + unsigned long iva; + unsigned long dcr; + unsigned long itc; + unsigned long domain_itm; + unsigned long domain_itm_last; + unsigned long xen_itm; + unsigned long xen_timer_interval; +#endif void *regs; /* temporary until find a better way to do privops */ - struct thread_struct _thread; struct mm_struct *active_mm; + struct thread_struct _thread; // this must be last }; + #define active_mm arch.active_mm -#define thread arch._thread +//#define thread arch._thread // FOLLOWING FROM linux-2.6.7/include/sched.h diff --git a/xen/include/asm-ia64/vcpu.h b/xen/include/asm-ia64/vcpu.h index 6ee2e73dde..bb24bad2b2 100644 --- a/xen/include/asm-ia64/vcpu.h +++ b/xen/include/asm-ia64/vcpu.h @@ -21,6 +21,21 @@ typedef struct pt_regs REGS; //#define vcpu_regs(vcpu) &((struct spk_thread_t *)vcpu)->thread_regs //#define vcpu_thread(vcpu) ((struct spk_thread_t *)vcpu) +#define PRIVOP_ADDR_COUNT +#ifdef PRIVOP_ADDR_COUNT +#define _RSM 0 +#define _SSM 1 +#define PRIVOP_COUNT_NINSTS 2 +#define PRIVOP_COUNT_NADDRS 30 + +struct privop_addr_count { + char *instname; + unsigned long addr[PRIVOP_COUNT_NADDRS]; + unsigned long count[PRIVOP_COUNT_NADDRS]; + unsigned long overflow; +}; +#endif + /* general registers */ extern UINT64 vcpu_get_gr(VCPU *vcpu, unsigned reg); extern IA64FAULT vcpu_set_gr(VCPU *vcpu, unsigned reg, UINT64 value); @@ -132,6 +147,8 @@ extern void vcpu_set_next_timer(VCPU *vcpu); extern BOOLEAN vcpu_timer_expired(VCPU *vcpu); extern UINT64 vcpu_deliverable_interrupts(VCPU *vcpu); extern void vcpu_itc_no_srlz(VCPU *vcpu, UINT64, UINT64, UINT64, UINT64, UINT64); +extern UINT64 vcpu_get_tmp(VCPU *, UINT64); +extern void vcpu_set_tmp(VCPU *, UINT64, UINT64); #endif diff --git a/xen/include/asm-x86/acpi.h b/xen/include/asm-x86/acpi.h index 3a15181819..b13f8ff79a 100644 --- a/xen/include/asm-x86/acpi.h +++ b/xen/include/asm-x86/acpi.h @@ -27,7 +27,7 @@ #define _ASM_ACPI_H #include <xen/config.h> -#include <asm/system.h> +#include <asm/system.h> /* defines cmpxchg */ #define COMPILER_DEPENDENT_INT64 long long #define COMPILER_DEPENDENT_UINT64 unsigned long long @@ -49,8 +49,8 @@ #define ACPI_ASM_MACROS #define BREAKPOINT3 -#define ACPI_DISABLE_IRQS() __cli() -#define ACPI_ENABLE_IRQS() __sti() +#define ACPI_DISABLE_IRQS() local_irq_disable() +#define ACPI_ENABLE_IRQS() local_irq_enable() #define ACPI_FLUSH_CPU_CACHE() wbinvd() @@ -100,6 +100,11 @@ __acpi_release_global_lock (unsigned int *lock) :"=r"(n_hi), "=r"(n_lo) \ :"0"(n_hi), "1"(n_lo)) +/* + * Refer Intel ACPI _PDC support document for bit definitions + */ +#define ACPI_PDC_EST_CAPABILITY_SMP 0xa +#define ACPI_PDC_EST_CAPABILITY_MSR 0x1 #ifdef CONFIG_ACPI_BOOT extern int acpi_lapic; @@ -108,46 +113,52 @@ extern int acpi_noirq; extern int acpi_strict; extern int acpi_disabled; extern int acpi_ht; -static inline void disable_acpi(void) { acpi_disabled = 1; acpi_ht = 0; } +extern int acpi_pci_disabled; +static inline void disable_acpi(void) +{ + acpi_disabled = 1; + acpi_ht = 0; + acpi_pci_disabled = 1; + acpi_noirq = 1; +} /* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */ #define FIX_ACPI_PAGES 4 -#else /* !CONFIG_ACPI_BOOT */ -# define acpi_lapic 0 -# define acpi_ioapic 0 - -#endif /* !CONFIG_ACPI_BOOT */ +extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq); -#ifdef CONFIG_ACPI_PCI -static inline void acpi_noirq_set(void) { acpi_noirq = 1; } -extern int acpi_irq_balance_set(char *str); -#else -static inline void acpi_noirq_set(void) { } -static inline int acpi_irq_balance_set(char *str) { return 0; } -#endif +#ifdef CONFIG_X86_IO_APIC +extern int skip_ioapic_setup; +extern int acpi_skip_timer_override; -#ifdef CONFIG_ACPI_SLEEP +extern void check_acpi_pci(void); -extern unsigned long saved_eip; -extern unsigned long saved_esp; -extern unsigned long saved_ebp; -extern unsigned long saved_ebx; -extern unsigned long saved_esi; -extern unsigned long saved_edi; +static inline void disable_ioapic_setup(void) +{ + skip_ioapic_setup = 1; +} -static inline void acpi_save_register_state(unsigned long return_point) +static inline int ioapic_setup_disabled(void) { - saved_eip = return_point; - asm volatile ("movl %%esp,(%0)" : "=m" (saved_esp)); - asm volatile ("movl %%ebp,(%0)" : "=m" (saved_ebp)); - asm volatile ("movl %%ebx,(%0)" : "=m" (saved_ebx)); - asm volatile ("movl %%edi,(%0)" : "=m" (saved_edi)); - asm volatile ("movl %%esi,(%0)" : "=m" (saved_esi)); + return skip_ioapic_setup; } -#define acpi_restore_register_state() do {} while (0) +#else +static inline void disable_ioapic_setup(void) { } +static inline void check_acpi_pci(void) { } +#endif + +#else /* CONFIG_ACPI_BOOT */ +# define acpi_lapic 0 +# define acpi_ioapic 0 + +#endif + +static inline void acpi_noirq_set(void) { acpi_noirq = 1; } +static inline int acpi_irq_balance_set(char *str) { return 0; } + +#ifdef CONFIG_ACPI_SLEEP /* routines for saving/restoring kernel state */ extern int acpi_save_state_mem(void); @@ -156,11 +167,11 @@ extern void acpi_restore_state_mem(void); extern unsigned long acpi_wakeup_address; -extern void do_suspend_lowlevel_s4bios(int resume); - /* early initialization routine */ extern void acpi_reserve_bootmem(void); #endif /*CONFIG_ACPI_SLEEP*/ +extern u8 x86_acpiid_to_apicid[]; + #endif /*_ASM_ACPI_H*/ diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h index 683ba2ac0c..86c806d2fd 100644 --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -30,7 +30,7 @@ struct arch_domain /* Shadow mode status and controls. */ unsigned int shadow_mode; /* flags to control shadow table operation */ - spinlock_t shadow_lock; + unsigned int shadow_nest; /* Recursive depth of shadow_lock() nesting */ /* Shadow mode has tainted page reference counts? */ unsigned int shadow_tainted_refcnts; diff --git a/xen/include/asm-x86/i387.h b/xen/include/asm-x86/i387.h index ca81778089..01039ab648 100644 --- a/xen/include/asm-x86/i387.h +++ b/xen/include/asm-x86/i387.h @@ -28,4 +28,16 @@ extern void restore_fpu(struct exec_domain *tsk); __asm__ __volatile__ ( "ldmxcsr %0" : : "m" (__mxcsr) ); \ } while ( 0 ) +/* Make domain the FPU owner */ +static inline void setup_fpu(struct exec_domain *ed) +{ + if ( !test_and_set_bit(EDF_USEDFPU, &ed->flags) ) + { + if ( test_bit(EDF_DONEFPUINIT, &ed->flags) ) + restore_fpu(ed); + else + init_fpu(); + } +} + #endif /* __ASM_I386_I387_H */ diff --git a/xen/include/asm-x86/io_apic.h b/xen/include/asm-x86/io_apic.h index 5c71c08f58..5e9448a847 100644 --- a/xen/include/asm-x86/io_apic.h +++ b/xen/include/asm-x86/io_apic.h @@ -17,7 +17,6 @@ #ifdef CONFIG_PCI_MSI static inline int use_pci_vector(void) {return 1;} static inline void disable_edge_ioapic_vector(unsigned int vector) { } -static inline void mask_and_ack_level_ioapic_vector(unsigned int vector) { } static inline void end_edge_ioapic_vector (unsigned int vector) { } #define startup_level_ioapic startup_level_ioapic_vector #define shutdown_level_ioapic mask_IO_APIC_vector @@ -36,7 +35,6 @@ static inline void end_edge_ioapic_vector (unsigned int vector) { } #else static inline int use_pci_vector(void) {return 0;} static inline void disable_edge_ioapic_irq(unsigned int irq) { } -static inline void mask_and_ack_level_ioapic_irq(unsigned int irq) { } static inline void end_edge_ioapic_irq (unsigned int irq) { } #define startup_level_ioapic startup_level_ioapic_irq #define shutdown_level_ioapic mask_IO_APIC_irq diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h index d991a51f93..5ba7d72bbb 100644 --- a/xen/include/asm-x86/shadow.h +++ b/xen/include/asm-x86/shadow.h @@ -26,6 +26,7 @@ #include <xen/types.h> #include <xen/perfc.h> #include <xen/sched.h> +#include <xen/mm.h> #include <asm/processor.h> #include <asm/domain_page.h> #include <public/dom0_ops.h> @@ -59,9 +60,45 @@ #define __linear_hl2_table ((l1_pgentry_t *)(LINEAR_PT_VIRT_START + \ (PERDOMAIN_VIRT_START >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)))) -#define shadow_lock_init(_d) spin_lock_init(&(_d)->arch.shadow_lock) -#define shadow_lock(_d) do { ASSERT(!spin_is_locked(&(_d)->arch.shadow_lock)); spin_lock(&(_d)->arch.shadow_lock); } while (0) -#define shadow_unlock(_d) spin_unlock(&(_d)->arch.shadow_lock) +/* + * For now we use the per-domain BIGLOCK rather than a shadow-specific lock. + * We usually have the BIGLOCK already acquired anyway, so this is unlikely + * to cause much unnecessary extra serialisation. Also it's a recursive + * lock, and there are some code paths containing nested shadow_lock(). + * The #if0'ed code below is therefore broken until such nesting is removed. + */ +#if 0 +#define shadow_lock_init(_d) \ + spin_lock_init(&(_d)->arch.shadow_lock) +#define shadow_lock_is_acquired(_d) \ + spin_is_locked(&(_d)->arch.shadow_lock) +#define shadow_lock(_d) \ +do { \ + ASSERT(!shadow_lock_is_acquired(_d)); \ + spin_lock(&(_d)->arch.shadow_lock); \ +} while (0) +#define shadow_unlock(_d) \ +do { \ + ASSERT(!shadow_lock_is_acquired(_d)); \ + spin_unlock(&(_d)->arch.shadow_lock); \ +} while (0) +#else +#define shadow_lock_init(_d) \ + ((_d)->arch.shadow_nest = 0) +#define shadow_lock_is_acquired(_d) \ + (spin_is_locked(&(_d)->big_lock) && ((_d)->arch.shadow_nest != 0)) +#define shadow_lock(_d) \ +do { \ + LOCK_BIGLOCK(_d); \ + (_d)->arch.shadow_nest++; \ +} while (0) +#define shadow_unlock(_d) \ +do { \ + ASSERT(shadow_lock_is_acquired(_d)); \ + (_d)->arch.shadow_nest--; \ + UNLOCK_BIGLOCK(_d); \ +} while (0) +#endif #define SHADOW_ENCODE_MIN_MAX(_min, _max) ((((L1_PAGETABLE_ENTRIES - 1) - (_max)) << 16) | (_min)) #define SHADOW_MIN(_encoded) ((_encoded) & ((1u<<16) - 1)) @@ -402,7 +439,7 @@ static inline int __mark_dirty(struct domain *d, unsigned int mfn) unsigned long pfn; int rc = 0; - ASSERT(spin_is_locked(&d->arch.shadow_lock)); + ASSERT(shadow_lock_is_acquired(d)); ASSERT(d->arch.shadow_dirty_bitmap != NULL); if ( !VALID_MFN(mfn) ) @@ -1136,7 +1173,7 @@ static inline unsigned long __shadow_status( ? __gpfn_to_mfn(d, gpfn) : INVALID_MFN); - ASSERT(spin_is_locked(&d->arch.shadow_lock)); + ASSERT(shadow_lock_is_acquired(d)); ASSERT(gpfn == (gpfn & PGT_mfn_mask)); ASSERT(stype && !(stype & ~PGT_type_mask)); @@ -1185,7 +1222,7 @@ shadow_max_pgtable_type(struct domain *d, unsigned long gpfn, struct shadow_status *x; u32 pttype = PGT_none, type; - ASSERT(spin_is_locked(&d->arch.shadow_lock)); + ASSERT(shadow_lock_is_acquired(d)); ASSERT(gpfn == (gpfn & PGT_mfn_mask)); perfc_incrc(shadow_max_type); @@ -1279,7 +1316,7 @@ static inline void delete_shadow_status( struct shadow_status *p, *x, *n, *head; unsigned long key = gpfn | stype; - ASSERT(spin_is_locked(&d->arch.shadow_lock)); + ASSERT(shadow_lock_is_acquired(d)); ASSERT(!(gpfn & ~PGT_mfn_mask)); ASSERT(stype && !(stype & ~PGT_type_mask)); @@ -1361,7 +1398,7 @@ static inline void set_shadow_status( SH_VVLOG("set gpfn=%lx gmfn=%lx smfn=%lx t=%lx", gpfn, gmfn, smfn, stype); - ASSERT(spin_is_locked(&d->arch.shadow_lock)); + ASSERT(shadow_lock_is_acquired(d)); ASSERT(shadow_mode_translate(d) || gpfn); ASSERT(!(gpfn & ~PGT_mfn_mask)); diff --git a/xen/include/asm-x86/vmx.h b/xen/include/asm-x86/vmx.h index 334afcf6be..3041a334f1 100644 --- a/xen/include/asm-x86/vmx.h +++ b/xen/include/asm-x86/vmx.h @@ -24,6 +24,7 @@ #include <asm/regs.h> #include <asm/processor.h> #include <asm/vmx_vmcs.h> +#include <asm/i387.h> extern void vmx_asm_vmexit_handler(struct cpu_user_regs); extern void vmx_asm_do_resume(void); @@ -251,4 +252,19 @@ static inline int __vmxon (u64 addr) return 0; } +/* Make sure that xen intercepts any FP accesses from current */ +static inline void vmx_stts() +{ + unsigned long cr0; + + __vmread(GUEST_CR0, &cr0); + if (!(cr0 & X86_CR0_TS)) + __vmwrite(GUEST_CR0, cr0 | X86_CR0_TS); + + __vmread(CR0_READ_SHADOW, &cr0); + if (!(cr0 & X86_CR0_TS)) + __vmwrite(EXCEPTION_BITMAP, MONITOR_DEFAULT_EXCEPTION_BITMAP | + EXCEPTION_BITMAP_NM); +} + #endif /* __ASM_X86_VMX_H__ */ diff --git a/xen/include/public/arch-ia64.h b/xen/include/public/arch-ia64.h index ad24299a0d..8c785d65d6 100644 --- a/xen/include/public/arch-ia64.h +++ b/xen/include/public/arch-ia64.h @@ -63,24 +63,8 @@ typedef struct { unsigned long bank1_regs[16]; // bank1 regs (r16-r31) when bank0 active unsigned long rrs[8]; // region registers unsigned long krs[8]; // kernel registers - unsigned long pkrs[8]; // protection key registers - // FIXME: These shouldn't be here as they can be overwritten by guests - // and validation at TLB miss time would be too expensive. - TR_ENTRY itrs[NITRS]; - TR_ENTRY dtrs[NDTRS]; - TR_ENTRY itlb; - TR_ENTRY dtlb; - unsigned long itlb_pte; - unsigned long dtlb_pte; - unsigned long irr[4]; - unsigned long insvc[4]; - unsigned long iva; - unsigned long dcr; - unsigned long itc; - unsigned long domain_itm; - unsigned long domain_itm_last; - unsigned long xen_itm; - unsigned long xen_timer_interval; + unsigned long pkrs[8]; // protection key registers + unsigned long tmp[8]; // temp registers (e.g. for hyperprivops) //} PACKED arch_shared_info_t; } arch_vcpu_info_t; // DON'T PACK diff --git a/xen/include/public/dom0_ops.h b/xen/include/public/dom0_ops.h index 869a959f6c..734b7c5667 100644 --- a/xen/include/public/dom0_ops.h +++ b/xen/include/public/dom0_ops.h @@ -19,7 +19,7 @@ * This makes sure that old versions of dom0 tools will stop working in a * well-defined way (rather than crashing the machine, for instance). */ -#define DOM0_INTERFACE_VERSION 0xAAAA1004 +#define DOM0_INTERFACE_VERSION 0xAAAA1005 /************************************************************************/ @@ -70,7 +70,6 @@ typedef struct { typedef struct { /* IN variables. */ domid_t domain; /* NB. IN/OUT variable. */ - u16 exec_domain; /* OUT variables. */ #define DOMFLAGS_DYING (1<<0) /* Domain is scheduled to die. */ #define DOMFLAGS_CRASHED (1<<1) /* Crashed domain; frozen for postmortem. */ @@ -83,11 +82,13 @@ typedef struct { #define DOMFLAGS_SHUTDOWNMASK 255 /* DOMFLAGS_SHUTDOWN guest-supplied code. */ #define DOMFLAGS_SHUTDOWNSHIFT 16 u32 flags; - vcpu_guest_context_t *ctxt; /* NB. IN/OUT variable. */ memory_t tot_pages; memory_t max_pages; memory_t shared_info_frame; /* MFN of shared_info struct */ u64 cpu_time; + u32 n_vcpu; + s32 vcpu_to_cpu[MAX_VIRT_CPUS]; /* current mapping */ + cpumap_t cpumap[MAX_VIRT_CPUS]; /* allowable mapping */ } dom0_getdomaininfo_t; #define DOM0_SETDOMAININFO 13 @@ -170,14 +171,14 @@ typedef struct { } dom0_readconsole_t; /* - * Pin Domain to a particular CPU (use -1 to unpin) + * Set which cpus an exec_domain can use */ #define DOM0_PINCPUDOMAIN 20 typedef struct { /* IN variables. */ domid_t domain; u16 exec_domain; - s32 cpu; /* -1 implies unpin */ + cpumap_t *cpumap; } dom0_pincpudomain_t; /* Get trace buffers machine base address */ @@ -342,6 +343,14 @@ typedef struct { u16 allow_access; /* allow or deny access to range? */ } dom0_ioport_permission_t; +#define DOM0_GETVCPUCONTEXT 37 +typedef struct { + domid_t domain; /* domain to be affected */ + u16 exec_domain; /* vcpu # */ + vcpu_guest_context_t *ctxt; /* NB. IN/OUT variable. */ + u64 cpu_time; +} dom0_getvcpucontext_t; + typedef struct { u32 cmd; u32 interface_version; /* DOM0_INTERFACE_VERSION */ @@ -373,6 +382,7 @@ typedef struct { dom0_perfccontrol_t perfccontrol; dom0_microcode_t microcode; dom0_ioport_permission_t ioport_permission; + dom0_getvcpucontext_t getvcpucontext; } u; } dom0_op_t; diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h index 7dc30c9dc6..e3ba5e9b06 100644 --- a/xen/include/public/xen.h +++ b/xen/include/public/xen.h @@ -473,6 +473,8 @@ typedef struct { /* For use in guest OSes. */ extern shared_info_t *HYPERVISOR_shared_info; +typedef u64 cpumap_t; + #endif /* !__ASSEMBLY__ */ #endif /* __XEN_PUBLIC_XEN_H__ */ diff --git a/xen/include/xen/cpumask.h b/xen/include/xen/cpumask.h index 33723fbd9a..4881447842 100644 --- a/xen/include/xen/cpumask.h +++ b/xen/include/xen/cpumask.h @@ -9,7 +9,9 @@ typedef u32 cpumask_t; +#ifndef cpu_online_map extern cpumask_t cpu_online_map; +#endif static inline int cpus_weight(cpumask_t w) { diff --git a/xen/include/xen/init.h b/xen/include/xen/init.h index 991abc6c24..0709c9abd8 100644 --- a/xen/include/xen/init.h +++ b/xen/include/xen/init.h @@ -59,13 +59,17 @@ extern initcall_t __initcall_start, __initcall_end; */ struct kernel_param { const char *name; - enum { OPT_STR, OPT_UINT, OPT_UINT_UNIT, OPT_BOOL } type; + enum { OPT_STR, OPT_UINT, OPT_BOOL, OPT_CUSTOM } type; void *var; unsigned int len; }; extern struct kernel_param __setup_start, __setup_end; +#define custom_param(_name, _var) \ + static char __setup_str_##_var[] __initdata = _name; \ + static struct kernel_param __setup_##_var __attribute_used__ \ + __initsetup = { __setup_str_##_var, OPT_CUSTOM, &_var, 0 } #define boolean_param(_name, _var) \ static char __setup_str_##_var[] __initdata = _name; \ static struct kernel_param __setup_##_var __attribute_used__ \ @@ -74,10 +78,6 @@ extern struct kernel_param __setup_start, __setup_end; static char __setup_str_##_var[] __initdata = _name; \ static struct kernel_param __setup_##_var __attribute_used__ \ __initsetup = { __setup_str_##_var, OPT_UINT, &_var, sizeof(_var) } -#define integer_unit_param(_name, _var) \ - static char __setup_str_##_var[] __initdata = _name; \ - static struct kernel_param __setup_##_var __attribute_used__ \ - __initsetup = { __setup_str_##_var, OPT_UINT_UNIT, &_var, sizeof(_var) } #define string_param(_name, _var) \ static char __setup_str_##_var[] __initdata = _name; \ static struct kernel_param __setup_##_var __attribute_used__ \ diff --git a/xen/include/xen/ioport.h b/xen/include/xen/ioport.h deleted file mode 100644 index b39d07ae32..0000000000 --- a/xen/include/xen/ioport.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * ioport.h Definitions of routines for detecting, reserving and - * allocating system resources. - * - * Authors: Linus Torvalds - */ - -#ifndef _LINUX_IOPORT_H -#define _LINUX_IOPORT_H - -/* - * Resources are tree-like, allowing - * nesting etc.. - */ -struct resource { - const char *name; - unsigned long start, end; - unsigned long flags; - struct resource *parent, *sibling, *child; -}; - -struct resource_list { - struct resource_list *next; - struct resource *res; - struct pci_dev *dev; -}; - -/* - * IO resources have these defined flags. - */ -#define IORESOURCE_BITS 0x000000ff /* Bus-specific bits */ - -#define IORESOURCE_IO 0x00000100 /* Resource type */ -#define IORESOURCE_MEM 0x00000200 -#define IORESOURCE_IRQ 0x00000400 -#define IORESOURCE_DMA 0x00000800 - -#define IORESOURCE_PREFETCH 0x00001000 /* No side effects */ -#define IORESOURCE_READONLY 0x00002000 -#define IORESOURCE_CACHEABLE 0x00004000 -#define IORESOURCE_RANGELENGTH 0x00008000 -#define IORESOURCE_SHADOWABLE 0x00010000 -#define IORESOURCE_BUS_HAS_VGA 0x00080000 - -#define IORESOURCE_UNSET 0x20000000 -#define IORESOURCE_AUTO 0x40000000 -#define IORESOURCE_BUSY 0x80000000 /* Driver has marked this resource busy */ - -/* ISA PnP IRQ specific bits (IORESOURCE_BITS) */ -#define IORESOURCE_IRQ_HIGHEDGE (1<<0) -#define IORESOURCE_IRQ_LOWEDGE (1<<1) -#define IORESOURCE_IRQ_HIGHLEVEL (1<<2) -#define IORESOURCE_IRQ_LOWLEVEL (1<<3) - -/* ISA PnP DMA specific bits (IORESOURCE_BITS) */ -#define IORESOURCE_DMA_TYPE_MASK (3<<0) -#define IORESOURCE_DMA_8BIT (0<<0) -#define IORESOURCE_DMA_8AND16BIT (1<<0) -#define IORESOURCE_DMA_16BIT (2<<0) - -#define IORESOURCE_DMA_MASTER (1<<2) -#define IORESOURCE_DMA_BYTE (1<<3) -#define IORESOURCE_DMA_WORD (1<<4) - -#define IORESOURCE_DMA_SPEED_MASK (3<<6) -#define IORESOURCE_DMA_COMPATIBLE (0<<6) -#define IORESOURCE_DMA_TYPEA (1<<6) -#define IORESOURCE_DMA_TYPEB (2<<6) -#define IORESOURCE_DMA_TYPEF (3<<6) - -/* ISA PnP memory I/O specific bits (IORESOURCE_BITS) */ -#define IORESOURCE_MEM_WRITEABLE (1<<0) /* dup: IORESOURCE_READONLY */ -#define IORESOURCE_MEM_CACHEABLE (1<<1) /* dup: IORESOURCE_CACHEABLE */ -#define IORESOURCE_MEM_RANGELENGTH (1<<2) /* dup: IORESOURCE_RANGELENGTH */ -#define IORESOURCE_MEM_TYPE_MASK (3<<3) -#define IORESOURCE_MEM_8BIT (0<<3) -#define IORESOURCE_MEM_16BIT (1<<3) -#define IORESOURCE_MEM_8AND16BIT (2<<3) -#define IORESOURCE_MEM_SHADOWABLE (1<<5) /* dup: IORESOURCE_SHADOWABLE */ -#define IORESOURCE_MEM_EXPANSIONROM (1<<6) - -/* PC/ISA/whatever - the normal PC address spaces: IO and memory */ -extern struct resource ioport_resource; -extern struct resource iomem_resource; - -extern int get_resource_list(struct resource *, char *buf, int size); - -extern int check_resource(struct resource *root, unsigned long, unsigned long); -extern int request_resource(struct resource *root, struct resource *new); -extern int release_resource(struct resource *new); -extern int allocate_resource(struct resource *root, struct resource *new, - unsigned long size, - unsigned long min, unsigned long max, - unsigned long align, - void (*alignf)(void *, struct resource *, - unsigned long, unsigned long), - void *alignf_data); - -/* Convenience shorthand with allocation */ -#define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name)) -#define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name)) - -#define release_region(start,n) __release_region(&ioport_resource, (start), (n)) -#define release_mem_region(start,n) __release_region(&iomem_resource, (start), (n)) - -extern void __release_region(struct resource *, unsigned long, unsigned long); - -extern struct resource * __request_region(struct resource *, unsigned long start, unsigned long n, const char *name); - -#define get_ioport_list(buf) get_resource_list(&ioport_resource, buf, PAGE_SIZE) -#define get_mem_list(buf) get_resource_list(&iomem_resource, buf, PAGE_SIZE) - -#define HAVE_AUTOIRQ -extern void autoirq_setup(int waittime); -extern int autoirq_report(int waittime); - -#endif /* _LINUX_IOPORT_H */ diff --git a/xen/include/xen/lib.h b/xen/include/xen/lib.h index b8a981a632..56eae9d9b5 100644 --- a/xen/include/xen/lib.h +++ b/xen/include/xen/lib.h @@ -57,8 +57,15 @@ extern int snprintf(char * buf, size_t size, const char * fmt, ...) __attribute__ ((format (printf, 3, 4))); extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args); -long simple_strtol(const char *cp,char **endp,unsigned int base); -unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base); -long long simple_strtoll(const char *cp,char **endp,unsigned int base); +long simple_strtol( + const char *cp,char **endp, unsigned int base); +unsigned long simple_strtoul( + const char *cp,char **endp, unsigned int base); +long long simple_strtoll( + const char *cp,char **endp, unsigned int base); +unsigned long long simple_strtoull( + const char *cp,char **endp, unsigned int base); + +unsigned long long memparse(char *s); #endif /* __LIB_H__ */ diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h deleted file mode 100644 index 046328d11e..0000000000 --- a/xen/include/xen/pci.h +++ /dev/null @@ -1,834 +0,0 @@ -/* - * $Id: pci.h,v 1.87 1998/10/11 15:13:12 mj Exp $ - * - * PCI defines and function prototypes - * Copyright 1994, Drew Eckhardt - * Copyright 1997--1999 Martin Mares <mj@ucw.cz> - * - * For more information, please consult the following manuals (look at - * http://www.pcisig.com/ for how to get them): - * - * PCI BIOS Specification - * PCI Local Bus Specification - * PCI to PCI Bridge Specification - * PCI System Design Guide - */ - -#ifndef LINUX_PCI_H -#define LINUX_PCI_H - -/* - * Under PCI, each device has 256 bytes of configuration address space, - * of which the first 64 bytes are standardized as follows: - */ -#define PCI_VENDOR_ID 0x00 /* 16 bits */ -#define PCI_DEVICE_ID 0x02 /* 16 bits */ -#define PCI_COMMAND 0x04 /* 16 bits */ -#define PCI_COMMAND_IO 0x1 /* Enable response in I/O space */ -#define PCI_COMMAND_MEMORY 0x2 /* Enable response in Memory space */ -#define PCI_COMMAND_MASTER 0x4 /* Enable bus mastering */ -#define PCI_COMMAND_SPECIAL 0x8 /* Enable response to special cycles */ -#define PCI_COMMAND_INVALIDATE 0x10 /* Use memory write and invalidate */ -#define PCI_COMMAND_VGA_PALETTE 0x20 /* Enable palette snooping */ -#define PCI_COMMAND_PARITY 0x40 /* Enable parity checking */ -#define PCI_COMMAND_WAIT 0x80 /* Enable address/data stepping */ -#define PCI_COMMAND_SERR 0x100 /* Enable SERR */ -#define PCI_COMMAND_FAST_BACK 0x200 /* Enable back-to-back writes */ - -#define PCI_STATUS 0x06 /* 16 bits */ -#define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */ -#define PCI_STATUS_66MHZ 0x20 /* Support 66 Mhz PCI 2.1 bus */ -#define PCI_STATUS_UDF 0x40 /* Support User Definable Features [obsolete] */ -#define PCI_STATUS_FAST_BACK 0x80 /* Accept fast-back to back */ -#define PCI_STATUS_PARITY 0x100 /* Detected parity error */ -#define PCI_STATUS_DEVSEL_MASK 0x600 /* DEVSEL timing */ -#define PCI_STATUS_DEVSEL_FAST 0x000 -#define PCI_STATUS_DEVSEL_MEDIUM 0x200 -#define PCI_STATUS_DEVSEL_SLOW 0x400 -#define PCI_STATUS_SIG_TARGET_ABORT 0x800 /* Set on target abort */ -#define PCI_STATUS_REC_TARGET_ABORT 0x1000 /* Master ack of " */ -#define PCI_STATUS_REC_MASTER_ABORT 0x2000 /* Set on master abort */ -#define PCI_STATUS_SIG_SYSTEM_ERROR 0x4000 /* Set when we drive SERR */ -#define PCI_STATUS_DETECTED_PARITY 0x8000 /* Set on parity error */ - -#define PCI_CLASS_REVISION 0x08 /* High 24 bits are class, low 8 - revision */ -#define PCI_REVISION_ID 0x08 /* Revision ID */ -#define PCI_CLASS_PROG 0x09 /* Reg. Level Programming Interface */ -#define PCI_CLASS_DEVICE 0x0a /* Device class */ - -#define PCI_CACHE_LINE_SIZE 0x0c /* 8 bits */ -#define PCI_LATENCY_TIMER 0x0d /* 8 bits */ -#define PCI_HEADER_TYPE 0x0e /* 8 bits */ -#define PCI_HEADER_TYPE_NORMAL 0 -#define PCI_HEADER_TYPE_BRIDGE 1 -#define PCI_HEADER_TYPE_CARDBUS 2 - -#define PCI_BIST 0x0f /* 8 bits */ -#define PCI_BIST_CODE_MASK 0x0f /* Return result */ -#define PCI_BIST_START 0x40 /* 1 to start BIST, 2 secs or less */ -#define PCI_BIST_CAPABLE 0x80 /* 1 if BIST capable */ - -/* - * Base addresses specify locations in memory or I/O space. - * Decoded size can be determined by writing a value of - * 0xffffffff to the register, and reading it back. Only - * 1 bits are decoded. - */ -#define PCI_BASE_ADDRESS_0 0x10 /* 32 bits */ -#define PCI_BASE_ADDRESS_1 0x14 /* 32 bits [htype 0,1 only] */ -#define PCI_BASE_ADDRESS_2 0x18 /* 32 bits [htype 0 only] */ -#define PCI_BASE_ADDRESS_3 0x1c /* 32 bits */ -#define PCI_BASE_ADDRESS_4 0x20 /* 32 bits */ -#define PCI_BASE_ADDRESS_5 0x24 /* 32 bits */ -#define PCI_BASE_ADDRESS_SPACE 0x01 /* 0 = memory, 1 = I/O */ -#define PCI_BASE_ADDRESS_SPACE_IO 0x01 -#define PCI_BASE_ADDRESS_SPACE_MEMORY 0x00 -#define PCI_BASE_ADDRESS_MEM_TYPE_MASK 0x06 -#define PCI_BASE_ADDRESS_MEM_TYPE_32 0x00 /* 32 bit address */ -#define PCI_BASE_ADDRESS_MEM_TYPE_1M 0x02 /* Below 1M [obsolete] */ -#define PCI_BASE_ADDRESS_MEM_TYPE_64 0x04 /* 64 bit address */ -#define PCI_BASE_ADDRESS_MEM_PREFETCH 0x08 /* prefetchable? */ -#define PCI_BASE_ADDRESS_MEM_MASK (~0x0fUL) -#define PCI_BASE_ADDRESS_IO_MASK (~0x03UL) -/* bit 1 is reserved if address_space = 1 */ - -/* Header type 0 (normal devices) */ -#define PCI_CARDBUS_CIS 0x28 -#define PCI_SUBSYSTEM_VENDOR_ID 0x2c -#define PCI_SUBSYSTEM_ID 0x2e -#define PCI_ROM_ADDRESS 0x30 /* Bits 31..11 are address, 10..1 reserved */ -#define PCI_ROM_ADDRESS_ENABLE 0x01 -#define PCI_ROM_ADDRESS_MASK (~0x7ffUL) - -#define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */ - -/* 0x35-0x3b are reserved */ -#define PCI_INTERRUPT_LINE 0x3c /* 8 bits */ -#define PCI_INTERRUPT_PIN 0x3d /* 8 bits */ -#define PCI_MIN_GNT 0x3e /* 8 bits */ -#define PCI_MAX_LAT 0x3f /* 8 bits */ - -/* Header type 1 (PCI-to-PCI bridges) */ -#define PCI_PRIMARY_BUS 0x18 /* Primary bus number */ -#define PCI_SECONDARY_BUS 0x19 /* Secondary bus number */ -#define PCI_SUBORDINATE_BUS 0x1a /* Highest bus number behind the bridge */ -#define PCI_SEC_LATENCY_TIMER 0x1b /* Latency timer for secondary interface */ -#define PCI_IO_BASE 0x1c /* I/O range behind the bridge */ -#define PCI_IO_LIMIT 0x1d -#define PCI_IO_RANGE_TYPE_MASK 0x0fUL /* I/O bridging type */ -#define PCI_IO_RANGE_TYPE_16 0x00 -#define PCI_IO_RANGE_TYPE_32 0x01 -#define PCI_IO_RANGE_MASK (~0x0fUL) -#define PCI_SEC_STATUS 0x1e /* Secondary status register, only bit 14 used */ -#define PCI_MEMORY_BASE 0x20 /* Memory range behind */ -#define PCI_MEMORY_LIMIT 0x22 -#define PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL -#define PCI_MEMORY_RANGE_MASK (~0x0fUL) -#define PCI_PREF_MEMORY_BASE 0x24 /* Prefetchable memory range behind */ -#define PCI_PREF_MEMORY_LIMIT 0x26 -#define PCI_PREF_RANGE_TYPE_MASK 0x0fUL -#define PCI_PREF_RANGE_TYPE_32 0x00 -#define PCI_PREF_RANGE_TYPE_64 0x01 -#define PCI_PREF_RANGE_MASK (~0x0fUL) -#define PCI_PREF_BASE_UPPER32 0x28 /* Upper half of prefetchable memory range */ -#define PCI_PREF_LIMIT_UPPER32 0x2c -#define PCI_IO_BASE_UPPER16 0x30 /* Upper half of I/O addresses */ -#define PCI_IO_LIMIT_UPPER16 0x32 -/* 0x34 same as for htype 0 */ -/* 0x35-0x3b is reserved */ -#define PCI_ROM_ADDRESS1 0x38 /* Same as PCI_ROM_ADDRESS, but for htype 1 */ -/* 0x3c-0x3d are same as for htype 0 */ -#define PCI_BRIDGE_CONTROL 0x3e -#define PCI_BRIDGE_CTL_PARITY 0x01 /* Enable parity detection on secondary interface */ -#define PCI_BRIDGE_CTL_SERR 0x02 /* The same for SERR forwarding */ -#define PCI_BRIDGE_CTL_NO_ISA 0x04 /* Disable bridging of ISA ports */ -#define PCI_BRIDGE_CTL_VGA 0x08 /* Forward VGA addresses */ -#define PCI_BRIDGE_CTL_MASTER_ABORT 0x20 /* Report master aborts */ -#define PCI_BRIDGE_CTL_BUS_RESET 0x40 /* Secondary bus reset */ -#define PCI_BRIDGE_CTL_FAST_BACK 0x80 /* Fast Back2Back enabled on secondary interface */ - -/* Header type 2 (CardBus bridges) */ -#define PCI_CB_CAPABILITY_LIST 0x14 -/* 0x15 reserved */ -#define PCI_CB_SEC_STATUS 0x16 /* Secondary status */ -#define PCI_CB_PRIMARY_BUS 0x18 /* PCI bus number */ -#define PCI_CB_CARD_BUS 0x19 /* CardBus bus number */ -#define PCI_CB_SUBORDINATE_BUS 0x1a /* Subordinate bus number */ -#define PCI_CB_LATENCY_TIMER 0x1b /* CardBus latency timer */ -#define PCI_CB_MEMORY_BASE_0 0x1c -#define PCI_CB_MEMORY_LIMIT_0 0x20 -#define PCI_CB_MEMORY_BASE_1 0x24 -#define PCI_CB_MEMORY_LIMIT_1 0x28 -#define PCI_CB_IO_BASE_0 0x2c -#define PCI_CB_IO_BASE_0_HI 0x2e -#define PCI_CB_IO_LIMIT_0 0x30 -#define PCI_CB_IO_LIMIT_0_HI 0x32 -#define PCI_CB_IO_BASE_1 0x34 -#define PCI_CB_IO_BASE_1_HI 0x36 -#define PCI_CB_IO_LIMIT_1 0x38 -#define PCI_CB_IO_LIMIT_1_HI 0x3a -#define PCI_CB_IO_RANGE_MASK (~0x03UL) -/* 0x3c-0x3d are same as for htype 0 */ -#define PCI_CB_BRIDGE_CONTROL 0x3e -#define PCI_CB_BRIDGE_CTL_PARITY 0x01 /* Similar to standard bridge control register */ -#define PCI_CB_BRIDGE_CTL_SERR 0x02 -#define PCI_CB_BRIDGE_CTL_ISA 0x04 -#define PCI_CB_BRIDGE_CTL_VGA 0x08 -#define PCI_CB_BRIDGE_CTL_MASTER_ABORT 0x20 -#define PCI_CB_BRIDGE_CTL_CB_RESET 0x40 /* CardBus reset */ -#define PCI_CB_BRIDGE_CTL_16BIT_INT 0x80 /* Enable interrupt for 16-bit cards */ -#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM0 0x100 /* Prefetch enable for both memory regions */ -#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM1 0x200 -#define PCI_CB_BRIDGE_CTL_POST_WRITES 0x400 -#define PCI_CB_SUBSYSTEM_VENDOR_ID 0x40 -#define PCI_CB_SUBSYSTEM_ID 0x42 -#define PCI_CB_LEGACY_MODE_BASE 0x44 /* 16-bit PC Card legacy mode base address (ExCa) */ -/* 0x48-0x7f reserved */ - -/* Capability lists */ - -#define PCI_CAP_LIST_ID 0 /* Capability ID */ -#define PCI_CAP_ID_PM 0x01 /* Power Management */ -#define PCI_CAP_ID_AGP 0x02 /* Accelerated Graphics Port */ -#define PCI_CAP_ID_VPD 0x03 /* Vital Product Data */ -#define PCI_CAP_ID_SLOTID 0x04 /* Slot Identification */ -#define PCI_CAP_ID_MSI 0x05 /* Message Signalled Interrupts */ -#define PCI_CAP_ID_CHSWP 0x06 /* CompactPCI HotSwap */ -#define PCI_CAP_ID_PCIX 0x07 /* PCI-X */ -#define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */ -#define PCI_CAP_FLAGS 2 /* Capability defined flags (16 bits) */ -#define PCI_CAP_SIZEOF 4 - -/* Power Management Registers */ - -#define PCI_PM_PMC 2 /* PM Capabilities Register */ -#define PCI_PM_CAP_VER_MASK 0x0007 /* Version */ -#define PCI_PM_CAP_PME_CLOCK 0x0008 /* PME clock required */ -#define PCI_PM_CAP_RESERVED 0x0010 /* Reserved field */ -#define PCI_PM_CAP_DSI 0x0020 /* Device specific initialization */ -#define PCI_PM_CAP_AUX_POWER 0x01C0 /* Auxilliary power support mask */ -#define PCI_PM_CAP_D1 0x0200 /* D1 power state support */ -#define PCI_PM_CAP_D2 0x0400 /* D2 power state support */ -#define PCI_PM_CAP_PME 0x0800 /* PME pin supported */ -#define PCI_PM_CAP_PME_MASK 0xF800 /* PME Mask of all supported states */ -#define PCI_PM_CAP_PME_D0 0x0800 /* PME# from D0 */ -#define PCI_PM_CAP_PME_D1 0x1000 /* PME# from D1 */ -#define PCI_PM_CAP_PME_D2 0x2000 /* PME# from D2 */ -#define PCI_PM_CAP_PME_D3 0x4000 /* PME# from D3 (hot) */ -#define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */ -#define PCI_PM_CTRL 4 /* PM control and status register */ -#define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */ -#define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */ -#define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */ -#define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */ -#define PCI_PM_CTRL_PME_STATUS 0x8000 /* PME pin status */ -#define PCI_PM_PPB_EXTENSIONS 6 /* PPB support extensions (??) */ -#define PCI_PM_PPB_B2_B3 0x40 /* Stop clock when in D3hot (??) */ -#define PCI_PM_BPCC_ENABLE 0x80 /* Bus power/clock control enable (??) */ -#define PCI_PM_DATA_REGISTER 7 /* (??) */ -#define PCI_PM_SIZEOF 8 - -/* AGP registers */ - -#define PCI_AGP_VERSION 2 /* BCD version number */ -#define PCI_AGP_RFU 3 /* Rest of capability flags */ -#define PCI_AGP_STATUS 4 /* Status register */ -#define PCI_AGP_STATUS_RQ_MASK 0xff000000 /* Maximum number of requests - 1 */ -#define PCI_AGP_STATUS_SBA 0x0200 /* Sideband addressing supported */ -#define PCI_AGP_STATUS_64BIT 0x0020 /* 64-bit addressing supported */ -#define PCI_AGP_STATUS_FW 0x0010 /* FW transfers supported */ -#define PCI_AGP_STATUS_RATE4 0x0004 /* 4x transfer rate supported */ -#define PCI_AGP_STATUS_RATE2 0x0002 /* 2x transfer rate supported */ -#define PCI_AGP_STATUS_RATE1 0x0001 /* 1x transfer rate supported */ -#define PCI_AGP_COMMAND 8 /* Control register */ -#define PCI_AGP_COMMAND_RQ_MASK 0xff000000 /* Master: Maximum number of requests */ -#define PCI_AGP_COMMAND_SBA 0x0200 /* Sideband addressing enabled */ -#define PCI_AGP_COMMAND_AGP 0x0100 /* Allow processing of AGP transactions */ -#define PCI_AGP_COMMAND_64BIT 0x0020 /* Allow processing of 64-bit addresses */ -#define PCI_AGP_COMMAND_FW 0x0010 /* Force FW transfers */ -#define PCI_AGP_COMMAND_RATE4 0x0004 /* Use 4x rate */ -#define PCI_AGP_COMMAND_RATE2 0x0002 /* Use 2x rate */ -#define PCI_AGP_COMMAND_RATE1 0x0001 /* Use 1x rate */ -#define PCI_AGP_SIZEOF 12 - -/* Slot Identification */ - -#define PCI_SID_ESR 2 /* Expansion Slot Register */ -#define PCI_SID_ESR_NSLOTS 0x1f /* Number of expansion slots available */ -#define PCI_SID_ESR_FIC 0x20 /* First In Chassis Flag */ -#define PCI_SID_CHASSIS_NR 3 /* Chassis Number */ - -/* Message Signalled Interrupts registers */ - -#define PCI_MSI_FLAGS 2 /* Various flags */ -#define PCI_MSI_FLAGS_64BIT 0x80 /* 64-bit addresses allowed */ -#define PCI_MSI_FLAGS_QSIZE 0x70 /* Message queue size configured */ -#define PCI_MSI_FLAGS_QMASK 0x0e /* Maximum queue size available */ -#define PCI_MSI_FLAGS_ENABLE 0x01 /* MSI feature enabled */ -#define PCI_MSI_RFU 3 /* Rest of capability flags */ -#define PCI_MSI_ADDRESS_LO 4 /* Lower 32 bits */ -#define PCI_MSI_ADDRESS_HI 8 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */ -#define PCI_MSI_DATA_32 8 /* 16 bits of data for 32-bit devices */ -#define PCI_MSI_DATA_64 12 /* 16 bits of data for 64-bit devices */ - -/* CompactPCI Hotswap Register */ - -#define PCI_CHSWP_CSR 2 /* Control and Status Register */ -#define PCI_CHSWP_DHA 0x01 /* Device Hiding Arm */ -#define PCI_CHSWP_EIM 0x02 /* ENUM# Signal Mask */ -#define PCI_CHSWP_PIE 0x04 /* Pending Insert or Extract */ -#define PCI_CHSWP_LOO 0x08 /* LED On / Off */ -#define PCI_CHSWP_PI 0x30 /* Programming Interface */ -#define PCI_CHSWP_EXT 0x40 /* ENUM# status - extraction */ -#define PCI_CHSWP_INS 0x80 /* ENUM# status - insertion */ - -/* PCI-X registers */ - -#define PCI_X_CMD 2 /* Modes & Features */ -#define PCI_X_CMD_DPERR_E 0x0001 /* Data Parity Error Recovery Enable */ -#define PCI_X_CMD_ERO 0x0002 /* Enable Relaxed Ordering */ -#define PCI_X_CMD_MAX_READ 0x000c /* Max Memory Read Byte Count */ -#define PCI_X_CMD_MAX_SPLIT 0x0070 /* Max Outstanding Split Transactions */ -#define PCI_X_DEVFN 4 /* A copy of devfn. */ -#define PCI_X_BUSNR 5 /* Bus segment number */ -#define PCI_X_STATUS 6 /* PCI-X capabilities */ -#define PCI_X_STATUS_64BIT 0x0001 /* 64-bit device */ -#define PCI_X_STATUS_133MHZ 0x0002 /* 133 MHz capable */ -#define PCI_X_STATUS_SPL_DISC 0x0004 /* Split Completion Discarded */ -#define PCI_X_STATUS_UNX_SPL 0x0008 /* Unexpected Split Completion */ -#define PCI_X_STATUS_COMPLEX 0x0010 /* Device Complexity */ -#define PCI_X_STATUS_MAX_READ 0x0060 /* Designed Maximum Memory Read Count */ -#define PCI_X_STATUS_MAX_SPLIT 0x0380 /* Design Max Outstanding Split Trans */ -#define PCI_X_STATUS_MAX_CUM 0x1c00 /* Designed Max Cumulative Read Size */ -#define PCI_X_STATUS_SPL_ERR 0x2000 /* Rcvd Split Completion Error Msg */ - -/* Include the ID list */ - -#include <xen/pci_ids.h> - -/* - * The PCI interface treats multi-function devices as independent - * devices. The slot/function address of each device is encoded - * in a single byte as follows: - * - * 7:3 = slot - * 2:0 = function - */ -#define PCI_DEVFN(slot,func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) -#define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) -#define PCI_FUNC(devfn) ((devfn) & 0x07) - -/* Ioctls for /proc/bus/pci/X/Y nodes. */ -#define PCIIOC_BASE ('P' << 24 | 'C' << 16 | 'I' << 8) -#define PCIIOC_CONTROLLER (PCIIOC_BASE | 0x00) /* Get controller for PCI device. */ -#define PCIIOC_MMAP_IS_IO (PCIIOC_BASE | 0x01) /* Set mmap state to I/O space. */ -#define PCIIOC_MMAP_IS_MEM (PCIIOC_BASE | 0x02) /* Set mmap state to MEM space. */ -#define PCIIOC_WRITE_COMBINE (PCIIOC_BASE | 0x03) /* Enable/disable write-combining. */ - -#include <xen/types.h> -#include <xen/config.h> -#include <xen/ioport.h> -#include <xen/list.h> -#include <xen/errno.h> - -/* File state for mmap()s on /proc/bus/pci/X/Y */ -enum pci_mmap_state { - pci_mmap_io, - pci_mmap_mem -}; - -/* This defines the direction arg to the DMA mapping routines. */ -#define PCI_DMA_BIDIRECTIONAL 0 -#define PCI_DMA_TODEVICE 1 -#define PCI_DMA_FROMDEVICE 2 -#define PCI_DMA_NONE 3 - -#define DEVICE_COUNT_COMPATIBLE 4 -#define DEVICE_COUNT_IRQ 2 -#define DEVICE_COUNT_DMA 2 -#define DEVICE_COUNT_RESOURCE 12 - -#define PCI_ANY_ID (~0) - -#define pci_present pcibios_present - - -#define pci_for_each_dev_reverse(dev) \ - for(dev = pci_dev_g(pci_devices.prev); dev != pci_dev_g(&pci_devices); dev = pci_dev_g(dev->global_list.prev)) - -#define pci_for_each_bus(bus) \ - list_for_each_entry(bus, &pci_root_buses, node) - -/* - * The pci_dev structure is used to describe both PCI and ISAPnP devices. - */ -struct pci_dev { - struct list_head global_list; /* node in list of all PCI devices */ - struct list_head bus_list; /* node in per-bus list */ - struct pci_bus *bus; /* bus this device is on */ - struct pci_bus *subordinate; /* bus this device bridges to */ - - void *sysdata; /* hook for sys-specific extension */ - struct proc_dir_entry *procent; /* device entry in /proc/bus/pci */ - - unsigned int devfn; /* encoded device & function index */ - unsigned short vendor; - unsigned short device; - unsigned short subsystem_vendor; - unsigned short subsystem_device; - unsigned int class; /* 3 bytes: (base,sub,prog-if) */ - u8 hdr_type; /* PCI header type (`multi' flag masked out) */ - u8 rom_base_reg; /* which config register controls the ROM */ - - struct pci_driver *driver; /* which driver has allocated this device */ - void *driver_data; /* data private to the driver */ - u64 dma_mask; /* Mask of the bits of bus address this - device implements. Normally this is - 0xffffffff. You only need to change - this if your device has broken DMA - or supports 64-bit transfers. */ - - u32 current_state; /* Current operating state. In ACPI-speak, - this is D0-D3, D0 being fully functional, - and D3 being off. */ - -#ifdef LINUX_2_6 - struct device dev; /* Generic device interface */ -#endif - - /* device is compatible with these IDs */ - unsigned short vendor_compatible[DEVICE_COUNT_COMPATIBLE]; - unsigned short device_compatible[DEVICE_COUNT_COMPATIBLE]; - - /* - * Instead of touching interrupt line and base address registers - * directly, use the values stored here. They might be different! - */ - unsigned int irq; - struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ - struct resource dma_resource[DEVICE_COUNT_DMA]; - struct resource irq_resource[DEVICE_COUNT_IRQ]; - - char name[90]; /* device name */ - char slot_name[8]; /* slot name */ - int active; /* ISAPnP: device is active */ - int ro; /* ISAPnP: read only */ - unsigned short regs; /* ISAPnP: supported registers */ - - /* These fields are used by common fixups */ - unsigned short transparent:1; /* Transparent PCI bridge */ - - int (*prepare)(struct pci_dev *dev); /* ISAPnP hooks */ - int (*activate)(struct pci_dev *dev); - int (*deactivate)(struct pci_dev *dev); -}; - -#define pci_dev_g(n) list_entry(n, struct pci_dev, global_list) -#define pci_dev_b(n) list_entry(n, struct pci_dev, bus_list) - -/* - * For PCI devices, the region numbers are assigned this way: - * - * 0-5 standard PCI regions - * 6 expansion ROM - * 7-10 bridges: address space assigned to buses behind the bridge - */ - -#define PCI_ROM_RESOURCE 6 -#define PCI_BRIDGE_RESOURCES 7 -#define PCI_NUM_RESOURCES 11 - -#define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */ - -struct pci_bus { - struct list_head node; /* node in list of buses */ - struct pci_bus *parent; /* parent bus this bridge is on */ - struct list_head children; /* list of child buses */ - struct list_head devices; /* list of devices on this bus */ - struct pci_dev *self; /* bridge device as seen by parent */ - struct resource *resource[4]; /* address space routed to this bus */ - - struct pci_ops *ops; /* configuration access functions */ - void *sysdata; /* hook for sys-specific extension */ - struct proc_dir_entry *procdir; /* directory entry in /proc/bus/pci */ - - unsigned char number; /* bus number */ - unsigned char primary; /* number of primary bridge */ - unsigned char secondary; /* number of secondary bridge */ - unsigned char subordinate; /* max number of subordinate buses */ - - char name[48]; - unsigned short vendor; - unsigned short device; - unsigned int serial; /* serial number */ - unsigned char pnpver; /* Plug & Play version */ - unsigned char productver; /* product version */ - unsigned char checksum; /* if zero - checksum passed */ - unsigned char pad1; -}; - -#define pci_bus_b(n) list_entry(n, struct pci_bus, node) - -extern struct list_head pci_root_buses; /* list of all known PCI buses */ -extern struct list_head pci_devices; /* list of all devices */ - -extern struct proc_dir_entry *proc_bus_pci_dir; -/* - * Error values that may be returned by PCI functions. - */ -#define PCIBIOS_SUCCESSFUL 0x00 -#define PCIBIOS_FUNC_NOT_SUPPORTED 0x81 -#define PCIBIOS_BAD_VENDOR_ID 0x83 -#define PCIBIOS_DEVICE_NOT_FOUND 0x86 -#define PCIBIOS_BAD_REGISTER_NUMBER 0x87 -#define PCIBIOS_SET_FAILED 0x88 -#define PCIBIOS_BUFFER_TOO_SMALL 0x89 - -/* Low-level architecture-dependent routines */ - -struct pci_ops { - int (*read_byte)(struct pci_dev *, int where, u8 *val); - int (*read_word)(struct pci_dev *, int where, u16 *val); - int (*read_dword)(struct pci_dev *, int where, u32 *val); - int (*write_byte)(struct pci_dev *, int where, u8 val); - int (*write_word)(struct pci_dev *, int where, u16 val); - int (*write_dword)(struct pci_dev *, int where, u32 val); -}; - -struct pbus_set_ranges_data -{ - unsigned long io_start, io_end; - unsigned long mem_start, mem_end; - unsigned long prefetch_start, prefetch_end; -}; - -struct pci_device_id { - unsigned int vendor, device; /* Vendor and device ID or PCI_ANY_ID */ - unsigned int subvendor, subdevice; /* Subsystem ID's or PCI_ANY_ID */ - unsigned int class, class_mask; /* (class,subclass,prog-if) triplet */ - unsigned long driver_data; /* Data private to the driver */ -}; - -struct pci_driver { - struct list_head node; - char *name; - const struct pci_device_id *id_table; /* NULL if wants all devices */ - int (*probe) (struct pci_dev *dev, const struct pci_device_id *id); /* New device inserted */ - void (*remove) (struct pci_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */ - int (*save_state) (struct pci_dev *dev, u32 state); /* Save Device Context */ - int (*suspend) (struct pci_dev *dev, u32 state); /* Device suspended */ - int (*resume) (struct pci_dev *dev); /* Device woken up */ - int (*enable_wake) (struct pci_dev *dev, u32 state, int enable); /* Enable wake event */ -}; - -/** - * PCI_DEVICE - macro used to describe a specific pci device - * @vend: the 16 bit PCI Vendor ID - * @dev: the 16 bit PCI Device ID - * - * This macro is used to create a struct pci_device_id that matches a - * specific device. The subvendor and subdevice fields will be set to - * PCI_ANY_ID. - */ -#define PCI_DEVICE(vend,dev) \ - .vendor = (vend), .device = (dev), \ - .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID - -/** - * PCI_DEVICE_CLASS - macro used to describe a specific pci device class - * @dev_class: the class, subclass, prog-if triple for this device - * @dev_class_mask: the class mask for this device - * - * This macro is used to create a struct pci_device_id that matches a - * specific PCI class. The vendor, device, subvendor, and subdevice - * fields will be set to PCI_ANY_ID. - */ -#define PCI_DEVICE_CLASS(dev_class,dev_class_mask) \ - .class = (dev_class), .class_mask = (dev_class_mask), \ - .vendor = PCI_ANY_ID, .device = PCI_ANY_ID, \ - .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID - -/* these external functions are only available when PCI support is enabled */ -#ifdef CONFIG_PCI - -#define pci_for_each_dev(dev) \ - for(dev = pci_dev_g(pci_devices.next); dev != pci_dev_g(&pci_devices); dev = pci_dev_g(dev->global_list.next)) - -void pcibios_init(void); -void pcibios_fixup_bus(struct pci_bus *); -int pcibios_enable_device(struct pci_dev *, int mask); -char *pcibios_setup (char *str); - -/* Used only when drivers/pci/setup.c is used */ -void pcibios_align_resource(void *, struct resource *, - unsigned long, unsigned long); -void pcibios_update_resource(struct pci_dev *, struct resource *, - struct resource *, int); -void pcibios_update_irq(struct pci_dev *, int irq); -void pcibios_fixup_pbus_ranges(struct pci_bus *, struct pbus_set_ranges_data *); - -/* Backward compatibility, don't use in new code! */ - -int pcibios_present(void); -int pcibios_read_config_byte (unsigned char bus, unsigned char dev_fn, - unsigned char where, unsigned char *val); -int pcibios_read_config_word (unsigned char bus, unsigned char dev_fn, - unsigned char where, unsigned short *val); -int pcibios_read_config_dword (unsigned char bus, unsigned char dev_fn, - unsigned char where, unsigned int *val); -int pcibios_write_config_byte (unsigned char bus, unsigned char dev_fn, - unsigned char where, unsigned char val); -int pcibios_write_config_word (unsigned char bus, unsigned char dev_fn, - unsigned char where, unsigned short val); -int pcibios_write_config_dword (unsigned char bus, unsigned char dev_fn, - unsigned char where, unsigned int val); -int pcibios_find_class (unsigned int class_code, unsigned short index, unsigned char *bus, unsigned char *dev_fn); -int pcibios_find_device (unsigned short vendor, unsigned short dev_id, - unsigned short index, unsigned char *bus, - unsigned char *dev_fn); - -/* Generic PCI functions used internally */ - -void pci_init(void); -int pci_bus_exists(const struct list_head *list, int nr); -struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata); -struct pci_bus *pci_alloc_primary_bus(int bus); -struct pci_dev *pci_scan_slot(struct pci_dev *temp); -int pci_proc_attach_device(struct pci_dev *dev); -int pci_proc_detach_device(struct pci_dev *dev); -int pci_proc_attach_bus(struct pci_bus *bus); -int pci_proc_detach_bus(struct pci_bus *bus); -void pci_name_device(struct pci_dev *dev); -char *pci_class_name(u32 class); -void pci_read_bridge_bases(struct pci_bus *child); -struct resource *pci_find_parent_resource(const struct pci_dev *dev, struct resource *res); -int pci_setup_device(struct pci_dev *dev); -int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge); - -/* Generic PCI functions exported to card drivers */ - -struct pci_dev *pci_find_device (unsigned int vendor, unsigned int device, const struct pci_dev *from); -struct pci_dev *pci_find_subsys (unsigned int vendor, unsigned int device, - unsigned int ss_vendor, unsigned int ss_device, - const struct pci_dev *from); -struct pci_dev *pci_find_class (unsigned int class, const struct pci_dev *from); -struct pci_dev *pci_find_slot (unsigned int bus, unsigned int devfn); -int pci_find_capability (struct pci_dev *dev, int cap); - -int pci_read_config_byte(struct pci_dev *dev, int where, u8 *val); -int pci_read_config_word(struct pci_dev *dev, int where, u16 *val); -int pci_read_config_dword(struct pci_dev *dev, int where, u32 *val); -int pci_write_config_byte(struct pci_dev *dev, int where, u8 val); -int pci_write_config_word(struct pci_dev *dev, int where, u16 val); -int pci_write_config_dword(struct pci_dev *dev, int where, u32 val); - -int pci_enable_device(struct pci_dev *dev); -int pci_enable_device_bars(struct pci_dev *dev, int mask); -void pci_disable_device(struct pci_dev *dev); -void pci_set_master(struct pci_dev *dev); -#define HAVE_PCI_SET_MWI -int pci_set_mwi(struct pci_dev *dev); -void pci_clear_mwi(struct pci_dev *dev); -int pci_set_dma_mask(struct pci_dev *dev, u64 mask); -int pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask); -int pci_assign_resource(struct pci_dev *dev, int i); - -/* Power management related routines */ -int pci_save_state(struct pci_dev *dev, u32 *buffer); -int pci_restore_state(struct pci_dev *dev, u32 *buffer); -int pci_set_power_state(struct pci_dev *dev, int state); -int pci_enable_wake(struct pci_dev *dev, u32 state, int enable); - -/* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */ - -int pci_claim_resource(struct pci_dev *, int); -void pci_assign_unassigned_resources(void); -void pdev_enable_device(struct pci_dev *); -void pdev_sort_resources(struct pci_dev *, struct resource_list *); -unsigned long pci_bridge_check_io(struct pci_dev *); -void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *), - int (*)(struct pci_dev *, u8, u8)); -#define HAVE_PCI_REQ_REGIONS 2 -int pci_request_regions(struct pci_dev *, char *); -void pci_release_regions(struct pci_dev *); -int pci_request_region(struct pci_dev *, int, char *); -void pci_release_region(struct pci_dev *, int); - -/* New-style probing supporting hot-pluggable devices */ -int pci_register_driver(struct pci_driver *); -void pci_unregister_driver(struct pci_driver *); -void pci_insert_device(struct pci_dev *, struct pci_bus *); -void pci_remove_device(struct pci_dev *); -struct pci_driver *pci_dev_driver(const struct pci_dev *); -const struct pci_device_id *pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev); -void pci_announce_device_to_drivers(struct pci_dev *); -unsigned int pci_do_scan_bus(struct pci_bus *bus); -struct pci_bus * pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr); - -#if 0 -/* xmem_cache style wrapper around pci_alloc_consistent() */ -struct pci_pool *pci_pool_create (const char *name, struct pci_dev *dev, - size_t size, size_t align, size_t allocation, int flags); -void pci_pool_destroy (struct pci_pool *pool); - -void *pci_pool_alloc (struct pci_pool *pool, int flags, dma_addr_t *handle); -void pci_pool_free (struct pci_pool *pool, void *vaddr, dma_addr_t addr); -#endif - -#endif /* CONFIG_PCI */ - -/* - * If the system does not have PCI, clearly these return errors. Define - * these as simple inline functions to avoid hair in drivers. - */ - -#ifndef CONFIG_PCI -static inline int pcibios_present(void) { return 0; } -static inline int pcibios_find_class (unsigned int class_code, unsigned short index, unsigned char *bus, unsigned char *dev_fn) -{ return PCIBIOS_DEVICE_NOT_FOUND; } - -#define _PCI_NOP(o,s,t) \ - static inline int pcibios_##o##_config_##s (u8 bus, u8 dfn, u8 where, t val) \ - { return PCIBIOS_FUNC_NOT_SUPPORTED; } \ - static inline int pci_##o##_config_##s (struct pci_dev *dev, int where, t val) \ - { return PCIBIOS_FUNC_NOT_SUPPORTED; } -#define _PCI_NOP_ALL(o,x) _PCI_NOP(o,byte,u8 x) \ - _PCI_NOP(o,word,u16 x) \ - _PCI_NOP(o,dword,u32 x) -_PCI_NOP_ALL(read, *) -_PCI_NOP_ALL(write,) - -static inline struct pci_dev *pci_find_device(unsigned int vendor, unsigned int device, const struct pci_dev *from) -{ return NULL; } - -static inline struct pci_dev *pci_find_class(unsigned int class, const struct pci_dev *from) -{ return NULL; } - -static inline struct pci_dev *pci_find_slot(unsigned int bus, unsigned int devfn) -{ return NULL; } - -static inline struct pci_dev *pci_find_subsys(unsigned int vendor, unsigned int device, -unsigned int ss_vendor, unsigned int ss_device, const struct pci_dev *from) -{ return NULL; } - -static inline void pci_set_master(struct pci_dev *dev) { } -static inline int pci_enable_device_bars(struct pci_dev *dev, int mask) { return -EBUSY; } -static inline int pci_enable_device(struct pci_dev *dev) { return -EIO; } -static inline void pci_disable_device(struct pci_dev *dev) { } -static inline int pci_module_init(struct pci_driver *drv) { return -ENODEV; } -static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask) { return -EIO; } -static inline int pci_dac_set_dma_mask(struct pci_dev *dev, u64 mask) { return -EIO; } -static inline int pci_assign_resource(struct pci_dev *dev, int i) { return -EBUSY;} -static inline int pci_register_driver(struct pci_driver *drv) { return 0;} -static inline void pci_unregister_driver(struct pci_driver *drv) { } -static inline int scsi_to_pci_dma_dir(unsigned char scsi_dir) { return scsi_dir; } -static inline int pci_find_capability (struct pci_dev *dev, int cap) {return 0; } -static inline const struct pci_device_id *pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev) { return NULL; } - -/* Power management related routines */ -static inline int pci_save_state(struct pci_dev *dev, u32 *buffer) { return 0; } -static inline int pci_restore_state(struct pci_dev *dev, u32 *buffer) { return 0; } -static inline int pci_set_power_state(struct pci_dev *dev, int state) { return 0; } -static inline int pci_enable_wake(struct pci_dev *dev, u32 state, int enable) { return 0; } - -#define pci_for_each_dev(dev) \ - for(dev = NULL; 0; ) - -#else - -/* - * a helper function which helps ensure correct pci_driver - * setup and cleanup for commonly-encountered hotplug/modular cases - * - * This MUST stay in a header, as it checks for -DMODULE - */ -static inline int pci_module_init(struct pci_driver *drv) -{ - int rc = pci_register_driver (drv); - - if (rc > 0) - return 0; - - /* iff CONFIG_HOTPLUG and built into kernel, we should - * leave the driver around for future hotplug events. - * For the module case, a hotplug daemon of some sort - * should load a module in response to an insert event. */ -#if defined(CONFIG_HOTPLUG) && !defined(MODULE) - if (rc == 0) - return 0; -#else - if (rc == 0) - rc = -ENODEV; -#endif - - /* if we get here, we need to clean up pci driver instance - * and return some sort of error */ - pci_unregister_driver (drv); - - return rc; -} - -#endif /* !CONFIG_PCI */ - -/* these helpers provide future and backwards compatibility - * for accessing popular PCI BAR info */ -#define pci_resource_start(dev,bar) ((dev)->resource[(bar)].start) -#define pci_resource_end(dev,bar) ((dev)->resource[(bar)].end) -#define pci_resource_flags(dev,bar) ((dev)->resource[(bar)].flags) -#define pci_resource_len(dev,bar) \ - ((pci_resource_start((dev),(bar)) == 0 && \ - pci_resource_end((dev),(bar)) == \ - pci_resource_start((dev),(bar))) ? 0 : \ - \ - (pci_resource_end((dev),(bar)) - \ - pci_resource_start((dev),(bar)) + 1)) - -/* Similar to the helpers above, these manipulate per-pci_dev - * driver-specific data. Currently stored as pci_dev::driver_data, - * a void pointer, but it is not present on older kernels. - */ -static inline void *pci_get_drvdata (struct pci_dev *pdev) -{ - return pdev->driver_data; -} - -static inline void pci_set_drvdata (struct pci_dev *pdev, void *data) -{ - pdev->driver_data = data; -} - -static inline char *pci_name(struct pci_dev *pdev) -{ - return pdev->slot_name; -} - -/* - * The world is not perfect and supplies us with broken PCI devices. - * For at least a part of these bugs we need a work-around, so both - * generic (drivers/pci/quirks.c) and per-architecture code can define - * fixup hooks to be called for particular buggy devices. - */ - -struct pci_fixup { - int pass; - u16 vendor, device; /* You can use PCI_ANY_ID here of course */ - void (*hook)(struct pci_dev *dev); -}; - -extern struct pci_fixup pcibios_fixups[]; - -#define PCI_FIXUP_HEADER 1 /* Called immediately after reading configuration header */ -#define PCI_FIXUP_FINAL 2 /* Final phase of device fixups */ - -void pci_fixup_device(int pass, struct pci_dev *dev); - -extern int pci_pci_problems; -#define PCIPCI_FAIL 1 -#define PCIPCI_TRITON 2 -#define PCIPCI_NATOMA 4 -#define PCIPCI_VIAETBF 8 -#define PCIPCI_VSFX 16 -#define PCIPCI_ALIMAGIK 32 - -#endif /* LINUX_PCI_H */ diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 6f64e0c057..dd4b95b1dc 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -58,6 +58,8 @@ int init_event_channels(struct domain *d); void destroy_event_channels(struct domain *d); int init_exec_domain_event_channels(struct exec_domain *ed); +#define CPUMAP_RUNANYWHERE 0xFFFFFFFF + struct exec_domain { int id; @@ -84,6 +86,8 @@ struct exec_domain atomic_t pausecnt; + cpumap_t cpumap; /* which cpus this domain can run on */ + struct arch_exec_domain arch; }; diff --git a/xen/include/xen/slab.h b/xen/include/xen/slab.h index c4c8231f78..a3a7daf7d6 100644 --- a/xen/include/xen/slab.h +++ b/xen/include/xen/slab.h @@ -3,12 +3,7 @@ #define __SLAB_H__ #include <xen/config.h> - -#ifdef __ARCH_HAS_SLAB_ALLOCATOR - -#include <asm/slab.h> - -#else +#include <xen/mm.h> /* Allocate space for typed object. */ #define xmalloc(_type) ((_type *)_xmalloc(sizeof(_type), __alignof__(_type))) @@ -32,6 +27,4 @@ static inline void *_xmalloc_array(size_t size, size_t align, size_t num) return _xmalloc(size * num, align); } -#endif /* __ARCH_HAS_SLAB_ALLOCATOR */ - #endif /* __SLAB_H__ */ |