diff options
author | iap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk> | 2004-05-13 10:51:36 +0000 |
---|---|---|
committer | iap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk> | 2004-05-13 10:51:36 +0000 |
commit | 34907c0472b945d27b9763363892248f72446d44 (patch) | |
tree | 572ed34b90c849da2f7b9a27ae7844babc666ebb /xen | |
parent | c4572920faddad16c0637fed15cb7e5559def08d (diff) | |
parent | 462dfb2f90b56afcdebe67d1f0302c059e0bbb1b (diff) | |
download | xen-34907c0472b945d27b9763363892248f72446d44.tar.gz xen-34907c0472b945d27b9763363892248f72446d44.tar.bz2 xen-34907c0472b945d27b9763363892248f72446d44.zip |
bitkeeper revision 1.903 (40a35338ZpUgNaqcF2wHoseyC85T4Q)
manual merge
Diffstat (limited to 'xen')
-rw-r--r-- | xen/arch/i386/entry.S | 4 | ||||
-rw-r--r-- | xen/arch/i386/io_apic.c | 136 | ||||
-rw-r--r-- | xen/arch/i386/irq.c | 6 | ||||
-rw-r--r-- | xen/arch/i386/process.c | 22 | ||||
-rw-r--r-- | xen/common/domain.c | 17 | ||||
-rw-r--r-- | xen/common/kernel.c | 44 | ||||
-rw-r--r-- | xen/common/memory.c | 14 | ||||
-rw-r--r-- | xen/common/physdev.c | 19 | ||||
-rw-r--r-- | xen/common/schedule.c | 2 | ||||
-rw-r--r-- | xen/include/hypervisor-ifs/physdev.h | 62 |
10 files changed, 247 insertions, 79 deletions
diff --git a/xen/arch/i386/entry.S b/xen/arch/i386/entry.S index 1fd1d9ebb0..b522f4f754 100644 --- a/xen/arch/i386/entry.S +++ b/xen/arch/i386/entry.S @@ -245,6 +245,10 @@ restore_all_guest: movsl movsl movsl + # Third, reenable interrupts. They will definitely be reenabled by IRET + # in any case. They could be disabled here if we are returning from an + # interrupt. We need interrupts enabled if we take a fault. + sti # Finally, restore guest registers -- faults will cause failsafe popl %ebx popl %ecx diff --git a/xen/arch/i386/io_apic.c b/xen/arch/i386/io_apic.c index 3f0c81be7a..7c307922b3 100644 --- a/xen/arch/i386/io_apic.c +++ b/xen/arch/i386/io_apic.c @@ -208,7 +208,11 @@ static void set_ioapic_affinity (unsigned int irq, unsigned long mask) spin_unlock_irqrestore(&ioapic_lock, flags); } -#if CONFIG_SMP +/* + * In new I/O model, the interrupt is pinned to the CPU of the first + * device-driver domain that attaches. Dynamic balancing is pointless. + */ +#if defined(CONFIG_SMP) && !defined(NO_DEVICES_IN_XEN) typedef struct { unsigned int cpu; @@ -220,8 +224,6 @@ static irq_balance_t irq_balance[NR_IRQS] __cacheline_aligned extern unsigned long irq_affinity [NR_IRQS]; -#endif - #define IDLE_ENOUGH(cpu,now) \ (idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1)) @@ -256,7 +258,6 @@ inside: static inline void balance_irq(int irq) { -#if CONFIG_SMP irq_balance_t *entry = irq_balance + irq; unsigned long now = jiffies; @@ -272,9 +273,14 @@ static inline void balance_irq(int irq) entry->cpu = move(entry->cpu, allowed_mask, now, random_number); set_ioapic_affinity(irq, apicid_to_phys_cpu_present(entry->cpu)); } -#endif } +#else + +#define balance_irq(_irq) ((void)0) + +#endif + /* * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to * specific CPU-side IRQs. @@ -883,6 +889,7 @@ void __init UNEXPECTED_IO_APIC(void) void __init print_IO_APIC(void) { +#ifndef NDEBUG int apic, i; struct IO_APIC_reg_00 reg_00; struct IO_APIC_reg_01 reg_01; @@ -1019,10 +1026,12 @@ void __init print_IO_APIC(void) } printk(KERN_INFO ".................................... done.\n"); - - return; +#endif } + +#if 0 /* Maybe useful for debugging, but not currently used anywhere. */ + static void print_APIC_bitfield (int base) { unsigned int v; @@ -1041,6 +1050,7 @@ static void print_APIC_bitfield (int base) } } + void /*__init*/ print_local_APIC(void * dummy) { unsigned int v, ver, maxlvt; @@ -1156,6 +1166,9 @@ void /*__init*/ print_PIC(void) printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); } +#endif /* 0 */ + + static void __init enable_IO_APIC(void) { struct IO_APIC_reg_01 reg_01; @@ -1874,7 +1887,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, edge_level, active_high_low); if (edge_level) { - irq_desc[irq].handler = &ioapic_level_irq_type; + irq_desc[irq].handler = &ioapic_level_irq_type; } else { irq_desc[irq].handler = &ioapic_edge_irq_type; } @@ -1893,3 +1906,110 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a } #endif /*CONFIG_ACPI_BOOT*/ + +extern char opt_leveltrigger[], opt_edgetrigger[]; + +static int __init ioapic_trigger_setup(void) +{ + char *p; + irq_desc_t *desc; + long irq; + + p = opt_leveltrigger; + while ( *p != '\0' ) + { + irq = simple_strtol(p, &p, 10); + if ( (irq <= 0) || (irq >= NR_IRQS) ) + { + printk("IRQ '%ld' out of range in level-trigger list '%s'\n", + irq, opt_leveltrigger); + break; + } + + printk("Forcing IRQ %ld to level-trigger: ", irq); + + desc = &irq_desc[irq]; + spin_lock_irq(&desc->lock); + + if ( desc->handler == &ioapic_level_irq_type ) + { + printk("already level-triggered (no force applied).\n"); + } + else if ( desc->handler != &ioapic_edge_irq_type ) + { + printk("cannot force (can only force IO-APIC-edge IRQs).\n"); + } + else + { + desc->handler = &ioapic_level_irq_type; + __mask_IO_APIC_irq(irq); + __level_IO_APIC_irq(irq); + printk("done.\n"); + } + + spin_unlock_irq(&desc->lock); + + if ( *p == '\0' ) + break; + + if ( *p != ',' ) + { + printk("Unexpected character '%c' in level-trigger list '%s'\n", + *p, opt_leveltrigger); + break; + } + + p++; + } + + p = opt_edgetrigger; + while ( *p != '\0' ) + { + irq = simple_strtol(p, &p, 10); + if ( (irq <= 0) || (irq >= NR_IRQS) ) + { + printk("IRQ '%ld' out of range in edge-trigger list '%s'\n", + irq, opt_edgetrigger); + break; + } + + printk("Forcing IRQ %ld to edge-trigger: ", irq); + + desc = &irq_desc[irq]; + spin_lock_irq(&desc->lock); + + if ( desc->handler == &ioapic_edge_irq_type ) + { + printk("already edge-triggered (no force applied).\n"); + } + else if ( desc->handler != &ioapic_level_irq_type ) + { + printk("cannot force (can only force IO-APIC-level IRQs).\n"); + } + else + { + desc->handler = &ioapic_edge_irq_type; + __edge_IO_APIC_irq(irq); + desc->status |= IRQ_PENDING; /* may have lost a masked edge */ + printk("done.\n"); + } + + spin_unlock_irq(&desc->lock); + + if ( *p == '\0' ) + break; + + if ( *p != ',' ) + { + printk("Unexpected character '%c' in edge-trigger list '%s'\n", + *p, opt_edgetrigger); + break; + } + + p++; + } + + return 0; +} + +__initcall(ioapic_trigger_setup); diff --git a/xen/arch/i386/irq.c b/xen/arch/i386/irq.c index d3eaf6af12..5b16bb0e63 100644 --- a/xen/arch/i386/irq.c +++ b/xen/arch/i386/irq.c @@ -39,6 +39,7 @@ #include <xen/delay.h> #include <xen/timex.h> #include <xen/perfc.h> +#include <asm/smpboot.h> /* * Linux has a controller-independent x86 interrupt architecture. @@ -1034,6 +1035,11 @@ int pirq_guest_bind(struct task_struct *p, int irq, int will_share) desc->status |= IRQ_GUEST; desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING); desc->handler->startup(irq); + + /* Attempt to bind the interrupt target to the correct CPU. */ + if ( desc->handler->set_affinity != NULL ) + desc->handler->set_affinity( + irq, apicid_to_phys_cpu_present(p->processor)); } else if ( !will_share || !action->shareable ) { diff --git a/xen/arch/i386/process.c b/xen/arch/i386/process.c index 29c4fde8cb..408daa1f14 100644 --- a/xen/arch/i386/process.c +++ b/xen/arch/i386/process.c @@ -270,7 +270,7 @@ void switch_to(struct task_struct *prev_p, struct task_struct *next_p) tss->ss1 = next->guestos_ss; /* Maybe switch the debug registers. */ - if ( next->debugreg[7] ) + if ( unlikely(next->debugreg[7]) ) { loaddebug(next, 0); loaddebug(next, 1); @@ -280,10 +280,17 @@ void switch_to(struct task_struct *prev_p, struct task_struct *next_p) loaddebug(next, 6); loaddebug(next, 7); } + + /* Switch page tables. */ + write_ptbase(&next_p->mm); + tlb_clocktick(); } - if ( ( prev_p->io_bitmap != NULL ) || ( next_p->io_bitmap != NULL ) ) { - if ( next_p->io_bitmap != NULL ) { + if ( unlikely(prev_p->io_bitmap != NULL) || + unlikely(next_p->io_bitmap != NULL) ) + { + if ( next_p->io_bitmap != NULL ) + { /* Copy in the appropriate parts of the IO bitmap. We use the * selector to copy only the interesting parts of the bitmap. */ @@ -314,7 +321,9 @@ void switch_to(struct task_struct *prev_p, struct task_struct *next_p) tss->bitmap = IO_BITMAP_OFFSET; - } else { + } + else + { /* In this case, we're switching FROM a task with IO port access, * to a task that doesn't use the IO bitmap. We set any TSS bits * that might have been cleared, ready for future use. */ @@ -332,11 +341,6 @@ void switch_to(struct task_struct *prev_p, struct task_struct *next_p) tss->bitmap = INVALID_IO_BITMAP_OFFSET; } } - - - /* Switch page tables. */ - write_ptbase(&next_p->mm); - tlb_clocktick(); set_current(next_p); diff --git a/xen/common/domain.c b/xen/common/domain.c index b3a2e0ffa8..d55f65d5ae 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -222,10 +222,6 @@ void __kill_domain(struct task_struct *p) *pp = p->next_hash; write_unlock_irqrestore(&tasklist_lock, flags); - if ( atomic_read(&p->refcnt) >2 ) - DPRINTK("Domain refcnt>1 so kil deferred. Missing put_task? p=%p cur=%p cnt=%d\n",p,current,atomic_read(&p->refcnt)); - - if ( p == current ) { __enter_scheduler(); @@ -420,7 +416,16 @@ void free_all_dom_mem(struct task_struct *p) INIT_LIST_HEAD(&zombies); - if ( p->mm.shadow_mode ) shadow_mode_disable(p); + /* + * If we're executing the idle task then we may still be running over the + * dead domain's page tables. We'd better fix that before freeing them! + */ + if ( is_idle_task(current) ) + write_ptbase(¤t->mm); + + /* Exit shadow mode before deconstructing final guest page table. */ + if ( p->mm.shadow_mode ) + shadow_mode_disable(p); /* STEP 1. Drop the in-use reference to the page-table base. */ put_page_and_type(&frame_table[pagetable_val(p->mm.pagetable) >> @@ -1078,7 +1083,7 @@ int construct_dom0(struct task_struct *p, set_bit(PF_CONSTRUCTED, &p->flags); -#if 0 // XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) +#if 0 /* XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) */ shadow_mode_enable(&p->mm, SHM_test); #endif diff --git a/xen/common/kernel.c b/xen/common/kernel.c index 76747fa981..3e8e617cb3 100644 --- a/xen/common/kernel.c +++ b/xen/common/kernel.c @@ -75,31 +75,37 @@ unsigned char opt_pdb[10] = "none"; unsigned int opt_tbuf_size = 1; /* opt_sched: scheduler - default to Borrowed Virtual Time */ char opt_sched[10] = "bvt"; -/* opt_physdev_dom0_hide: list of PCI slots to hide from dom0 - * Should have the format '(%02x:%02x.%1x)(%02x:%02x.%1x)...etc' */ -char opt_physdev_dom0_hide[20] = ""; +/* opt_physdev_dom0_hide: list of PCI slots to hide from domain 0. */ +/* Format is '(%02x:%02x.%1x)(%02x:%02x.%1x)' and so on. */ +char opt_physdev_dom0_hide[200] = ""; +/* opt_leveltrigger, opt_edgetrigger: Force an IO-APIC-routed IRQ to be */ +/* level- or edge-triggered. */ +/* Example: 'leveltrigger=4,5,6,20 edgetrigger=21'. */ +char opt_leveltrigger[30] = "", opt_edgetrigger[30] = ""; static struct { unsigned char *name; enum { OPT_IP, OPT_STR, OPT_UINT, OPT_BOOL } type; void *var; } opts[] = { - { "console", OPT_STR, &opt_console }, - { "ser_baud", OPT_UINT, &opt_ser_baud }, - { "com1", OPT_STR, &opt_com1 }, - { "com2", OPT_STR, &opt_com2 }, - { "dom0_mem", OPT_UINT, &opt_dom0_mem }, - { "ifname", OPT_STR, &opt_ifname }, - { "noht", OPT_BOOL, &opt_noht }, - { "noacpi", OPT_BOOL, &opt_noacpi }, - { "nosmp", OPT_BOOL, &opt_nosmp }, - { "noreboot", OPT_BOOL, &opt_noreboot }, - { "ignorebiostables", OPT_BOOL, &opt_ignorebiostables }, - { "watchdog", OPT_BOOL, &opt_watchdog }, - { "pdb", OPT_STR, &opt_pdb }, - { "tbuf_size", OPT_UINT, &opt_tbuf_size }, - { "sched", OPT_STR, &opt_sched }, - { "physdev_dom0_hide",OPT_STR, &opt_physdev_dom0_hide }, + { "console", OPT_STR, &opt_console }, + { "ser_baud", OPT_UINT, &opt_ser_baud }, + { "com1", OPT_STR, &opt_com1 }, + { "com2", OPT_STR, &opt_com2 }, + { "dom0_mem", OPT_UINT, &opt_dom0_mem }, + { "ifname", OPT_STR, &opt_ifname }, + { "noht", OPT_BOOL, &opt_noht }, + { "noacpi", OPT_BOOL, &opt_noacpi }, + { "nosmp", OPT_BOOL, &opt_nosmp }, + { "noreboot", OPT_BOOL, &opt_noreboot }, + { "ignorebiostables", OPT_BOOL, &opt_ignorebiostables }, + { "watchdog", OPT_BOOL, &opt_watchdog }, + { "pdb", OPT_STR, &opt_pdb }, + { "tbuf_size", OPT_UINT, &opt_tbuf_size }, + { "sched", OPT_STR, &opt_sched }, + { "physdev_dom0_hide", OPT_STR, &opt_physdev_dom0_hide }, + { "leveltrigger", OPT_STR, &opt_leveltrigger }, + { "edgetrigger", OPT_STR, &opt_edgetrigger }, { NULL, 0, NULL } }; diff --git a/xen/common/memory.c b/xen/common/memory.c index 99931bb515..ddb2778bc3 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -194,7 +194,6 @@ static struct { */ void __init init_frametable(unsigned long nr_pages) { - int i; unsigned long mfn; memset(percpu_info, 0, sizeof(percpu_info)); @@ -209,20 +208,19 @@ void __init init_frametable(unsigned long nr_pages) INIT_LIST_HEAD(&free_list); free_pfns = 0; - /* so that we can map them latter, set the ownership of pages - belonging to the machine_to_phys_mapping to CPU0 idle task */ - - mfn = virt_to_phys((void *)RDWR_MPT_VIRT_START)>>PAGE_SHIFT; - /* initialise to a magic of 0x55555555 so easier to spot bugs later */ memset( machine_to_phys_mapping, 0x55, 4*1024*1024 ); /* The array is sized for a 4GB machine regardless of actuall mem size. This costs 4MB -- may want to fix some day */ - for(i=0;i<1024*1024;i+=1024,mfn++) + + /* Pin the ownership of the MP table so that DOM0 can map it later. */ + for ( mfn = virt_to_phys((void *)RDWR_MPT_VIRT_START)>>PAGE_SHIFT; + mfn < virt_to_phys((void *)RDWR_MPT_VIRT_END)>>PAGE_SHIFT; + mfn++ ) { frame_table[mfn].count_and_flags = 1 | PGC_allocated; - frame_table[mfn].type_and_flags = 1 | PGT_gdt_page; // anything non RW + frame_table[mfn].type_and_flags = 1 | PGT_gdt_page; /* non-RW type */ frame_table[mfn].u.domain = &idle0_task; } } diff --git a/xen/common/physdev.c b/xen/common/physdev.c index d15183cb6e..61b7b22cb2 100644 --- a/xen/common/physdev.c +++ b/xen/common/physdev.c @@ -634,9 +634,10 @@ static long pci_probe_root_buses(u32 *busmask) */ long do_physdev_op(physdev_op_t *uop) { - phys_dev_t *pdev; + phys_dev_t *pdev; physdev_op_t op; - long ret; + long ret; + int irq; if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) ) return -EFAULT; @@ -674,10 +675,22 @@ long do_physdev_op(physdev_op_t *uop) ret = pci_probe_root_buses(op.u.pci_probe_root_buses.busmask); break; - case PHYSDEVOP_UNMASK_IRQ: + case PHYSDEVOP_IRQ_UNMASK_NOTIFY: ret = pirq_guest_unmask(current); break; + case PHYSDEVOP_IRQ_STATUS_QUERY: + irq = op.u.irq_status_query.irq; + ret = -EINVAL; + if ( (irq < 0) || (irq >= NR_IRQS) ) + break; + op.u.irq_status_query.flags = 0; + /* Edge-triggered interrupts don't need an explicit unmask downcall. */ + if ( strstr(irq_desc[irq].handler->typename, "edge") == NULL ) + op.u.irq_status_query.flags |= PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY; + ret = 0; + break; + default: ret = -EINVAL; break; diff --git a/xen/common/schedule.c b/xen/common/schedule.c index dfacb65bd0..5aa12bf8cb 100644 --- a/xen/common/schedule.c +++ b/xen/common/schedule.c @@ -520,7 +520,7 @@ asmlinkage void __enter_scheduler(void) /* Mark a timer event for the newly-scheduled domain. */ if ( !is_idle_task(next) ) - evtchn_set_pending(next, VIRQ_TIMER); + send_guest_virq(next, VIRQ_TIMER); schedule_tail(next); diff --git a/xen/include/hypervisor-ifs/physdev.h b/xen/include/hypervisor-ifs/physdev.h index 914a555981..50372bf2be 100644 --- a/xen/include/hypervisor-ifs/physdev.h +++ b/xen/include/hypervisor-ifs/physdev.h @@ -14,44 +14,55 @@ #define PHYSDEVOP_PCI_CFGREG_WRITE 1 #define PHYSDEVOP_PCI_INITIALISE_DEVICE 2 #define PHYSDEVOP_PCI_PROBE_ROOT_BUSES 3 -#define PHYSDEVOP_UNMASK_IRQ 4 +#define PHYSDEVOP_IRQ_UNMASK_NOTIFY 4 +#define PHYSDEVOP_IRQ_STATUS_QUERY 5 /* Read from PCI configuration space. */ -typedef struct physdevop_pci_cfgreg_read_st -{ - int bus; /* IN */ - int dev; /* IN */ - int func; /* IN */ - int reg; /* IN */ - int len; /* IN */ - u32 value; /* OUT */ +typedef struct { + /* IN */ + int bus; + int dev; + int func; + int reg; + int len; + /* OUT */ + u32 value; } physdevop_pci_cfgreg_read_t; /* Write to PCI configuration space. */ -typedef struct physdevop_pci_cfgreg_write_st -{ - int bus; /* IN */ - int dev; /* IN */ - int func; /* IN */ - int reg; /* IN */ - int len; /* IN */ - u32 value; /* IN */ +typedef struct { + /* IN */ + int bus; + int dev; + int func; + int reg; + int len; + u32 value; } physdevop_pci_cfgreg_write_t; /* Do final initialisation of a PCI device (e.g., last-moment IRQ routing). */ -typedef struct physdevop_pci_initialise_device_st -{ - int bus; /* IN */ - int dev; /* IN */ - int func; /* IN */ +typedef struct { + /* IN */ + int bus; + int dev; + int func; } physdevop_pci_initialise_device_t; /* Find the root buses for subsequent scanning. */ -typedef struct physdevop_pci_probe_root_buses_st -{ - u32 busmask[256/32]; /* OUT */ +typedef struct { + /* OUT */ + u32 busmask[256/32]; } physdevop_pci_probe_root_buses_t; +typedef struct { + /* IN */ + int irq; + /* OUT */ +/* Need to call PHYSDEVOP_IRQ_UNMASK_NOTIFY when the IRQ has been serviced? */ +#define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY (1<<0) + unsigned long flags; +} physdevop_irq_status_query_t; + typedef struct _physdev_op_st { unsigned long cmd; @@ -61,6 +72,7 @@ typedef struct _physdev_op_st physdevop_pci_cfgreg_write_t pci_cfgreg_write; physdevop_pci_initialise_device_t pci_initialise_device; physdevop_pci_probe_root_buses_t pci_probe_root_buses; + physdevop_irq_status_query_t irq_status_query; } u; } physdev_op_t; |