aboutsummaryrefslogtreecommitdiffstats
path: root/xen
diff options
context:
space:
mode:
authoriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>2004-05-13 10:51:36 +0000
committeriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>2004-05-13 10:51:36 +0000
commit34907c0472b945d27b9763363892248f72446d44 (patch)
tree572ed34b90c849da2f7b9a27ae7844babc666ebb /xen
parentc4572920faddad16c0637fed15cb7e5559def08d (diff)
parent462dfb2f90b56afcdebe67d1f0302c059e0bbb1b (diff)
downloadxen-34907c0472b945d27b9763363892248f72446d44.tar.gz
xen-34907c0472b945d27b9763363892248f72446d44.tar.bz2
xen-34907c0472b945d27b9763363892248f72446d44.zip
bitkeeper revision 1.903 (40a35338ZpUgNaqcF2wHoseyC85T4Q)
manual merge
Diffstat (limited to 'xen')
-rw-r--r--xen/arch/i386/entry.S4
-rw-r--r--xen/arch/i386/io_apic.c136
-rw-r--r--xen/arch/i386/irq.c6
-rw-r--r--xen/arch/i386/process.c22
-rw-r--r--xen/common/domain.c17
-rw-r--r--xen/common/kernel.c44
-rw-r--r--xen/common/memory.c14
-rw-r--r--xen/common/physdev.c19
-rw-r--r--xen/common/schedule.c2
-rw-r--r--xen/include/hypervisor-ifs/physdev.h62
10 files changed, 247 insertions, 79 deletions
diff --git a/xen/arch/i386/entry.S b/xen/arch/i386/entry.S
index 1fd1d9ebb0..b522f4f754 100644
--- a/xen/arch/i386/entry.S
+++ b/xen/arch/i386/entry.S
@@ -245,6 +245,10 @@ restore_all_guest:
movsl
movsl
movsl
+ # Third, reenable interrupts. They will definitely be reenabled by IRET
+ # in any case. They could be disabled here if we are returning from an
+ # interrupt. We need interrupts enabled if we take a fault.
+ sti
# Finally, restore guest registers -- faults will cause failsafe
popl %ebx
popl %ecx
diff --git a/xen/arch/i386/io_apic.c b/xen/arch/i386/io_apic.c
index 3f0c81be7a..7c307922b3 100644
--- a/xen/arch/i386/io_apic.c
+++ b/xen/arch/i386/io_apic.c
@@ -208,7 +208,11 @@ static void set_ioapic_affinity (unsigned int irq, unsigned long mask)
spin_unlock_irqrestore(&ioapic_lock, flags);
}
-#if CONFIG_SMP
+/*
+ * In new I/O model, the interrupt is pinned to the CPU of the first
+ * device-driver domain that attaches. Dynamic balancing is pointless.
+ */
+#if defined(CONFIG_SMP) && !defined(NO_DEVICES_IN_XEN)
typedef struct {
unsigned int cpu;
@@ -220,8 +224,6 @@ static irq_balance_t irq_balance[NR_IRQS] __cacheline_aligned
extern unsigned long irq_affinity [NR_IRQS];
-#endif
-
#define IDLE_ENOUGH(cpu,now) \
(idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1))
@@ -256,7 +258,6 @@ inside:
static inline void balance_irq(int irq)
{
-#if CONFIG_SMP
irq_balance_t *entry = irq_balance + irq;
unsigned long now = jiffies;
@@ -272,9 +273,14 @@ static inline void balance_irq(int irq)
entry->cpu = move(entry->cpu, allowed_mask, now, random_number);
set_ioapic_affinity(irq, apicid_to_phys_cpu_present(entry->cpu));
}
-#endif
}
+#else
+
+#define balance_irq(_irq) ((void)0)
+
+#endif
+
/*
* support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
* specific CPU-side IRQs.
@@ -883,6 +889,7 @@ void __init UNEXPECTED_IO_APIC(void)
void __init print_IO_APIC(void)
{
+#ifndef NDEBUG
int apic, i;
struct IO_APIC_reg_00 reg_00;
struct IO_APIC_reg_01 reg_01;
@@ -1019,10 +1026,12 @@ void __init print_IO_APIC(void)
}
printk(KERN_INFO ".................................... done.\n");
-
- return;
+#endif
}
+
+#if 0 /* Maybe useful for debugging, but not currently used anywhere. */
+
static void print_APIC_bitfield (int base)
{
unsigned int v;
@@ -1041,6 +1050,7 @@ static void print_APIC_bitfield (int base)
}
}
+
void /*__init*/ print_local_APIC(void * dummy)
{
unsigned int v, ver, maxlvt;
@@ -1156,6 +1166,9 @@ void /*__init*/ print_PIC(void)
printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
}
+#endif /* 0 */
+
+
static void __init enable_IO_APIC(void)
{
struct IO_APIC_reg_01 reg_01;
@@ -1874,7 +1887,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq, edge_level, active_high_low);
if (edge_level) {
- irq_desc[irq].handler = &ioapic_level_irq_type;
+ irq_desc[irq].handler = &ioapic_level_irq_type;
} else {
irq_desc[irq].handler = &ioapic_edge_irq_type;
}
@@ -1893,3 +1906,110 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
}
#endif /*CONFIG_ACPI_BOOT*/
+
+extern char opt_leveltrigger[], opt_edgetrigger[];
+
+static int __init ioapic_trigger_setup(void)
+{
+ char *p;
+ irq_desc_t *desc;
+ long irq;
+
+ p = opt_leveltrigger;
+ while ( *p != '\0' )
+ {
+ irq = simple_strtol(p, &p, 10);
+ if ( (irq <= 0) || (irq >= NR_IRQS) )
+ {
+ printk("IRQ '%ld' out of range in level-trigger list '%s'\n",
+ irq, opt_leveltrigger);
+ break;
+ }
+
+ printk("Forcing IRQ %ld to level-trigger: ", irq);
+
+ desc = &irq_desc[irq];
+ spin_lock_irq(&desc->lock);
+
+ if ( desc->handler == &ioapic_level_irq_type )
+ {
+ printk("already level-triggered (no force applied).\n");
+ }
+ else if ( desc->handler != &ioapic_edge_irq_type )
+ {
+ printk("cannot force (can only force IO-APIC-edge IRQs).\n");
+ }
+ else
+ {
+ desc->handler = &ioapic_level_irq_type;
+ __mask_IO_APIC_irq(irq);
+ __level_IO_APIC_irq(irq);
+ printk("done.\n");
+ }
+
+ spin_unlock_irq(&desc->lock);
+
+ if ( *p == '\0' )
+ break;
+
+ if ( *p != ',' )
+ {
+ printk("Unexpected character '%c' in level-trigger list '%s'\n",
+ *p, opt_leveltrigger);
+ break;
+ }
+
+ p++;
+ }
+
+ p = opt_edgetrigger;
+ while ( *p != '\0' )
+ {
+ irq = simple_strtol(p, &p, 10);
+ if ( (irq <= 0) || (irq >= NR_IRQS) )
+ {
+ printk("IRQ '%ld' out of range in edge-trigger list '%s'\n",
+ irq, opt_edgetrigger);
+ break;
+ }
+
+ printk("Forcing IRQ %ld to edge-trigger: ", irq);
+
+ desc = &irq_desc[irq];
+ spin_lock_irq(&desc->lock);
+
+ if ( desc->handler == &ioapic_edge_irq_type )
+ {
+ printk("already edge-triggered (no force applied).\n");
+ }
+ else if ( desc->handler != &ioapic_level_irq_type )
+ {
+ printk("cannot force (can only force IO-APIC-level IRQs).\n");
+ }
+ else
+ {
+ desc->handler = &ioapic_edge_irq_type;
+ __edge_IO_APIC_irq(irq);
+ desc->status |= IRQ_PENDING; /* may have lost a masked edge */
+ printk("done.\n");
+ }
+
+ spin_unlock_irq(&desc->lock);
+
+ if ( *p == '\0' )
+ break;
+
+ if ( *p != ',' )
+ {
+ printk("Unexpected character '%c' in edge-trigger list '%s'\n",
+ *p, opt_edgetrigger);
+ break;
+ }
+
+ p++;
+ }
+
+ return 0;
+}
+
+__initcall(ioapic_trigger_setup);
diff --git a/xen/arch/i386/irq.c b/xen/arch/i386/irq.c
index d3eaf6af12..5b16bb0e63 100644
--- a/xen/arch/i386/irq.c
+++ b/xen/arch/i386/irq.c
@@ -39,6 +39,7 @@
#include <xen/delay.h>
#include <xen/timex.h>
#include <xen/perfc.h>
+#include <asm/smpboot.h>
/*
* Linux has a controller-independent x86 interrupt architecture.
@@ -1034,6 +1035,11 @@ int pirq_guest_bind(struct task_struct *p, int irq, int will_share)
desc->status |= IRQ_GUEST;
desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING);
desc->handler->startup(irq);
+
+ /* Attempt to bind the interrupt target to the correct CPU. */
+ if ( desc->handler->set_affinity != NULL )
+ desc->handler->set_affinity(
+ irq, apicid_to_phys_cpu_present(p->processor));
}
else if ( !will_share || !action->shareable )
{
diff --git a/xen/arch/i386/process.c b/xen/arch/i386/process.c
index 29c4fde8cb..408daa1f14 100644
--- a/xen/arch/i386/process.c
+++ b/xen/arch/i386/process.c
@@ -270,7 +270,7 @@ void switch_to(struct task_struct *prev_p, struct task_struct *next_p)
tss->ss1 = next->guestos_ss;
/* Maybe switch the debug registers. */
- if ( next->debugreg[7] )
+ if ( unlikely(next->debugreg[7]) )
{
loaddebug(next, 0);
loaddebug(next, 1);
@@ -280,10 +280,17 @@ void switch_to(struct task_struct *prev_p, struct task_struct *next_p)
loaddebug(next, 6);
loaddebug(next, 7);
}
+
+ /* Switch page tables. */
+ write_ptbase(&next_p->mm);
+ tlb_clocktick();
}
- if ( ( prev_p->io_bitmap != NULL ) || ( next_p->io_bitmap != NULL ) ) {
- if ( next_p->io_bitmap != NULL ) {
+ if ( unlikely(prev_p->io_bitmap != NULL) ||
+ unlikely(next_p->io_bitmap != NULL) )
+ {
+ if ( next_p->io_bitmap != NULL )
+ {
/* Copy in the appropriate parts of the IO bitmap. We use the
* selector to copy only the interesting parts of the bitmap. */
@@ -314,7 +321,9 @@ void switch_to(struct task_struct *prev_p, struct task_struct *next_p)
tss->bitmap = IO_BITMAP_OFFSET;
- } else {
+ }
+ else
+ {
/* In this case, we're switching FROM a task with IO port access,
* to a task that doesn't use the IO bitmap. We set any TSS bits
* that might have been cleared, ready for future use. */
@@ -332,11 +341,6 @@ void switch_to(struct task_struct *prev_p, struct task_struct *next_p)
tss->bitmap = INVALID_IO_BITMAP_OFFSET;
}
}
-
-
- /* Switch page tables. */
- write_ptbase(&next_p->mm);
- tlb_clocktick();
set_current(next_p);
diff --git a/xen/common/domain.c b/xen/common/domain.c
index b3a2e0ffa8..d55f65d5ae 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -222,10 +222,6 @@ void __kill_domain(struct task_struct *p)
*pp = p->next_hash;
write_unlock_irqrestore(&tasklist_lock, flags);
- if ( atomic_read(&p->refcnt) >2 )
- DPRINTK("Domain refcnt>1 so kil deferred. Missing put_task? p=%p cur=%p cnt=%d\n",p,current,atomic_read(&p->refcnt));
-
-
if ( p == current )
{
__enter_scheduler();
@@ -420,7 +416,16 @@ void free_all_dom_mem(struct task_struct *p)
INIT_LIST_HEAD(&zombies);
- if ( p->mm.shadow_mode ) shadow_mode_disable(p);
+ /*
+ * If we're executing the idle task then we may still be running over the
+ * dead domain's page tables. We'd better fix that before freeing them!
+ */
+ if ( is_idle_task(current) )
+ write_ptbase(&current->mm);
+
+ /* Exit shadow mode before deconstructing final guest page table. */
+ if ( p->mm.shadow_mode )
+ shadow_mode_disable(p);
/* STEP 1. Drop the in-use reference to the page-table base. */
put_page_and_type(&frame_table[pagetable_val(p->mm.pagetable) >>
@@ -1078,7 +1083,7 @@ int construct_dom0(struct task_struct *p,
set_bit(PF_CONSTRUCTED, &p->flags);
-#if 0 // XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave)
+#if 0 /* XXXXX DO NOT CHECK IN ENABLED !!! (but useful for testing so leave) */
shadow_mode_enable(&p->mm, SHM_test);
#endif
diff --git a/xen/common/kernel.c b/xen/common/kernel.c
index 76747fa981..3e8e617cb3 100644
--- a/xen/common/kernel.c
+++ b/xen/common/kernel.c
@@ -75,31 +75,37 @@ unsigned char opt_pdb[10] = "none";
unsigned int opt_tbuf_size = 1;
/* opt_sched: scheduler - default to Borrowed Virtual Time */
char opt_sched[10] = "bvt";
-/* opt_physdev_dom0_hide: list of PCI slots to hide from dom0
- * Should have the format '(%02x:%02x.%1x)(%02x:%02x.%1x)...etc' */
-char opt_physdev_dom0_hide[20] = "";
+/* opt_physdev_dom0_hide: list of PCI slots to hide from domain 0. */
+/* Format is '(%02x:%02x.%1x)(%02x:%02x.%1x)' and so on. */
+char opt_physdev_dom0_hide[200] = "";
+/* opt_leveltrigger, opt_edgetrigger: Force an IO-APIC-routed IRQ to be */
+/* level- or edge-triggered. */
+/* Example: 'leveltrigger=4,5,6,20 edgetrigger=21'. */
+char opt_leveltrigger[30] = "", opt_edgetrigger[30] = "";
static struct {
unsigned char *name;
enum { OPT_IP, OPT_STR, OPT_UINT, OPT_BOOL } type;
void *var;
} opts[] = {
- { "console", OPT_STR, &opt_console },
- { "ser_baud", OPT_UINT, &opt_ser_baud },
- { "com1", OPT_STR, &opt_com1 },
- { "com2", OPT_STR, &opt_com2 },
- { "dom0_mem", OPT_UINT, &opt_dom0_mem },
- { "ifname", OPT_STR, &opt_ifname },
- { "noht", OPT_BOOL, &opt_noht },
- { "noacpi", OPT_BOOL, &opt_noacpi },
- { "nosmp", OPT_BOOL, &opt_nosmp },
- { "noreboot", OPT_BOOL, &opt_noreboot },
- { "ignorebiostables", OPT_BOOL, &opt_ignorebiostables },
- { "watchdog", OPT_BOOL, &opt_watchdog },
- { "pdb", OPT_STR, &opt_pdb },
- { "tbuf_size", OPT_UINT, &opt_tbuf_size },
- { "sched", OPT_STR, &opt_sched },
- { "physdev_dom0_hide",OPT_STR, &opt_physdev_dom0_hide },
+ { "console", OPT_STR, &opt_console },
+ { "ser_baud", OPT_UINT, &opt_ser_baud },
+ { "com1", OPT_STR, &opt_com1 },
+ { "com2", OPT_STR, &opt_com2 },
+ { "dom0_mem", OPT_UINT, &opt_dom0_mem },
+ { "ifname", OPT_STR, &opt_ifname },
+ { "noht", OPT_BOOL, &opt_noht },
+ { "noacpi", OPT_BOOL, &opt_noacpi },
+ { "nosmp", OPT_BOOL, &opt_nosmp },
+ { "noreboot", OPT_BOOL, &opt_noreboot },
+ { "ignorebiostables", OPT_BOOL, &opt_ignorebiostables },
+ { "watchdog", OPT_BOOL, &opt_watchdog },
+ { "pdb", OPT_STR, &opt_pdb },
+ { "tbuf_size", OPT_UINT, &opt_tbuf_size },
+ { "sched", OPT_STR, &opt_sched },
+ { "physdev_dom0_hide", OPT_STR, &opt_physdev_dom0_hide },
+ { "leveltrigger", OPT_STR, &opt_leveltrigger },
+ { "edgetrigger", OPT_STR, &opt_edgetrigger },
{ NULL, 0, NULL }
};
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 99931bb515..ddb2778bc3 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -194,7 +194,6 @@ static struct {
*/
void __init init_frametable(unsigned long nr_pages)
{
- int i;
unsigned long mfn;
memset(percpu_info, 0, sizeof(percpu_info));
@@ -209,20 +208,19 @@ void __init init_frametable(unsigned long nr_pages)
INIT_LIST_HEAD(&free_list);
free_pfns = 0;
- /* so that we can map them latter, set the ownership of pages
- belonging to the machine_to_phys_mapping to CPU0 idle task */
-
- mfn = virt_to_phys((void *)RDWR_MPT_VIRT_START)>>PAGE_SHIFT;
-
/* initialise to a magic of 0x55555555 so easier to spot bugs later */
memset( machine_to_phys_mapping, 0x55, 4*1024*1024 );
/* The array is sized for a 4GB machine regardless of actuall mem size.
This costs 4MB -- may want to fix some day */
- for(i=0;i<1024*1024;i+=1024,mfn++)
+
+ /* Pin the ownership of the MP table so that DOM0 can map it later. */
+ for ( mfn = virt_to_phys((void *)RDWR_MPT_VIRT_START)>>PAGE_SHIFT;
+ mfn < virt_to_phys((void *)RDWR_MPT_VIRT_END)>>PAGE_SHIFT;
+ mfn++ )
{
frame_table[mfn].count_and_flags = 1 | PGC_allocated;
- frame_table[mfn].type_and_flags = 1 | PGT_gdt_page; // anything non RW
+ frame_table[mfn].type_and_flags = 1 | PGT_gdt_page; /* non-RW type */
frame_table[mfn].u.domain = &idle0_task;
}
}
diff --git a/xen/common/physdev.c b/xen/common/physdev.c
index d15183cb6e..61b7b22cb2 100644
--- a/xen/common/physdev.c
+++ b/xen/common/physdev.c
@@ -634,9 +634,10 @@ static long pci_probe_root_buses(u32 *busmask)
*/
long do_physdev_op(physdev_op_t *uop)
{
- phys_dev_t *pdev;
+ phys_dev_t *pdev;
physdev_op_t op;
- long ret;
+ long ret;
+ int irq;
if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) )
return -EFAULT;
@@ -674,10 +675,22 @@ long do_physdev_op(physdev_op_t *uop)
ret = pci_probe_root_buses(op.u.pci_probe_root_buses.busmask);
break;
- case PHYSDEVOP_UNMASK_IRQ:
+ case PHYSDEVOP_IRQ_UNMASK_NOTIFY:
ret = pirq_guest_unmask(current);
break;
+ case PHYSDEVOP_IRQ_STATUS_QUERY:
+ irq = op.u.irq_status_query.irq;
+ ret = -EINVAL;
+ if ( (irq < 0) || (irq >= NR_IRQS) )
+ break;
+ op.u.irq_status_query.flags = 0;
+ /* Edge-triggered interrupts don't need an explicit unmask downcall. */
+ if ( strstr(irq_desc[irq].handler->typename, "edge") == NULL )
+ op.u.irq_status_query.flags |= PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY;
+ ret = 0;
+ break;
+
default:
ret = -EINVAL;
break;
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index dfacb65bd0..5aa12bf8cb 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -520,7 +520,7 @@ asmlinkage void __enter_scheduler(void)
/* Mark a timer event for the newly-scheduled domain. */
if ( !is_idle_task(next) )
- evtchn_set_pending(next, VIRQ_TIMER);
+ send_guest_virq(next, VIRQ_TIMER);
schedule_tail(next);
diff --git a/xen/include/hypervisor-ifs/physdev.h b/xen/include/hypervisor-ifs/physdev.h
index 914a555981..50372bf2be 100644
--- a/xen/include/hypervisor-ifs/physdev.h
+++ b/xen/include/hypervisor-ifs/physdev.h
@@ -14,44 +14,55 @@
#define PHYSDEVOP_PCI_CFGREG_WRITE 1
#define PHYSDEVOP_PCI_INITIALISE_DEVICE 2
#define PHYSDEVOP_PCI_PROBE_ROOT_BUSES 3
-#define PHYSDEVOP_UNMASK_IRQ 4
+#define PHYSDEVOP_IRQ_UNMASK_NOTIFY 4
+#define PHYSDEVOP_IRQ_STATUS_QUERY 5
/* Read from PCI configuration space. */
-typedef struct physdevop_pci_cfgreg_read_st
-{
- int bus; /* IN */
- int dev; /* IN */
- int func; /* IN */
- int reg; /* IN */
- int len; /* IN */
- u32 value; /* OUT */
+typedef struct {
+ /* IN */
+ int bus;
+ int dev;
+ int func;
+ int reg;
+ int len;
+ /* OUT */
+ u32 value;
} physdevop_pci_cfgreg_read_t;
/* Write to PCI configuration space. */
-typedef struct physdevop_pci_cfgreg_write_st
-{
- int bus; /* IN */
- int dev; /* IN */
- int func; /* IN */
- int reg; /* IN */
- int len; /* IN */
- u32 value; /* IN */
+typedef struct {
+ /* IN */
+ int bus;
+ int dev;
+ int func;
+ int reg;
+ int len;
+ u32 value;
} physdevop_pci_cfgreg_write_t;
/* Do final initialisation of a PCI device (e.g., last-moment IRQ routing). */
-typedef struct physdevop_pci_initialise_device_st
-{
- int bus; /* IN */
- int dev; /* IN */
- int func; /* IN */
+typedef struct {
+ /* IN */
+ int bus;
+ int dev;
+ int func;
} physdevop_pci_initialise_device_t;
/* Find the root buses for subsequent scanning. */
-typedef struct physdevop_pci_probe_root_buses_st
-{
- u32 busmask[256/32]; /* OUT */
+typedef struct {
+ /* OUT */
+ u32 busmask[256/32];
} physdevop_pci_probe_root_buses_t;
+typedef struct {
+ /* IN */
+ int irq;
+ /* OUT */
+/* Need to call PHYSDEVOP_IRQ_UNMASK_NOTIFY when the IRQ has been serviced? */
+#define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY (1<<0)
+ unsigned long flags;
+} physdevop_irq_status_query_t;
+
typedef struct _physdev_op_st
{
unsigned long cmd;
@@ -61,6 +72,7 @@ typedef struct _physdev_op_st
physdevop_pci_cfgreg_write_t pci_cfgreg_write;
physdevop_pci_initialise_device_t pci_initialise_device;
physdevop_pci_probe_root_buses_t pci_probe_root_buses;
+ physdevop_irq_status_query_t irq_status_query;
} u;
} physdev_op_t;