diff options
author | kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk> | 2004-03-29 14:45:20 +0000 |
---|---|---|
committer | kaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk> | 2004-03-29 14:45:20 +0000 |
commit | a9ce6bed10dcb8fe9ef4e88dd25c01ba7f97f8da (patch) | |
tree | 8c0eee298aadfd59fbaff28fb1dab8ebaefbe62a | |
parent | 3f211155ec8df4dc24918334dec511ea2595a570 (diff) | |
download | xen-a9ce6bed10dcb8fe9ef4e88dd25c01ba7f97f8da.tar.gz xen-a9ce6bed10dcb8fe9ef4e88dd25c01ba7f97f8da.tar.bz2 xen-a9ce6bed10dcb8fe9ef4e88dd25c01ba7f97f8da.zip |
bitkeeper revision 1.825.3.14 (40683680NZjB1f8PmpgffnMdcNdBjQ)
Many files:
Final IRQ and PCI-access virtualisation fixes.
-rw-r--r-- | xen/arch/i386/irq.c | 18 | ||||
-rw-r--r-- | xen/common/event_channel.c | 4 | ||||
-rw-r--r-- | xen/common/physdev.c | 304 | ||||
-rw-r--r-- | xen/include/hypervisor-ifs/event_channel.h | 2 | ||||
-rw-r--r-- | xen/include/xen/irq.h | 2 | ||||
-rw-r--r-- | xen/include/xen/sched.h | 2 | ||||
-rw-r--r-- | xenolinux-2.4.25-sparse/arch/xen/Makefile | 13 | ||||
-rw-r--r-- | xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev | 41 | ||||
-rw-r--r-- | xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c | 45 | ||||
-rw-r--r-- | xenolinux-2.4.25-sparse/arch/xen/kernel/irq.c | 83 |
10 files changed, 334 insertions, 180 deletions
diff --git a/xen/arch/i386/irq.c b/xen/arch/i386/irq.c index 7e035d0a66..b280daf63f 100644 --- a/xen/arch/i386/irq.c +++ b/xen/arch/i386/irq.c @@ -941,8 +941,9 @@ int setup_irq(unsigned int irq, struct irqaction * new) #define IRQ_MAX_GUESTS 7 typedef struct { - unsigned short nr_guests; - unsigned short in_flight; + u8 nr_guests; + u8 in_flight; + u8 shareable; struct task_struct *guest[IRQ_MAX_GUESTS]; } irq_guest_action_t; @@ -989,7 +990,7 @@ int pirq_guest_unmask(struct task_struct *p) return 0; } -int pirq_guest_bind(struct task_struct *p, int irq) +int pirq_guest_bind(struct task_struct *p, int irq, int will_share) { unsigned long flags; irq_desc_t *desc = &irq_desc[irq]; @@ -1001,6 +1002,8 @@ int pirq_guest_bind(struct task_struct *p, int irq) spin_lock_irqsave(&desc->lock, flags); + action = (irq_guest_action_t *)desc->action; + if ( !(desc->status & IRQ_GUEST) ) { rc = -EBUSY; @@ -1021,14 +1024,19 @@ int pirq_guest_bind(struct task_struct *p, int irq) action->nr_guests = 0; action->in_flight = 0; + action->shareable = will_share; desc->depth = 0; desc->status |= IRQ_GUEST; desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING); desc->handler->startup(irq); } - - action = (irq_guest_action_t *)desc->action; + else if ( !will_share || !action->shareable ) + { + DPRINTK("Cannot bind IRQ %d to guest. Will not share with others.\n"); + rc = -EBUSY; + goto out; + } rc = -EBUSY; if ( action->nr_guests == IRQ_MAX_GUESTS ) diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c index 8824c15890..3db38b763e 100644 --- a/xen/common/event_channel.c +++ b/xen/common/event_channel.c @@ -194,7 +194,9 @@ static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind) goto out; p->pirq_to_evtchn[pirq] = port; - if ( (rc = pirq_guest_bind(p, pirq)) != 0 ) + rc = pirq_guest_bind(p, pirq, + !!(bind->flags & BIND_PIRQ__WILL_SHARE)); + if ( rc != 0 ) { p->pirq_to_evtchn[pirq] = 0; DPRINTK("Couldn't bind to PIRQ %d (error=%d)\n", pirq, rc); diff --git a/xen/common/physdev.c b/xen/common/physdev.c index a3f3fb7f52..37ed7c74e3 100644 --- a/xen/common/physdev.c +++ b/xen/common/physdev.c @@ -6,33 +6,18 @@ * * Description: allows a domain to access devices on the PCI bus * - * a guest os may be given access to particular devices on the PCI - * bus. to allow the standard PCI device discovery to work it may - * also have limited access to devices (bridges) in the PCI device - * tree between the device and the PCI root device. - * - * for each domain a list of PCI devices is maintained, describing the + * A guest OS may be given access to particular devices on the PCI bus. + * For each domain a list of PCI devices is maintained, describing the * access mode for the domain. * - * guests can figure out the virtualised, or better, partioned PCI space - * through normal pci config register access. Some of the accesses, in - * particular write access are faked out. For example the sequence for - * for detecting the IO regions, which require writes to determine the - * size of teh region, is faked out by a very simple state machine, - * preventing direct writes to the PCI config registers by a guest. - * - * Interrupt handling is currently done in a very cheese fashion. - * We take the default irq controller code and replace it with our own. - * If an interrupt comes in it is acked using the PICs normal routine. Then - * an event is send to the receiving domain which has to explicitly call - * once it is finished dealing with the interrupt. Only then the PICs end - * handler is called. very cheesy with all sorts of problems but it seems - * to work in normal cases. No shared interrupts are allowed. - * - * XXX this code is not SMP safe at the moment! + * Guests can figure out the virtualised PCI space through normal PCI config + * register access. Some of the accesses, in particular write accesses, are + * faked. For example the sequence for detecting the IO regions, which requires + * writes to determine the size of the region, is faked out by a very simple + * state machine, preventing direct writes to the PCI config registers by a + * guest. */ - #include <xen/config.h> #include <xen/lib.h> #include <xen/types.h> @@ -47,22 +32,29 @@ /* Called by PHYSDEV_PCI_INITIALISE_DEVICE to finalise IRQ routing. */ extern void pcibios_enable_irq(struct pci_dev *dev); -#if 1 -#define DBG(_x...) +#if 0 +#define VERBOSE_INFO(_f, _a...) printk( _f , ## _a ) #else -#define DBG(_x...) printk(_x) +#define VERBOSE_INFO(_f, _a...) ((void)0) #endif +#if 1 || !defined(NDEBUG) +#define INFO(_f, _a...) printk( _f, ## _a ) +#else +#define INFO(_f, _a...) ((void)0) +#endif + + #define ACC_READ 1 #define ACC_WRITE 2 -/* upper bounds for PCI devices */ +/* Upper bounds for PCI-device addressing. */ #define PCI_BUSMAX 255 #define PCI_DEVMAX 31 #define PCI_FUNCMAX 7 #define PCI_REGMAX 255 -/* bit offsets into state */ +/* Bit offsets into state. */ #define ST_BASE_ADDRESS 0 /* bits 0-5: are for base address access */ #define ST_ROM_ADDRESS 6 /* bit 6: is for rom address access */ @@ -75,13 +67,7 @@ typedef struct _phys_dev_st { } phys_dev_t; -/* - * - * General functions - * - */ - -/* find a device on the device list */ +/* Find a device on a per-domain device list. */ static phys_dev_t *find_pdev(struct task_struct *p, struct pci_dev *dev) { phys_dev_t *t, *res = NULL; @@ -99,24 +85,22 @@ static phys_dev_t *find_pdev(struct task_struct *p, struct pci_dev *dev) return res; } -/* add the device to the list of devices task p can access */ +/* Add a device to a per-domain device-access list. */ static void add_dev_to_task(struct task_struct *p, struct pci_dev *dev, int acc) { - phys_dev_t *pdev; if ( (pdev = find_pdev(p, dev)) ) { - /* device already on list, update access */ + /* Sevice already on list: update access permissions. */ pdev->flags = acc; return; } - /* add device */ if ( !(pdev = kmalloc(sizeof(phys_dev_t), GFP_KERNEL)) ) { - printk("error allocating pdev structure\n"); + INFO("Error allocating pdev structure.\n"); return; } @@ -127,7 +111,6 @@ static void add_dev_to_task(struct task_struct *p, if ( acc == ACC_WRITE ) pdev->owner = p; - } /* @@ -151,11 +134,11 @@ int physdev_pci_access_modify( if ( !enable ) { - DPRINTK("Disallowing access is not yet supported.\n"); + INFO("Disallowing access is not yet supported.\n"); return -EINVAL; } - DPRINTK("physdev_pci_access_modify: %02x:%02x:%02x\n", bus, dev, func); + INFO("physdev_pci_access_modify: %02x:%02x:%02x\n", bus, dev, func); if ( (p = find_domain_by_id(dom)) == NULL ) return -ESRCH; @@ -166,36 +149,36 @@ int physdev_pci_access_modify( /* Grant write access to the specified device. */ if ( (pdev = pci_find_slot(bus, PCI_DEVFN(dev, func))) == NULL ) { - DPRINTK(" dev does not exist\n"); + INFO(" dev does not exist\n"); return -ENODEV; } add_dev_to_task(p, pdev, ACC_WRITE); - DPRINTK(" add RW %02x:%02x:%02x\n", pdev->bus->number, - PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + INFO(" add RW %02x:%02x:%02x\n", pdev->bus->number, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); /* Grant read access to the root device. */ if ( (rdev = pci_find_slot(0, PCI_DEVFN(0, 0))) == NULL ) { - DPRINTK(" bizarre -- no PCI root dev\n"); + INFO(" bizarre -- no PCI root dev\n"); return -ENODEV; } add_dev_to_task(p, rdev, ACC_READ); - DPRINTK(" add R0 %02x:%02x:%02x\n", 0, 0, 0); + INFO(" add R0 %02x:%02x:%02x\n", 0, 0, 0); /* Grant read access to all devices on the path to the root. */ for ( tdev = pdev->bus->self; tdev != NULL; tdev = tdev->bus->self ) { add_dev_to_task(p, tdev, ACC_READ); - DPRINTK(" add RO %02x:%02x:%02x\n", tdev->bus->number, - PCI_SLOT(tdev->devfn), PCI_FUNC(tdev->devfn)); + INFO(" add RO %02x:%02x:%02x\n", tdev->bus->number, + PCI_SLOT(tdev->devfn), PCI_FUNC(tdev->devfn)); } if ( pdev->hdr_type == PCI_HEADER_TYPE_NORMAL ) return 0; - /* The device is a bridge or cardbus. */ - printk("XXX can't give access to bridge devices yet\n"); + /* The device is a bridge or cardbus. */ + INFO("XXX can't give access to bridge devices yet\n"); return 0; } @@ -217,15 +200,15 @@ inline static int check_dev_acc (struct task_struct *p, if ( bus > PCI_BUSMAX || dev > PCI_DEVMAX || func > PCI_FUNCMAX ) return -EINVAL; - DBG("a=%c b=%x d=%x f=%x ", (acc == ACC_READ) ? 'R' : 'W', - mask, bus, dev, func); + VERBOSE_INFO("a=%c b=%x d=%x f=%x ", (acc == ACC_READ) ? 'R' : 'W', + mask, bus, dev, func); /* check target device */ target_devfn = PCI_DEVFN(dev, func); target_dev = pci_find_slot(bus, target_devfn); if ( !target_dev ) { - DBG("target does not exist\n"); + VERBOSE_INFO("target does not exist\n"); return -ENODEV; } @@ -233,7 +216,7 @@ inline static int check_dev_acc (struct task_struct *p, target_pdev = find_pdev(p, target_dev); if ( !target_pdev ) { - DBG("dom has no access to target\n"); + VERBOSE_INFO("dom has no access to target\n"); return -EPERM; } @@ -241,11 +224,6 @@ inline static int check_dev_acc (struct task_struct *p, return 0; } -/* - * - * PCI config space access - * - */ /* * Base address registers contain the base address for IO regions. @@ -266,60 +244,58 @@ inline static int check_dev_acc (struct task_struct *p, * cleared again. If the guest attempts to "restores" a wrong value an * error is flagged. */ -static int do_base_address_access(phys_dev_t *pdev, int acc, - int bus, int dev, int func, - int reg, int len, u32 *val) +static int do_base_address_access(phys_dev_t *pdev, int acc, int idx, + int len, u32 *val) { - int idx, st_bit, ret = -EINVAL; + int st_bit, reg = PCI_BASE_ADDRESS_0 + (idx*4), ret = -EINVAL; + struct pci_dev *dev = pdev->dev; u32 orig_val, sz; struct resource *res; - idx = (reg - PCI_BASE_ADDRESS_0)/4; + if ( len != sizeof(u32) ) + { + INFO("Guest attempting sub-dword %s to BASE_ADDRESS %d\n", + (acc == ACC_READ) ? "read" : "write", idx); + return -EPERM; + } + st_bit = idx + ST_BASE_ADDRESS; res = &(pdev->dev->resource[idx]); if ( acc == ACC_WRITE ) { - if ( *val == 0xffffffff || - ((res->flags & IORESOURCE_IO) && *val == 0xffff) ) + if ( (*val == 0xffffffff) || + ((res->flags & IORESOURCE_IO) && (*val == 0xffff)) ) { - /* set bit and return */ + /* Set bit and return. */ set_bit(st_bit, &pdev->state); ret = 0; } else { - /* assume guest wants to set the base address */ + /* Assume guest wants to set the base address. */ clear_bit(st_bit, &pdev->state); /* check if guest tries to restore orig value */ - ret = pci_config_read(0, bus, dev, func, reg, len, &orig_val); - if ( *val != orig_val ) + ret = pci_read_config_dword(dev, reg, &orig_val); + if ( (ret == 0) && (*val != orig_val) ) { - printk("caution: guest tried to change base address range.\n"); + INFO("Guest attempting update to BASE_ADDRESS %d\n", idx); ret = -EPERM; } } - DBG("fixed pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x" - " val=0x%08x %lx\n", bus, dev, func, reg, len, *val, - pdev->state); - + VERBOSE_INFO("fixed pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x" + " val=0x%08x %lx\n", + dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), reg, len, *val, pdev->state); } - else if ( acc == ACC_READ ) { - if ( !test_bit(st_bit, &pdev->state) ) + ret = pci_read_config_dword(dev, reg, val); + if ( (ret == 0) && test_bit(st_bit, &pdev->state) ) { - /* just read and return */ - ret = pci_config_read(0, bus, dev, func, reg, len, val); - } - else - { - /* fake value */ - ret = pci_config_read(0, bus, dev, func, reg, len, &orig_val); - + /* Cook the value. */ sz = res->end - res->start; - if ( res->flags & IORESOURCE_MEM ) { /* this is written out explicitly for clarity */ @@ -344,77 +320,75 @@ static int do_base_address_access(phys_dev_t *pdev, int acc, *val = *val & (sz << 2); *val = *val | 0x1; } - ret = 0; } - DBG("fixed pci read : %02x:%02x:%02x reg=0x%02x len=0x%02x" - " val=0x%08x %lx\n", bus, dev, func, reg, len, *val, pdev->state); + VERBOSE_INFO("fixed pci read: %02x:%02x:%02x reg=0x%02x len=0x%02x" + " val=0x%08x %lx\n", + dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), reg, len, *val, pdev->state); } return ret; } -/* - * fake out read/write access to rom address register - * pretty much the same as a above - */ -static int do_rom_address_access(phys_dev_t *pdev, int acc, - int bus, int dev, int func, - int reg, int len, u32 *val) + +static int do_rom_address_access(phys_dev_t *pdev, int acc, int len, u32 *val) { int st_bit, ret = -EINVAL; + struct pci_dev *dev = pdev->dev; u32 orig_val, sz; struct resource *res; + if ( len != sizeof(u32) ) + { + INFO("Guest attempting sub-dword %s to ROM_ADDRESS\n", + (acc == ACC_READ) ? "read" : "write"); + return -EPERM; + } + st_bit = ST_ROM_ADDRESS; res = &(pdev->dev->resource[PCI_ROM_RESOURCE]); if ( acc == ACC_WRITE ) { - if ( *val == 0xffffffff || *val == 0xfffffffe) + if ( (*val == 0xffffffff) || (*val == 0xfffffffe) ) { - /* 0xffffffff would be unusual, but we check anyway */ - /* set bit and return */ + /* NB. 0xffffffff would be unusual, but we trap it anyway. */ set_bit(st_bit, &pdev->state); ret = 0; } else { - /* assume guest wants to set the base address */ + /* Assume guest wants simply to set the base address. */ clear_bit(st_bit, &pdev->state); - /* check if guest tries to restore orig value */ - ret = pci_config_read(0, bus, dev, func, reg, len, &orig_val); - if ( (*val != orig_val) ) + /* Check if guest tries to restore the original value. */ + ret = pci_read_config_dword(dev, PCI_ROM_ADDRESS, &orig_val); + if ( (ret == 0) && (*val != orig_val) ) { - if (*val != 0x00000000 ) + if ( (*val != 0x00000000) ) { - printk("caution: guest tried to change rom address.\n"); + INFO("caution: guest tried to change rom address.\n"); ret = -EPERM; } else { - printk ("guest disabled rom access for %02x:%02x:%02x\n", - bus, dev, func); - ret = 0; + INFO("guest disabled rom access for %02x:%02x:%02x\n", + dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); } } - } - DBG("fixed pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x" - " val=0x%08x %lx\n", bus, dev, func, reg, len, *val, pdev->state); - + VERBOSE_INFO("fixed pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x" + " val=0x%08x %lx\n", + dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), reg, len, *val, pdev->state); } else if ( acc == ACC_READ ) { - if ( !test_bit(st_bit, &pdev->state) ) - { - /* just read and return */ - ret = pci_config_read(0, bus, dev, func, reg, len, val); - } - else + ret = pci_read_config_dword(dev, PCI_ROM_ADDRESS, val); + if ( (ret == 0) && test_bit(st_bit, &pdev->state) ) { - /* fake value */ - ret = pci_config_read(0, bus, dev, func, reg, len, &orig_val); + /* Cook the value. */ sz = res->end - res->start; *val = 0xffffffff; /* leave bit 0 untouched */ @@ -424,19 +398,18 @@ static int do_rom_address_access(phys_dev_t *pdev, int acc, *val = *val & (sz << 11); *val = *val | (orig_val & 0x1); } - - DBG("fixed pci read : %02x:%02x:%02x reg=0x%02x len=0x%02x" - " val=0x%08x %lx\n", bus, dev, func, reg, len, *val, pdev->state); + VERBOSE_INFO("fixed pci read: %02x:%02x:%02x reg=0x%02x len=0x%02x" + " val=0x%08x %lx\n", + dev->bus->number, PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn), reg, len, *val, pdev->state); } + return ret; } /* - * handle a domains pci config space read access if it has access to - * the device. - * For some registers for read-only devices (e.g. address base registers) - * we need to maintain a state machine. + * Handle a PCI config space read access if the domain has access privileges. */ static long pci_cfgreg_read(int bus, int dev, int func, int reg, int len, u32 *val) @@ -451,39 +424,51 @@ static long pci_cfgreg_read(int bus, int dev, int func, int reg, switch ( reg ) { case PCI_BASE_ADDRESS_0: + ret = do_base_address_access(pdev, ACC_READ, 0, len, val); + break; + case PCI_BASE_ADDRESS_1: + ret = do_base_address_access(pdev, ACC_READ, 1, len, val); + break; + case PCI_BASE_ADDRESS_2: + ret = do_base_address_access(pdev, ACC_READ, 2, len, val); + break; + case PCI_BASE_ADDRESS_3: + ret = do_base_address_access(pdev, ACC_READ, 3, len, val); + break; + case PCI_BASE_ADDRESS_4: + ret = do_base_address_access(pdev, ACC_READ, 4, len, val); + break; + case PCI_BASE_ADDRESS_5: - ret = do_base_address_access(pdev, ACC_READ, bus, dev, - func, reg, len, val); + ret = do_base_address_access(pdev, ACC_READ, 5, len, val); break; case PCI_ROM_ADDRESS: - ret = do_rom_address_access(pdev, ACC_READ, bus, dev, - func, reg, len, val); + ret = do_rom_address_access(pdev, ACC_READ, len, val); break; case PCI_INTERRUPT_LINE: - ret = pdev->dev->irq; + *val = pdev->dev->irq; + ret = 0; break; default: ret = pci_config_read(0, bus, dev, func, reg, len, val); - DBG("pci read : %02x:%02x:%02x reg=0x%02x len=0x%02x val=0x%08x\n", - bus, dev, func, reg, len, *val); + VERBOSE_INFO("pci read : %02x:%02x:%02x reg=0x%02x len=0x%02x " + "val=0x%08x\n", bus, dev, func, reg, len, *val); break; } return ret; } + /* - * handle a domains pci config space write accesses if it has access to - * the device. - * for some registers a state machine is maintained to fake out r/w access. - * By default no write access is allowed but we may change that in the future. + * Handle a PCI config space write access if the domain has access privileges. */ static long pci_cfgreg_write(int bus, int dev, int func, int reg, int len, u32 val) @@ -498,35 +483,46 @@ static long pci_cfgreg_write(int bus, int dev, int func, int reg, switch (reg) { case PCI_BASE_ADDRESS_0: + ret = do_base_address_access(pdev, ACC_WRITE, 0, len, &val); + break; + case PCI_BASE_ADDRESS_1: + ret = do_base_address_access(pdev, ACC_WRITE, 1, len, &val); + break; + case PCI_BASE_ADDRESS_2: + ret = do_base_address_access(pdev, ACC_WRITE, 2, len, &val); + break; + case PCI_BASE_ADDRESS_3: + ret = do_base_address_access(pdev, ACC_WRITE, 3, len, &val); + break; + case PCI_BASE_ADDRESS_4: + ret = do_base_address_access(pdev, ACC_WRITE, 4, len, &val); + break; + case PCI_BASE_ADDRESS_5: - ret = do_base_address_access (pdev, ACC_WRITE, bus, dev, - func, reg, len, &val); - return ret; + ret = do_base_address_access(pdev, ACC_WRITE, 5, len, &val); break; case PCI_ROM_ADDRESS: - ret = do_rom_address_access (pdev, ACC_WRITE, bus, dev, - func, reg, len, &val); - return ret; + ret = do_rom_address_access(pdev, ACC_WRITE, len, &val); break; default: if ( pdev->flags != ACC_WRITE ) { - printk("pci write not allowed %02x:%02x:%02x: " - "reg=0x%02x len=0x%02x val=0x%08x\n", - bus, dev, func, reg, len, val); + INFO("pci write not allowed %02x:%02x:%02x: " + "reg=0x%02x len=0x%02x val=0x%08x\n", + bus, dev, func, reg, len, val); ret = -EPERM; } else { ret = pci_config_write(0, bus, dev, func, reg, len, val); - DBG("pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x val=0x%08x\n", - bus, dev, func, reg, len, val); + VERBOSE_INFO("pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x " + "val=0x%08x\n", bus, dev, func, reg, len, val); } break; } @@ -617,11 +613,13 @@ void physdev_init_dom0(struct task_struct *p) struct pci_dev *dev; phys_dev_t *pdev; - printk("Give DOM0 read access to all PCI devices\n"); + INFO("Give DOM0 read access to all PCI devices\n"); pci_for_each_dev(dev) { - /* add device */ + /* Skip bridges and other peculiarities for now. */ + if ( dev->hdr_type != PCI_HEADER_TYPE_NORMAL ) + continue; pdev = kmalloc(sizeof(phys_dev_t), GFP_KERNEL); pdev->dev = dev; pdev->flags = ACC_WRITE; diff --git a/xen/include/hypervisor-ifs/event_channel.h b/xen/include/hypervisor-ifs/event_channel.h index fdc4eaeb1b..20095c91a3 100644 --- a/xen/include/hypervisor-ifs/event_channel.h +++ b/xen/include/hypervisor-ifs/event_channel.h @@ -50,6 +50,8 @@ typedef struct evtchn_bind_pirq { /* IN parameters. */ int pirq; +#define BIND_PIRQ__WILL_SHARE 1 + unsigned int flags; /* BIND_PIRQ__* */ /* OUT parameters. */ int port; } evtchn_bind_pirq_t; diff --git a/xen/include/xen/irq.h b/xen/include/xen/irq.h index 0e53cc0331..34f54862bd 100644 --- a/xen/include/xen/irq.h +++ b/xen/include/xen/irq.h @@ -59,7 +59,7 @@ extern void no_action(int cpl, void *dev_id, struct pt_regs *regs); struct task_struct; extern int pirq_guest_unmask(struct task_struct *p); -extern int pirq_guest_bind(struct task_struct *p, int irq); +extern int pirq_guest_bind(struct task_struct *p, int irq, int will_share); extern int pirq_guest_unbind(struct task_struct *p, int irq); #endif /* __XEN_IRQ_H__ */ diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 1b8bd10d3c..123b17773f 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -164,7 +164,7 @@ struct task_struct * domain's event-channel spinlock. Read accesses can also synchronise on * the lock, but races don't usually matter. */ - u16 pirq_to_evtchn[64]; + u16 pirq_to_evtchn[128]; u16 virq_to_evtchn[NR_VIRQS]; u32 pirq_mask[2]; diff --git a/xenolinux-2.4.25-sparse/arch/xen/Makefile b/xenolinux-2.4.25-sparse/arch/xen/Makefile index 04117a9e4b..f52b90632f 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/Makefile +++ b/xenolinux-2.4.25-sparse/arch/xen/Makefile @@ -49,9 +49,14 @@ endif HEAD := arch/xen/kernel/head.o arch/xen/kernel/init_task.o SUBDIRS += arch/xen/kernel arch/xen/mm arch/xen/lib -SUBDIRS += arch/xen/drivers/console arch/xen/drivers/network -SUBDIRS += arch/xen/drivers/evtchn arch/xen/drivers/block -SUBDIRS += arch/xen/drivers/balloon arch/xen/drivers/vnetif +SUBDIRS += arch/xen/drivers/console +ifndef CONFIG_XEN_PHYSDEV_ACCESS +SUBDIRS += arch/xen/drivers/network +endif +SUBDIRS += arch/xen/drivers/evtchn +SUBDIRS += arch/xen/drivers/block +SUBDIRS += arch/xen/drivers/balloon +SUBDIRS += arch/xen/drivers/vnetif ifdef CONFIG_XEN_PRIVILEGED_GUEST SUBDIRS += arch/xen/drivers/dom0 endif @@ -60,7 +65,9 @@ CORE_FILES += arch/xen/kernel/kernel.o arch/xen/mm/mm.o CORE_FILES += arch/xen/drivers/evtchn/drv.o CORE_FILES += arch/xen/drivers/console/drv.o CORE_FILES += arch/xen/drivers/block/drv.o +ifndef CONFIG_XEN_PHYSDEV_ACCESS CORE_FILES += arch/xen/drivers/network/drv.o +endif CORE_FILES += arch/xen/drivers/vnetif/drv.o ifdef CONFIG_XEN_PRIVILEGED_GUEST CORE_FILES += arch/xen/drivers/dom0/drv.o diff --git a/xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev b/xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev index e6a213757c..4b32576d59 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev +++ b/xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev @@ -401,9 +401,6 @@ CONFIG_SCSI_DTC3280=y # CONFIG_SCSI_FUTURE_DOMAIN is not set # CONFIG_SCSI_GDTH is not set # CONFIG_SCSI_GENERIC_NCR5380 is not set -# CONFIG_SCSI_GENERIC_NCR53C400 is not set -# CONFIG_SCSI_G_NCR5380_PORT is not set -# CONFIG_SCSI_G_NCR5380_MEM is not set # CONFIG_SCSI_IPS is not set # CONFIG_SCSI_INITIO is not set # CONFIG_SCSI_INIA100 is not set @@ -500,7 +497,43 @@ CONFIG_VORTEX=y # CONFIG_DEPCA is not set # CONFIG_HP100 is not set # CONFIG_NET_ISA is not set -# CONFIG_NET_PCI is not set +CONFIG_NET_PCI=y +CONFIG_PCNET32=y +# CONFIG_AMD8111_ETH is not set +# CONFIG_ADAPTEC_STARFIRE is not set +# CONFIG_AC3200 is not set +# CONFIG_APRICOT is not set +# CONFIG_B44 is not set +# CONFIG_CS89x0 is not set +CONFIG_TULIP=y +# CONFIG_TULIP_MWI is not set +# CONFIG_TULIP_MMIO is not set +# CONFIG_DE4X5 is not set +# CONFIG_DGRS is not set +# CONFIG_DM9102 is not set +# CONFIG_EEPRO100 is not set +# CONFIG_EEPRO100_PIO is not set +# CONFIG_E100 is not set +# CONFIG_LNE390 is not set +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +# CONFIG_NE2K_PCI is not set +# CONFIG_NE3210 is not set +# CONFIG_ES3210 is not set +# CONFIG_8139CP is not set +# CONFIG_8139TOO is not set +# CONFIG_8139TOO_PIO is not set +# CONFIG_8139TOO_TUNE_TWISTER is not set +# CONFIG_8139TOO_8129 is not set +# CONFIG_8139_OLD_RX_RESET is not set +# CONFIG_SIS900 is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_SUNDANCE_MMIO is not set +# CONFIG_TLAN is not set +# CONFIG_VIA_RHINE is not set +# CONFIG_VIA_RHINE_MMIO is not set +# CONFIG_WINBOND_840 is not set # CONFIG_NET_POCKET is not set # diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c index e46307f33d..c65806a7d9 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c @@ -38,6 +38,8 @@ static int irq_bindcount[NR_IRQS]; /* Upcall to generic IRQ layer. */ extern asmlinkage unsigned int do_IRQ(int irq, struct pt_regs *regs); +#define VALID_EVTCHN(_chn) ((_chn) != -1) + void evtchn_do_upcall(struct pt_regs *regs) { unsigned long l1, l2; @@ -233,15 +235,27 @@ static inline void pirq_unmask_notify(int pirq) (void)HYPERVISOR_physdev_op(&op); } +/* + * On startup, if there is no action associated with the IRQ then we are + * probing. In this case we should not share with others as it will confuse us. + */ +#define probing_irq(_irq) (irq_desc[(_irq)].action == NULL) + static unsigned int startup_pirq(unsigned int irq) { evtchn_op_t op; int evtchn; - op.cmd = EVTCHNOP_bind_pirq; - op.u.bind_pirq.pirq = irq; + op.cmd = EVTCHNOP_bind_pirq; + op.u.bind_pirq.pirq = irq; + /* NB. We are happy to share unless we are probing. */ + op.u.bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE; if ( HYPERVISOR_event_channel_op(&op) != 0 ) - panic("Failed to obtain physical IRQ %d\n", irq); + { + if ( !probing_irq(irq) ) /* Some failures are expected when probing. */ + printk(KERN_INFO "Failed to obtain physical IRQ %d\n", irq); + return 0; + } evtchn = op.u.bind_pirq.port; evtchn_to_irq[evtchn] = irq; @@ -258,6 +272,9 @@ static void shutdown_pirq(unsigned int irq) evtchn_op_t op; int evtchn = irq_to_evtchn[irq]; + if ( !VALID_EVTCHN(evtchn) ) + return; + mask_evtchn(evtchn); op.cmd = EVTCHNOP_close; @@ -272,26 +289,38 @@ static void shutdown_pirq(unsigned int irq) static void enable_pirq(unsigned int irq) { - unmask_evtchn(irq_to_evtchn[irq]); + int evtchn = irq_to_evtchn[irq]; + if ( !VALID_EVTCHN(evtchn) ) + return; + unmask_evtchn(evtchn); pirq_unmask_notify(irq_to_pirq(irq)); } static void disable_pirq(unsigned int irq) { - mask_evtchn(irq_to_evtchn[irq]); + int evtchn = irq_to_evtchn[irq]; + if ( !VALID_EVTCHN(evtchn) ) + return; + mask_evtchn(evtchn); } static void ack_pirq(unsigned int irq) { - mask_evtchn(irq_to_evtchn[irq]); - clear_evtchn(irq_to_evtchn[irq]); + int evtchn = irq_to_evtchn[irq]; + if ( !VALID_EVTCHN(evtchn) ) + return; + mask_evtchn(evtchn); + clear_evtchn(evtchn); } static void end_pirq(unsigned int irq) { + int evtchn = irq_to_evtchn[irq]; + if ( !VALID_EVTCHN(evtchn) ) + return; if ( !(irq_desc[irq].status & IRQ_DISABLED) ) { - unmask_evtchn(irq_to_evtchn[irq]); + unmask_evtchn(evtchn); pirq_unmask_notify(irq_to_pirq(irq)); } } diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/irq.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/irq.c index c88e976125..abb2b398be 100644 --- a/xenolinux-2.4.25-sparse/arch/xen/kernel/irq.c +++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/irq.c @@ -86,7 +86,25 @@ static unsigned int startup_none(unsigned int irq) { return 0; } static void disable_none(unsigned int irq) { } static void ack_none(unsigned int irq) { +/* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves, it doesnt deserve + * a generic callback i think. + */ +#if CONFIG_X86 printk("unexpected IRQ trap at vector %02x\n", irq); +#ifdef CONFIG_X86_LOCAL_APIC + /* + * Currently unexpected vectors happen only on SMP and APIC. + * We _must_ ack these because every local APIC has only N + * irq slots per priority level, and a 'hanging, unacked' IRQ + * holds up an irq slot - in excessive cases (when multiple + * unexpected vectors occur) that might lock up the APIC + * completely. + */ + ack_APIC_irq(); +#endif +#endif } /* startup is the same as "enable", shutdown is same as "disable" */ @@ -322,14 +340,38 @@ static inline void get_irqlock(int cpu) global_irq_holder = cpu; } +/* + * A global "cli()" while in an interrupt context + * turns into just a local cli(). Interrupts + * should use spinlocks for the (very unlikely) + * case that they ever want to protect against + * each other. + * + * If we already have local interrupts disabled, + * this will not turn a local disable into a + * global one (problems with spinlocks: this makes + * save_flags+cli+sti usable inside a spinlock). + */ void __global_cli(void) { - panic("__global_cli"); + unsigned int flags; + + __save_flags(flags); + if (!flags) { + int cpu = smp_processor_id(); + __cli(); + if (!local_irq_count(cpu)) + get_irqlock(cpu); + } } void __global_sti(void) { - panic("__global_sti"); + int cpu = smp_processor_id(); + + if (!local_irq_count(cpu)) + release_irqlock(cpu); + __sti(); } /* @@ -341,12 +383,45 @@ void __global_sti(void) */ unsigned long __global_save_flags(void) { - panic("__global_save_flags"); + int retval; + int local_enabled; + unsigned long flags; + int cpu = smp_processor_id(); + + __save_flags(flags); + local_enabled = !flags; + /* default to local */ + retval = 2 + local_enabled; + + /* check for global flags if we're not in an interrupt */ + if (!local_irq_count(cpu)) { + if (local_enabled) + retval = 1; + if (global_irq_holder == cpu) + retval = 0; + } + return retval; } void __global_restore_flags(unsigned long flags) { - panic("__global_restore_flags"); + switch (flags) { + case 0: + __global_cli(); + break; + case 1: + __global_sti(); + break; + case 2: + __cli(); + break; + case 3: + __sti(); + break; + default: + printk("global_restore_flags: %08lx (%08lx)\n", + flags, (&flags)[-1]); + } } #endif |