aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>2004-03-29 14:45:20 +0000
committerkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>2004-03-29 14:45:20 +0000
commita9ce6bed10dcb8fe9ef4e88dd25c01ba7f97f8da (patch)
tree8c0eee298aadfd59fbaff28fb1dab8ebaefbe62a
parent3f211155ec8df4dc24918334dec511ea2595a570 (diff)
downloadxen-a9ce6bed10dcb8fe9ef4e88dd25c01ba7f97f8da.tar.gz
xen-a9ce6bed10dcb8fe9ef4e88dd25c01ba7f97f8da.tar.bz2
xen-a9ce6bed10dcb8fe9ef4e88dd25c01ba7f97f8da.zip
bitkeeper revision 1.825.3.14 (40683680NZjB1f8PmpgffnMdcNdBjQ)
Many files: Final IRQ and PCI-access virtualisation fixes.
-rw-r--r--xen/arch/i386/irq.c18
-rw-r--r--xen/common/event_channel.c4
-rw-r--r--xen/common/physdev.c304
-rw-r--r--xen/include/hypervisor-ifs/event_channel.h2
-rw-r--r--xen/include/xen/irq.h2
-rw-r--r--xen/include/xen/sched.h2
-rw-r--r--xenolinux-2.4.25-sparse/arch/xen/Makefile13
-rw-r--r--xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev41
-rw-r--r--xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c45
-rw-r--r--xenolinux-2.4.25-sparse/arch/xen/kernel/irq.c83
10 files changed, 334 insertions, 180 deletions
diff --git a/xen/arch/i386/irq.c b/xen/arch/i386/irq.c
index 7e035d0a66..b280daf63f 100644
--- a/xen/arch/i386/irq.c
+++ b/xen/arch/i386/irq.c
@@ -941,8 +941,9 @@ int setup_irq(unsigned int irq, struct irqaction * new)
#define IRQ_MAX_GUESTS 7
typedef struct {
- unsigned short nr_guests;
- unsigned short in_flight;
+ u8 nr_guests;
+ u8 in_flight;
+ u8 shareable;
struct task_struct *guest[IRQ_MAX_GUESTS];
} irq_guest_action_t;
@@ -989,7 +990,7 @@ int pirq_guest_unmask(struct task_struct *p)
return 0;
}
-int pirq_guest_bind(struct task_struct *p, int irq)
+int pirq_guest_bind(struct task_struct *p, int irq, int will_share)
{
unsigned long flags;
irq_desc_t *desc = &irq_desc[irq];
@@ -1001,6 +1002,8 @@ int pirq_guest_bind(struct task_struct *p, int irq)
spin_lock_irqsave(&desc->lock, flags);
+ action = (irq_guest_action_t *)desc->action;
+
if ( !(desc->status & IRQ_GUEST) )
{
rc = -EBUSY;
@@ -1021,14 +1024,19 @@ int pirq_guest_bind(struct task_struct *p, int irq)
action->nr_guests = 0;
action->in_flight = 0;
+ action->shareable = will_share;
desc->depth = 0;
desc->status |= IRQ_GUEST;
desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING);
desc->handler->startup(irq);
}
-
- action = (irq_guest_action_t *)desc->action;
+ else if ( !will_share || !action->shareable )
+ {
+ DPRINTK("Cannot bind IRQ %d to guest. Will not share with others.\n");
+ rc = -EBUSY;
+ goto out;
+ }
rc = -EBUSY;
if ( action->nr_guests == IRQ_MAX_GUESTS )
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index 8824c15890..3db38b763e 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -194,7 +194,9 @@ static long evtchn_bind_pirq(evtchn_bind_pirq_t *bind)
goto out;
p->pirq_to_evtchn[pirq] = port;
- if ( (rc = pirq_guest_bind(p, pirq)) != 0 )
+ rc = pirq_guest_bind(p, pirq,
+ !!(bind->flags & BIND_PIRQ__WILL_SHARE));
+ if ( rc != 0 )
{
p->pirq_to_evtchn[pirq] = 0;
DPRINTK("Couldn't bind to PIRQ %d (error=%d)\n", pirq, rc);
diff --git a/xen/common/physdev.c b/xen/common/physdev.c
index a3f3fb7f52..37ed7c74e3 100644
--- a/xen/common/physdev.c
+++ b/xen/common/physdev.c
@@ -6,33 +6,18 @@
*
* Description: allows a domain to access devices on the PCI bus
*
- * a guest os may be given access to particular devices on the PCI
- * bus. to allow the standard PCI device discovery to work it may
- * also have limited access to devices (bridges) in the PCI device
- * tree between the device and the PCI root device.
- *
- * for each domain a list of PCI devices is maintained, describing the
+ * A guest OS may be given access to particular devices on the PCI bus.
+ * For each domain a list of PCI devices is maintained, describing the
* access mode for the domain.
*
- * guests can figure out the virtualised, or better, partioned PCI space
- * through normal pci config register access. Some of the accesses, in
- * particular write access are faked out. For example the sequence for
- * for detecting the IO regions, which require writes to determine the
- * size of teh region, is faked out by a very simple state machine,
- * preventing direct writes to the PCI config registers by a guest.
- *
- * Interrupt handling is currently done in a very cheese fashion.
- * We take the default irq controller code and replace it with our own.
- * If an interrupt comes in it is acked using the PICs normal routine. Then
- * an event is send to the receiving domain which has to explicitly call
- * once it is finished dealing with the interrupt. Only then the PICs end
- * handler is called. very cheesy with all sorts of problems but it seems
- * to work in normal cases. No shared interrupts are allowed.
- *
- * XXX this code is not SMP safe at the moment!
+ * Guests can figure out the virtualised PCI space through normal PCI config
+ * register access. Some of the accesses, in particular write accesses, are
+ * faked. For example the sequence for detecting the IO regions, which requires
+ * writes to determine the size of the region, is faked out by a very simple
+ * state machine, preventing direct writes to the PCI config registers by a
+ * guest.
*/
-
#include <xen/config.h>
#include <xen/lib.h>
#include <xen/types.h>
@@ -47,22 +32,29 @@
/* Called by PHYSDEV_PCI_INITIALISE_DEVICE to finalise IRQ routing. */
extern void pcibios_enable_irq(struct pci_dev *dev);
-#if 1
-#define DBG(_x...)
+#if 0
+#define VERBOSE_INFO(_f, _a...) printk( _f , ## _a )
#else
-#define DBG(_x...) printk(_x)
+#define VERBOSE_INFO(_f, _a...) ((void)0)
#endif
+#if 1 || !defined(NDEBUG)
+#define INFO(_f, _a...) printk( _f, ## _a )
+#else
+#define INFO(_f, _a...) ((void)0)
+#endif
+
+
#define ACC_READ 1
#define ACC_WRITE 2
-/* upper bounds for PCI devices */
+/* Upper bounds for PCI-device addressing. */
#define PCI_BUSMAX 255
#define PCI_DEVMAX 31
#define PCI_FUNCMAX 7
#define PCI_REGMAX 255
-/* bit offsets into state */
+/* Bit offsets into state. */
#define ST_BASE_ADDRESS 0 /* bits 0-5: are for base address access */
#define ST_ROM_ADDRESS 6 /* bit 6: is for rom address access */
@@ -75,13 +67,7 @@ typedef struct _phys_dev_st {
} phys_dev_t;
-/*
- *
- * General functions
- *
- */
-
-/* find a device on the device list */
+/* Find a device on a per-domain device list. */
static phys_dev_t *find_pdev(struct task_struct *p, struct pci_dev *dev)
{
phys_dev_t *t, *res = NULL;
@@ -99,24 +85,22 @@ static phys_dev_t *find_pdev(struct task_struct *p, struct pci_dev *dev)
return res;
}
-/* add the device to the list of devices task p can access */
+/* Add a device to a per-domain device-access list. */
static void add_dev_to_task(struct task_struct *p,
struct pci_dev *dev, int acc)
{
-
phys_dev_t *pdev;
if ( (pdev = find_pdev(p, dev)) )
{
- /* device already on list, update access */
+ /* Sevice already on list: update access permissions. */
pdev->flags = acc;
return;
}
- /* add device */
if ( !(pdev = kmalloc(sizeof(phys_dev_t), GFP_KERNEL)) )
{
- printk("error allocating pdev structure\n");
+ INFO("Error allocating pdev structure.\n");
return;
}
@@ -127,7 +111,6 @@ static void add_dev_to_task(struct task_struct *p,
if ( acc == ACC_WRITE )
pdev->owner = p;
-
}
/*
@@ -151,11 +134,11 @@ int physdev_pci_access_modify(
if ( !enable )
{
- DPRINTK("Disallowing access is not yet supported.\n");
+ INFO("Disallowing access is not yet supported.\n");
return -EINVAL;
}
- DPRINTK("physdev_pci_access_modify: %02x:%02x:%02x\n", bus, dev, func);
+ INFO("physdev_pci_access_modify: %02x:%02x:%02x\n", bus, dev, func);
if ( (p = find_domain_by_id(dom)) == NULL )
return -ESRCH;
@@ -166,36 +149,36 @@ int physdev_pci_access_modify(
/* Grant write access to the specified device. */
if ( (pdev = pci_find_slot(bus, PCI_DEVFN(dev, func))) == NULL )
{
- DPRINTK(" dev does not exist\n");
+ INFO(" dev does not exist\n");
return -ENODEV;
}
add_dev_to_task(p, pdev, ACC_WRITE);
- DPRINTK(" add RW %02x:%02x:%02x\n", pdev->bus->number,
- PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ INFO(" add RW %02x:%02x:%02x\n", pdev->bus->number,
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
/* Grant read access to the root device. */
if ( (rdev = pci_find_slot(0, PCI_DEVFN(0, 0))) == NULL )
{
- DPRINTK(" bizarre -- no PCI root dev\n");
+ INFO(" bizarre -- no PCI root dev\n");
return -ENODEV;
}
add_dev_to_task(p, rdev, ACC_READ);
- DPRINTK(" add R0 %02x:%02x:%02x\n", 0, 0, 0);
+ INFO(" add R0 %02x:%02x:%02x\n", 0, 0, 0);
/* Grant read access to all devices on the path to the root. */
for ( tdev = pdev->bus->self; tdev != NULL; tdev = tdev->bus->self )
{
add_dev_to_task(p, tdev, ACC_READ);
- DPRINTK(" add RO %02x:%02x:%02x\n", tdev->bus->number,
- PCI_SLOT(tdev->devfn), PCI_FUNC(tdev->devfn));
+ INFO(" add RO %02x:%02x:%02x\n", tdev->bus->number,
+ PCI_SLOT(tdev->devfn), PCI_FUNC(tdev->devfn));
}
if ( pdev->hdr_type == PCI_HEADER_TYPE_NORMAL )
return 0;
- /* The device is a bridge or cardbus. */
- printk("XXX can't give access to bridge devices yet\n");
+ /* The device is a bridge or cardbus. */
+ INFO("XXX can't give access to bridge devices yet\n");
return 0;
}
@@ -217,15 +200,15 @@ inline static int check_dev_acc (struct task_struct *p,
if ( bus > PCI_BUSMAX || dev > PCI_DEVMAX || func > PCI_FUNCMAX )
return -EINVAL;
- DBG("a=%c b=%x d=%x f=%x ", (acc == ACC_READ) ? 'R' : 'W',
- mask, bus, dev, func);
+ VERBOSE_INFO("a=%c b=%x d=%x f=%x ", (acc == ACC_READ) ? 'R' : 'W',
+ mask, bus, dev, func);
/* check target device */
target_devfn = PCI_DEVFN(dev, func);
target_dev = pci_find_slot(bus, target_devfn);
if ( !target_dev )
{
- DBG("target does not exist\n");
+ VERBOSE_INFO("target does not exist\n");
return -ENODEV;
}
@@ -233,7 +216,7 @@ inline static int check_dev_acc (struct task_struct *p,
target_pdev = find_pdev(p, target_dev);
if ( !target_pdev )
{
- DBG("dom has no access to target\n");
+ VERBOSE_INFO("dom has no access to target\n");
return -EPERM;
}
@@ -241,11 +224,6 @@ inline static int check_dev_acc (struct task_struct *p,
return 0;
}
-/*
- *
- * PCI config space access
- *
- */
/*
* Base address registers contain the base address for IO regions.
@@ -266,60 +244,58 @@ inline static int check_dev_acc (struct task_struct *p,
* cleared again. If the guest attempts to "restores" a wrong value an
* error is flagged.
*/
-static int do_base_address_access(phys_dev_t *pdev, int acc,
- int bus, int dev, int func,
- int reg, int len, u32 *val)
+static int do_base_address_access(phys_dev_t *pdev, int acc, int idx,
+ int len, u32 *val)
{
- int idx, st_bit, ret = -EINVAL;
+ int st_bit, reg = PCI_BASE_ADDRESS_0 + (idx*4), ret = -EINVAL;
+ struct pci_dev *dev = pdev->dev;
u32 orig_val, sz;
struct resource *res;
- idx = (reg - PCI_BASE_ADDRESS_0)/4;
+ if ( len != sizeof(u32) )
+ {
+ INFO("Guest attempting sub-dword %s to BASE_ADDRESS %d\n",
+ (acc == ACC_READ) ? "read" : "write", idx);
+ return -EPERM;
+ }
+
st_bit = idx + ST_BASE_ADDRESS;
res = &(pdev->dev->resource[idx]);
if ( acc == ACC_WRITE )
{
- if ( *val == 0xffffffff ||
- ((res->flags & IORESOURCE_IO) && *val == 0xffff) )
+ if ( (*val == 0xffffffff) ||
+ ((res->flags & IORESOURCE_IO) && (*val == 0xffff)) )
{
- /* set bit and return */
+ /* Set bit and return. */
set_bit(st_bit, &pdev->state);
ret = 0;
}
else
{
- /* assume guest wants to set the base address */
+ /* Assume guest wants to set the base address. */
clear_bit(st_bit, &pdev->state);
/* check if guest tries to restore orig value */
- ret = pci_config_read(0, bus, dev, func, reg, len, &orig_val);
- if ( *val != orig_val )
+ ret = pci_read_config_dword(dev, reg, &orig_val);
+ if ( (ret == 0) && (*val != orig_val) )
{
- printk("caution: guest tried to change base address range.\n");
+ INFO("Guest attempting update to BASE_ADDRESS %d\n", idx);
ret = -EPERM;
}
}
- DBG("fixed pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x"
- " val=0x%08x %lx\n", bus, dev, func, reg, len, *val,
- pdev->state);
-
+ VERBOSE_INFO("fixed pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x"
+ " val=0x%08x %lx\n",
+ dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), reg, len, *val, pdev->state);
}
-
else if ( acc == ACC_READ )
{
- if ( !test_bit(st_bit, &pdev->state) )
+ ret = pci_read_config_dword(dev, reg, val);
+ if ( (ret == 0) && test_bit(st_bit, &pdev->state) )
{
- /* just read and return */
- ret = pci_config_read(0, bus, dev, func, reg, len, val);
- }
- else
- {
- /* fake value */
- ret = pci_config_read(0, bus, dev, func, reg, len, &orig_val);
-
+ /* Cook the value. */
sz = res->end - res->start;
-
if ( res->flags & IORESOURCE_MEM )
{
/* this is written out explicitly for clarity */
@@ -344,77 +320,75 @@ static int do_base_address_access(phys_dev_t *pdev, int acc,
*val = *val & (sz << 2);
*val = *val | 0x1;
}
- ret = 0;
}
- DBG("fixed pci read : %02x:%02x:%02x reg=0x%02x len=0x%02x"
- " val=0x%08x %lx\n", bus, dev, func, reg, len, *val, pdev->state);
+ VERBOSE_INFO("fixed pci read: %02x:%02x:%02x reg=0x%02x len=0x%02x"
+ " val=0x%08x %lx\n",
+ dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), reg, len, *val, pdev->state);
}
return ret;
}
-/*
- * fake out read/write access to rom address register
- * pretty much the same as a above
- */
-static int do_rom_address_access(phys_dev_t *pdev, int acc,
- int bus, int dev, int func,
- int reg, int len, u32 *val)
+
+static int do_rom_address_access(phys_dev_t *pdev, int acc, int len, u32 *val)
{
int st_bit, ret = -EINVAL;
+ struct pci_dev *dev = pdev->dev;
u32 orig_val, sz;
struct resource *res;
+ if ( len != sizeof(u32) )
+ {
+ INFO("Guest attempting sub-dword %s to ROM_ADDRESS\n",
+ (acc == ACC_READ) ? "read" : "write");
+ return -EPERM;
+ }
+
st_bit = ST_ROM_ADDRESS;
res = &(pdev->dev->resource[PCI_ROM_RESOURCE]);
if ( acc == ACC_WRITE )
{
- if ( *val == 0xffffffff || *val == 0xfffffffe)
+ if ( (*val == 0xffffffff) || (*val == 0xfffffffe) )
{
- /* 0xffffffff would be unusual, but we check anyway */
- /* set bit and return */
+ /* NB. 0xffffffff would be unusual, but we trap it anyway. */
set_bit(st_bit, &pdev->state);
ret = 0;
}
else
{
- /* assume guest wants to set the base address */
+ /* Assume guest wants simply to set the base address. */
clear_bit(st_bit, &pdev->state);
- /* check if guest tries to restore orig value */
- ret = pci_config_read(0, bus, dev, func, reg, len, &orig_val);
- if ( (*val != orig_val) )
+ /* Check if guest tries to restore the original value. */
+ ret = pci_read_config_dword(dev, PCI_ROM_ADDRESS, &orig_val);
+ if ( (ret == 0) && (*val != orig_val) )
{
- if (*val != 0x00000000 )
+ if ( (*val != 0x00000000) )
{
- printk("caution: guest tried to change rom address.\n");
+ INFO("caution: guest tried to change rom address.\n");
ret = -EPERM;
}
else
{
- printk ("guest disabled rom access for %02x:%02x:%02x\n",
- bus, dev, func);
- ret = 0;
+ INFO("guest disabled rom access for %02x:%02x:%02x\n",
+ dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn));
}
}
-
}
- DBG("fixed pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x"
- " val=0x%08x %lx\n", bus, dev, func, reg, len, *val, pdev->state);
-
+ VERBOSE_INFO("fixed pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x"
+ " val=0x%08x %lx\n",
+ dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), reg, len, *val, pdev->state);
}
else if ( acc == ACC_READ )
{
- if ( !test_bit(st_bit, &pdev->state) )
- {
- /* just read and return */
- ret = pci_config_read(0, bus, dev, func, reg, len, val);
- }
- else
+ ret = pci_read_config_dword(dev, PCI_ROM_ADDRESS, val);
+ if ( (ret == 0) && test_bit(st_bit, &pdev->state) )
{
- /* fake value */
- ret = pci_config_read(0, bus, dev, func, reg, len, &orig_val);
+ /* Cook the value. */
sz = res->end - res->start;
*val = 0xffffffff;
/* leave bit 0 untouched */
@@ -424,19 +398,18 @@ static int do_rom_address_access(phys_dev_t *pdev, int acc,
*val = *val & (sz << 11);
*val = *val | (orig_val & 0x1);
}
-
- DBG("fixed pci read : %02x:%02x:%02x reg=0x%02x len=0x%02x"
- " val=0x%08x %lx\n", bus, dev, func, reg, len, *val, pdev->state);
+ VERBOSE_INFO("fixed pci read: %02x:%02x:%02x reg=0x%02x len=0x%02x"
+ " val=0x%08x %lx\n",
+ dev->bus->number, PCI_SLOT(dev->devfn),
+ PCI_FUNC(dev->devfn), reg, len, *val, pdev->state);
}
+
return ret;
}
/*
- * handle a domains pci config space read access if it has access to
- * the device.
- * For some registers for read-only devices (e.g. address base registers)
- * we need to maintain a state machine.
+ * Handle a PCI config space read access if the domain has access privileges.
*/
static long pci_cfgreg_read(int bus, int dev, int func, int reg,
int len, u32 *val)
@@ -451,39 +424,51 @@ static long pci_cfgreg_read(int bus, int dev, int func, int reg,
switch ( reg )
{
case PCI_BASE_ADDRESS_0:
+ ret = do_base_address_access(pdev, ACC_READ, 0, len, val);
+ break;
+
case PCI_BASE_ADDRESS_1:
+ ret = do_base_address_access(pdev, ACC_READ, 1, len, val);
+ break;
+
case PCI_BASE_ADDRESS_2:
+ ret = do_base_address_access(pdev, ACC_READ, 2, len, val);
+ break;
+
case PCI_BASE_ADDRESS_3:
+ ret = do_base_address_access(pdev, ACC_READ, 3, len, val);
+ break;
+
case PCI_BASE_ADDRESS_4:
+ ret = do_base_address_access(pdev, ACC_READ, 4, len, val);
+ break;
+
case PCI_BASE_ADDRESS_5:
- ret = do_base_address_access(pdev, ACC_READ, bus, dev,
- func, reg, len, val);
+ ret = do_base_address_access(pdev, ACC_READ, 5, len, val);
break;
case PCI_ROM_ADDRESS:
- ret = do_rom_address_access(pdev, ACC_READ, bus, dev,
- func, reg, len, val);
+ ret = do_rom_address_access(pdev, ACC_READ, len, val);
break;
case PCI_INTERRUPT_LINE:
- ret = pdev->dev->irq;
+ *val = pdev->dev->irq;
+ ret = 0;
break;
default:
ret = pci_config_read(0, bus, dev, func, reg, len, val);
- DBG("pci read : %02x:%02x:%02x reg=0x%02x len=0x%02x val=0x%08x\n",
- bus, dev, func, reg, len, *val);
+ VERBOSE_INFO("pci read : %02x:%02x:%02x reg=0x%02x len=0x%02x "
+ "val=0x%08x\n", bus, dev, func, reg, len, *val);
break;
}
return ret;
}
+
/*
- * handle a domains pci config space write accesses if it has access to
- * the device.
- * for some registers a state machine is maintained to fake out r/w access.
- * By default no write access is allowed but we may change that in the future.
+ * Handle a PCI config space write access if the domain has access privileges.
*/
static long pci_cfgreg_write(int bus, int dev, int func, int reg,
int len, u32 val)
@@ -498,35 +483,46 @@ static long pci_cfgreg_write(int bus, int dev, int func, int reg,
switch (reg)
{
case PCI_BASE_ADDRESS_0:
+ ret = do_base_address_access(pdev, ACC_WRITE, 0, len, &val);
+ break;
+
case PCI_BASE_ADDRESS_1:
+ ret = do_base_address_access(pdev, ACC_WRITE, 1, len, &val);
+ break;
+
case PCI_BASE_ADDRESS_2:
+ ret = do_base_address_access(pdev, ACC_WRITE, 2, len, &val);
+ break;
+
case PCI_BASE_ADDRESS_3:
+ ret = do_base_address_access(pdev, ACC_WRITE, 3, len, &val);
+ break;
+
case PCI_BASE_ADDRESS_4:
+ ret = do_base_address_access(pdev, ACC_WRITE, 4, len, &val);
+ break;
+
case PCI_BASE_ADDRESS_5:
- ret = do_base_address_access (pdev, ACC_WRITE, bus, dev,
- func, reg, len, &val);
- return ret;
+ ret = do_base_address_access(pdev, ACC_WRITE, 5, len, &val);
break;
case PCI_ROM_ADDRESS:
- ret = do_rom_address_access (pdev, ACC_WRITE, bus, dev,
- func, reg, len, &val);
- return ret;
+ ret = do_rom_address_access(pdev, ACC_WRITE, len, &val);
break;
default:
if ( pdev->flags != ACC_WRITE )
{
- printk("pci write not allowed %02x:%02x:%02x: "
- "reg=0x%02x len=0x%02x val=0x%08x\n",
- bus, dev, func, reg, len, val);
+ INFO("pci write not allowed %02x:%02x:%02x: "
+ "reg=0x%02x len=0x%02x val=0x%08x\n",
+ bus, dev, func, reg, len, val);
ret = -EPERM;
}
else
{
ret = pci_config_write(0, bus, dev, func, reg, len, val);
- DBG("pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x val=0x%08x\n",
- bus, dev, func, reg, len, val);
+ VERBOSE_INFO("pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x "
+ "val=0x%08x\n", bus, dev, func, reg, len, val);
}
break;
}
@@ -617,11 +613,13 @@ void physdev_init_dom0(struct task_struct *p)
struct pci_dev *dev;
phys_dev_t *pdev;
- printk("Give DOM0 read access to all PCI devices\n");
+ INFO("Give DOM0 read access to all PCI devices\n");
pci_for_each_dev(dev)
{
- /* add device */
+ /* Skip bridges and other peculiarities for now. */
+ if ( dev->hdr_type != PCI_HEADER_TYPE_NORMAL )
+ continue;
pdev = kmalloc(sizeof(phys_dev_t), GFP_KERNEL);
pdev->dev = dev;
pdev->flags = ACC_WRITE;
diff --git a/xen/include/hypervisor-ifs/event_channel.h b/xen/include/hypervisor-ifs/event_channel.h
index fdc4eaeb1b..20095c91a3 100644
--- a/xen/include/hypervisor-ifs/event_channel.h
+++ b/xen/include/hypervisor-ifs/event_channel.h
@@ -50,6 +50,8 @@ typedef struct evtchn_bind_pirq
{
/* IN parameters. */
int pirq;
+#define BIND_PIRQ__WILL_SHARE 1
+ unsigned int flags; /* BIND_PIRQ__* */
/* OUT parameters. */
int port;
} evtchn_bind_pirq_t;
diff --git a/xen/include/xen/irq.h b/xen/include/xen/irq.h
index 0e53cc0331..34f54862bd 100644
--- a/xen/include/xen/irq.h
+++ b/xen/include/xen/irq.h
@@ -59,7 +59,7 @@ extern void no_action(int cpl, void *dev_id, struct pt_regs *regs);
struct task_struct;
extern int pirq_guest_unmask(struct task_struct *p);
-extern int pirq_guest_bind(struct task_struct *p, int irq);
+extern int pirq_guest_bind(struct task_struct *p, int irq, int will_share);
extern int pirq_guest_unbind(struct task_struct *p, int irq);
#endif /* __XEN_IRQ_H__ */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 1b8bd10d3c..123b17773f 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -164,7 +164,7 @@ struct task_struct
* domain's event-channel spinlock. Read accesses can also synchronise on
* the lock, but races don't usually matter.
*/
- u16 pirq_to_evtchn[64];
+ u16 pirq_to_evtchn[128];
u16 virq_to_evtchn[NR_VIRQS];
u32 pirq_mask[2];
diff --git a/xenolinux-2.4.25-sparse/arch/xen/Makefile b/xenolinux-2.4.25-sparse/arch/xen/Makefile
index 04117a9e4b..f52b90632f 100644
--- a/xenolinux-2.4.25-sparse/arch/xen/Makefile
+++ b/xenolinux-2.4.25-sparse/arch/xen/Makefile
@@ -49,9 +49,14 @@ endif
HEAD := arch/xen/kernel/head.o arch/xen/kernel/init_task.o
SUBDIRS += arch/xen/kernel arch/xen/mm arch/xen/lib
-SUBDIRS += arch/xen/drivers/console arch/xen/drivers/network
-SUBDIRS += arch/xen/drivers/evtchn arch/xen/drivers/block
-SUBDIRS += arch/xen/drivers/balloon arch/xen/drivers/vnetif
+SUBDIRS += arch/xen/drivers/console
+ifndef CONFIG_XEN_PHYSDEV_ACCESS
+SUBDIRS += arch/xen/drivers/network
+endif
+SUBDIRS += arch/xen/drivers/evtchn
+SUBDIRS += arch/xen/drivers/block
+SUBDIRS += arch/xen/drivers/balloon
+SUBDIRS += arch/xen/drivers/vnetif
ifdef CONFIG_XEN_PRIVILEGED_GUEST
SUBDIRS += arch/xen/drivers/dom0
endif
@@ -60,7 +65,9 @@ CORE_FILES += arch/xen/kernel/kernel.o arch/xen/mm/mm.o
CORE_FILES += arch/xen/drivers/evtchn/drv.o
CORE_FILES += arch/xen/drivers/console/drv.o
CORE_FILES += arch/xen/drivers/block/drv.o
+ifndef CONFIG_XEN_PHYSDEV_ACCESS
CORE_FILES += arch/xen/drivers/network/drv.o
+endif
CORE_FILES += arch/xen/drivers/vnetif/drv.o
ifdef CONFIG_XEN_PRIVILEGED_GUEST
CORE_FILES += arch/xen/drivers/dom0/drv.o
diff --git a/xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev b/xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev
index e6a213757c..4b32576d59 100644
--- a/xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev
+++ b/xenolinux-2.4.25-sparse/arch/xen/defconfig-physdev
@@ -401,9 +401,6 @@ CONFIG_SCSI_DTC3280=y
# CONFIG_SCSI_FUTURE_DOMAIN is not set
# CONFIG_SCSI_GDTH is not set
# CONFIG_SCSI_GENERIC_NCR5380 is not set
-# CONFIG_SCSI_GENERIC_NCR53C400 is not set
-# CONFIG_SCSI_G_NCR5380_PORT is not set
-# CONFIG_SCSI_G_NCR5380_MEM is not set
# CONFIG_SCSI_IPS is not set
# CONFIG_SCSI_INITIO is not set
# CONFIG_SCSI_INIA100 is not set
@@ -500,7 +497,43 @@ CONFIG_VORTEX=y
# CONFIG_DEPCA is not set
# CONFIG_HP100 is not set
# CONFIG_NET_ISA is not set
-# CONFIG_NET_PCI is not set
+CONFIG_NET_PCI=y
+CONFIG_PCNET32=y
+# CONFIG_AMD8111_ETH is not set
+# CONFIG_ADAPTEC_STARFIRE is not set
+# CONFIG_AC3200 is not set
+# CONFIG_APRICOT is not set
+# CONFIG_B44 is not set
+# CONFIG_CS89x0 is not set
+CONFIG_TULIP=y
+# CONFIG_TULIP_MWI is not set
+# CONFIG_TULIP_MMIO is not set
+# CONFIG_DE4X5 is not set
+# CONFIG_DGRS is not set
+# CONFIG_DM9102 is not set
+# CONFIG_EEPRO100 is not set
+# CONFIG_EEPRO100_PIO is not set
+# CONFIG_E100 is not set
+# CONFIG_LNE390 is not set
+# CONFIG_FEALNX is not set
+# CONFIG_NATSEMI is not set
+# CONFIG_NE2K_PCI is not set
+# CONFIG_NE3210 is not set
+# CONFIG_ES3210 is not set
+# CONFIG_8139CP is not set
+# CONFIG_8139TOO is not set
+# CONFIG_8139TOO_PIO is not set
+# CONFIG_8139TOO_TUNE_TWISTER is not set
+# CONFIG_8139TOO_8129 is not set
+# CONFIG_8139_OLD_RX_RESET is not set
+# CONFIG_SIS900 is not set
+# CONFIG_EPIC100 is not set
+# CONFIG_SUNDANCE is not set
+# CONFIG_SUNDANCE_MMIO is not set
+# CONFIG_TLAN is not set
+# CONFIG_VIA_RHINE is not set
+# CONFIG_VIA_RHINE_MMIO is not set
+# CONFIG_WINBOND_840 is not set
# CONFIG_NET_POCKET is not set
#
diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c
index e46307f33d..c65806a7d9 100644
--- a/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c
+++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/evtchn.c
@@ -38,6 +38,8 @@ static int irq_bindcount[NR_IRQS];
/* Upcall to generic IRQ layer. */
extern asmlinkage unsigned int do_IRQ(int irq, struct pt_regs *regs);
+#define VALID_EVTCHN(_chn) ((_chn) != -1)
+
void evtchn_do_upcall(struct pt_regs *regs)
{
unsigned long l1, l2;
@@ -233,15 +235,27 @@ static inline void pirq_unmask_notify(int pirq)
(void)HYPERVISOR_physdev_op(&op);
}
+/*
+ * On startup, if there is no action associated with the IRQ then we are
+ * probing. In this case we should not share with others as it will confuse us.
+ */
+#define probing_irq(_irq) (irq_desc[(_irq)].action == NULL)
+
static unsigned int startup_pirq(unsigned int irq)
{
evtchn_op_t op;
int evtchn;
- op.cmd = EVTCHNOP_bind_pirq;
- op.u.bind_pirq.pirq = irq;
+ op.cmd = EVTCHNOP_bind_pirq;
+ op.u.bind_pirq.pirq = irq;
+ /* NB. We are happy to share unless we are probing. */
+ op.u.bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE;
if ( HYPERVISOR_event_channel_op(&op) != 0 )
- panic("Failed to obtain physical IRQ %d\n", irq);
+ {
+ if ( !probing_irq(irq) ) /* Some failures are expected when probing. */
+ printk(KERN_INFO "Failed to obtain physical IRQ %d\n", irq);
+ return 0;
+ }
evtchn = op.u.bind_pirq.port;
evtchn_to_irq[evtchn] = irq;
@@ -258,6 +272,9 @@ static void shutdown_pirq(unsigned int irq)
evtchn_op_t op;
int evtchn = irq_to_evtchn[irq];
+ if ( !VALID_EVTCHN(evtchn) )
+ return;
+
mask_evtchn(evtchn);
op.cmd = EVTCHNOP_close;
@@ -272,26 +289,38 @@ static void shutdown_pirq(unsigned int irq)
static void enable_pirq(unsigned int irq)
{
- unmask_evtchn(irq_to_evtchn[irq]);
+ int evtchn = irq_to_evtchn[irq];
+ if ( !VALID_EVTCHN(evtchn) )
+ return;
+ unmask_evtchn(evtchn);
pirq_unmask_notify(irq_to_pirq(irq));
}
static void disable_pirq(unsigned int irq)
{
- mask_evtchn(irq_to_evtchn[irq]);
+ int evtchn = irq_to_evtchn[irq];
+ if ( !VALID_EVTCHN(evtchn) )
+ return;
+ mask_evtchn(evtchn);
}
static void ack_pirq(unsigned int irq)
{
- mask_evtchn(irq_to_evtchn[irq]);
- clear_evtchn(irq_to_evtchn[irq]);
+ int evtchn = irq_to_evtchn[irq];
+ if ( !VALID_EVTCHN(evtchn) )
+ return;
+ mask_evtchn(evtchn);
+ clear_evtchn(evtchn);
}
static void end_pirq(unsigned int irq)
{
+ int evtchn = irq_to_evtchn[irq];
+ if ( !VALID_EVTCHN(evtchn) )
+ return;
if ( !(irq_desc[irq].status & IRQ_DISABLED) )
{
- unmask_evtchn(irq_to_evtchn[irq]);
+ unmask_evtchn(evtchn);
pirq_unmask_notify(irq_to_pirq(irq));
}
}
diff --git a/xenolinux-2.4.25-sparse/arch/xen/kernel/irq.c b/xenolinux-2.4.25-sparse/arch/xen/kernel/irq.c
index c88e976125..abb2b398be 100644
--- a/xenolinux-2.4.25-sparse/arch/xen/kernel/irq.c
+++ b/xenolinux-2.4.25-sparse/arch/xen/kernel/irq.c
@@ -86,7 +86,25 @@ static unsigned int startup_none(unsigned int irq) { return 0; }
static void disable_none(unsigned int irq) { }
static void ack_none(unsigned int irq)
{
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves, it doesnt deserve
+ * a generic callback i think.
+ */
+#if CONFIG_X86
printk("unexpected IRQ trap at vector %02x\n", irq);
+#ifdef CONFIG_X86_LOCAL_APIC
+ /*
+ * Currently unexpected vectors happen only on SMP and APIC.
+ * We _must_ ack these because every local APIC has only N
+ * irq slots per priority level, and a 'hanging, unacked' IRQ
+ * holds up an irq slot - in excessive cases (when multiple
+ * unexpected vectors occur) that might lock up the APIC
+ * completely.
+ */
+ ack_APIC_irq();
+#endif
+#endif
}
/* startup is the same as "enable", shutdown is same as "disable" */
@@ -322,14 +340,38 @@ static inline void get_irqlock(int cpu)
global_irq_holder = cpu;
}
+/*
+ * A global "cli()" while in an interrupt context
+ * turns into just a local cli(). Interrupts
+ * should use spinlocks for the (very unlikely)
+ * case that they ever want to protect against
+ * each other.
+ *
+ * If we already have local interrupts disabled,
+ * this will not turn a local disable into a
+ * global one (problems with spinlocks: this makes
+ * save_flags+cli+sti usable inside a spinlock).
+ */
void __global_cli(void)
{
- panic("__global_cli");
+ unsigned int flags;
+
+ __save_flags(flags);
+ if (!flags) {
+ int cpu = smp_processor_id();
+ __cli();
+ if (!local_irq_count(cpu))
+ get_irqlock(cpu);
+ }
}
void __global_sti(void)
{
- panic("__global_sti");
+ int cpu = smp_processor_id();
+
+ if (!local_irq_count(cpu))
+ release_irqlock(cpu);
+ __sti();
}
/*
@@ -341,12 +383,45 @@ void __global_sti(void)
*/
unsigned long __global_save_flags(void)
{
- panic("__global_save_flags");
+ int retval;
+ int local_enabled;
+ unsigned long flags;
+ int cpu = smp_processor_id();
+
+ __save_flags(flags);
+ local_enabled = !flags;
+ /* default to local */
+ retval = 2 + local_enabled;
+
+ /* check for global flags if we're not in an interrupt */
+ if (!local_irq_count(cpu)) {
+ if (local_enabled)
+ retval = 1;
+ if (global_irq_holder == cpu)
+ retval = 0;
+ }
+ return retval;
}
void __global_restore_flags(unsigned long flags)
{
- panic("__global_restore_flags");
+ switch (flags) {
+ case 0:
+ __global_cli();
+ break;
+ case 1:
+ __global_sti();
+ break;
+ case 2:
+ __cli();
+ break;
+ case 3:
+ __sti();
+ break;
+ default:
+ printk("global_restore_flags: %08lx (%08lx)\n",
+ flags, (&flags)[-1]);
+ }
}
#endif