diff -pruN ../orig-linux-2.6.16.29/arch/i386/pci/mmconfig.c ./arch/i386/pci/mmconfig.c --- ../orig-linux-2.6.16.29/arch/i386/pci/mmconfig.c 2006-09-12 19:02:10.000000000 +0100 +++ ./arch/i386/pci/mmconfig.c 2006-09-21 09:35:27.000000000 +0100 @@ -12,14 +12,22 @@ #include #include #include +#include #include "pci.h" +/* aperture is up to 256MB but BIOS may reserve less */ +#define MMCONFIG_APER_MIN (2 * 1024*1024) +#define MMCONFIG_APER_MAX (256 * 1024*1024) + +/* Assume systems with more busses have correct MCFG */ +#define MAX_CHECK_BUS 16 + #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) /* The base address of the last MMCONFIG device accessed */ static u32 mmcfg_last_accessed_device; -static DECLARE_BITMAP(fallback_slots, 32); +static DECLARE_BITMAP(fallback_slots, MAX_CHECK_BUS*32); /* * Functions for accessing PCI configuration space with MMCONFIG accesses @@ -29,8 +37,8 @@ static u32 get_base_addr(unsigned int se int cfg_num = -1; struct acpi_table_mcfg_config *cfg; - if (seg == 0 && bus == 0 && - test_bit(PCI_SLOT(devfn), fallback_slots)) + if (seg == 0 && bus < MAX_CHECK_BUS && + test_bit(PCI_SLOT(devfn) + 32*bus, fallback_slots)) return 0; while (1) { @@ -74,8 +82,10 @@ static int pci_mmcfg_read(unsigned int s unsigned long flags; u32 base; - if (!value || (bus > 255) || (devfn > 255) || (reg > 4095)) + if ((bus > 255) || (devfn > 255) || (reg > 4095)) { + *value = -1; return -EINVAL; + } base = get_base_addr(seg, bus, devfn); if (!base) @@ -146,30 +156,66 @@ static struct pci_raw_ops pci_mmcfg = { Normally this can be expressed in the MCFG by not listing them and assigning suitable _SEGs, but this isn't implemented in some BIOS. Instead try to discover all devices on bus 0 that are unreachable using MM - and fallback for them. - We only do this for bus 0/seg 0 */ + and fallback for them. */ static __init void unreachable_devices(void) { - int i; + int i, k; unsigned long flags; - for (i = 0; i < 32; i++) { - u32 val1; - u32 addr; + for (k = 0; k < MAX_CHECK_BUS; k++) { + for (i = 0; i < 32; i++) { + u32 val1; + u32 addr; + + pci_conf1_read(0, k, PCI_DEVFN(i, 0), 0, 4, &val1); + if (val1 == 0xffffffff) + continue; + + /* Locking probably not needed, but safer */ + spin_lock_irqsave(&pci_config_lock, flags); + addr = get_base_addr(0, k, PCI_DEVFN(i, 0)); + if (addr != 0) + pci_exp_set_dev_base(addr, k, PCI_DEVFN(i, 0)); + if (addr == 0 || + readl((u32 __iomem *)mmcfg_virt_addr) != val1) { + set_bit(i, fallback_slots); + printk(KERN_NOTICE + "PCI: No mmconfig possible on %x:%x\n", k, i); + } + spin_unlock_irqrestore(&pci_config_lock, flags); + } + } +} - pci_conf1_read(0, 0, PCI_DEVFN(i, 0), 0, 4, &val1); - if (val1 == 0xffffffff) +/* NB. Ripped from arch/i386/kernel/setup.c for this Xen bugfix patch. */ +#ifdef CONFIG_XEN +extern struct e820map machine_e820; +#define e820 machine_e820 +#endif +static int __init +e820_all_mapped(unsigned long s, unsigned long e, unsigned type) +{ + u64 start = s; + u64 end = e; + int i; + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + if (type && ei->type != type) continue; - - /* Locking probably not needed, but safer */ - spin_lock_irqsave(&pci_config_lock, flags); - addr = get_base_addr(0, 0, PCI_DEVFN(i, 0)); - if (addr != 0) - pci_exp_set_dev_base(addr, 0, PCI_DEVFN(i, 0)); - if (addr == 0 || readl((u32 __iomem *)mmcfg_virt_addr) != val1) - set_bit(i, fallback_slots); - spin_unlock_irqrestore(&pci_config_lock, flags); + /* is the region (part) in overlap with the current region ?*/ + if (ei->addr >= end || ei->addr + ei->size <= start) + continue; + /* if the region is at the beginning of we move + * start to the end of the region since it's ok until there + */ + if (ei->addr <= start) + start = ei->addr + ei->size; + /* if start is now at or beyond end, we're done, full + * coverage */ + if (start >= end) + return 1; /* we're done */ } + return 0; } static int __init pci_mmcfg_init(void) @@ -183,6 +229,15 @@ static int __init pci_mmcfg_init(void) (pci_mmcfg_config[0].base_address == 0)) goto out; + if (!e820_all_mapped(pci_mmcfg_config[0].base_address, + pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN, + E820_RESERVED)) { + printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n", + pci_mmcfg_config[0].base_address); + printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); + goto out; + } + printk(KERN_INFO "PCI: Using MMCONFIG\n"); raw_pci_ops = &pci_mmcfg; pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; diff -pruN ../orig-linux-2.6.16.29/arch/x86_64/pci/mmconfig.c ./arch/x86_64/pci/mmconfig.c --- ../orig-linux-2.6.16.29/arch/x86_64/pci/mmconfig.c 2006-09-12 19:02:10.000000000 +0100 +++ ./arch/x86_64/pci/mmconfig.c 2006-09-21 09:35:40.000000000 +0100 @@ -9,11 +9,19 @@ #include #include #include +#include + #include "pci.h" -#define MMCONFIG_APER_SIZE (256*1024*1024) +/* aperture is up to 256MB but BIOS may reserve less */ +#define MMCONFIG_APER_MIN (2 * 1024*1024) +#define MMCONFIG_APER_MAX (256 * 1024*1024) + +/* Verify the first 16 busses. We assume that systems with more busses + get MCFG right. */ +#define MAX_CHECK_BUS 16 -static DECLARE_BITMAP(fallback_slots, 32); +static DECLARE_BITMAP(fallback_slots, 32*MAX_CHECK_BUS); /* Static virtual mapping of the MMCONFIG aperture */ struct mmcfg_virt { @@ -55,7 +63,8 @@ static char __iomem *get_virt(unsigned i static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) { char __iomem *addr; - if (seg == 0 && bus == 0 && test_bit(PCI_SLOT(devfn), &fallback_slots)) + if (seg == 0 && bus < MAX_CHECK_BUS && + test_bit(32*bus + PCI_SLOT(devfn), fallback_slots)) return NULL; addr = get_virt(seg, bus); if (!addr) @@ -69,8 +78,10 @@ static int pci_mmcfg_read(unsigned int s char __iomem *addr; /* Why do we have this when nobody checks it. How about a BUG()!? -AK */ - if (unlikely(!value || (bus > 255) || (devfn > 255) || (reg > 4095))) + if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) { + *value = -1; return -EINVAL; + } addr = pci_dev_base(seg, bus, devfn); if (!addr) @@ -129,23 +140,56 @@ static struct pci_raw_ops pci_mmcfg = { Normally this can be expressed in the MCFG by not listing them and assigning suitable _SEGs, but this isn't implemented in some BIOS. Instead try to discover all devices on bus 0 that are unreachable using MM - and fallback for them. - We only do this for bus 0/seg 0 */ + and fallback for them. */ static __init void unreachable_devices(void) { - int i; - for (i = 0; i < 32; i++) { - u32 val1; - char __iomem *addr; + int i, k; + /* Use the max bus number from ACPI here? */ + for (k = 0; k < MAX_CHECK_BUS; k++) { + for (i = 0; i < 32; i++) { + u32 val1; + char __iomem *addr; + + pci_conf1_read(0, k, PCI_DEVFN(i,0), 0, 4, &val1); + if (val1 == 0xffffffff) + continue; + addr = pci_dev_base(0, k, PCI_DEVFN(i, 0)); + if (addr == NULL|| readl(addr) != val1) { + set_bit(i + 32*k, fallback_slots); + printk(KERN_NOTICE + "PCI: No mmconfig possible on device %x:%x\n", + k, i); + } + } + } +} - pci_conf1_read(0, 0, PCI_DEVFN(i,0), 0, 4, &val1); - if (val1 == 0xffffffff) +/* NB. Ripped from arch/x86_64/kernel/e820.c for this Xen bugfix patch. */ +#ifdef CONFIG_XEN +extern struct e820map machine_e820; +#define e820 machine_e820 +#endif +static int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type) +{ + int i; + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + if (type && ei->type != type) continue; - addr = pci_dev_base(0, 0, PCI_DEVFN(i, 0)); - if (addr == NULL|| readl(addr) != val1) { - set_bit(i, &fallback_slots); - } + /* is the region (part) in overlap with the current region ?*/ + if (ei->addr >= end || ei->addr + ei->size <= start) + continue; + + /* if the region is at the beginning of we move + * start to the end of the region since it's ok until there + */ + if (ei->addr <= start) + start = ei->addr + ei->size; + /* if start is now at or beyond end, we're done, full coverage */ + if (start >= end) + return 1; /* we're done */ } + return 0; } static int __init pci_mmcfg_init(void) @@ -161,6 +205,15 @@ static int __init pci_mmcfg_init(void) (pci_mmcfg_config[0].base_address == 0)) return 0; + if (!e820_all_mapped(pci_mmcfg_config[0].base_address, + pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN, + E820_RESERVED)) { + printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n", + pci_mmcfg_config[0].base_address); + printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); + return 0; + } + /* RED-PEN i386 doesn't do _nocache right now */ pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL); if (pci_mmcfg_virt == NULL) { @@ -169,7 +222,8 @@ static int __init pci_mmcfg_init(void) } for (i = 0; i < pci_mmcfg_config_num; ++i) { pci_mmcfg_virt[i].cfg = &pci_mmcfg_config[i]; - pci_mmcfg_virt[i].virt = ioremap_nocache(pci_mmcfg_config[i].base_address, MMCONFIG_APER_SIZE); + pci_mmcfg_virt[i].virt = ioremap_nocache(pci_mmcfg_config[i].base_address, + MMCONFIG_APER_MAX); if (!pci_mmcfg_virt[i].virt) { printk("PCI: Cannot map mmconfig aperture for segment %d\n", pci_mmcfg_config[i].pci_segment_group_number);