diff options
author | Jan Beulich <jbeulich@novell.com> | 2011-08-16 15:21:46 +0100 |
---|---|---|
committer | Jan Beulich <jbeulich@novell.com> | 2011-08-16 15:21:46 +0100 |
commit | 0258b9db5465123fbcd40e5efa846de1f4a07338 (patch) | |
tree | 522094f213164acab6fdb5ae713c24bf0882af6c | |
parent | 638f141e34103b84810ae8b44fbb809457b6e27c (diff) | |
download | xen-0258b9db5465123fbcd40e5efa846de1f4a07338.tar.gz xen-0258b9db5465123fbcd40e5efa846de1f4a07338.tar.bz2 xen-0258b9db5465123fbcd40e5efa846de1f4a07338.zip |
x86/PCI-MSI: properly determine VF BAR values
As was discussed a couple of times on this list, SR-IOV virtual
functions have their BARs read as zero - the physical function's
SR-IOV capability structure must be consulted instead. The bogus
warnings people complained about are being eliminated with this
change.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
xen-unstable changeset: 23766:8d6edc3d26d2
xen-unstable date: Sat Aug 13 10:14:58 2011 +0100
PCI: consolidate interface for adding devices
The functionality of pci_add_device_ext() can be easily folded into
pci_add_device(), and eliminates the need to change two functions for
future adjustments.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
xen-unstable changeset: 23725:4dc6a9ba90d6
xen-unstable date: Tue Jul 19 14:14:08 2011 +0100
-rw-r--r-- | xen/arch/ia64/xen/hypercall.c | 12 | ||||
-rw-r--r-- | xen/arch/x86/msi.c | 73 | ||||
-rw-r--r-- | xen/arch/x86/physdev.c | 8 | ||||
-rw-r--r-- | xen/drivers/passthrough/pci.c | 136 | ||||
-rw-r--r-- | xen/include/xen/pci.h | 4 | ||||
-rw-r--r-- | xen/include/xen/pci_regs.h | 33 |
6 files changed, 195 insertions, 71 deletions
diff --git a/xen/arch/ia64/xen/hypercall.c b/xen/arch/ia64/xen/hypercall.c index 6ea15c290a..6e98289530 100644 --- a/xen/arch/ia64/xen/hypercall.c +++ b/xen/arch/ia64/xen/hypercall.c @@ -662,8 +662,8 @@ long do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg) if ( copy_from_guest(&manage_pci, arg, 1) != 0 ) break; - ret = pci_add_device(manage_pci.bus, manage_pci.devfn); - break; + ret = pci_add_device(manage_pci.bus, manage_pci.devfn, NULL); + break; } case PHYSDEVOP_manage_pci_remove: { @@ -695,10 +695,10 @@ long do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg) pdev_info.is_virtfn = manage_pci_ext.is_virtfn; pdev_info.physfn.bus = manage_pci_ext.physfn.bus; pdev_info.physfn.devfn = manage_pci_ext.physfn.devfn; - ret = pci_add_device_ext(manage_pci_ext.bus, - manage_pci_ext.devfn, - &pdev_info); - break; + ret = pci_add_device(manage_pci_ext.bus, + manage_pci_ext.devfn, + &pdev_info); + break; } default: diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c index 81beb290ed..c9eb1f4772 100644 --- a/xen/arch/x86/msi.c +++ b/xen/arch/x86/msi.c @@ -521,12 +521,48 @@ static int msi_capability_init(struct pci_dev *dev, return 0; } -static u64 read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir) +static u64 read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir, int vf) { u8 limit; - u32 addr; + u32 addr, base = PCI_BASE_ADDRESS_0, disp = 0; - switch ( pci_conf_read8(bus, slot, func, PCI_HEADER_TYPE) & 0x7f ) + if ( vf >= 0 ) + { + struct pci_dev *pdev = pci_get_pdev(bus, PCI_DEVFN(slot, func)); + unsigned int pos = pci_find_ext_capability(0, bus, + PCI_DEVFN(slot, func), + PCI_EXT_CAP_ID_SRIOV); + u16 ctrl = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_CTRL); + u16 num_vf = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_NUM_VF); + u16 offset = pci_conf_read16(bus, slot, func, + pos + PCI_SRIOV_VF_OFFSET); + u16 stride = pci_conf_read16(bus, slot, func, + pos + PCI_SRIOV_VF_STRIDE); + + if ( !pdev || !pos || + !(ctrl & PCI_SRIOV_CTRL_VFE) || + !(ctrl & PCI_SRIOV_CTRL_MSE) || + !num_vf || !offset || (num_vf > 1 && !stride) || + bir >= PCI_SRIOV_NUM_BARS || + !pdev->vf_rlen[bir] ) + return 0; + base = pos + PCI_SRIOV_BAR; + vf -= PCI_BDF(bus, slot, func) + offset; + if ( vf < 0 || (vf && vf % stride) ) + return 0; + if ( stride ) + { + if ( vf % stride ) + return 0; + vf /= stride; + } + if ( vf >= num_vf ) + return 0; + BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS); + disp = vf * pdev->vf_rlen[bir]; + limit = PCI_SRIOV_NUM_BARS; + } + else switch ( pci_conf_read8(bus, slot, func, PCI_HEADER_TYPE) & 0x7f ) { case PCI_HEADER_TYPE_NORMAL: limit = 6; @@ -543,7 +579,7 @@ static u64 read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir) if ( bir >= limit ) return 0; - addr = pci_conf_read32(bus, slot, func, PCI_BASE_ADDRESS_0 + bir * 4); + addr = pci_conf_read32(bus, slot, func, base + bir * 4); if ( (addr & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO ) return 0; if ( (addr & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64 ) @@ -551,11 +587,10 @@ static u64 read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir) addr &= PCI_BASE_ADDRESS_MEM_MASK; if ( ++bir >= limit ) return 0; - return addr | - ((u64)pci_conf_read32(bus, slot, func, - PCI_BASE_ADDRESS_0 + bir * 4) << 32); + return addr + disp + + ((u64)pci_conf_read32(bus, slot, func, base + bir * 4) << 32); } - return addr & PCI_BASE_ADDRESS_MEM_MASK; + return (addr & PCI_BASE_ADDRESS_MEM_MASK) + disp; } /** @@ -628,11 +663,29 @@ static int msix_capability_init(struct pci_dev *dev, if ( !dev->msix_nr_entries ) { + u8 pbus, pslot, pfunc; + int vf; u64 pba_paddr; u32 pba_offset; + if ( !dev->info.is_virtfn ) + { + pbus = bus; + pslot = slot; + pfunc = func; + vf = -1; + } + else + { + pbus = dev->info.physfn.bus; + pslot = PCI_SLOT(dev->info.physfn.devfn); + pfunc = PCI_FUNC(dev->info.physfn.devfn); + vf = PCI_BDF2(dev->bus, dev->devfn); + } + ASSERT(!dev->msix_used_entries); - WARN_ON(msi->table_base != read_pci_mem_bar(bus, slot, func, bir)); + WARN_ON(msi->table_base != + read_pci_mem_bar(pbus, pslot, pfunc, bir, vf)); dev->msix_nr_entries = nr_entries; dev->msix_table.first = PFN_DOWN(table_paddr); @@ -644,7 +697,7 @@ static int msix_capability_init(struct pci_dev *dev, pba_offset = pci_conf_read32(bus, slot, func, msix_pba_offset_reg(pos)); bir = (u8)(pba_offset & PCI_MSIX_BIRMASK); - pba_paddr = read_pci_mem_bar(bus, slot, func, bir); + pba_paddr = read_pci_mem_bar(pbus, pslot, pfunc, bir, vf); WARN_ON(!pba_paddr); pba_paddr += pba_offset & ~PCI_MSIX_BIRMASK; diff --git a/xen/arch/x86/physdev.c b/xen/arch/x86/physdev.c index 3454c03e03..2701a7f9e6 100644 --- a/xen/arch/x86/physdev.c +++ b/xen/arch/x86/physdev.c @@ -472,7 +472,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg) if ( copy_from_guest(&manage_pci, arg, 1) != 0 ) break; - ret = pci_add_device(manage_pci.bus, manage_pci.devfn); + ret = pci_add_device(manage_pci.bus, manage_pci.devfn, NULL); break; } @@ -509,9 +509,9 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg) pdev_info.is_virtfn = manage_pci_ext.is_virtfn; pdev_info.physfn.bus = manage_pci_ext.physfn.bus; pdev_info.physfn.devfn = manage_pci_ext.physfn.devfn; - ret = pci_add_device_ext(manage_pci_ext.bus, - manage_pci_ext.devfn, - &pdev_info); + ret = pci_add_device(manage_pci_ext.bus, + manage_pci_ext.devfn, + &pdev_info); break; } diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c index da61549177..5b144190cd 100644 --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -142,16 +142,101 @@ void pci_enable_acs(struct pci_dev *pdev) pci_conf_write16(bus, dev, func, pos + PCI_ACS_CTRL, ctrl); } -int pci_add_device(u8 bus, u8 devfn) +int pci_add_device(u8 bus, u8 devfn, const struct pci_dev_info *info) { struct pci_dev *pdev; + unsigned int slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn); + const char *pdev_type; int ret = -ENOMEM; + if (!info) + pdev_type = "device"; + else if (info->is_extfn) + pdev_type = "extended function"; + else if (info->is_virtfn) + { + spin_lock(&pcidevs_lock); + pdev = pci_get_pdev(info->physfn.bus, info->physfn.devfn); + spin_unlock(&pcidevs_lock); + if ( !pdev ) + pci_add_device(info->physfn.bus, info->physfn.devfn, NULL); + pdev_type = "virtual function"; + } + else + return -EINVAL; + spin_lock(&pcidevs_lock); pdev = alloc_pdev(bus, devfn); if ( !pdev ) goto out; + if ( info ) + pdev->info = *info; + else if ( !pdev->vf_rlen[0] ) + { + unsigned int pos = pci_find_ext_capability(0, bus, devfn, + PCI_EXT_CAP_ID_SRIOV); + u16 ctrl = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_CTRL); + + if ( !pos ) + /* Nothing */; + else if ( !(ctrl & (PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE)) ) + { + unsigned int i; + + BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS); + for ( i = 0; i < PCI_SRIOV_NUM_BARS; ++i ) + { + unsigned int idx = pos + PCI_SRIOV_BAR + i * 4; + u32 bar = pci_conf_read32(bus, slot, func, idx); + u32 hi = 0; + + if ( (bar & PCI_BASE_ADDRESS_SPACE) == + PCI_BASE_ADDRESS_SPACE_IO ) + { + printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x with vf" + " BAR%u in IO space\n", + bus, slot, func, i); + continue; + } + pci_conf_write32(bus, slot, func, idx, ~0); + if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == + PCI_BASE_ADDRESS_MEM_TYPE_64 ) + { + if ( i >= PCI_SRIOV_NUM_BARS ) + { + printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x with" + " 64-bit vf BAR in last slot\n", + bus, slot, func); + break; + } + hi = pci_conf_read32(bus, slot, func, idx + 4); + pci_conf_write32(bus, slot, func, idx + 4, ~0); + } + pdev->vf_rlen[i] = pci_conf_read32(bus, slot, func, idx) & + PCI_BASE_ADDRESS_MEM_MASK; + if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == + PCI_BASE_ADDRESS_MEM_TYPE_64 ) + { + pdev->vf_rlen[i] |= (u64)pci_conf_read32(bus, slot, func, + idx + 4) << 32; + pci_conf_write32(bus, slot, func, idx + 4, hi); + } + else if ( pdev->vf_rlen[i] ) + pdev->vf_rlen[i] |= (u64)~0 << 32; + pci_conf_write32(bus, slot, func, idx, bar); + pdev->vf_rlen[i] = -pdev->vf_rlen[i]; + if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == + PCI_BASE_ADDRESS_MEM_TYPE_64 ) + ++i; + } + } + else + printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x has its virtual" + " functions already enabled (%04x)\n", + bus, slot, func, ctrl); + } + ret = 0; if ( !pdev->domain ) { @@ -169,8 +254,8 @@ int pci_add_device(u8 bus, u8 devfn) out: spin_unlock(&pcidevs_lock); - printk(XENLOG_DEBUG "PCI add device %02x:%02x.%x\n", bus, - PCI_SLOT(devfn), PCI_FUNC(devfn)); + printk(XENLOG_DEBUG "PCI add %s %02x:%02x.%x\n", pdev_type, + bus, slot, func); return ret; } @@ -197,51 +282,6 @@ int pci_remove_device(u8 bus, u8 devfn) return ret; } -int pci_add_device_ext(u8 bus, u8 devfn, struct pci_dev_info *info) -{ - int ret; - char *pdev_type; - struct pci_dev *pdev; - - if (info->is_extfn) - pdev_type = "Extended Function"; - else if (info->is_virtfn) - pdev_type = "Virtual Function"; - else - return -EINVAL; - - - ret = -ENOMEM; - spin_lock(&pcidevs_lock); - pdev = alloc_pdev(bus, devfn); - if ( !pdev ) - goto out; - - pdev->info = *info; - - ret = 0; - if ( !pdev->domain ) - { - pdev->domain = dom0; - ret = iommu_add_device(pdev); - if ( ret ) - { - pdev->domain = NULL; - goto out; - } - - list_add(&pdev->domain_list, &dom0->arch.pdev_list); - pci_enable_acs(pdev); - } - -out: - spin_unlock(&pcidevs_lock); - printk(XENLOG_DEBUG "PCI add %s %02x:%02x.%x\n", pdev_type, - bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); - - return ret; -} - static void pci_clean_dpci_irqs(struct domain *d) { struct hvm_irq_dpci *hvm_irq_dpci = NULL; diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h index 67dea10e59..e1ca05ad5e 100644 --- a/xen/include/xen/pci.h +++ b/xen/include/xen/pci.h @@ -57,6 +57,7 @@ struct pci_dev { const u8 bus; const u8 devfn; struct pci_dev_info info; + u64 vf_rlen[6]; }; #define for_each_pdev(domain, pdev) \ @@ -86,9 +87,8 @@ struct pci_dev *pci_lock_pdev(int bus, int devfn); struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn); void pci_release_devices(struct domain *d); -int pci_add_device(u8 bus, u8 devfn); +int pci_add_device(u8 bus, u8 devfn, const struct pci_dev_info *); int pci_remove_device(u8 bus, u8 devfn); -int pci_add_device_ext(u8 bus, u8 devfn, struct pci_dev_info *info); struct pci_dev *pci_get_pdev(int bus, int devfn); struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn); diff --git a/xen/include/xen/pci_regs.h b/xen/include/xen/pci_regs.h index dfeead1aca..aa5c91204d 100644 --- a/xen/include/xen/pci_regs.h +++ b/xen/include/xen/pci_regs.h @@ -425,7 +425,7 @@ #define PCI_EXT_CAP_ID_ACS 13 #define PCI_EXT_CAP_ID_ARI 14 #define PCI_EXT_CAP_ID_ATS 15 -#define PCI_EXT_CAP_ID_IOV 16 +#define PCI_EXT_CAP_ID_SRIOV 16 /* Advanced Error Reporting */ #define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ @@ -545,4 +545,35 @@ #define PCI_ACS_CTRL 0x06 /* ACS Control Register */ #define PCI_ACS_EGRESS_CTL_V 0x08 /* ACS Egress Control Vector */ +/* Single Root I/O Virtualization */ +#define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */ +#define PCI_SRIOV_CAP_VFM 0x01 /* VF Migration Capable */ +#define PCI_SRIOV_CAP_INTR(x) ((x) >> 21) /* Interrupt Message Number */ +#define PCI_SRIOV_CTRL 0x08 /* SR-IOV Control */ +#define PCI_SRIOV_CTRL_VFE 0x01 /* VF Enable */ +#define PCI_SRIOV_CTRL_VFM 0x02 /* VF Migration Enable */ +#define PCI_SRIOV_CTRL_INTR 0x04 /* VF Migration Interrupt Enable */ +#define PCI_SRIOV_CTRL_MSE 0x08 /* VF Memory Space Enable */ +#define PCI_SRIOV_CTRL_ARI 0x10 /* ARI Capable Hierarchy */ +#define PCI_SRIOV_STATUS 0x0a /* SR-IOV Status */ +#define PCI_SRIOV_STATUS_VFM 0x01 /* VF Migration Status */ +#define PCI_SRIOV_INITIAL_VF 0x0c /* Initial VFs */ +#define PCI_SRIOV_TOTAL_VF 0x0e /* Total VFs */ +#define PCI_SRIOV_NUM_VF 0x10 /* Number of VFs */ +#define PCI_SRIOV_FUNC_LINK 0x12 /* Function Dependency Link */ +#define PCI_SRIOV_VF_OFFSET 0x14 /* First VF Offset */ +#define PCI_SRIOV_VF_STRIDE 0x16 /* Following VF Stride */ +#define PCI_SRIOV_VF_DID 0x1a /* VF Device ID */ +#define PCI_SRIOV_SUP_PGSIZE 0x1c /* Supported Page Sizes */ +#define PCI_SRIOV_SYS_PGSIZE 0x20 /* System Page Size */ +#define PCI_SRIOV_BAR 0x24 /* VF BAR0 */ +#define PCI_SRIOV_NUM_BARS 6 /* Number of VF BARs */ +#define PCI_SRIOV_VFM 0x3c /* VF Migration State Array Offset*/ +#define PCI_SRIOV_VFM_BIR(x) ((x) & 7) /* State BIR */ +#define PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7) /* State Offset */ +#define PCI_SRIOV_VFM_UA 0x0 /* Inactive.Unavailable */ +#define PCI_SRIOV_VFM_MI 0x1 /* Dormant.MigrateIn */ +#define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */ +#define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */ + #endif /* LINUX_PCI_REGS_H */ |