aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Beulich <jbeulich@novell.com>2011-08-16 15:21:46 +0100
committerJan Beulich <jbeulich@novell.com>2011-08-16 15:21:46 +0100
commit0258b9db5465123fbcd40e5efa846de1f4a07338 (patch)
tree522094f213164acab6fdb5ae713c24bf0882af6c
parent638f141e34103b84810ae8b44fbb809457b6e27c (diff)
downloadxen-0258b9db5465123fbcd40e5efa846de1f4a07338.tar.gz
xen-0258b9db5465123fbcd40e5efa846de1f4a07338.tar.bz2
xen-0258b9db5465123fbcd40e5efa846de1f4a07338.zip
x86/PCI-MSI: properly determine VF BAR values
As was discussed a couple of times on this list, SR-IOV virtual functions have their BARs read as zero - the physical function's SR-IOV capability structure must be consulted instead. The bogus warnings people complained about are being eliminated with this change. Signed-off-by: Jan Beulich <jbeulich@novell.com> xen-unstable changeset: 23766:8d6edc3d26d2 xen-unstable date: Sat Aug 13 10:14:58 2011 +0100 PCI: consolidate interface for adding devices The functionality of pci_add_device_ext() can be easily folded into pci_add_device(), and eliminates the need to change two functions for future adjustments. Signed-off-by: Jan Beulich <jbeulich@novell.com> xen-unstable changeset: 23725:4dc6a9ba90d6 xen-unstable date: Tue Jul 19 14:14:08 2011 +0100
-rw-r--r--xen/arch/ia64/xen/hypercall.c12
-rw-r--r--xen/arch/x86/msi.c73
-rw-r--r--xen/arch/x86/physdev.c8
-rw-r--r--xen/drivers/passthrough/pci.c136
-rw-r--r--xen/include/xen/pci.h4
-rw-r--r--xen/include/xen/pci_regs.h33
6 files changed, 195 insertions, 71 deletions
diff --git a/xen/arch/ia64/xen/hypercall.c b/xen/arch/ia64/xen/hypercall.c
index 6ea15c290a..6e98289530 100644
--- a/xen/arch/ia64/xen/hypercall.c
+++ b/xen/arch/ia64/xen/hypercall.c
@@ -662,8 +662,8 @@ long do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg)
if ( copy_from_guest(&manage_pci, arg, 1) != 0 )
break;
- ret = pci_add_device(manage_pci.bus, manage_pci.devfn);
- break;
+ ret = pci_add_device(manage_pci.bus, manage_pci.devfn, NULL);
+ break;
}
case PHYSDEVOP_manage_pci_remove: {
@@ -695,10 +695,10 @@ long do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg)
pdev_info.is_virtfn = manage_pci_ext.is_virtfn;
pdev_info.physfn.bus = manage_pci_ext.physfn.bus;
pdev_info.physfn.devfn = manage_pci_ext.physfn.devfn;
- ret = pci_add_device_ext(manage_pci_ext.bus,
- manage_pci_ext.devfn,
- &pdev_info);
- break;
+ ret = pci_add_device(manage_pci_ext.bus,
+ manage_pci_ext.devfn,
+ &pdev_info);
+ break;
}
default:
diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
index 81beb290ed..c9eb1f4772 100644
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -521,12 +521,48 @@ static int msi_capability_init(struct pci_dev *dev,
return 0;
}
-static u64 read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir)
+static u64 read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir, int vf)
{
u8 limit;
- u32 addr;
+ u32 addr, base = PCI_BASE_ADDRESS_0, disp = 0;
- switch ( pci_conf_read8(bus, slot, func, PCI_HEADER_TYPE) & 0x7f )
+ if ( vf >= 0 )
+ {
+ struct pci_dev *pdev = pci_get_pdev(bus, PCI_DEVFN(slot, func));
+ unsigned int pos = pci_find_ext_capability(0, bus,
+ PCI_DEVFN(slot, func),
+ PCI_EXT_CAP_ID_SRIOV);
+ u16 ctrl = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_CTRL);
+ u16 num_vf = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_NUM_VF);
+ u16 offset = pci_conf_read16(bus, slot, func,
+ pos + PCI_SRIOV_VF_OFFSET);
+ u16 stride = pci_conf_read16(bus, slot, func,
+ pos + PCI_SRIOV_VF_STRIDE);
+
+ if ( !pdev || !pos ||
+ !(ctrl & PCI_SRIOV_CTRL_VFE) ||
+ !(ctrl & PCI_SRIOV_CTRL_MSE) ||
+ !num_vf || !offset || (num_vf > 1 && !stride) ||
+ bir >= PCI_SRIOV_NUM_BARS ||
+ !pdev->vf_rlen[bir] )
+ return 0;
+ base = pos + PCI_SRIOV_BAR;
+ vf -= PCI_BDF(bus, slot, func) + offset;
+ if ( vf < 0 || (vf && vf % stride) )
+ return 0;
+ if ( stride )
+ {
+ if ( vf % stride )
+ return 0;
+ vf /= stride;
+ }
+ if ( vf >= num_vf )
+ return 0;
+ BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS);
+ disp = vf * pdev->vf_rlen[bir];
+ limit = PCI_SRIOV_NUM_BARS;
+ }
+ else switch ( pci_conf_read8(bus, slot, func, PCI_HEADER_TYPE) & 0x7f )
{
case PCI_HEADER_TYPE_NORMAL:
limit = 6;
@@ -543,7 +579,7 @@ static u64 read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir)
if ( bir >= limit )
return 0;
- addr = pci_conf_read32(bus, slot, func, PCI_BASE_ADDRESS_0 + bir * 4);
+ addr = pci_conf_read32(bus, slot, func, base + bir * 4);
if ( (addr & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO )
return 0;
if ( (addr & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64 )
@@ -551,11 +587,10 @@ static u64 read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir)
addr &= PCI_BASE_ADDRESS_MEM_MASK;
if ( ++bir >= limit )
return 0;
- return addr |
- ((u64)pci_conf_read32(bus, slot, func,
- PCI_BASE_ADDRESS_0 + bir * 4) << 32);
+ return addr + disp +
+ ((u64)pci_conf_read32(bus, slot, func, base + bir * 4) << 32);
}
- return addr & PCI_BASE_ADDRESS_MEM_MASK;
+ return (addr & PCI_BASE_ADDRESS_MEM_MASK) + disp;
}
/**
@@ -628,11 +663,29 @@ static int msix_capability_init(struct pci_dev *dev,
if ( !dev->msix_nr_entries )
{
+ u8 pbus, pslot, pfunc;
+ int vf;
u64 pba_paddr;
u32 pba_offset;
+ if ( !dev->info.is_virtfn )
+ {
+ pbus = bus;
+ pslot = slot;
+ pfunc = func;
+ vf = -1;
+ }
+ else
+ {
+ pbus = dev->info.physfn.bus;
+ pslot = PCI_SLOT(dev->info.physfn.devfn);
+ pfunc = PCI_FUNC(dev->info.physfn.devfn);
+ vf = PCI_BDF2(dev->bus, dev->devfn);
+ }
+
ASSERT(!dev->msix_used_entries);
- WARN_ON(msi->table_base != read_pci_mem_bar(bus, slot, func, bir));
+ WARN_ON(msi->table_base !=
+ read_pci_mem_bar(pbus, pslot, pfunc, bir, vf));
dev->msix_nr_entries = nr_entries;
dev->msix_table.first = PFN_DOWN(table_paddr);
@@ -644,7 +697,7 @@ static int msix_capability_init(struct pci_dev *dev,
pba_offset = pci_conf_read32(bus, slot, func,
msix_pba_offset_reg(pos));
bir = (u8)(pba_offset & PCI_MSIX_BIRMASK);
- pba_paddr = read_pci_mem_bar(bus, slot, func, bir);
+ pba_paddr = read_pci_mem_bar(pbus, pslot, pfunc, bir, vf);
WARN_ON(!pba_paddr);
pba_paddr += pba_offset & ~PCI_MSIX_BIRMASK;
diff --git a/xen/arch/x86/physdev.c b/xen/arch/x86/physdev.c
index 3454c03e03..2701a7f9e6 100644
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -472,7 +472,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg)
if ( copy_from_guest(&manage_pci, arg, 1) != 0 )
break;
- ret = pci_add_device(manage_pci.bus, manage_pci.devfn);
+ ret = pci_add_device(manage_pci.bus, manage_pci.devfn, NULL);
break;
}
@@ -509,9 +509,9 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg)
pdev_info.is_virtfn = manage_pci_ext.is_virtfn;
pdev_info.physfn.bus = manage_pci_ext.physfn.bus;
pdev_info.physfn.devfn = manage_pci_ext.physfn.devfn;
- ret = pci_add_device_ext(manage_pci_ext.bus,
- manage_pci_ext.devfn,
- &pdev_info);
+ ret = pci_add_device(manage_pci_ext.bus,
+ manage_pci_ext.devfn,
+ &pdev_info);
break;
}
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index da61549177..5b144190cd 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -142,16 +142,101 @@ void pci_enable_acs(struct pci_dev *pdev)
pci_conf_write16(bus, dev, func, pos + PCI_ACS_CTRL, ctrl);
}
-int pci_add_device(u8 bus, u8 devfn)
+int pci_add_device(u8 bus, u8 devfn, const struct pci_dev_info *info)
{
struct pci_dev *pdev;
+ unsigned int slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn);
+ const char *pdev_type;
int ret = -ENOMEM;
+ if (!info)
+ pdev_type = "device";
+ else if (info->is_extfn)
+ pdev_type = "extended function";
+ else if (info->is_virtfn)
+ {
+ spin_lock(&pcidevs_lock);
+ pdev = pci_get_pdev(info->physfn.bus, info->physfn.devfn);
+ spin_unlock(&pcidevs_lock);
+ if ( !pdev )
+ pci_add_device(info->physfn.bus, info->physfn.devfn, NULL);
+ pdev_type = "virtual function";
+ }
+ else
+ return -EINVAL;
+
spin_lock(&pcidevs_lock);
pdev = alloc_pdev(bus, devfn);
if ( !pdev )
goto out;
+ if ( info )
+ pdev->info = *info;
+ else if ( !pdev->vf_rlen[0] )
+ {
+ unsigned int pos = pci_find_ext_capability(0, bus, devfn,
+ PCI_EXT_CAP_ID_SRIOV);
+ u16 ctrl = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_CTRL);
+
+ if ( !pos )
+ /* Nothing */;
+ else if ( !(ctrl & (PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE)) )
+ {
+ unsigned int i;
+
+ BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS);
+ for ( i = 0; i < PCI_SRIOV_NUM_BARS; ++i )
+ {
+ unsigned int idx = pos + PCI_SRIOV_BAR + i * 4;
+ u32 bar = pci_conf_read32(bus, slot, func, idx);
+ u32 hi = 0;
+
+ if ( (bar & PCI_BASE_ADDRESS_SPACE) ==
+ PCI_BASE_ADDRESS_SPACE_IO )
+ {
+ printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x with vf"
+ " BAR%u in IO space\n",
+ bus, slot, func, i);
+ continue;
+ }
+ pci_conf_write32(bus, slot, func, idx, ~0);
+ if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
+ PCI_BASE_ADDRESS_MEM_TYPE_64 )
+ {
+ if ( i >= PCI_SRIOV_NUM_BARS )
+ {
+ printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x with"
+ " 64-bit vf BAR in last slot\n",
+ bus, slot, func);
+ break;
+ }
+ hi = pci_conf_read32(bus, slot, func, idx + 4);
+ pci_conf_write32(bus, slot, func, idx + 4, ~0);
+ }
+ pdev->vf_rlen[i] = pci_conf_read32(bus, slot, func, idx) &
+ PCI_BASE_ADDRESS_MEM_MASK;
+ if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
+ PCI_BASE_ADDRESS_MEM_TYPE_64 )
+ {
+ pdev->vf_rlen[i] |= (u64)pci_conf_read32(bus, slot, func,
+ idx + 4) << 32;
+ pci_conf_write32(bus, slot, func, idx + 4, hi);
+ }
+ else if ( pdev->vf_rlen[i] )
+ pdev->vf_rlen[i] |= (u64)~0 << 32;
+ pci_conf_write32(bus, slot, func, idx, bar);
+ pdev->vf_rlen[i] = -pdev->vf_rlen[i];
+ if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
+ PCI_BASE_ADDRESS_MEM_TYPE_64 )
+ ++i;
+ }
+ }
+ else
+ printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x has its virtual"
+ " functions already enabled (%04x)\n",
+ bus, slot, func, ctrl);
+ }
+
ret = 0;
if ( !pdev->domain )
{
@@ -169,8 +254,8 @@ int pci_add_device(u8 bus, u8 devfn)
out:
spin_unlock(&pcidevs_lock);
- printk(XENLOG_DEBUG "PCI add device %02x:%02x.%x\n", bus,
- PCI_SLOT(devfn), PCI_FUNC(devfn));
+ printk(XENLOG_DEBUG "PCI add %s %02x:%02x.%x\n", pdev_type,
+ bus, slot, func);
return ret;
}
@@ -197,51 +282,6 @@ int pci_remove_device(u8 bus, u8 devfn)
return ret;
}
-int pci_add_device_ext(u8 bus, u8 devfn, struct pci_dev_info *info)
-{
- int ret;
- char *pdev_type;
- struct pci_dev *pdev;
-
- if (info->is_extfn)
- pdev_type = "Extended Function";
- else if (info->is_virtfn)
- pdev_type = "Virtual Function";
- else
- return -EINVAL;
-
-
- ret = -ENOMEM;
- spin_lock(&pcidevs_lock);
- pdev = alloc_pdev(bus, devfn);
- if ( !pdev )
- goto out;
-
- pdev->info = *info;
-
- ret = 0;
- if ( !pdev->domain )
- {
- pdev->domain = dom0;
- ret = iommu_add_device(pdev);
- if ( ret )
- {
- pdev->domain = NULL;
- goto out;
- }
-
- list_add(&pdev->domain_list, &dom0->arch.pdev_list);
- pci_enable_acs(pdev);
- }
-
-out:
- spin_unlock(&pcidevs_lock);
- printk(XENLOG_DEBUG "PCI add %s %02x:%02x.%x\n", pdev_type,
- bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
-
- return ret;
-}
-
static void pci_clean_dpci_irqs(struct domain *d)
{
struct hvm_irq_dpci *hvm_irq_dpci = NULL;
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index 67dea10e59..e1ca05ad5e 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -57,6 +57,7 @@ struct pci_dev {
const u8 bus;
const u8 devfn;
struct pci_dev_info info;
+ u64 vf_rlen[6];
};
#define for_each_pdev(domain, pdev) \
@@ -86,9 +87,8 @@ struct pci_dev *pci_lock_pdev(int bus, int devfn);
struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn);
void pci_release_devices(struct domain *d);
-int pci_add_device(u8 bus, u8 devfn);
+int pci_add_device(u8 bus, u8 devfn, const struct pci_dev_info *);
int pci_remove_device(u8 bus, u8 devfn);
-int pci_add_device_ext(u8 bus, u8 devfn, struct pci_dev_info *info);
struct pci_dev *pci_get_pdev(int bus, int devfn);
struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn);
diff --git a/xen/include/xen/pci_regs.h b/xen/include/xen/pci_regs.h
index dfeead1aca..aa5c91204d 100644
--- a/xen/include/xen/pci_regs.h
+++ b/xen/include/xen/pci_regs.h
@@ -425,7 +425,7 @@
#define PCI_EXT_CAP_ID_ACS 13
#define PCI_EXT_CAP_ID_ARI 14
#define PCI_EXT_CAP_ID_ATS 15
-#define PCI_EXT_CAP_ID_IOV 16
+#define PCI_EXT_CAP_ID_SRIOV 16
/* Advanced Error Reporting */
#define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */
@@ -545,4 +545,35 @@
#define PCI_ACS_CTRL 0x06 /* ACS Control Register */
#define PCI_ACS_EGRESS_CTL_V 0x08 /* ACS Egress Control Vector */
+/* Single Root I/O Virtualization */
+#define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */
+#define PCI_SRIOV_CAP_VFM 0x01 /* VF Migration Capable */
+#define PCI_SRIOV_CAP_INTR(x) ((x) >> 21) /* Interrupt Message Number */
+#define PCI_SRIOV_CTRL 0x08 /* SR-IOV Control */
+#define PCI_SRIOV_CTRL_VFE 0x01 /* VF Enable */
+#define PCI_SRIOV_CTRL_VFM 0x02 /* VF Migration Enable */
+#define PCI_SRIOV_CTRL_INTR 0x04 /* VF Migration Interrupt Enable */
+#define PCI_SRIOV_CTRL_MSE 0x08 /* VF Memory Space Enable */
+#define PCI_SRIOV_CTRL_ARI 0x10 /* ARI Capable Hierarchy */
+#define PCI_SRIOV_STATUS 0x0a /* SR-IOV Status */
+#define PCI_SRIOV_STATUS_VFM 0x01 /* VF Migration Status */
+#define PCI_SRIOV_INITIAL_VF 0x0c /* Initial VFs */
+#define PCI_SRIOV_TOTAL_VF 0x0e /* Total VFs */
+#define PCI_SRIOV_NUM_VF 0x10 /* Number of VFs */
+#define PCI_SRIOV_FUNC_LINK 0x12 /* Function Dependency Link */
+#define PCI_SRIOV_VF_OFFSET 0x14 /* First VF Offset */
+#define PCI_SRIOV_VF_STRIDE 0x16 /* Following VF Stride */
+#define PCI_SRIOV_VF_DID 0x1a /* VF Device ID */
+#define PCI_SRIOV_SUP_PGSIZE 0x1c /* Supported Page Sizes */
+#define PCI_SRIOV_SYS_PGSIZE 0x20 /* System Page Size */
+#define PCI_SRIOV_BAR 0x24 /* VF BAR0 */
+#define PCI_SRIOV_NUM_BARS 6 /* Number of VF BARs */
+#define PCI_SRIOV_VFM 0x3c /* VF Migration State Array Offset*/
+#define PCI_SRIOV_VFM_BIR(x) ((x) & 7) /* State BIR */
+#define PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7) /* State Offset */
+#define PCI_SRIOV_VFM_UA 0x0 /* Inactive.Unavailable */
+#define PCI_SRIOV_VFM_MI 0x1 /* Dormant.MigrateIn */
+#define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */
+#define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */
+
#endif /* LINUX_PCI_REGS_H */