From 8d82d92ea697145c32bb36d9f39afd5bb0927bc2 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Wed, 27 Sep 2017 10:34:46 +0800
Subject: [PATCH] vfio: support layerscape

This is a integrated patch for layerscape vfio support.

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@nxp.com>
Signed-off-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
---
 drivers/vfio/Kconfig                      |   1 +
 drivers/vfio/Makefile                     |   1 +
 drivers/vfio/fsl-mc/Kconfig               |   9 +
 drivers/vfio/fsl-mc/Makefile              |   2 +
 drivers/vfio/fsl-mc/vfio_fsl_mc.c         | 753 ++++++++++++++++++++++++++++++
 drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c    | 199 ++++++++
 drivers/vfio/fsl-mc/vfio_fsl_mc_private.h |  55 +++
 drivers/vfio/vfio_iommu_type1.c           |  39 +-
 include/uapi/linux/vfio.h                 |   1 +
 9 files changed, 1058 insertions(+), 2 deletions(-)
 create mode 100644 drivers/vfio/fsl-mc/Kconfig
 create mode 100644 drivers/vfio/fsl-mc/Makefile
 create mode 100644 drivers/vfio/fsl-mc/vfio_fsl_mc.c
 create mode 100644 drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c
 create mode 100644 drivers/vfio/fsl-mc/vfio_fsl_mc_private.h

--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -48,4 +48,5 @@ menuconfig VFIO_NOIOMMU
 
 source "drivers/vfio/pci/Kconfig"
 source "drivers/vfio/platform/Kconfig"
+source "drivers/vfio/fsl-mc/Kconfig"
 source "virt/lib/Kconfig"
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vf
 obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o
 obj-$(CONFIG_VFIO_PCI) += pci/
 obj-$(CONFIG_VFIO_PLATFORM) += platform/
+obj-$(CONFIG_VFIO_FSL_MC) += fsl-mc/
--- /dev/null
+++ b/drivers/vfio/fsl-mc/Kconfig
@@ -0,0 +1,9 @@
+config VFIO_FSL_MC
+	tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices"
+	depends on VFIO && FSL_MC_BUS && EVENTFD
+	help
+	  Driver to enable support for the VFIO QorIQ DPAA2 fsl-mc
+	  (Management Complex) devices. This is required to passthrough
+	  fsl-mc bus devices using the VFIO framework.
+
+	  If you don't know what to do here, say N.
--- /dev/null
+++ b/drivers/vfio/fsl-mc/Makefile
@@ -0,0 +1,2 @@
+vfio-fsl_mc-y := vfio_fsl_mc.o
+obj-$(CONFIG_VFIO_FSL_MC) += vfio_fsl_mc.o vfio_fsl_mc_intr.o
--- /dev/null
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
@@ -0,0 +1,753 @@
+/*
+ * Freescale Management Complex (MC) device passthrough using VFIO
+ *
+ * Copyright (C) 2013-2016 Freescale Semiconductor, Inc.
+ * Copyright 2016-2017 NXP
+ * Author: Bharat Bhushan <bharat.bhushan@nxp.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/device.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/vfio.h>
+#include <linux/delay.h>
+
+#include "../../staging/fsl-mc/include/mc.h"
+#include "../../staging/fsl-mc/include/mc-bus.h"
+#include "../../staging/fsl-mc/include/mc-sys.h"
+#include "../../staging/fsl-mc/bus/dprc-cmd.h"
+
+#include "vfio_fsl_mc_private.h"
+
+#define DRIVER_VERSION	"0.10"
+#define DRIVER_AUTHOR	"Bharat Bhushan <bharat.bhushan@nxp.com>"
+#define DRIVER_DESC	"VFIO for FSL-MC devices - User Level meta-driver"
+
+static DEFINE_MUTEX(driver_lock);
+
+/* FSl-MC device regions (address and size) are aligned to 64K.
+ * While MC firmware reports size less than 64K for some objects (it actually
+ * reports size which does not include reserved space beyond valid bytes).
+ * Align the size to PAGE_SIZE for userspace to mmap.
+ */
+static size_t aligned_region_size(struct fsl_mc_device *mc_dev, int index)
+{
+	size_t size;
+
+	size = resource_size(&mc_dev->regions[index]);
+	return PAGE_ALIGN(size);
+}
+
+static int vfio_fsl_mc_regions_init(struct vfio_fsl_mc_device *vdev)
+{
+	struct fsl_mc_device *mc_dev = vdev->mc_dev;
+	int count = mc_dev->obj_desc.region_count;
+	int i;
+
+	vdev->regions = kcalloc(count, sizeof(struct vfio_fsl_mc_region),
+				GFP_KERNEL);
+	if (!vdev->regions)
+		return -ENOMEM;
+
+	for (i = 0; i < mc_dev->obj_desc.region_count; i++) {
+		vdev->regions[i].addr = mc_dev->regions[i].start;
+		vdev->regions[i].size = aligned_region_size(mc_dev, i);
+		vdev->regions[i].type = VFIO_FSL_MC_REGION_TYPE_MMIO;
+		if (mc_dev->regions[i].flags & IORESOURCE_CACHEABLE)
+			vdev->regions[i].type |=
+					VFIO_FSL_MC_REGION_TYPE_CACHEABLE;
+		vdev->regions[i].flags = VFIO_REGION_INFO_FLAG_MMAP;
+		vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ;
+		if (!(mc_dev->regions[i].flags & IORESOURCE_READONLY))
+			vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE;
+	}
+
+	vdev->num_regions = mc_dev->obj_desc.region_count;
+	return 0;
+}
+
+static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device *vdev)
+{
+	int i;
+
+	for (i = 0; i < vdev->num_regions; i++)
+		iounmap(vdev->regions[i].ioaddr);
+
+	vdev->num_regions = 0;
+	kfree(vdev->regions);
+}
+
+static int vfio_fsl_mc_open(void *device_data)
+{
+	struct vfio_fsl_mc_device *vdev = device_data;
+	int ret;
+
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	mutex_lock(&driver_lock);
+	if (!vdev->refcnt) {
+		ret = vfio_fsl_mc_regions_init(vdev);
+		if (ret)
+			goto error_region_init;
+
+		ret = vfio_fsl_mc_irqs_init(vdev);
+		if (ret)
+			goto error_irq_init;
+	}
+
+	vdev->refcnt++;
+	mutex_unlock(&driver_lock);
+	return 0;
+
+error_irq_init:
+	vfio_fsl_mc_regions_cleanup(vdev);
+error_region_init:
+	mutex_unlock(&driver_lock);
+	if (ret)
+		module_put(THIS_MODULE);
+
+	return ret;
+}
+
+static void vfio_fsl_mc_release(void *device_data)
+{
+	struct vfio_fsl_mc_device *vdev = device_data;
+	struct fsl_mc_device *mc_dev = vdev->mc_dev;
+
+	mutex_lock(&driver_lock);
+
+	if (!(--vdev->refcnt)) {
+		vfio_fsl_mc_regions_cleanup(vdev);
+		vfio_fsl_mc_irqs_cleanup(vdev);
+	}
+
+	if (strcmp(mc_dev->obj_desc.type, "dprc") == 0)
+		dprc_reset_container(mc_dev->mc_io, 0, mc_dev->mc_handle,
+				     mc_dev->obj_desc.id);
+
+	mutex_unlock(&driver_lock);
+
+	module_put(THIS_MODULE);
+}
+
+static long vfio_fsl_mc_ioctl(void *device_data, unsigned int cmd,
+			      unsigned long arg)
+{
+	struct vfio_fsl_mc_device *vdev = device_data;
+	struct fsl_mc_device *mc_dev = vdev->mc_dev;
+	unsigned long minsz;
+
+	if (WARN_ON(!mc_dev))
+		return -ENODEV;
+
+	switch (cmd) {
+	case VFIO_DEVICE_GET_INFO:
+	{
+		struct vfio_device_info info;
+
+		minsz = offsetofend(struct vfio_device_info, num_irqs);
+
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (info.argsz < minsz)
+			return -EINVAL;
+
+		info.flags = VFIO_DEVICE_FLAGS_FSL_MC;
+		info.num_regions = mc_dev->obj_desc.region_count;
+		info.num_irqs = mc_dev->obj_desc.irq_count;
+
+		return copy_to_user((void __user *)arg, &info, minsz);
+	}
+	case VFIO_DEVICE_GET_REGION_INFO:
+	{
+		struct vfio_region_info info;
+
+		minsz = offsetofend(struct vfio_region_info, offset);
+
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (info.argsz < minsz)
+			return -EINVAL;
+
+		if (info.index >= vdev->num_regions)
+			return -EINVAL;
+
+		/* map offset to the physical address  */
+		info.offset = VFIO_FSL_MC_INDEX_TO_OFFSET(info.index);
+		info.size = vdev->regions[info.index].size;
+		info.flags = vdev->regions[info.index].flags;
+
+		return copy_to_user((void __user *)arg, &info, minsz);
+	}
+	case VFIO_DEVICE_GET_IRQ_INFO:
+	{
+		struct vfio_irq_info info;
+
+		minsz = offsetofend(struct vfio_irq_info, count);
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (info.argsz < minsz)
+			return -EINVAL;
+
+		if (info.index >= mc_dev->obj_desc.irq_count)
+			return -EINVAL;
+
+		if (vdev->mc_irqs != NULL) {
+			info.flags = vdev->mc_irqs[info.index].flags;
+			info.count = vdev->mc_irqs[info.index].count;
+		} else {
+			/*
+			 * If IRQs are not initialized then these can not
+			 * be configuted and used by user-space/
+			 */
+			info.flags = 0;
+			info.count = 0;
+		}
+
+		return copy_to_user((void __user *)arg, &info, minsz);
+	}
+	case VFIO_DEVICE_SET_IRQS:
+	{
+		struct vfio_irq_set hdr;
+		u8 *data = NULL;
+		int ret = 0;
+
+		minsz = offsetofend(struct vfio_irq_set, count);
+
+		if (copy_from_user(&hdr, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (hdr.argsz < minsz)
+			return -EINVAL;
+
+		if (hdr.index >= mc_dev->obj_desc.irq_count)
+			return -EINVAL;
+
+		if (hdr.start != 0 || hdr.count > 1)
+			return -EINVAL;
+
+		if (hdr.count == 0 &&
+		    (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE) ||
+		    !(hdr.flags & VFIO_IRQ_SET_ACTION_TRIGGER)))
+			return -EINVAL;
+
+		if (hdr.flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
+				  VFIO_IRQ_SET_ACTION_TYPE_MASK))
+			return -EINVAL;
+
+		if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
+			size_t size;
+
+			if (hdr.flags & VFIO_IRQ_SET_DATA_BOOL)
+				size = sizeof(uint8_t);
+			else if (hdr.flags & VFIO_IRQ_SET_DATA_EVENTFD)
+				size = sizeof(int32_t);
+			else
+				return -EINVAL;
+
+			if (hdr.argsz - minsz < hdr.count * size)
+				return -EINVAL;
+
+			data = memdup_user((void __user *)(arg + minsz),
+					   hdr.count * size);
+			if (IS_ERR(data))
+				return PTR_ERR(data);
+		}
+
+		ret = vfio_fsl_mc_set_irqs_ioctl(vdev, hdr.flags,
+						 hdr.index, hdr.start,
+						 hdr.count, data);
+		return ret;
+	}
+	case VFIO_DEVICE_RESET:
+	{
+		return -EINVAL;
+	}
+	default:
+		return -EINVAL;
+	}
+}
+
+static ssize_t vfio_fsl_mc_read(void *device_data, char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	struct vfio_fsl_mc_device *vdev = device_data;
+	unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
+	loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
+	struct vfio_fsl_mc_region *region;
+	uint64_t data[8];
+	int i;
+
+	/* Read ioctl supported only for DPRC device */
+	if (strcmp(vdev->mc_dev->obj_desc.type, "dprc"))
+		return -EINVAL;
+
+	if (index >= vdev->num_regions)
+		return -EINVAL;
+
+	region = &vdev->regions[index];
+
+	if (!(region->flags & VFIO_REGION_INFO_FLAG_READ))
+		return -EINVAL;
+
+	if (!region->type & VFIO_FSL_MC_REGION_TYPE_MMIO)
+		return -EINVAL;
+
+	if (!region->ioaddr) {
+		region->ioaddr = ioremap_nocache(region->addr, region->size);
+		if (!region->ioaddr)
+			return -ENOMEM;
+	}
+
+	if (count != 64 || off != 0)
+		return -EINVAL;
+
+	for (i = 7; i >= 0; i--)
+		data[i] = readq(region->ioaddr + i * sizeof(uint64_t));
+
+	if (copy_to_user(buf, data, 64))
+		return -EFAULT;
+
+	return count;
+}
+
+#define MC_CMD_COMPLETION_TIMEOUT_MS	5000
+#define MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS    500
+
+static int vfio_fsl_mc_dprc_wait_for_response(void __iomem *ioaddr)
+{
+	enum mc_cmd_status status;
+	unsigned long timeout_usecs = MC_CMD_COMPLETION_TIMEOUT_MS * 1000;
+
+	for (;;) {
+		u64 header;
+		struct mc_cmd_header *resp_hdr;
+
+		__iormb();
+		header = readq(ioaddr);
+		__iormb();
+
+		resp_hdr = (struct mc_cmd_header *)&header;
+		status = (enum mc_cmd_status)resp_hdr->status;
+		if (status != MC_CMD_STATUS_READY)
+			break;
+
+		udelay(MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS);
+		timeout_usecs -= MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS;
+		if (timeout_usecs == 0)
+			return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static int vfio_fsl_mc_send_command(void __iomem *ioaddr, uint64_t *cmd_data)
+{
+	int i;
+
+	/* Write at command header in the end */
+	for (i = 7; i >= 0; i--)
+		writeq(cmd_data[i], ioaddr + i * sizeof(uint64_t));
+
+	/* Wait for response before returning to user-space
+	 * This can be optimized in future to even prepare response
+	 * before returning to user-space and avoid read ioctl.
+	 */
+	return vfio_fsl_mc_dprc_wait_for_response(ioaddr);
+}
+
+static int vfio_handle_dprc_commands(void __iomem *ioaddr, uint64_t *cmd_data)
+{
+	uint64_t cmd_hdr = cmd_data[0];
+	int cmd = (cmd_hdr >> 52) & 0xfff;
+
+	switch (cmd) {
+	case DPRC_CMDID_OPEN:
+	default:
+		return vfio_fsl_mc_send_command(ioaddr, cmd_data);
+	}
+
+	return 0;
+}
+
+static ssize_t vfio_fsl_mc_write(void *device_data, const char __user *buf,
+				 size_t count, loff_t *ppos)
+{
+	struct vfio_fsl_mc_device *vdev = device_data;
+	unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
+	loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
+	struct vfio_fsl_mc_region *region;
+	uint64_t data[8];
+	int ret;
+
+	/* Write ioctl supported only for DPRC device */
+	if (strcmp(vdev->mc_dev->obj_desc.type, "dprc"))
+		return -EINVAL;
+
+	if (index >= vdev->num_regions)
+		return -EINVAL;
+
+	region = &vdev->regions[index];
+
+	if (!(region->flags & VFIO_REGION_INFO_FLAG_WRITE))
+		return -EINVAL;
+
+	if (!region->type & VFIO_FSL_MC_REGION_TYPE_MMIO)
+		return -EINVAL;
+
+	if (!region->ioaddr) {
+		region->ioaddr = ioremap_nocache(region->addr, region->size);
+		if (!region->ioaddr)
+			return -ENOMEM;
+	}
+
+	if (count != 64 || off != 0)
+		return -EINVAL;
+
+	if (copy_from_user(&data, buf, 64))
+		return -EFAULT;
+
+	ret = vfio_handle_dprc_commands(region->ioaddr, data);
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static int vfio_fsl_mc_mmap_mmio(struct vfio_fsl_mc_region region,
+				 struct vm_area_struct *vma)
+{
+	u64 size = vma->vm_end - vma->vm_start;
+	u64 pgoff, base;
+
+	pgoff = vma->vm_pgoff &
+		((1U << (VFIO_FSL_MC_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+	base = pgoff << PAGE_SHIFT;
+
+	if (region.size < PAGE_SIZE || base + size > region.size)
+		return -EINVAL;
+	/*
+	 * Set the REGION_TYPE_CACHEABLE (QBman CENA regs) to be the
+	 * cache inhibited area of the portal to avoid coherency issues
+	 * if a user migrates to another core.
+	 */
+	if (region.type & VFIO_FSL_MC_REGION_TYPE_CACHEABLE)
+		vma->vm_page_prot = pgprot_cached_ns(vma->vm_page_prot);
+	else
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_pgoff = (region.addr >> PAGE_SHIFT) + pgoff;
+
+	return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+			       size, vma->vm_page_prot);
+}
+
+/* Allows mmaping fsl_mc device regions in assigned DPRC */
+static int vfio_fsl_mc_mmap(void *device_data, struct vm_area_struct *vma)
+{
+	struct vfio_fsl_mc_device *vdev = device_data;
+	struct fsl_mc_device *mc_dev = vdev->mc_dev;
+	unsigned long size, addr;
+	int index;
+
+	index = vma->vm_pgoff >> (VFIO_FSL_MC_OFFSET_SHIFT - PAGE_SHIFT);
+
+	if (vma->vm_end < vma->vm_start)
+		return -EINVAL;
+	if (vma->vm_start & ~PAGE_MASK)
+		return -EINVAL;
+	if (vma->vm_end & ~PAGE_MASK)
+		return -EINVAL;
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+	if (index >= vdev->num_regions)
+		return -EINVAL;
+
+	if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_MMAP))
+		return -EINVAL;
+
+	if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_READ)
+			&& (vma->vm_flags & VM_READ))
+		return -EINVAL;
+
+	if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_WRITE)
+			&& (vma->vm_flags & VM_WRITE))
+		return -EINVAL;
+
+	addr = vdev->regions[index].addr;
+	size = vdev->regions[index].size;
+
+	vma->vm_private_data = mc_dev;
+
+	if (vdev->regions[index].type & VFIO_FSL_MC_REGION_TYPE_MMIO)
+		return vfio_fsl_mc_mmap_mmio(vdev->regions[index], vma);
+
+	return -EFAULT;
+}
+
+static const struct vfio_device_ops vfio_fsl_mc_ops = {
+	.name		= "vfio-fsl-mc",
+	.open		= vfio_fsl_mc_open,
+	.release	= vfio_fsl_mc_release,
+	.ioctl		= vfio_fsl_mc_ioctl,
+	.read		= vfio_fsl_mc_read,
+	.write		= vfio_fsl_mc_write,
+	.mmap		= vfio_fsl_mc_mmap,
+};
+
+static int vfio_fsl_mc_initialize_dprc(struct vfio_fsl_mc_device *vdev)
+{
+	struct device *root_dprc_dev;
+	struct fsl_mc_device *mc_dev = vdev->mc_dev;
+	struct device *dev = &mc_dev->dev;
+	struct fsl_mc_bus *mc_bus;
+	struct irq_domain *mc_msi_domain;
+	unsigned int irq_count;
+	int ret;
+
+	/* device must be DPRC */
+	if (strcmp(mc_dev->obj_desc.type, "dprc"))
+		return -EINVAL;
+
+	/* mc_io must be un-initialized */
+	WARN_ON(mc_dev->mc_io);
+
+	/* allocate a portal from the root DPRC for vfio use */
+	fsl_mc_get_root_dprc(dev, &root_dprc_dev);
+	if (WARN_ON(!root_dprc_dev))
+		return -EINVAL;
+
+	ret = fsl_mc_portal_allocate(to_fsl_mc_device(root_dprc_dev),
+				     FSL_MC_IO_ATOMIC_CONTEXT_PORTAL,
+				     &mc_dev->mc_io);
+	if (ret < 0)
+		goto clean_msi_domain;
+
+	/* Reset MCP before move on */
+	ret = fsl_mc_portal_reset(mc_dev->mc_io);
+	if (ret < 0) {
+		dev_err(dev, "dprc portal reset failed: error = %d\n", ret);
+		goto free_mc_portal;
+	}
+
+	/* MSI domain set up */
+	ret = fsl_mc_find_msi_domain(root_dprc_dev->parent, &mc_msi_domain);
+	if (ret < 0)
+		goto free_mc_portal;
+
+	dev_set_msi_domain(&mc_dev->dev, mc_msi_domain);
+
+	ret = dprc_open(mc_dev->mc_io, 0, mc_dev->obj_desc.id,
+			&mc_dev->mc_handle);
+	if (ret) {
+		dev_err(dev, "dprc_open() failed: error = %d\n", ret);
+		goto free_mc_portal;
+	}
+
+	/* Initialize resource pool */
+	fsl_mc_init_all_resource_pools(mc_dev);
+
+	mc_bus = to_fsl_mc_bus(mc_dev);
+
+	if (!mc_bus->irq_resources) {
+		irq_count = FSL_MC_IRQ_POOL_MAX_TOTAL_IRQS;
+		ret = fsl_mc_populate_irq_pool(mc_bus, irq_count);
+		if (ret < 0) {
+			dev_err(dev, "%s: Failed to init irq-pool\n", __func__);
+			goto clean_resource_pool;
+		}
+	}
+
+	mutex_init(&mc_bus->scan_mutex);
+
+	mutex_lock(&mc_bus->scan_mutex);
+	ret = dprc_scan_objects(mc_dev, mc_dev->driver_override,
+				&irq_count);
+	mutex_unlock(&mc_bus->scan_mutex);
+	if (ret) {
+		dev_err(dev, "dprc_scan_objects() fails (%d)\n", ret);
+		goto clean_irq_pool;
+	}
+
+	if (irq_count > FSL_MC_IRQ_POOL_MAX_TOTAL_IRQS) {
+		dev_warn(&mc_dev->dev,
+			 "IRQs needed (%u) exceed IRQs preallocated (%u)\n",
+			 irq_count, FSL_MC_IRQ_POOL_MAX_TOTAL_IRQS);
+	}
+
+	return 0;
+
+clean_irq_pool:
+	fsl_mc_cleanup_irq_pool(mc_bus);
+
+clean_resource_pool:
+	fsl_mc_cleanup_all_resource_pools(mc_dev);
+	dprc_close(mc_dev->mc_io, 0, mc_dev->mc_handle);
+
+free_mc_portal:
+	fsl_mc_portal_free(mc_dev->mc_io);
+
+clean_msi_domain:
+	dev_set_msi_domain(&mc_dev->dev, NULL);
+
+	return ret;
+}
+
+static int vfio_fsl_mc_device_remove(struct device *dev, void *data)
+{
+	struct fsl_mc_device *mc_dev;
+
+	WARN_ON(dev == NULL);
+
+	mc_dev = to_fsl_mc_device(dev);
+	if (WARN_ON(mc_dev == NULL))
+		return -ENODEV;
+
+	fsl_mc_device_remove(mc_dev);
+	return 0;
+}
+
+static void vfio_fsl_mc_cleanup_dprc(struct vfio_fsl_mc_device *vdev)
+{
+	struct fsl_mc_device *mc_dev = vdev->mc_dev;
+	struct fsl_mc_bus *mc_bus;
+
+	/* device must be DPRC */
+	if (strcmp(mc_dev->obj_desc.type, "dprc"))
+		return;
+
+	device_for_each_child(&mc_dev->dev, NULL, vfio_fsl_mc_device_remove);
+
+	mc_bus = to_fsl_mc_bus(mc_dev);
+	if (dev_get_msi_domain(&mc_dev->dev))
+		fsl_mc_cleanup_irq_pool(mc_bus);
+
+	dev_set_msi_domain(&mc_dev->dev, NULL);
+
+	fsl_mc_cleanup_all_resource_pools(mc_dev);
+	dprc_close(mc_dev->mc_io, 0, mc_dev->mc_handle);
+	fsl_mc_portal_free(mc_dev->mc_io);
+}
+
+static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
+{
+	struct iommu_group *group;
+	struct vfio_fsl_mc_device *vdev;
+	struct device *dev = &mc_dev->dev;
+	int ret;
+
+	group = vfio_iommu_group_get(dev);
+	if (!group) {
+		dev_err(dev, "%s: VFIO: No IOMMU group\n", __func__);
+		return -EINVAL;
+	}
+
+	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
+	if (!vdev) {
+		vfio_iommu_group_put(group, dev);
+		return -ENOMEM;
+	}
+
+	vdev->mc_dev = mc_dev;
+
+	ret = vfio_add_group_dev(dev, &vfio_fsl_mc_ops, vdev);
+	if (ret) {
+		dev_err(dev, "%s: Failed to add to vfio group\n", __func__);
+		goto free_vfio_device;
+	}
+
+	/* DPRC container scanned and it's chilren bound with vfio driver */
+	if (strcmp(mc_dev->obj_desc.type, "dprc") == 0) {
+		ret = vfio_fsl_mc_initialize_dprc(vdev);
+		if (ret) {
+			vfio_del_group_dev(dev);
+			goto free_vfio_device;
+		}
+	} else {
+		struct fsl_mc_device *mc_bus_dev;
+
+		/* Non-dprc devices share mc_io from the parent dprc */
+		mc_bus_dev = to_fsl_mc_device(mc_dev->dev.parent);
+		if (mc_bus_dev == NULL) {
+			vfio_del_group_dev(dev);
+			goto free_vfio_device;
+		}
+
+		mc_dev->mc_io = mc_bus_dev->mc_io;
+
+		/* Inherit parent MSI domain */
+		dev_set_msi_domain(&mc_dev->dev,
+				   dev_get_msi_domain(mc_dev->dev.parent));
+	}
+	return 0;
+
+free_vfio_device:
+	kfree(vdev);
+	vfio_iommu_group_put(group, dev);
+	return ret;
+}
+
+static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev)
+{
+	struct vfio_fsl_mc_device *vdev;
+	struct device *dev = &mc_dev->dev;
+
+	vdev = vfio_del_group_dev(dev);
+	if (!vdev)
+		return -EINVAL;
+
+	if (strcmp(mc_dev->obj_desc.type, "dprc") == 0)
+		vfio_fsl_mc_cleanup_dprc(vdev);
+	else
+		dev_set_msi_domain(&mc_dev->dev, NULL);
+
+	mc_dev->mc_io = NULL;
+
+	vfio_iommu_group_put(mc_dev->dev.iommu_group, dev);
+	kfree(vdev);
+
+	return 0;
+}
+
+/*
+ * vfio-fsl_mc is a meta-driver, so use driver_override interface to
+ * bind a fsl_mc container with this driver and match_id_table is NULL.
+ */
+static struct fsl_mc_driver vfio_fsl_mc_driver = {
+	.probe		= vfio_fsl_mc_probe,
+	.remove		= vfio_fsl_mc_remove,
+	.match_id_table = NULL,
+	.driver	= {
+		.name	= "vfio-fsl-mc",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init vfio_fsl_mc_driver_init(void)
+{
+	return fsl_mc_driver_register(&vfio_fsl_mc_driver);
+}
+
+static void __exit vfio_fsl_mc_driver_exit(void)
+{
+	fsl_mc_driver_unregister(&vfio_fsl_mc_driver);
+}
+
+module_init(vfio_fsl_mc_driver_init);
+module_exit(vfio_fsl_mc_driver_exit);
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
--- /dev/null
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c
@@ -0,0 +1,199 @@
+/*
+ * Freescale Management Complex (MC) device passthrough using VFIO
+ *
+ * Copyright (C) 2013-2016 Freescale Semiconductor, Inc.
+ * Author: Bharat Bhushan <bharat.bhushan@nxp.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/vfio.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/eventfd.h>
+#include <linux/msi.h>
+
+#include "../../staging/fsl-mc/include/mc.h"
+#include "vfio_fsl_mc_private.h"
+
+static irqreturn_t vfio_fsl_mc_irq_handler(int irq_num, void *arg)
+{
+	struct vfio_fsl_mc_irq *mc_irq = (struct vfio_fsl_mc_irq *)arg;
+
+	eventfd_signal(mc_irq->trigger, 1);
+	return IRQ_HANDLED;
+}
+
+static int vfio_fsl_mc_irq_mask(struct vfio_fsl_mc_device *vdev,
+				unsigned int index, unsigned int start,
+				unsigned int count, uint32_t flags,
+				void *data)
+{
+	return -EINVAL;
+}
+
+static int vfio_fsl_mc_irq_unmask(struct vfio_fsl_mc_device *vdev,
+				unsigned int index, unsigned int start,
+				unsigned int count, uint32_t flags,
+				void *data)
+{
+	return -EINVAL;
+}
+
+static int vfio_set_trigger(struct vfio_fsl_mc_device *vdev,
+			    int index, int fd)
+{
+	struct vfio_fsl_mc_irq *irq = &vdev->mc_irqs[index];
+	struct eventfd_ctx *trigger;
+	int hwirq;
+	int ret;
+
+	hwirq = vdev->mc_dev->irqs[index]->msi_desc->irq;
+	if (irq->trigger) {
+		free_irq(hwirq, irq);
+		kfree(irq->name);
+		eventfd_ctx_put(irq->trigger);
+		irq->trigger = NULL;
+	}
+
+	if (fd < 0) /* Disable only */
+		return 0;
+
+	irq->name = kasprintf(GFP_KERNEL, "vfio-irq[%d](%s)",
+					hwirq, dev_name(&vdev->mc_dev->dev));
+	if (!irq->name)
+		return -ENOMEM;
+
+	trigger = eventfd_ctx_fdget(fd);
+	if (IS_ERR(trigger)) {
+		kfree(irq->name);
+		return PTR_ERR(trigger);
+	}
+
+	irq->trigger = trigger;
+
+	ret = request_irq(hwirq, vfio_fsl_mc_irq_handler, 0,
+			  irq->name, irq);
+	if (ret) {
+		kfree(irq->name);
+		eventfd_ctx_put(trigger);
+		irq->trigger = NULL;
+		return ret;
+	}
+
+	return 0;
+}
+
+int vfio_fsl_mc_irqs_init(struct vfio_fsl_mc_device *vdev)
+{
+	struct fsl_mc_device *mc_dev = vdev->mc_dev;
+	struct vfio_fsl_mc_irq *mc_irq;
+	int irq_count;
+	int ret, i;
+
+	/* Device does not support any interrupt */
+	if (mc_dev->obj_desc.irq_count == 0)
+		return 0;
+
+	irq_count = mc_dev->obj_desc.irq_count;
+
+	mc_irq = kcalloc(irq_count, sizeof(*mc_irq), GFP_KERNEL);
+	if (mc_irq == NULL)
+		return -ENOMEM;
+
+	/* Allocate IRQs */
+	ret = fsl_mc_allocate_irqs(mc_dev);
+	if  (ret) {
+		kfree(mc_irq);
+		return ret;
+	}
+
+	for (i = 0; i < irq_count; i++) {
+		mc_irq[i].count = 1;
+		mc_irq[i].flags = VFIO_IRQ_INFO_EVENTFD;
+	}
+
+	vdev->mc_irqs = mc_irq;
+
+	return 0;
+}
+
+/* Free All IRQs for the given MC object */
+void vfio_fsl_mc_irqs_cleanup(struct vfio_fsl_mc_device *vdev)
+{
+	struct fsl_mc_device *mc_dev = vdev->mc_dev;
+	int irq_count = mc_dev->obj_desc.irq_count;
+	int i;
+
+	/* Device does not support any interrupt */
+	if (mc_dev->obj_desc.irq_count == 0)
+		return;
+
+	for (i = 0; i < irq_count; i++)
+		vfio_set_trigger(vdev, i, -1);
+
+	fsl_mc_free_irqs(mc_dev);
+	kfree(vdev->mc_irqs);
+}
+
+static int vfio_fsl_mc_set_irq_trigger(struct vfio_fsl_mc_device *vdev,
+				       unsigned int index, unsigned int start,
+				       unsigned int count, uint32_t flags,
+				       void *data)
+{
+	struct vfio_fsl_mc_irq *irq = &vdev->mc_irqs[index];
+	int hwirq;
+
+	if (!count && (flags & VFIO_IRQ_SET_DATA_NONE))
+		return vfio_set_trigger(vdev, index, -1);
+
+	if (start != 0 || count != 1)
+		return -EINVAL;
+
+	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+		int32_t fd = *(int32_t *)data;
+
+		return vfio_set_trigger(vdev, index, fd);
+	}
+
+	hwirq = vdev->mc_dev->irqs[index]->msi_desc->irq;
+
+	if (flags & VFIO_IRQ_SET_DATA_NONE) {
+		vfio_fsl_mc_irq_handler(hwirq, irq);
+
+	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
+		uint8_t trigger = *(uint8_t *)data;
+
+		if (trigger)
+			vfio_fsl_mc_irq_handler(hwirq, irq);
+	}
+
+	return 0;
+}
+
+int vfio_fsl_mc_set_irqs_ioctl(struct vfio_fsl_mc_device *vdev,
+			       uint32_t flags, unsigned int index,
+			       unsigned int start, unsigned int count,
+			       void *data)
+{
+	int ret = -ENOTTY;
+
+	switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
+	case VFIO_IRQ_SET_ACTION_MASK:
+		ret = vfio_fsl_mc_irq_mask(vdev, index, start, count,
+					   flags, data);
+		break;
+	case VFIO_IRQ_SET_ACTION_UNMASK:
+		ret = vfio_fsl_mc_irq_unmask(vdev, index, start, count,
+					     flags, data);
+		break;
+	case VFIO_IRQ_SET_ACTION_TRIGGER:
+		ret = vfio_fsl_mc_set_irq_trigger(vdev, index, start,
+						  count, flags, data);
+		break;
+	}
+
+	return ret;
+}
--- /dev/null
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
@@ -0,0 +1,55 @@
+/*
+ * Freescale Management Complex VFIO private declarations
+ *
+ * Copyright (C) 2013-2016 Freescale Semiconductor, Inc.
+ * Copyright 2016 NXP
+ * Author: Bharat Bhushan <bharat.bhushan@nxp.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef VFIO_FSL_MC_PRIVATE_H
+#define VFIO_FSL_MC_PRIVATE_H
+
+#define VFIO_FSL_MC_OFFSET_SHIFT    40
+#define VFIO_FSL_MC_OFFSET_MASK (((u64)(1) << VFIO_FSL_MC_OFFSET_SHIFT) - 1)
+
+#define VFIO_FSL_MC_OFFSET_TO_INDEX(off) (off >> VFIO_FSL_MC_OFFSET_SHIFT)
+
+#define VFIO_FSL_MC_INDEX_TO_OFFSET(index)	\
+	((u64)(index) << VFIO_FSL_MC_OFFSET_SHIFT)
+
+struct vfio_fsl_mc_irq {
+	u32			flags;
+	u32			count;
+	struct eventfd_ctx	*trigger;
+	char			*name;
+};
+
+struct vfio_fsl_mc_region {
+	u32			flags;
+#define VFIO_FSL_MC_REGION_TYPE_MMIO  1
+#define VFIO_FSL_MC_REGION_TYPE_CACHEABLE  2
+	u32			type;
+	u64			addr;
+	resource_size_t		size;
+	void __iomem		*ioaddr;
+};
+
+struct vfio_fsl_mc_device {
+	struct fsl_mc_device		*mc_dev;
+	int				refcnt;
+	u32				num_regions;
+	struct vfio_fsl_mc_region	*regions;
+	struct vfio_fsl_mc_irq		*mc_irqs;
+};
+
+int vfio_fsl_mc_irqs_init(struct vfio_fsl_mc_device *vdev);
+void vfio_fsl_mc_irqs_cleanup(struct vfio_fsl_mc_device *vdev);
+int vfio_fsl_mc_set_irqs_ioctl(struct vfio_fsl_mc_device *vdev,
+			       uint32_t flags, unsigned int index,
+			       unsigned int start, unsigned int count,
+			       void *data);
+#endif /* VFIO_PCI_PRIVATE_H */
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -36,6 +36,8 @@
 #include <linux/uaccess.h>
 #include <linux/vfio.h>
 #include <linux/workqueue.h>
+#include <linux/dma-iommu.h>
+#include <linux/irqdomain.h>
 
 #define DRIVER_VERSION  "0.2"
 #define DRIVER_AUTHOR   "Alex Williamson <alex.williamson@redhat.com>"
@@ -720,6 +722,27 @@ static void vfio_test_domain_fgsp(struct
 	__free_pages(pages, order);
 }
 
+static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base)
+{
+	struct list_head group_resv_regions;
+	struct iommu_resv_region *region, *next;
+	bool ret = false;
+
+	INIT_LIST_HEAD(&group_resv_regions);
+	iommu_get_group_resv_regions(group, &group_resv_regions);
+	list_for_each_entry(region, &group_resv_regions, list) {
+		if (region->type == IOMMU_RESV_SW_MSI) {
+			*base = region->start;
+			ret = true;
+			goto out;
+		}
+	}
+out:
+	list_for_each_entry_safe(region, next, &group_resv_regions, list)
+		kfree(region);
+	return ret;
+}
+
 static int vfio_iommu_type1_attach_group(void *iommu_data,
 					 struct iommu_group *iommu_group)
 {
@@ -728,6 +751,8 @@ static int vfio_iommu_type1_attach_group
 	struct vfio_domain *domain, *d;
 	struct bus_type *bus = NULL;
 	int ret;
+	bool resv_msi, msi_remap;
+	phys_addr_t resv_msi_base;
 
 	mutex_lock(&iommu->lock);
 
@@ -774,11 +799,15 @@ static int vfio_iommu_type1_attach_group
 	if (ret)
 		goto out_domain;
 
+	resv_msi = vfio_iommu_has_sw_msi(iommu_group, &resv_msi_base);
+
 	INIT_LIST_HEAD(&domain->group_list);
 	list_add(&group->next, &domain->group_list);
 
-	if (!allow_unsafe_interrupts &&
-	    !iommu_capable(bus, IOMMU_CAP_INTR_REMAP)) {
+	msi_remap = resv_msi ? irq_domain_check_msi_remap() :
+				iommu_capable(bus, IOMMU_CAP_INTR_REMAP);
+
+	if (!allow_unsafe_interrupts && !msi_remap) {
 		pr_warn("%s: No interrupt remapping support.  Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n",
 		       __func__);
 		ret = -EPERM;
@@ -820,6 +849,12 @@ static int vfio_iommu_type1_attach_group
 	if (ret)
 		goto out_detach;
 
+	if (resv_msi) {
+		ret = iommu_get_msi_cookie(domain->domain, resv_msi_base);
+		if (ret)
+			goto out_detach;
+	}
+
 	list_add(&domain->next, &iommu->domain_list);
 
 	mutex_unlock(&iommu->lock);
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -198,6 +198,7 @@ struct vfio_device_info {
 #define VFIO_DEVICE_FLAGS_PCI	(1 << 1)	/* vfio-pci device */
 #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2)	/* vfio-platform device */
 #define VFIO_DEVICE_FLAGS_AMBA  (1 << 3)	/* vfio-amba device */
+#define VFIO_DEVICE_FLAGS_FSL_MC (1 << 5)	/* vfio-fsl-mc device */
 	__u32	num_regions;	/* Max region index + 1 */
 	__u32	num_irqs;	/* Max IRQ index + 1 */
 };