From cf067bf8bb993d6cfdc42d750ae241c43f88403f Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Mon, 12 May 2014 11:55:20 +0200
Subject: [PATCH 1/2] PCI: BCM5301X: add PCIe2 driver for BCM5301X SoCs

This driver supports the PCIe controller found on the BCM4708 and
similar SoCs. The controller itself is automatically detected by bcma.

This controller is found on SoCs usually used in SOHO routers to
connect the wifi cards to the SoC. All the of the BCM5301X SoCs I know
of have 2 or 3 of these controllers in the SoC.

I had to use PCI domains otherwise the pci_create_root_bus() function
in drivers/pci/probe.c would fail for the second controller being
registered because pci_find_bus() would find the same PCIe bus again
and assume it is already registered, which ends up in a kernel panic in
pcibios_init_hw() in arch/arm/kernel/bios32.c

The ARM PCI code assumes that every controller has an I/O space and
adds a dummy area if the driver does not specify one. This will work
for the first controller, but when we register the second one this will
result in an error. To prevent this problem we add an empty I/O space.

Currently I have problems with probing the devices on the bus, because
pci_bus_add_devices() is called too early in pci_scan_root_bus() in
drivers/pci/probe.c, before pci_bus_assign_resources() was called in
pci_common_init_dev() in arch/arm/kernel/bios32.c. When the devices are
added too early they do not have any resources and adding fails. I have
to remove the call to pci_bus_add_devices() in pci_scan_root_bus() to
make registration work, calling pci_bus_add_devices() later again does
not fix this problem.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
---
 arch/arm/mach-bcm/Kconfig            |   1 +
 drivers/pci/host/Kconfig             |   7 +
 drivers/pci/host/Makefile            |   1 +
 drivers/pci/host/pci-host-bcm5301x.c | 428 +++++++++++++++++++++++++++++++++++
 4 files changed, 437 insertions(+)
 create mode 100644 drivers/pci/host/pci-host-bcm5301x.c

--- a/arch/arm/mach-bcm/Kconfig
+++ b/arch/arm/mach-bcm/Kconfig
@@ -45,6 +45,7 @@ config ARCH_BCM_5301X
 	select ARM_GLOBAL_TIMER
 	select CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK
 	select MIGHT_HAVE_PCI
+	select PCI_DOMAINS if PCI
 	help
 	  Support for Broadcom BCM470X and BCM5301X SoCs with ARM CPU cores.
 
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig
@@ -33,4 +33,11 @@ config PCI_RCAR_GEN2
 	  There are 3 internal PCI controllers available with a single
 	  built-in EHCI/OHCI host controller present on each one.
 
+config PCI_BCM5301X
+	bool "BCM5301X PCIe2 host controller"
+	depends on BCMA && OF && ARM && PCI_DOMAINS
+	help
+	  Say Y here if you want to support the PCIe host controller found
+	  on Broadcom BCM5301X and BCM470X (Northstar) SoCs.
+
 endmenu
--- a/drivers/pci/host/Makefile
+++ b/drivers/pci/host/Makefile
@@ -4,3 +4,4 @@ obj-$(CONFIG_PCI_IMX6) += pci-imx6.o
 obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o
 obj-$(CONFIG_PCI_TEGRA) += pci-tegra.o
 obj-$(CONFIG_PCI_RCAR_GEN2) += pci-rcar-gen2.o
+obj-$(CONFIG_PCI_BCM5301X) += pci-host-bcm5301x.o
--- /dev/null
+++ b/drivers/pci/host/pci-host-bcm5301x.c
@@ -0,0 +1,459 @@
+/*
+ * Northstar PCI-Express driver
+ * Only supports Root-Complex (RC) mode
+ *
+ * Notes:
+ * PCI Domains are being used to identify the PCIe port 1:1.
+ *
+ * Only MEM access is supported, PAX does not support IO.
+ *
+ * Copyright 2012-2014, Broadcom Corporation
+ * Copyright 2014, Hauke Mehrtens <hauke@hauke-m.de>
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/bcma/bcma.h>
+#include <linux/bcma/bcma_driver_pcie2.h>
+
+#define	SOC_PCIE_HDR_OFF	0x400	/* 256 bytes per function */
+
+#define PCI_LINK_STATUS_CTRL_2_OFFSET 0xDC
+#define PCI_TARGET_LINK_SPEED_MASK    0xF
+#define PCI_TARGET_LINK_SPEED_GEN2    0x2
+#define PCI_TARGET_LINK_SPEED_GEN1    0x1
+
+static int bcma_pcie2_map_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
+{
+	struct pci_sys_data *sys = pdev->sysdata;
+	struct bcma_device *bdev = sys->private_data;
+
+	return bcma_core_irq(bdev, 5);
+}
+
+static u32 bcma_pcie2_cfg_base(struct bcma_device *bdev, int busno,
+			       unsigned int devfn, int where)
+{
+	int slot = PCI_SLOT(devfn);
+	int fn = PCI_FUNC(devfn);
+	u32 addr_reg;
+
+	if (busno == 0) {
+		if (slot >= 1)
+			return 0;
+		bcma_write32(bdev, BCMA_CORE_PCIE2_CONFIGINDADDR,
+			     where & 0xffc);
+		return BCMA_CORE_PCIE2_CONFIGINDDATA;
+	}
+	if (fn > 1)
+		return 0;
+	addr_reg = (busno & 0xff) << 20 | (slot << 15) | (fn << 12) |
+		   (where & 0xffc) | (1 & 0x3);
+
+	bcma_write32(bdev, BCMA_CORE_PCIE2_CFG_ADDR, addr_reg);
+	return BCMA_CORE_PCIE2_CFG_DATA;
+}
+
+static u32 bcma_pcie2_read_config(struct bcma_device *bdev, int busno,
+				  unsigned int devfn, int where, int size)
+{
+	u32 base;
+	u32 data_reg;
+	u32 mask;
+	int shift;
+
+	base = bcma_pcie2_cfg_base(bdev, busno, devfn, where);
+
+	if (!base)
+		return ~0UL;
+
+	data_reg = bcma_read32(bdev, base);
+
+	if (size == 4)
+		return data_reg;
+
+	mask = (1 << (size * 8)) - 1;
+	shift = (where % 4) * 8;
+	return (data_reg >> shift) & mask;
+}
+
+static void bcma_pcie2_write_config(struct bcma_device *bdev, int busno,
+				    unsigned int devfn, int where, int size,
+				    u32 val)
+{
+	u32 base;
+	u32 data_reg;
+
+	base = bcma_pcie2_cfg_base(bdev, busno, devfn, where);
+
+	if (!base)
+		return;
+
+	if (size < 4) {
+		u32 mask = (1 << (size * 8)) - 1;
+		int shift = (where % 4) * 8;
+
+		data_reg = bcma_read32(bdev, base);
+		data_reg &= ~(mask << shift);
+		data_reg |= (val & mask) << shift;
+	} else {
+		data_reg = val;
+	}
+
+	bcma_write32(bdev, base, data_reg);
+}
+
+static int bcma_pcie2_read_config_pci(struct pci_bus *bus, unsigned int devfn,
+				   int where, int size, u32 *val)
+{
+	struct pci_sys_data *sys = bus->sysdata;
+	struct bcma_device *bdev = sys->private_data;
+
+	*val = bcma_pcie2_read_config(bdev, bus->number, devfn, where, size);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int bcma_pcie2_write_config_pci(struct pci_bus *bus, unsigned int devfn,
+				    int where, int size, u32 val)
+{
+	struct pci_sys_data *sys = bus->sysdata;
+	struct bcma_device *bdev = sys->private_data;
+
+	bcma_pcie2_write_config(bdev, bus->number, devfn, where, size, val);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+/*
+ * Methods for accessing configuration registers
+ */
+static struct pci_ops bcma_pcie2_ops = {
+	.read = bcma_pcie2_read_config_pci,
+	.write = bcma_pcie2_write_config_pci,
+};
+
+/* NS: CLASS field is R/O, and set to wrong 0x200 value */
+static void bcma_pcie2_fixup_class(struct pci_dev *dev)
+{
+	dev->class = PCI_CLASS_BRIDGE_PCI << 8;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0x8011, bcma_pcie2_fixup_class);
+
+/*
+ * Check link status, return 0 if link is up in RC mode,
+ * otherwise return non-zero
+ */
+static int bcma_pcie2_check_link(struct bcma_device *bdev, struct pci_sys_data *sys)
+{
+	u32 tmp32;
+	u16 tmp16;
+	u16 pos;
+	u8 nlw;
+	/*
+	 * Setup callback (bcma_pcie2_setup) is called in pcibios_init_hw before
+	 * creating bus root, so we don't have it here yet. On the other hand
+	 * we really want to use pci_bus_find_capability helper to check NLW.
+	 * Let's fake simple pci_bus just to query for capabilities.
+	 */
+	struct pci_bus bus = {
+		.number = 0,
+		.ops = &bcma_pcie2_ops,
+		.sysdata = sys,
+	};
+
+	tmp32 = bcma_read32(bdev, BCMA_CORE_PCIE2_LINK_STATUS);
+	dev_dbg(&bdev->dev, "link status: 0x%08x\n", tmp32);
+
+	tmp32 = bcma_read32(bdev, BCMA_CORE_PCIE2_STRAP_STATUS);
+	dev_dbg(&bdev->dev, "strap status: 0x%08x\n", tmp32);
+
+	/* check link status to see if link is active */
+	pos = pci_bus_find_capability(&bus, 0, PCI_CAP_ID_EXP);
+	pci_bus_read_config_word(&bus, 0, pos + PCI_EXP_LNKSTA, &tmp16);
+	nlw = (tmp16 & PCI_EXP_LNKSTA_NLW) >> PCI_EXP_LNKSTA_NLW_SHIFT;
+
+	if (nlw == 0) {
+		/* try GEN 1 link speed */
+		tmp32 = bcma_pcie2_read_config(bdev, 0, 0,
+					PCI_LINK_STATUS_CTRL_2_OFFSET, 4);
+		if ((tmp32 & PCI_TARGET_LINK_SPEED_MASK) ==
+				PCI_TARGET_LINK_SPEED_GEN2) {
+			tmp32 &= ~PCI_TARGET_LINK_SPEED_MASK;
+			tmp32 |= PCI_TARGET_LINK_SPEED_GEN1;
+			bcma_pcie2_write_config(bdev, 0, 0,
+					PCI_LINK_STATUS_CTRL_2_OFFSET, 4, tmp32);
+			tmp32 = bcma_pcie2_read_config(bdev, 0, 0,
+					PCI_LINK_STATUS_CTRL_2_OFFSET, 4);
+			msleep(100);
+
+			pos = pci_bus_find_capability(&bus, 0, PCI_CAP_ID_EXP);
+			pci_bus_read_config_word(&bus, 0, pos + PCI_EXP_LNKSTA,
+					&tmp16);
+			nlw = (tmp16 & PCI_EXP_LNKSTA_NLW) >>
+				PCI_EXP_LNKSTA_NLW_SHIFT;
+		}
+	}
+
+	dev_info(&bdev->dev, "link: %s\n", nlw ? "UP" : "DOWN");
+	return nlw ? 0 : -ENODEV;
+}
+
+/*
+ * Initializte the PCIe controller
+ */
+static void bcma_pcie2_hw_init(struct bcma_device *bdev)
+{
+	u32 tmp32;
+	u16 tmp16;
+
+	/* Change MPS and MRRS to 512 */
+	tmp16 = bcma_pcie2_read_config(bdev, 0, 0, 0x4d4, 2);
+	tmp16 &= ~7;
+	tmp16 |= 2;
+	bcma_pcie2_write_config(bdev, 0, 0, 0x4d4, 2, tmp16);
+
+	tmp32 = bcma_pcie2_read_config(bdev, 0, 0, 0xb4, 4);
+	tmp32 &= ~((7 << 12) | (7 << 5));
+	tmp32 |= (2 << 12) | (2 << 5);
+	bcma_pcie2_write_config(bdev, 0, 0, 0xb4, 4, tmp32);
+
+	/*
+	 * Turn-on Root-Complex (RC) mode, from reset default of EP
+	 * The mode is set by straps, can be overwritten via DMU
+	 * register <cru_straps_control> bit 5, "1" means RC
+	 */
+
+	/* Send a downstream reset */
+	bcma_write32(bdev, BCMA_CORE_PCIE2_CLK_CONTROL,
+		     PCIE2_CLKC_RST_OE | PCIE2_CLKC_RST);
+	usleep_range(250, 400);
+	bcma_write32(bdev, BCMA_CORE_PCIE2_CLK_CONTROL, PCIE2_CLKC_RST_OE);
+	msleep(250);
+
+	/* TBD: take care of PM, check we're on */
+}
+
+/*
+ * Setup the address translation
+ *
+ * NOTE: All PCI-to-CPU address mapping are 1:1 for simplicity
+ */
+static int bcma_pcie2_map_init(struct bcma_device *bdev, u32 addr)
+{
+	/* 64MB alignment */
+	if (!addr || (addr & (SZ_64M - 1)))
+		return -EINVAL;
+
+	bcma_write32(bdev, BCMA_CORE_PCIE2_OMAP0_LOWER, addr);
+	bcma_write32(bdev, BCMA_CORE_PCIE2_OARR0, addr | 0x01);
+
+	bcma_write32(bdev, BCMA_CORE_PCIE2_OMAP1_LOWER, addr + SZ_64M);
+	bcma_write32(bdev, BCMA_CORE_PCIE2_OARR1, (addr + SZ_64M) | 0x01);
+
+	/*
+	 * Inbound address translation setup
+	 * Northstar only maps up to 128 MiB inbound, DRAM could be up to 1 GiB.
+	 *
+	 * For now allow access to entire DRAM, assuming it is less than 128MiB,
+	 * otherwise DMA bouncing mechanism may be required.
+	 * Also consider DMA mask to limit DMA physical address
+	 */
+	/* 64-bit LE regs, write low word, high is 0 at reset */
+	bcma_write32(bdev, BCMA_CORE_PCIE2_FUNC0_IMAP1, PHYS_OFFSET | 0x1);
+	bcma_write32(bdev, BCMA_CORE_PCIE2_IARR1_LOWER,
+			   PHYS_OFFSET | ((SZ_128M >> 20) & 0xff));
+	return 0;
+}
+
+/*
+ * Setup PCIE Host bridge
+ */
+static int bcma_pcie2_bridge_init(struct bcma_device *bdev, u32 addr, u32 size)
+{
+	bcma_pcie2_write_config(bdev, 0, 0, PCI_PRIMARY_BUS, 1, 0);
+	bcma_pcie2_write_config(bdev, 0, 0, PCI_SECONDARY_BUS, 1, 1);
+	bcma_pcie2_write_config(bdev, 0, 0, PCI_SUBORDINATE_BUS, 1, 4);
+
+	bcma_pcie2_read_config(bdev, 0, 0, PCI_PRIMARY_BUS, 1);
+	bcma_pcie2_read_config(bdev, 0, 0, PCI_SECONDARY_BUS, 1);
+	bcma_pcie2_read_config(bdev, 0, 0, PCI_SUBORDINATE_BUS, 1);
+
+	/* MEM_BASE, MEM_LIM require 1MB alignment */
+	if (((addr >> 16) & 0xf) || (((addr + size) >> 16) & 0xf))
+		return -EINVAL;
+
+	bcma_pcie2_write_config(bdev, 0, 0, PCI_MEMORY_BASE, 2, addr >> 16);
+	bcma_pcie2_write_config(bdev, 0, 0, PCI_MEMORY_LIMIT, 2,
+				(addr + size) >> 16);
+
+	/* These registers are not supported on the NS */
+	bcma_pcie2_write_config(bdev, 0, 0, PCI_IO_BASE_UPPER16, 2, 0);
+	bcma_pcie2_write_config(bdev, 0, 0, PCI_IO_LIMIT_UPPER16, 2, 0);
+
+	/* Force class to that of a Bridge */
+	bcma_pcie2_write_config(bdev, 0, 0, PCI_CLASS_DEVICE, 2,
+				PCI_CLASS_BRIDGE_PCI);
+
+	bcma_pcie2_read_config(bdev, 0, 0, PCI_CLASS_DEVICE, 2);
+	bcma_pcie2_read_config(bdev, 0, 0, PCI_MEMORY_BASE, 2);
+	bcma_pcie2_read_config(bdev, 0, 0, PCI_MEMORY_LIMIT, 2);
+	return 0;
+}
+
+static void bcma_pcie2_3rd_init(struct bcma_bus *bus)
+{
+	/* PCIE PLL block register (base 0x8000) */
+	bcma_chipco_b_mii_write(&bus->drv_cc_b, 0x00000088, 0x57fe8000);
+	/* Check PCIE PLL lock status */
+	bcma_chipco_b_mii_write(&bus->drv_cc_b, 0x00000088, 0x67c60000);
+}
+
+/* To improve PCIE phy jitter */
+static void bcma_pcie2_improve_phy_jitter(struct bcma_bus *bus, int phyaddr)
+{
+	u32 val;
+
+	/* Change blkaddr */
+	val = (1 << 30) | (1 << 28) | (phyaddr << 23) | (0x1f << 18) |
+		(2 << 16) | (0x863 << 4);
+	bcma_chipco_b_mii_write(&bus->drv_cc_b, 0x0000009a, val);
+
+	/* Write 0x0190 to 0x13 regaddr */
+	val = (1 << 30) | (1 << 28) | (phyaddr << 23) | (0x13 << 18) |
+		(2 << 16) | 0x0190;
+	bcma_chipco_b_mii_write(&bus->drv_cc_b, 0x0000009a, val);
+
+	/* Write 0x0191 to 0x19 regaddr */
+	val = (1 << 30) | (1 << 28) | (phyaddr << 23) | (0x19 << 18) |
+		(2 << 16) | 0x0191;
+	bcma_chipco_b_mii_write(&bus->drv_cc_b, 0x0000009a, val);
+}
+
+static int bcma_pcie2_setup(int nr, struct pci_sys_data *sys)
+{
+	struct bcma_device *bdev = sys->private_data;
+	struct bcma_bus *bus = bdev->bus;
+	struct resource *res;
+	struct bcma_device *arm_core;
+	u32 cru_straps_ctrl;
+	int ret;
+	int phyaddr;
+
+	if (bdev->core_unit == 2) {
+		arm_core = bcma_find_core(bus, BCMA_CORE_ARMCA9);
+		cru_straps_ctrl = bcma_read32(arm_core, 0x2a0);
+
+		/* 3rd PCIE is not selected */
+		if (cru_straps_ctrl & 0x10)
+			return -ENODEV;
+
+		bcma_pcie2_3rd_init(bus);
+		phyaddr = 0xf;
+	} else {
+		phyaddr = bdev->core_unit;
+	}
+	bcma_pcie2_improve_phy_jitter(bus, phyaddr);
+
+	/* create mem resource */
+	res = devm_kzalloc(&bdev->dev, sizeof(*res), GFP_KERNEL);
+	if (!res)
+		return -EINVAL;
+
+	res->start = bdev->addr_s[0];
+	res->end = bdev->addr_s[0] + SZ_128M -1;
+	res->name = "PCIe dummy IO space";
+	res->flags = IORESOURCE_MEM;
+
+	pci_add_resource(&sys->resources, res);
+
+	/* This PCIe controller does not support IO Mem, so use a dummy one. */
+	res = devm_kzalloc(&bdev->dev, sizeof(*res), GFP_KERNEL);
+	if (!res)
+		return -EINVAL;
+
+	res->start = 0;
+	res->end = 0;
+	res->name = "PCIe dummy IO space";
+	res->flags = IORESOURCE_IO;
+
+	pci_add_resource(&sys->resources, res);
+
+	bcma_pcie2_hw_init(bdev);
+	ret = bcma_pcie2_map_init(bdev, bdev->addr_s[0]);
+	if (ret)
+		return ret;
+
+	/*
+	 * Skip inactive ports -
+	 * will need to change this for hot-plugging
+	 */
+	ret = bcma_pcie2_check_link(bdev, sys);
+	if (ret)
+		return ret;
+
+	ret = bcma_pcie2_bridge_init(bdev, bdev->addr_s[0], SZ_128M);
+	if (ret)
+		return ret;
+
+	return 1;
+}
+
+static int bcma_pcie2_probe(struct bcma_device *bdev)
+{
+	struct hw_pci hw = {
+		.nr_controllers = 1,
+		.domain		= bdev->core_unit,
+		.private_data	= (void **)&bdev,
+		.setup		= bcma_pcie2_setup,
+		.map_irq	= bcma_pcie2_map_irq,
+		.ops		= &bcma_pcie2_ops,
+	};
+
+	dev_info(&bdev->dev, "initializing PCIe controller\n");
+
+	/* Announce this port to ARM/PCI common code */
+	pci_common_init_dev(&bdev->dev, &hw);
+
+	/* Setup virtual-wire interrupts */
+	bcma_write32(bdev, BCMA_CORE_PCIE2_SYS_RC_INTX_EN, 0xf);
+
+	/* Enable memory and bus master */
+	bcma_write32(bdev, SOC_PCIE_HDR_OFF + 4, 0x6);
+
+	return 0;
+}
+
+static const struct bcma_device_id bcma_pcie2_table[] = {
+	BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_NS_PCIEG2, BCMA_ANY_REV, BCMA_ANY_CLASS),
+	BCMA_CORETABLE_END
+};
+MODULE_DEVICE_TABLE(bcma, bcma_pcie2_table);
+
+static struct bcma_driver bcma_pcie2_driver = {
+	.name		= KBUILD_MODNAME,
+	.id_table	= bcma_pcie2_table,
+	.probe		= bcma_pcie2_probe,
+};
+
+static int __init bcma_pcie2_init(void)
+{
+	return bcma_driver_register(&bcma_pcie2_driver);
+}
+module_init(bcma_pcie2_init);
+
+static void __exit bcma_pcie2_exit(void)
+{
+	bcma_driver_unregister(&bcma_pcie2_driver);
+}
+module_exit(bcma_pcie2_exit);
+
+MODULE_AUTHOR("Hauke Mehrtens");
+MODULE_DESCRIPTION("BCM5301X PCIe host controller");
+MODULE_LICENSE("GPLv2");