initial_commit

author: root <root@artemis.panaceas.org> 2015-12-25 04:40:36 +0000
committer: root <root@artemis.panaceas.org> 2015-12-25 04:40:36 +0000
commit: 849369d6c66d3054688672f97d31fceb8e8230fb (patch)
tree: 6135abc790ca67dedbe07c39806591e70eda81ce /arch/mips/cavium-octeon
download: linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.tar.gz
linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.tar.bz2
linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.zip
21 files changed, 6493 insertions, 0 deletions
diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig
new file mode 100644
index 00000000..cad555eb
--- /dev/null
+++ b/arch/mips/cavium-octeon/Kconfig
@@ -0,0 +1,105 @@
+if CPU_CAVIUM_OCTEON
+
+config CAVIUM_CN63XXP1
+	bool "Enable CN63XXP1 errata worarounds"
+	default "n"
+	help
+	  The CN63XXP1 chip requires build time workarounds to
+	  function reliably, select this option to enable them.  These
+	  workarounds will cause a slight decrease in performance on
+	  non-CN63XXP1 hardware, so it is recommended to select "n"
+	  unless it is known the workarounds are needed.
+
+config CAVIUM_OCTEON_2ND_KERNEL
+	bool "Build the kernel to be used as a 2nd kernel on the same chip"
+	default "n"
+	help
+	  This option configures this kernel to be linked at a different
+	  address and use the 2nd uart for output. This allows a kernel built
+	  with this option to be run at the same time as one built without this
+	  option.
+
+config CAVIUM_OCTEON_HW_FIX_UNALIGNED
+	bool "Enable hardware fixups of unaligned loads and stores"
+	default "y"
+	help
+	  Configure the Octeon hardware to automatically fix unaligned loads
+	  and stores. Normally unaligned accesses are fixed using a kernel
+	  exception handler. This option enables the hardware automatic fixups,
+	  which requires only an extra 3 cycles. Disable this option if you
+	  are running code that relies on address exceptions on unaligned
+	  accesses.
+
+config CAVIUM_OCTEON_CVMSEG_SIZE
+	int "Number of L1 cache lines reserved for CVMSEG memory"
+	range 0 54
+	default 1
+	help
+	  CVMSEG LM is a segment that accesses portions of the dcache as a
+	  local memory; the larger CVMSEG is, the smaller the cache is.
+	  This selects the size of CVMSEG LM, which is in cache blocks. The
+	  legally range is from zero to 54 cache blocks (i.e. CVMSEG LM is
+	  between zero and 6192 bytes).
+
+config CAVIUM_OCTEON_LOCK_L2
+	bool "Lock often used kernel code in the L2"
+	default "y"
+	help
+	  Enable locking parts of the kernel into the L2 cache.
+
+config CAVIUM_OCTEON_LOCK_L2_TLB
+	bool "Lock the TLB handler in L2"
+	depends on CAVIUM_OCTEON_LOCK_L2
+	default "y"
+	help
+	  Lock the low level TLB fast path into L2.
+
+config CAVIUM_OCTEON_LOCK_L2_EXCEPTION
+	bool "Lock the exception handler in L2"
+	depends on CAVIUM_OCTEON_LOCK_L2
+	default "y"
+	help
+	  Lock the low level exception handler into L2.
+
+config CAVIUM_OCTEON_LOCK_L2_LOW_LEVEL_INTERRUPT
+	bool "Lock the interrupt handler in L2"
+	depends on CAVIUM_OCTEON_LOCK_L2
+	default "y"
+	help
+	  Lock the low level interrupt handler into L2.
+
+config CAVIUM_OCTEON_LOCK_L2_INTERRUPT
+	bool "Lock the 2nd level interrupt handler in L2"
+	depends on CAVIUM_OCTEON_LOCK_L2
+	default "y"
+	help
+	  Lock the 2nd level interrupt handler in L2.
+
+config CAVIUM_OCTEON_LOCK_L2_MEMCPY
+	bool "Lock memcpy() in L2"
+	depends on CAVIUM_OCTEON_LOCK_L2
+	default "y"
+	help
+	  Lock the kernel's implementation of memcpy() into L2.
+
+config ARCH_SPARSEMEM_ENABLE
+	def_bool y
+	select SPARSEMEM_STATIC
+
+config CAVIUM_OCTEON_HELPER
+	def_bool y
+	depends on OCTEON_ETHERNET || PCI
+
+config IOMMU_HELPER
+	bool
+
+config NEED_SG_DMA_LENGTH
+	bool
+
+config SWIOTLB
+	def_bool y
+	select IOMMU_HELPER
+	select NEED_SG_DMA_LENGTH
+
+
+endif # CPU_CAVIUM_OCTEON
diff --git a/arch/mips/cavium-octeon/Makefile b/arch/mips/cavium-octeon/Makefile
new file mode 100644
index 00000000..19eb0434
--- /dev/null
+++ b/arch/mips/cavium-octeon/Makefile
@@ -0,0 +1,17 @@
+#
+# Makefile for the Cavium Octeon specific kernel interface routines
+# under Linux.
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2005-2009 Cavium Networks
+#
+
+obj-y := cpu.o setup.o serial.o octeon-platform.o octeon-irq.o csrc-octeon.o
+obj-y += dma-octeon.o flash_setup.o
+obj-y += octeon-memcpy.o
+obj-y += executive/
+
+obj-$(CONFIG_SMP)                     += smp.o
diff --git a/arch/mips/cavium-octeon/Platform b/arch/mips/cavium-octeon/Platform
new file mode 100644
index 00000000..1e43ccf1
--- /dev/null
+++ b/arch/mips/cavium-octeon/Platform
@@ -0,0 +1,11 @@
+#
+# Cavium Octeon
+#
+platform-$(CONFIG_CPU_CAVIUM_OCTEON)	+= cavium-octeon/
+cflags-$(CONFIG_CPU_CAVIUM_OCTEON)	+=				\
+		-I$(srctree)/arch/mips/include/asm/mach-cavium-octeon
+ifdef CONFIG_CAVIUM_OCTEON_2ND_KERNEL
+load-$(CONFIG_CPU_CAVIUM_OCTEON)	+= 0xffffffff84100000
+else
+load-$(CONFIG_CPU_CAVIUM_OCTEON)	+= 0xffffffff81100000
+endif
diff --git a/arch/mips/cavium-octeon/cpu.c b/arch/mips/cavium-octeon/cpu.c
new file mode 100644
index 00000000..a5b42790
--- /dev/null
+++ b/arch/mips/cavium-octeon/cpu.c
@@ -0,0 +1,48 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2009 Wind River Systems,
+ *   written by Ralf Baechle <ralf@linux-mips.org>
+ */
+#include <linux/init.h>
+#include <linux/irqflags.h>
+#include <linux/notifier.h>
+#include <linux/prefetch.h>
+#include <linux/sched.h>
+
+#include <asm/cop2.h>
+#include <asm/current.h>
+#include <asm/mipsregs.h>
+#include <asm/page.h>
+#include <asm/octeon/octeon.h>
+
+static int cnmips_cu2_call(struct notifier_block *nfb, unsigned long action,
+	void *data)
+{
+	unsigned long flags;
+	unsigned int status;
+
+	switch (action) {
+	case CU2_EXCEPTION:
+		prefetch(&current->thread.cp2);
+		local_irq_save(flags);
+		KSTK_STATUS(current) |= ST0_CU2;
+		status = read_c0_status();
+		write_c0_status(status | ST0_CU2);
+		octeon_cop2_restore(&(current->thread.cp2));
+		write_c0_status(status & ~ST0_CU2);
+		local_irq_restore(flags);
+
+		return NOTIFY_BAD;	/* Don't call default notifier */
+	}
+
+	return NOTIFY_OK;		/* Let default notifier send signals */
+}
+
+static int __init cnmips_cu2_setup(void)
+{
+	return cu2_notifier(cnmips_cu2_call, 0);
+}
+early_initcall(cnmips_cu2_setup);
diff --git a/arch/mips/cavium-octeon/csrc-octeon.c b/arch/mips/cavium-octeon/csrc-octeon.c
new file mode 100644
index 00000000..29d56afb
--- /dev/null
+++ b/arch/mips/cavium-octeon/csrc-octeon.c
@@ -0,0 +1,164 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2007 by Ralf Baechle
+ * Copyright (C) 2009, 2010 Cavium Networks, Inc.
+ */
+#include <linux/clocksource.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/cpu-info.h>
+#include <asm/time.h>
+
+#include <asm/octeon/octeon.h>
+#include <asm/octeon/cvmx-ipd-defs.h>
+#include <asm/octeon/cvmx-mio-defs.h>
+
+/*
+ * Set the current core's cvmcount counter to the value of the
+ * IPD_CLK_COUNT.  We do this on all cores as they are brought
+ * on-line.  This allows for a read from a local cpu register to
+ * access a synchronized counter.
+ *
+ * On CPU_CAVIUM_OCTEON2 the IPD_CLK_COUNT is scaled by rdiv/sdiv.
+ */
+void octeon_init_cvmcount(void)
+{
+	unsigned long flags;
+	unsigned loops = 2;
+	u64 f = 0;
+	u64 rdiv = 0;
+	u64 sdiv = 0;
+	if (current_cpu_type() == CPU_CAVIUM_OCTEON2) {
+		union cvmx_mio_rst_boot rst_boot;
+		rst_boot.u64 = cvmx_read_csr(CVMX_MIO_RST_BOOT);
+		rdiv = rst_boot.s.c_mul;	/* CPU clock */
+		sdiv = rst_boot.s.pnr_mul;	/* I/O clock */
+		f = (0x8000000000000000ull / sdiv) * 2;
+	}
+
+
+	/* Clobber loops so GCC will not unroll the following while loop. */
+	asm("" : "+r" (loops));
+
+	local_irq_save(flags);
+	/*
+	 * Loop several times so we are executing from the cache,
+	 * which should give more deterministic timing.
+	 */
+	while (loops--) {
+		u64 ipd_clk_count = cvmx_read_csr(CVMX_IPD_CLK_COUNT);
+		if (rdiv != 0) {
+			ipd_clk_count *= rdiv;
+			if (f != 0) {
+				asm("dmultu\t%[cnt],%[f]\n\t"
+				    "mfhi\t%[cnt]"
+				    : [cnt] "+r" (ipd_clk_count),
+				      [f] "=r" (f)
+				    : : "hi", "lo");
+			}
+		}
+		write_c0_cvmcount(ipd_clk_count);
+	}
+	local_irq_restore(flags);
+}
+
+static cycle_t octeon_cvmcount_read(struct clocksource *cs)
+{
+	return read_c0_cvmcount();
+}
+
+static struct clocksource clocksource_mips = {
+	.name		= "OCTEON_CVMCOUNT",
+	.read		= octeon_cvmcount_read,
+	.mask		= CLOCKSOURCE_MASK(64),
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+unsigned long long notrace sched_clock(void)
+{
+	/* 64-bit arithmatic can overflow, so use 128-bit.  */
+	u64 t1, t2, t3;
+	unsigned long long rv;
+	u64 mult = clocksource_mips.mult;
+	u64 shift = clocksource_mips.shift;
+	u64 cnt = read_c0_cvmcount();
+
+	asm (
+		"dmultu\t%[cnt],%[mult]\n\t"
+		"nor\t%[t1],$0,%[shift]\n\t"
+		"mfhi\t%[t2]\n\t"
+		"mflo\t%[t3]\n\t"
+		"dsll\t%[t2],%[t2],1\n\t"
+		"dsrlv\t%[rv],%[t3],%[shift]\n\t"
+		"dsllv\t%[t1],%[t2],%[t1]\n\t"
+		"or\t%[rv],%[t1],%[rv]\n\t"
+		: [rv] "=&r" (rv), [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3)
+		: [cnt] "r" (cnt), [mult] "r" (mult), [shift] "r" (shift)
+		: "hi", "lo");
+	return rv;
+}
+
+void __init plat_time_init(void)
+{
+	clocksource_mips.rating = 300;
+	clocksource_register_hz(&clocksource_mips, octeon_get_clock_rate());
+}
+
+static u64 octeon_udelay_factor;
+static u64 octeon_ndelay_factor;
+
+void __init octeon_setup_delays(void)
+{
+	octeon_udelay_factor = octeon_get_clock_rate() / 1000000;
+	/*
+	 * For __ndelay we divide by 2^16, so the factor is multiplied
+	 * by the same amount.
+	 */
+	octeon_ndelay_factor = (octeon_udelay_factor * 0x10000ull) / 1000ull;
+
+	preset_lpj = octeon_get_clock_rate() / HZ;
+}
+
+void __udelay(unsigned long us)
+{
+	u64 cur, end, inc;
+
+	cur = read_c0_cvmcount();
+
+	inc = us * octeon_udelay_factor;
+	end = cur + inc;
+
+	while (end > cur)
+		cur = read_c0_cvmcount();
+}
+EXPORT_SYMBOL(__udelay);
+
+void __ndelay(unsigned long ns)
+{
+	u64 cur, end, inc;
+
+	cur = read_c0_cvmcount();
+
+	inc = ((ns * octeon_ndelay_factor) >> 16);
+	end = cur + inc;
+
+	while (end > cur)
+		cur = read_c0_cvmcount();
+}
+EXPORT_SYMBOL(__ndelay);
+
+void __delay(unsigned long loops)
+{
+	u64 cur, end;
+
+	cur = read_c0_cvmcount();
+	end = cur + loops;
+
+	while (end > cur)
+		cur = read_c0_cvmcount();
+}
+EXPORT_SYMBOL(__delay);
diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c
new file mode 100644
index 00000000..1abb66ca
--- /dev/null
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -0,0 +1,349 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000  Ani Joshi <ajoshi@unixbox.com>
+ * Copyright (C) 2000, 2001  Ralf Baechle <ralf@gnu.org>
+ * Copyright (C) 2005 Ilya A. Volynets-Evenbakh <ilya@total-knowledge.com>
+ * swiped from i386, and cloned for MIPS by Geert, polished by Ralf.
+ * IP32 changes by Ilya.
+ * Copyright (C) 2010 Cavium Networks, Inc.
+ */
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
+#include <linux/bootmem.h>
+#include <linux/swiotlb.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+
+#include <asm/bootinfo.h>
+
+#include <asm/octeon/octeon.h>
+
+#ifdef CONFIG_PCI
+#include <asm/octeon/pci-octeon.h>
+#include <asm/octeon/cvmx-npi-defs.h>
+#include <asm/octeon/cvmx-pci-defs.h>
+
+static dma_addr_t octeon_hole_phys_to_dma(phys_addr_t paddr)
+{
+	if (paddr >= CVMX_PCIE_BAR1_PHYS_BASE && paddr < (CVMX_PCIE_BAR1_PHYS_BASE + CVMX_PCIE_BAR1_PHYS_SIZE))
+		return paddr - CVMX_PCIE_BAR1_PHYS_BASE + CVMX_PCIE_BAR1_RC_BASE;
+	else
+		return paddr;
+}
+
+static phys_addr_t octeon_hole_dma_to_phys(dma_addr_t daddr)
+{
+	if (daddr >= CVMX_PCIE_BAR1_RC_BASE)
+		return daddr + CVMX_PCIE_BAR1_PHYS_BASE - CVMX_PCIE_BAR1_RC_BASE;
+	else
+		return daddr;
+}
+
+static dma_addr_t octeon_gen1_phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+	if (paddr >= 0x410000000ull && paddr < 0x420000000ull)
+		paddr -= 0x400000000ull;
+	return octeon_hole_phys_to_dma(paddr);
+}
+
+static phys_addr_t octeon_gen1_dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+	daddr = octeon_hole_dma_to_phys(daddr);
+
+	if (daddr >= 0x10000000ull && daddr < 0x20000000ull)
+		daddr += 0x400000000ull;
+
+	return daddr;
+}
+
+static dma_addr_t octeon_big_phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+	if (paddr >= 0x410000000ull && paddr < 0x420000000ull)
+		paddr -= 0x400000000ull;
+
+	/* Anything in the BAR1 hole or above goes via BAR2 */
+	if (paddr >= 0xf0000000ull)
+		paddr = OCTEON_BAR2_PCI_ADDRESS + paddr;
+
+	return paddr;
+}
+
+static phys_addr_t octeon_big_dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+	if (daddr >= OCTEON_BAR2_PCI_ADDRESS)
+		daddr -= OCTEON_BAR2_PCI_ADDRESS;
+
+	if (daddr >= 0x10000000ull && daddr < 0x20000000ull)
+		daddr += 0x400000000ull;
+	return daddr;
+}
+
+static dma_addr_t octeon_small_phys_to_dma(struct device *dev,
+					   phys_addr_t paddr)
+{
+	if (paddr >= 0x410000000ull && paddr < 0x420000000ull)
+		paddr -= 0x400000000ull;
+
+	/* Anything not in the BAR1 range goes via BAR2 */
+	if (paddr >= octeon_bar1_pci_phys && paddr < octeon_bar1_pci_phys + 0x8000000ull)
+		paddr = paddr - octeon_bar1_pci_phys;
+	else
+		paddr = OCTEON_BAR2_PCI_ADDRESS + paddr;
+
+	return paddr;
+}
+
+static phys_addr_t octeon_small_dma_to_phys(struct device *dev,
+					    dma_addr_t daddr)
+{
+	if (daddr >= OCTEON_BAR2_PCI_ADDRESS)
+		daddr -= OCTEON_BAR2_PCI_ADDRESS;
+	else
+		daddr += octeon_bar1_pci_phys;
+
+	if (daddr >= 0x10000000ull && daddr < 0x20000000ull)
+		daddr += 0x400000000ull;
+	return daddr;
+}
+
+#endif /* CONFIG_PCI */
+
+static dma_addr_t octeon_dma_map_page(struct device *dev, struct page *page,
+	unsigned long offset, size_t size, enum dma_data_direction direction,
+	struct dma_attrs *attrs)
+{
+	dma_addr_t daddr = swiotlb_map_page(dev, page, offset, size,
+					    direction, attrs);
+	mb();
+
+	return daddr;
+}
+
+static int octeon_dma_map_sg(struct device *dev, struct scatterlist *sg,
+	int nents, enum dma_data_direction direction, struct dma_attrs *attrs)
+{
+	int r = swiotlb_map_sg_attrs(dev, sg, nents, direction, attrs);
+	mb();
+	return r;
+}
+
+static void octeon_dma_sync_single_for_device(struct device *dev,
+	dma_addr_t dma_handle, size_t size, enum dma_data_direction direction)
+{
+	swiotlb_sync_single_for_device(dev, dma_handle, size, direction);
+	mb();
+}
+
+static void octeon_dma_sync_sg_for_device(struct device *dev,
+	struct scatterlist *sg, int nelems, enum dma_data_direction direction)
+{
+	swiotlb_sync_sg_for_device(dev, sg, nelems, direction);
+	mb();
+}
+
+static void *octeon_dma_alloc_coherent(struct device *dev, size_t size,
+	dma_addr_t *dma_handle, gfp_t gfp)
+{
+	void *ret;
+
+	if (dma_alloc_from_coherent(dev, size, dma_handle, &ret))
+		return ret;
+
+	/* ignore region specifiers */
+	gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
+
+#ifdef CONFIG_ZONE_DMA
+	if (dev == NULL)
+		gfp |= __GFP_DMA;
+	else if (dev->coherent_dma_mask <= DMA_BIT_MASK(24))
+		gfp |= __GFP_DMA;
+	else
+#endif
+#ifdef CONFIG_ZONE_DMA32
+	     if (dev->coherent_dma_mask <= DMA_BIT_MASK(32))
+		gfp |= __GFP_DMA32;
+	else
+#endif
+		;
+
+	/* Don't invoke OOM killer */
+	gfp |= __GFP_NORETRY;
+
+	ret = swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
+
+	mb();
+
+	return ret;
+}
+
+static void octeon_dma_free_coherent(struct device *dev, size_t size,
+	void *vaddr, dma_addr_t dma_handle)
+{
+	int order = get_order(size);
+
+	if (dma_release_from_coherent(dev, order, vaddr))
+		return;
+
+	swiotlb_free_coherent(dev, size, vaddr, dma_handle);
+}
+
+static dma_addr_t octeon_unity_phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+	return paddr;
+}
+
+static phys_addr_t octeon_unity_dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+	return daddr;
+}
+
+struct octeon_dma_map_ops {
+	struct dma_map_ops dma_map_ops;
+	dma_addr_t (*phys_to_dma)(struct device *dev, phys_addr_t paddr);
+	phys_addr_t (*dma_to_phys)(struct device *dev, dma_addr_t daddr);
+};
+
+dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+	struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev),
+						      struct octeon_dma_map_ops,
+						      dma_map_ops);
+
+	return ops->phys_to_dma(dev, paddr);
+}
+EXPORT_SYMBOL(phys_to_dma);
+
+phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+	struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev),
+						      struct octeon_dma_map_ops,
+						      dma_map_ops);
+
+	return ops->dma_to_phys(dev, daddr);
+}
+EXPORT_SYMBOL(dma_to_phys);
+
+static struct octeon_dma_map_ops octeon_linear_dma_map_ops = {
+	.dma_map_ops = {
+		.alloc_coherent = octeon_dma_alloc_coherent,
+		.free_coherent = octeon_dma_free_coherent,
+		.map_page = octeon_dma_map_page,
+		.unmap_page = swiotlb_unmap_page,
+		.map_sg = octeon_dma_map_sg,
+		.unmap_sg = swiotlb_unmap_sg_attrs,
+		.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
+		.sync_single_for_device = octeon_dma_sync_single_for_device,
+		.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
+		.sync_sg_for_device = octeon_dma_sync_sg_for_device,
+		.mapping_error = swiotlb_dma_mapping_error,
+		.dma_supported = swiotlb_dma_supported
+	},
+	.phys_to_dma = octeon_unity_phys_to_dma,
+	.dma_to_phys = octeon_unity_dma_to_phys
+};
+
+char *octeon_swiotlb;
+
+void __init plat_swiotlb_setup(void)
+{
+	int i;
+	phys_t max_addr;
+	phys_t addr_size;
+	size_t swiotlbsize;
+	unsigned long swiotlb_nslabs;
+
+	max_addr = 0;
+	addr_size = 0;
+
+	for (i = 0 ; i < boot_mem_map.nr_map; i++) {
+		struct boot_mem_map_entry *e = &boot_mem_map.map[i];
+		if (e->type != BOOT_MEM_RAM)
+			continue;
+
+		/* These addresses map low for PCI. */
+		if (e->addr > 0x410000000ull)
+			continue;
+
+		addr_size += e->size;
+
+		if (max_addr < e->addr + e->size)
+			max_addr = e->addr + e->size;
+
+	}
+
+	swiotlbsize = PAGE_SIZE;
+
+#ifdef CONFIG_PCI
+	/*
+	 * For OCTEON_DMA_BAR_TYPE_SMALL, size the iotlb at 1/4 memory
+	 * size to a maximum of 64MB
+	 */
+	if (OCTEON_IS_MODEL(OCTEON_CN31XX)
+	    || OCTEON_IS_MODEL(OCTEON_CN38XX_PASS2)) {
+		swiotlbsize = addr_size / 4;
+		if (swiotlbsize > 64 * (1<<20))
+			swiotlbsize = 64 * (1<<20);
+	} else if (max_addr > 0xf0000000ul) {
+		/*
+		 * Otherwise only allocate a big iotlb if there is
+		 * memory past the BAR1 hole.
+		 */
+		swiotlbsize = 64 * (1<<20);
+	}
+#endif
+	swiotlb_nslabs = swiotlbsize >> IO_TLB_SHIFT;
+	swiotlb_nslabs = ALIGN(swiotlb_nslabs, IO_TLB_SEGSIZE);
+	swiotlbsize = swiotlb_nslabs << IO_TLB_SHIFT;
+
+	octeon_swiotlb = alloc_bootmem_low_pages(swiotlbsize);
+
+	swiotlb_init_with_tbl(octeon_swiotlb, swiotlb_nslabs, 1);
+
+	mips_dma_map_ops = &octeon_linear_dma_map_ops.dma_map_ops;
+}
+
+#ifdef CONFIG_PCI
+static struct octeon_dma_map_ops _octeon_pci_dma_map_ops = {
+	.dma_map_ops = {
+		.alloc_coherent = octeon_dma_alloc_coherent,
+		.free_coherent = octeon_dma_free_coherent,
+		.map_page = octeon_dma_map_page,
+		.unmap_page = swiotlb_unmap_page,
+		.map_sg = octeon_dma_map_sg,
+		.unmap_sg = swiotlb_unmap_sg_attrs,
+		.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
+		.sync_single_for_device = octeon_dma_sync_single_for_device,
+		.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
+		.sync_sg_for_device = octeon_dma_sync_sg_for_device,
+		.mapping_error = swiotlb_dma_mapping_error,
+		.dma_supported = swiotlb_dma_supported
+	},
+};
+
+struct dma_map_ops *octeon_pci_dma_map_ops;
+
+void __init octeon_pci_dma_init(void)
+{
+	switch (octeon_dma_bar_type) {
+	case OCTEON_DMA_BAR_TYPE_PCIE:
+		_octeon_pci_dma_map_ops.phys_to_dma = octeon_gen1_phys_to_dma;
+		_octeon_pci_dma_map_ops.dma_to_phys = octeon_gen1_dma_to_phys;
+		break;
+	case OCTEON_DMA_BAR_TYPE_BIG:
+		_octeon_pci_dma_map_ops.phys_to_dma = octeon_big_phys_to_dma;
+		_octeon_pci_dma_map_ops.dma_to_phys = octeon_big_dma_to_phys;
+		break;
+	case OCTEON_DMA_BAR_TYPE_SMALL:
+		_octeon_pci_dma_map_ops.phys_to_dma = octeon_small_phys_to_dma;
+		_octeon_pci_dma_map_ops.dma_to_phys = octeon_small_dma_to_phys;
+		break;
+	default:
+		BUG();
+	}
+	octeon_pci_dma_map_ops = &_octeon_pci_dma_map_ops.dma_map_ops;
+}
+#endif /* CONFIG_PCI */
diff --git a/arch/mips/cavium-octeon/executive/Makefile b/arch/mips/cavium-octeon/executive/Makefile
new file mode 100644
index 00000000..7f41c5be
--- /dev/null
+++ b/arch/mips/cavium-octeon/executive/Makefile
@@ -0,0 +1,14 @@
+#
+# Makefile for the Cavium Octeon specific kernel interface routines
+# under Linux.
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2005-2008 Cavium Networks
+#
+
+obj-y += cvmx-bootmem.o cvmx-l2c.o cvmx-sysinfo.o octeon-model.o
+
+obj-$(CONFIG_CAVIUM_OCTEON_HELPER) += cvmx-helper-errata.o cvmx-helper-jtag.o
diff --git a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c
new file mode 100644
index 00000000..fdf5f19b
--- /dev/null
+++ b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c
@@ -0,0 +1,690 @@
+/***********************license start***************
+ * Author: Cavium Networks
+ *
+ * Contact: support@caviumnetworks.com
+ * This file is part of the OCTEON SDK
+ *
+ * Copyright (c) 2003-2008 Cavium Networks
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this file; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * or visit http://www.gnu.org/licenses/.
+ *
+ * This file may also be available under a different license from Cavium.
+ * Contact Cavium Networks for more information
+ ***********************license end**************************************/
+
+/*
+ * Simple allocate only memory allocator.  Used to allocate memory at
+ * application start time.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/octeon/cvmx.h>
+#include <asm/octeon/cvmx-spinlock.h>
+#include <asm/octeon/cvmx-bootmem.h>
+
+/*#define DEBUG */
+
+
+static struct cvmx_bootmem_desc *cvmx_bootmem_desc;
+
+/* See header file for descriptions of functions */
+
+/*
+ * Wrapper functions are provided for reading/writing the size and
+ * next block values as these may not be directly addressible (in 32
+ * bit applications, for instance.)  Offsets of data elements in
+ * bootmem list, must match cvmx_bootmem_block_header_t.
+ */
+#define NEXT_OFFSET 0
+#define SIZE_OFFSET 8
+
+static void cvmx_bootmem_phy_set_size(uint64_t addr, uint64_t size)
+{
+	cvmx_write64_uint64((addr + SIZE_OFFSET) | (1ull << 63), size);
+}
+
+static void cvmx_bootmem_phy_set_next(uint64_t addr, uint64_t next)
+{
+	cvmx_write64_uint64((addr + NEXT_OFFSET) | (1ull << 63), next);
+}
+
+static uint64_t cvmx_bootmem_phy_get_size(uint64_t addr)
+{
+	return cvmx_read64_uint64((addr + SIZE_OFFSET) | (1ull << 63));
+}
+
+static uint64_t cvmx_bootmem_phy_get_next(uint64_t addr)
+{
+	return cvmx_read64_uint64((addr + NEXT_OFFSET) | (1ull << 63));
+}
+
+void *cvmx_bootmem_alloc_range(uint64_t size, uint64_t alignment,
+			       uint64_t min_addr, uint64_t max_addr)
+{
+	int64_t address;
+	address =
+	    cvmx_bootmem_phy_alloc(size, min_addr, max_addr, alignment, 0);
+
+	if (address > 0)
+		return cvmx_phys_to_ptr(address);
+	else
+		return NULL;
+}
+
+void *cvmx_bootmem_alloc_address(uint64_t size, uint64_t address,
+				 uint64_t alignment)
+{
+	return cvmx_bootmem_alloc_range(size, alignment, address,
+					address + size);
+}
+
+void *cvmx_bootmem_alloc(uint64_t size, uint64_t alignment)
+{
+	return cvmx_bootmem_alloc_range(size, alignment, 0, 0);
+}
+
+void *cvmx_bootmem_alloc_named_range(uint64_t size, uint64_t min_addr,
+				     uint64_t max_addr, uint64_t align,
+				     char *name)
+{
+	int64_t addr;
+
+	addr = cvmx_bootmem_phy_named_block_alloc(size, min_addr, max_addr,
+						  align, name, 0);
+	if (addr >= 0)
+		return cvmx_phys_to_ptr(addr);
+	else
+		return NULL;
+}
+
+void *cvmx_bootmem_alloc_named_address(uint64_t size, uint64_t address,
+				       char *name)
+{
+    return cvmx_bootmem_alloc_named_range(size, address, address + size,
+					  0, name);
+}
+
+void *cvmx_bootmem_alloc_named(uint64_t size, uint64_t alignment, char *name)
+{
+    return cvmx_bootmem_alloc_named_range(size, 0, 0, alignment, name);
+}
+EXPORT_SYMBOL(cvmx_bootmem_alloc_named);
+
+int cvmx_bootmem_free_named(char *name)
+{
+	return cvmx_bootmem_phy_named_block_free(name, 0);
+}
+
+struct cvmx_bootmem_named_block_desc *cvmx_bootmem_find_named_block(char *name)
+{
+	return cvmx_bootmem_phy_named_block_find(name, 0);
+}
+EXPORT_SYMBOL(cvmx_bootmem_find_named_block);
+
+void cvmx_bootmem_lock(void)
+{
+	cvmx_spinlock_lock((cvmx_spinlock_t *) &(cvmx_bootmem_desc->lock));
+}
+
+void cvmx_bootmem_unlock(void)
+{
+	cvmx_spinlock_unlock((cvmx_spinlock_t *) &(cvmx_bootmem_desc->lock));
+}
+
+int cvmx_bootmem_init(void *mem_desc_ptr)
+{
+	/* Here we set the global pointer to the bootmem descriptor
+	 * block.  This pointer will be used directly, so we will set
+	 * it up to be directly usable by the application.  It is set
+	 * up as follows for the various runtime/ABI combinations:
+	 *
+	 * Linux 64 bit: Set XKPHYS bit
+	 * Linux 32 bit: use mmap to create mapping, use virtual address
+	 * CVMX 64 bit:  use physical address directly
+	 * CVMX 32 bit:  use physical address directly
+	 *
+	 * Note that the CVMX environment assumes the use of 1-1 TLB
+	 * mappings so that the physical addresses can be used
+	 * directly
+	 */
+	if (!cvmx_bootmem_desc) {
+#if   defined(CVMX_ABI_64)
+		/* Set XKPHYS bit */
+		cvmx_bootmem_desc = cvmx_phys_to_ptr(CAST64(mem_desc_ptr));
+#else
+		cvmx_bootmem_desc = (struct cvmx_bootmem_desc *) mem_desc_ptr;
+#endif
+	}
+
+	return 0;
+}
+
+/*
+ * The cvmx_bootmem_phy* functions below return 64 bit physical
+ * addresses, and expose more features that the cvmx_bootmem_functions
+ * above.  These are required for full memory space access in 32 bit
+ * applications, as well as for using some advance features.  Most
+ * applications should not need to use these.
+ */
+
+int64_t cvmx_bootmem_phy_alloc(uint64_t req_size, uint64_t address_min,
+			       uint64_t address_max, uint64_t alignment,
+			       uint32_t flags)
+{
+
+	uint64_t head_addr;
+	uint64_t ent_addr;
+	/* points to previous list entry, NULL current entry is head of list */
+	uint64_t prev_addr = 0;
+	uint64_t new_ent_addr = 0;
+	uint64_t desired_min_addr;
+
+#ifdef DEBUG
+	cvmx_dprintf("cvmx_bootmem_phy_alloc: req_size: 0x%llx, "
+		     "min_addr: 0x%llx, max_addr: 0x%llx, align: 0x%llx\n",
+		     (unsigned long long)req_size,
+		     (unsigned long long)address_min,
+		     (unsigned long long)address_max,
+		     (unsigned long long)alignment);
+#endif
+
+	if (cvmx_bootmem_desc->major_version > 3) {
+		cvmx_dprintf("ERROR: Incompatible bootmem descriptor "
+			     "version: %d.%d at addr: %p\n",
+			     (int)cvmx_bootmem_desc->major_version,
+			     (int)cvmx_bootmem_desc->minor_version,
+			     cvmx_bootmem_desc);
+		goto error_out;
+	}
+
+	/*
+	 * Do a variety of checks to validate the arguments.  The
+	 * allocator code will later assume that these checks have
+	 * been made.  We validate that the requested constraints are
+	 * not self-contradictory before we look through the list of
+	 * available memory.
+	 */
+
+	/* 0 is not a valid req_size for this allocator */
+	if (!req_size)
+		goto error_out;
+
+	/* Round req_size up to mult of minimum alignment bytes */
+	req_size = (req_size + (CVMX_BOOTMEM_ALIGNMENT_SIZE - 1)) &
+		~(CVMX_BOOTMEM_ALIGNMENT_SIZE - 1);
+
+	/*
+	 * Convert !0 address_min and 0 address_max to special case of
+	 * range that specifies an exact memory block to allocate.  Do
+	 * this before other checks and adjustments so that this
+	 * tranformation will be validated.
+	 */
+	if (address_min && !address_max)
+		address_max = address_min + req_size;
+	else if (!address_min && !address_max)
+		address_max = ~0ull;  /* If no limits given, use max limits */
+
+
+	/*
+	 * Enforce minimum alignment (this also keeps the minimum free block
+	 * req_size the same as the alignment req_size.
+	 */
+	if (alignment < CVMX_BOOTMEM_ALIGNMENT_SIZE)
+		alignment = CVMX_BOOTMEM_ALIGNMENT_SIZE;
+
+	/*
+	 * Adjust address minimum based on requested alignment (round
+	 * up to meet alignment).  Do this here so we can reject
+	 * impossible requests up front. (NOP for address_min == 0)
+	 */
+	if (alignment)
+		address_min = ALIGN(address_min, alignment);
+
+	/*
+	 * Reject inconsistent args.  We have adjusted these, so this
+	 * may fail due to our internal changes even if this check
+	 * would pass for the values the user supplied.
+	 */
+	if (req_size > address_max - address_min)
+		goto error_out;
+
+	/* Walk through the list entries - first fit found is returned */
+
+	if (!(flags & CVMX_BOOTMEM_FLAG_NO_LOCKING))
+		cvmx_bootmem_lock();
+	head_addr = cvmx_bootmem_desc->head_addr;
+	ent_addr = head_addr;
+	for (; ent_addr;
+	     prev_addr = ent_addr,
+	     ent_addr = cvmx_bootmem_phy_get_next(ent_addr)) {
+		uint64_t usable_base, usable_max;
+		uint64_t ent_size = cvmx_bootmem_phy_get_size(ent_addr);
+
+		if (cvmx_bootmem_phy_get_next(ent_addr)
+		    && ent_addr > cvmx_bootmem_phy_get_next(ent_addr)) {
+			cvmx_dprintf("Internal bootmem_alloc() error: ent: "
+				"0x%llx, next: 0x%llx\n",
+				(unsigned long long)ent_addr,
+				(unsigned long long)
+				cvmx_bootmem_phy_get_next(ent_addr));
+			goto error_out;
+		}
+
+		/*
+		 * Determine if this is an entry that can satisify the
+		 * request Check to make sure entry is large enough to
+		 * satisfy request.
+		 */
+		usable_base =
+		    ALIGN(max(address_min, ent_addr), alignment);
+		usable_max = min(address_max, ent_addr + ent_size);
+		/*
+		 * We should be able to allocate block at address
+		 * usable_base.
+		 */
+
+		desired_min_addr = usable_base;
+		/*
+		 * Determine if request can be satisfied from the
+		 * current entry.
+		 */
+		if (!((ent_addr + ent_size) > usable_base
+				&& ent_addr < address_max
+				&& req_size <= usable_max - usable_base))
+			continue;
+		/*
+		 * We have found an entry that has room to satisfy the
+		 * request, so allocate it from this entry.  If end
+		 * CVMX_BOOTMEM_FLAG_END_ALLOC set, then allocate from
+		 * the end of this block rather than the beginning.
+		 */
+		if (flags & CVMX_BOOTMEM_FLAG_END_ALLOC) {
+			desired_min_addr = usable_max - req_size;
+			/*
+			 * Align desired address down to required
+			 * alignment.
+			 */
+			desired_min_addr &= ~(alignment - 1);
+		}
+
+		/* Match at start of entry */
+		if (desired_min_addr == ent_addr) {
+			if (req_size < ent_size) {
+				/*
+				 * big enough to create a new block
+				 * from top portion of block.
+				 */
+				new_ent_addr = ent_addr + req_size;
+				cvmx_bootmem_phy_set_next(new_ent_addr,
+					cvmx_bootmem_phy_get_next(ent_addr));
+				cvmx_bootmem_phy_set_size(new_ent_addr,
+							ent_size -
+							req_size);
+
+				/*
+				 * Adjust next pointer as following
+				 * code uses this.
+				 */
+				cvmx_bootmem_phy_set_next(ent_addr,
+							new_ent_addr);
+			}
+
+			/*
+			 * adjust prev ptr or head to remove this
+			 * entry from list.
+			 */
+			if (prev_addr)
+				cvmx_bootmem_phy_set_next(prev_addr,
+					cvmx_bootmem_phy_get_next(ent_addr));
+			else
+				/*
+				 * head of list being returned, so
+				 * update head ptr.
+				 */
+				cvmx_bootmem_desc->head_addr =
+					cvmx_bootmem_phy_get_next(ent_addr);
+
+			if (!(flags & CVMX_BOOTMEM_FLAG_NO_LOCKING))
+				cvmx_bootmem_unlock();
+			return desired_min_addr;
+		}
+		/*
+		 * block returned doesn't start at beginning of entry,
+		 * so we know that we will be splitting a block off
+		 * the front of this one.  Create a new block from the
+		 * beginning, add to list, and go to top of loop
+		 * again.
+		 *
+		 * create new block from high portion of
+		 * block, so that top block starts at desired
+		 * addr.
+		 */
+		new_ent_addr = desired_min_addr;
+		cvmx_bootmem_phy_set_next(new_ent_addr,
+					cvmx_bootmem_phy_get_next
+					(ent_addr));
+		cvmx_bootmem_phy_set_size(new_ent_addr,
+					cvmx_bootmem_phy_get_size
+					(ent_addr) -
+					(desired_min_addr -
+						ent_addr));
+		cvmx_bootmem_phy_set_size(ent_addr,
+					desired_min_addr - ent_addr);
+		cvmx_bootmem_phy_set_next(ent_addr, new_ent_addr);
+		/* Loop again to handle actual alloc from new block */
+	}
+error_out:
+	/* We didn't find anything, so return error */
+	if (!(flags & CVMX_BOOTMEM_FLAG_NO_LOCKING))
+		cvmx_bootmem_unlock();
+	return -1;
+}
+
+int __cvmx_bootmem_phy_free(uint64_t phy_addr, uint64_t size, uint32_t flags)
+{
+	uint64_t cur_addr;
+	uint64_t prev_addr = 0;	/* zero is invalid */
+	int retval = 0;
+
+#ifdef DEBUG
+	cvmx_dprintf("__cvmx_bootmem_phy_free addr: 0x%llx, size: 0x%llx\n",
+		     (unsigned long long)phy_addr, (unsigned long long)size);
+#endif
+	if (cvmx_bootmem_desc->major_version > 3) {
+		cvmx_dprintf("ERROR: Incompatible bootmem descriptor "
+			     "version: %d.%d at addr: %p\n",
+			     (int)cvmx_bootmem_desc->major_version,
+			     (int)cvmx_bootmem_desc->minor_version,
+			     cvmx_bootmem_desc);
+		return 0;
+	}
+
+	/* 0 is not a valid size for this allocator */
+	if (!size)
+		return 0;
+
+	if (!(flags & CVMX_BOOTMEM_FLAG_NO_LOCKING))
+		cvmx_bootmem_lock();
+	cur_addr = cvmx_bootmem_desc->head_addr;
+	if (cur_addr == 0 || phy_addr < cur_addr) {
+		/* add at front of list - special case with changing head ptr */
+		if (cur_addr && phy_addr + size > cur_addr)
+			goto bootmem_free_done;	/* error, overlapping section */
+		else if (phy_addr + size == cur_addr) {
+			/* Add to front of existing first block */
+			cvmx_bootmem_phy_set_next(phy_addr,
+						  cvmx_bootmem_phy_get_next
+						  (cur_addr));
+			cvmx_bootmem_phy_set_size(phy_addr,
+						  cvmx_bootmem_phy_get_size
+						  (cur_addr) + size);
+			cvmx_bootmem_desc->head_addr = phy_addr;
+
+		} else {
+			/* New block before first block.  OK if cur_addr is 0 */
+			cvmx_bootmem_phy_set_next(phy_addr, cur_addr);
+			cvmx_bootmem_phy_set_size(phy_addr, size);
+			cvmx_bootmem_desc->head_addr = phy_addr;
+		}
+		retval = 1;
+		goto bootmem_free_done;
+	}
+
+	/* Find place in list to add block */
+	while (cur_addr && phy_addr > cur_addr) {
+		prev_addr = cur_addr;
+		cur_addr = cvmx_bootmem_phy_get_next(cur_addr);
+	}
+
+	if (!cur_addr) {
+		/*
+		 * We have reached the end of the list, add on to end,
+		 * checking to see if we need to combine with last
+		 * block
+		 */
+		if (prev_addr + cvmx_bootmem_phy_get_size(prev_addr) ==
+		    phy_addr) {
+			cvmx_bootmem_phy_set_size(prev_addr,
+						  cvmx_bootmem_phy_get_size
+						  (prev_addr) + size);
+		} else {
+			cvmx_bootmem_phy_set_next(prev_addr, phy_addr);
+			cvmx_bootmem_phy_set_size(phy_addr, size);
+			cvmx_bootmem_phy_set_next(phy_addr, 0);
+		}
+		retval = 1;
+		goto bootmem_free_done;
+	} else {
+		/*
+		 * insert between prev and cur nodes, checking for
+		 * merge with either/both.
+		 */
+		if (prev_addr + cvmx_bootmem_phy_get_size(prev_addr) ==
+		    phy_addr) {
+			/* Merge with previous */
+			cvmx_bootmem_phy_set_size(prev_addr,
+						  cvmx_bootmem_phy_get_size
+						  (prev_addr) + size);
+			if (phy_addr + size == cur_addr) {
+				/* Also merge with current */
+				cvmx_bootmem_phy_set_size(prev_addr,
+					cvmx_bootmem_phy_get_size(cur_addr) +
+					cvmx_bootmem_phy_get_size(prev_addr));
+				cvmx_bootmem_phy_set_next(prev_addr,
+					cvmx_bootmem_phy_get_next(cur_addr));
+			}
+			retval = 1;
+			goto bootmem_free_done;
+		} else if (phy_addr + size == cur_addr) {
+			/* Merge with current */
+			cvmx_bootmem_phy_set_size(phy_addr,
+						  cvmx_bootmem_phy_get_size
+						  (cur_addr) + size);
+			cvmx_bootmem_phy_set_next(phy_addr,
+						  cvmx_bootmem_phy_get_next
+						  (cur_addr));
+			cvmx_bootmem_phy_set_next(prev_addr, phy_addr);
+			retval = 1;
+			goto bootmem_free_done;
+		}
+
+		/* It is a standalone block, add in between prev and cur */
+		cvmx_bootmem_phy_set_size(phy_addr, size);
+		cvmx_bootmem_phy_set_next(phy_addr, cur_addr);
+		cvmx_bootmem_phy_set_next(prev_addr, phy_addr);
+
+	}
+	retval = 1;
+
+bootmem_free_done:
+	if (!(flags & CVMX_BOOTMEM_FLAG_NO_LOCKING))
+		cvmx_bootmem_unlock();
+	return retval;
+
+}
+
+struct cvmx_bootmem_named_block_desc *
+	cvmx_bootmem_phy_named_block_find(char *name, uint32_t flags)
+{
+	unsigned int i;
+	struct cvmx_bootmem_named_block_desc *named_block_array_ptr;
+
+#ifdef DEBUG
+	cvmx_dprintf("cvmx_bootmem_phy_named_block_find: %s\n", name);
+#endif
+	/*
+	 * Lock the structure to make sure that it is not being
+	 * changed while we are examining it.
+	 */
+	if (!(flags & CVMX_BOOTMEM_FLAG_NO_LOCKING))
+		cvmx_bootmem_lock();
+
+	/* Use XKPHYS for 64 bit linux */
+	named_block_array_ptr = (struct cvmx_bootmem_named_block_desc *)
+	    cvmx_phys_to_ptr(cvmx_bootmem_desc->named_block_array_addr);
+
+#ifdef DEBUG
+	cvmx_dprintf
+	    ("cvmx_bootmem_phy_named_block_find: named_block_array_ptr: %p\n",
+	     named_block_array_ptr);
+#endif
+	if (cvmx_bootmem_desc->major_version == 3) {
+		for (i = 0;
+		     i < cvmx_bootmem_desc->named_block_num_blocks; i++) {
+			if ((name && named_block_array_ptr[i].size
+			     && !strncmp(name, named_block_array_ptr[i].name,
+					 cvmx_bootmem_desc->named_block_name_len
+					 - 1))
+			    || (!name && !named_block_array_ptr[i].size)) {
+				if (!(flags & CVMX_BOOTMEM_FLAG_NO_LOCKING))
+					cvmx_bootmem_unlock();
+
+				return &(named_block_array_ptr[i]);
+			}
+		}
+	} else {
+		cvmx_dprintf("ERROR: Incompatible bootmem descriptor "
+			     "version: %d.%d at addr: %p\n",
+			     (int)cvmx_bootmem_desc->major_version,
+			     (int)cvmx_bootmem_desc->minor_version,
+			     cvmx_bootmem_desc);
+	}
+	if (!(flags & CVMX_BOOTMEM_FLAG_NO_LOCKING))
+		cvmx_bootmem_unlock();
+
+	return NULL;
+}
+
+int cvmx_bootmem_phy_named_block_free(char *name, uint32_t flags)
+{
+	struct cvmx_bootmem_named_block_desc *named_block_ptr;
+
+	if (cvmx_bootmem_desc->major_version != 3) {
+		cvmx_dprintf("ERROR: Incompatible bootmem descriptor version: "
+			     "%d.%d at addr: %p\n",
+			     (int)cvmx_bootmem_desc->major_version,
+			     (int)cvmx_bootmem_desc->minor_version,
+			     cvmx_bootmem_desc);
+		return 0;
+	}
+#ifdef DEBUG
+	cvmx_dprintf("cvmx_bootmem_phy_named_block_free: %s\n", name);
+#endif
+
+	/*
+	 * Take lock here, as name lookup/block free/name free need to
+	 * be atomic.
+	 */
+	cvmx_bootmem_lock();
+
+	named_block_ptr =
+	    cvmx_bootmem_phy_named_block_find(name,
+					      CVMX_BOOTMEM_FLAG_NO_LOCKING);
+	if (named_block_ptr) {
+#ifdef DEBUG
+		cvmx_dprintf("cvmx_bootmem_phy_named_block_free: "
+			     "%s, base: 0x%llx, size: 0x%llx\n",
+			     name,
+			     (unsigned long long)named_block_ptr->base_addr,
+			     (unsigned long long)named_block_ptr->size);
+#endif
+		__cvmx_bootmem_phy_free(named_block_ptr->base_addr,
+					named_block_ptr->size,
+					CVMX_BOOTMEM_FLAG_NO_LOCKING);
+		named_block_ptr->size = 0;
+		/* Set size to zero to indicate block not used. */
+	}
+
+	cvmx_bootmem_unlock();
+	return named_block_ptr != NULL;	/* 0 on failure, 1 on success */
+}
+
+int64_t cvmx_bootmem_phy_named_block_alloc(uint64_t size, uint64_t min_addr,
+					   uint64_t max_addr,
+					   uint64_t alignment,
+					   char *name,
+					   uint32_t flags)
+{
+	int64_t addr_allocated;
+	struct cvmx_bootmem_named_block_desc *named_block_desc_ptr;
+
+#ifdef DEBUG
+	cvmx_dprintf("cvmx_bootmem_phy_named_block_alloc: size: 0x%llx, min: "
+		     "0x%llx, max: 0x%llx, align: 0x%llx, name: %s\n",
+		     (unsigned long long)size,
+		     (unsigned long long)min_addr,
+		     (unsigned long long)max_addr,
+		     (unsigned long long)alignment,
+		     name);
+#endif
+	if (cvmx_bootmem_desc->major_version != 3) {
+		cvmx_dprintf("ERROR: Incompatible bootmem descriptor version: "
+			     "%d.%d at addr: %p\n",
+			     (int)cvmx_bootmem_desc->major_version,
+			     (int)cvmx_bootmem_desc->minor_version,
+			     cvmx_bootmem_desc);
+		return -1;
+	}
+
+	/*
+	 * Take lock here, as name lookup/block alloc/name add need to
+	 * be atomic.
+	 */
+	if (!(flags & CVMX_BOOTMEM_FLAG_NO_LOCKING))
+		cvmx_spinlock_lock((cvmx_spinlock_t *)&(cvmx_bootmem_desc->lock));
+
+	/* Get pointer to first available named block descriptor */
+	named_block_desc_ptr =
+		cvmx_bootmem_phy_named_block_find(NULL,
+						  flags | CVMX_BOOTMEM_FLAG_NO_LOCKING);
+
+	/*
+	 * Check to see if name already in use, return error if name
+	 * not available or no more room for blocks.
+	 */
+	if (cvmx_bootmem_phy_named_block_find(name,
+					      flags | CVMX_BOOTMEM_FLAG_NO_LOCKING) || !named_block_desc_ptr) {
+		if (!(flags & CVMX_BOOTMEM_FLAG_NO_LOCKING))
+			cvmx_spinlock_unlock((cvmx_spinlock_t *)&(cvmx_bootmem_desc->lock));
+		return -1;
+	}
+
+
+	/*
+	 * Round size up to mult of minimum alignment bytes We need
+	 * the actual size allocated to allow for blocks to be
+	 * coallesced when they are freed.  The alloc routine does the
+	 * same rounding up on all allocations.
+	 */
+	size = ALIGN(size, CVMX_BOOTMEM_ALIGNMENT_SIZE);
+
+	addr_allocated = cvmx_bootmem_phy_alloc(size, min_addr, max_addr,
+						alignment,
+						flags | CVMX_BOOTMEM_FLAG_NO_LOCKING);
+	if (addr_allocated >= 0) {
+		named_block_desc_ptr->base_addr = addr_allocated;
+		named_block_desc_ptr->size = size;
+		strncpy(named_block_desc_ptr->name, name,
+			cvmx_bootmem_desc->named_block_name_len);
+		named_block_desc_ptr->name[cvmx_bootmem_desc->named_block_name_len - 1] = 0;
+	}
+
+	if (!(flags & CVMX_BOOTMEM_FLAG_NO_LOCKING))
+		cvmx_spinlock_unlock((cvmx_spinlock_t *)&(cvmx_bootmem_desc->lock));
+	return addr_allocated;
+}
diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-errata.c b/arch/mips/cavium-octeon/executive/cvmx-helper-errata.c
new file mode 100644
index 00000000..868659e6
--- /dev/null
+++ b/arch/mips/cavium-octeon/executive/cvmx-helper-errata.c
@@ -0,0 +1,73 @@
+/***********************license start***************
+ * Author: Cavium Networks
+ *
+ * Contact: support@caviumnetworks.com
+ * This file is part of the OCTEON SDK
+ *
+ * Copyright (c) 2003-2008 Cavium Networks
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this file; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * or visit http://www.gnu.org/licenses/.
+ *
+ * This file may also be available under a different license from Cavium.
+ * Contact Cavium Networks for more information
+ ***********************license end**************************************/
+
+/**
+ *
+ * Fixes and workaround for Octeon chip errata. This file
+ * contains functions called by cvmx-helper to workaround known
+ * chip errata. For the most part, code doesn't need to call
+ * these functions directly.
+ *
+ */
+#include <linux/module.h>
+
+#include <asm/octeon/octeon.h>
+
+#include <asm/octeon/cvmx-helper-jtag.h>
+
+/**
+ * Due to errata G-720, the 2nd order CDR circuit on CN52XX pass
+ * 1 doesn't work properly. The following code disables 2nd order
+ * CDR for the specified QLM.
+ *
+ * @qlm:    QLM to disable 2nd order CDR for.
+ */
+void __cvmx_helper_errata_qlm_disable_2nd_order_cdr(int qlm)
+{
+	int lane;
+	cvmx_helper_qlm_jtag_init();
+	/* We need to load all four lanes of the QLM, a total of 1072 bits */
+	for (lane = 0; lane < 4; lane++) {
+		/*
+		 * Each lane has 268 bits. We need to set
+		 * cfg_cdr_incx<67:64> = 3 and cfg_cdr_secord<77> =
+		 * 1. All other bits are zero. Bits go in LSB first,
+		 * so start off with the zeros for bits <63:0>.
+		 */
+		cvmx_helper_qlm_jtag_shift_zeros(qlm, 63 - 0 + 1);
+		/* cfg_cdr_incx<67:64>=3 */
+		cvmx_helper_qlm_jtag_shift(qlm, 67 - 64 + 1, 3);
+		/* Zeros for bits <76:68> */
+		cvmx_helper_qlm_jtag_shift_zeros(qlm, 76 - 68 + 1);
+		/* cfg_cdr_secord<77>=1 */
+		cvmx_helper_qlm_jtag_shift(qlm, 77 - 77 + 1, 1);
+		/* Zeros for bits <267:78> */
+		cvmx_helper_qlm_jtag_shift_zeros(qlm, 267 - 78 + 1);
+	}
+	cvmx_helper_qlm_jtag_update(qlm);
+}
+EXPORT_SYMBOL(__cvmx_helper_errata_qlm_disable_2nd_order_cdr);
diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-jtag.c b/arch/mips/cavium-octeon/executive/cvmx-helper-jtag.c
new file mode 100644
index 00000000..c1c54890
--- /dev/null
+++ b/arch/mips/cavium-octeon/executive/cvmx-helper-jtag.c
@@ -0,0 +1,144 @@
+
+/***********************license start***************
+ * Author: Cavium Networks
+ *
+ * Contact: support@caviumnetworks.com
+ * This file is part of the OCTEON SDK
+ *
+ * Copyright (c) 2003-2008 Cavium Networks
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this file; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * or visit http://www.gnu.org/licenses/.
+ *
+ * This file may also be available under a different license from Cavium.
+ * Contact Cavium Networks for more information
+ ***********************license end**************************************/
+
+/**
+ *
+ * Helper utilities for qlm_jtag.
+ *
+ */
+
+#include <asm/octeon/octeon.h>
+#include <asm/octeon/cvmx-helper-jtag.h>
+
+
+/**
+ * Initialize the internal QLM JTAG logic to allow programming
+ * of the JTAG chain by the cvmx_helper_qlm_jtag_*() functions.
+ * These functions should only be used at the direction of Cavium
+ * Networks. Programming incorrect values into the JTAG chain
+ * can cause chip damage.
+ */
+void cvmx_helper_qlm_jtag_init(void)
+{
+	union cvmx_ciu_qlm_jtgc jtgc;
+	uint32_t clock_div = 0;
+	uint32_t divisor = cvmx_sysinfo_get()->cpu_clock_hz / (25 * 1000000);
+	divisor = (divisor - 1) >> 2;
+	/* Convert the divisor into a power of 2 shift */
+	while (divisor) {
+		clock_div++;
+		divisor = divisor >> 1;
+	}
+
+	/*
+	 * Clock divider for QLM JTAG operations.  eclk is divided by
+	 * 2^(CLK_DIV + 2)
+	 */
+	jtgc.u64 = 0;
+	jtgc.s.clk_div = clock_div;
+	jtgc.s.mux_sel = 0;
+	if (OCTEON_IS_MODEL(OCTEON_CN52XX))
+		jtgc.s.bypass = 0x3;
+	else
+		jtgc.s.bypass = 0xf;
+	cvmx_write_csr(CVMX_CIU_QLM_JTGC, jtgc.u64);
+	cvmx_read_csr(CVMX_CIU_QLM_JTGC);
+}
+
+/**
+ * Write up to 32bits into the QLM jtag chain. Bits are shifted
+ * into the MSB and out the LSB, so you should shift in the low
+ * order bits followed by the high order bits. The JTAG chain is
+ * 4 * 268 bits long, or 1072.
+ *
+ * @qlm:    QLM to shift value into
+ * @bits:   Number of bits to shift in (1-32).
+ * @data:   Data to shift in. Bit 0 enters the chain first, followed by
+ *               bit 1, etc.
+ *
+ * Returns The low order bits of the JTAG chain that shifted out of the
+ *         circle.
+ */
+uint32_t cvmx_helper_qlm_jtag_shift(int qlm, int bits, uint32_t data)
+{
+	union cvmx_ciu_qlm_jtgd jtgd;
+	jtgd.u64 = 0;
+	jtgd.s.shift = 1;
+	jtgd.s.shft_cnt = bits - 1;
+	jtgd.s.shft_reg = data;
+	if (!OCTEON_IS_MODEL(OCTEON_CN56XX_PASS1_X))
+		jtgd.s.select = 1 << qlm;
+	cvmx_write_csr(CVMX_CIU_QLM_JTGD, jtgd.u64);
+	do {
+		jtgd.u64 = cvmx_read_csr(CVMX_CIU_QLM_JTGD);
+	} while (jtgd.s.shift);
+	return jtgd.s.shft_reg >> (32 - bits);
+}
+
+/**
+ * Shift long sequences of zeros into the QLM JTAG chain. It is
+ * common to need to shift more than 32 bits of zeros into the
+ * chain. This function is a convience wrapper around
+ * cvmx_helper_qlm_jtag_shift() to shift more than 32 bits of
+ * zeros at a time.
+ *
+ * @qlm:    QLM to shift zeros into
+ * @bits:
+ */
+void cvmx_helper_qlm_jtag_shift_zeros(int qlm, int bits)
+{
+	while (bits > 0) {
+		int n = bits;
+		if (n > 32)
+			n = 32;
+		cvmx_helper_qlm_jtag_shift(qlm, n, 0);
+		bits -= n;
+	}
+}
+
+/**
+ * Program the QLM JTAG chain into all lanes of the QLM. You must
+ * have already shifted in 268*4, or 1072 bits into the JTAG
+ * chain. Updating invalid values can possibly cause chip damage.
+ *
+ * @qlm:    QLM to program
+ */
+void cvmx_helper_qlm_jtag_update(int qlm)
+{
+	union cvmx_ciu_qlm_jtgd jtgd;
+
+	/* Update the new data */
+	jtgd.u64 = 0;
+	jtgd.s.update = 1;
+	if (!OCTEON_IS_MODEL(OCTEON_CN56XX_PASS1_X))
+		jtgd.s.select = 1 << qlm;
+	cvmx_write_csr(CVMX_CIU_QLM_JTGD, jtgd.u64);
+	do {
+		jtgd.u64 = cvmx_read_csr(CVMX_CIU_QLM_JTGD);
+	} while (jtgd.s.update);
+}
diff --git a/arch/mips/cavium-octeon/executive/cvmx-l2c.c b/arch/mips/cavium-octeon/executive/cvmx-l2c.c
new file mode 100644
index 00000000..d38246e3
--- /dev/null
+++ b/arch/mips/cavium-octeon/executive/cvmx-l2c.c
@@ -0,0 +1,900 @@
+/***********************license start***************
+ * Author: Cavium Networks
+ *
+ * Contact: support@caviumnetworks.com
+ * This file is part of the OCTEON SDK
+ *
+ * Copyright (c) 2003-2010 Cavium Networks
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this file; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * or visit http://www.gnu.org/licenses/.
+ *
+ * This file may also be available under a different license from Cavium.
+ * Contact Cavium Networks for more information
+ ***********************license end**************************************/
+
+/*
+ * Implementation of the Level 2 Cache (L2C) control,
+ * measurement, and debugging facilities.
+ */
+
+#include <asm/octeon/cvmx.h>
+#include <asm/octeon/cvmx-l2c.h>
+#include <asm/octeon/cvmx-spinlock.h>
+
+/*
+ * This spinlock is used internally to ensure that only one core is
+ * performing certain L2 operations at a time.
+ *
+ * NOTE: This only protects calls from within a single application -
+ * if multiple applications or operating systems are running, then it
+ * is up to the user program to coordinate between them.
+ */
+cvmx_spinlock_t cvmx_l2c_spinlock;
+
+int cvmx_l2c_get_core_way_partition(uint32_t core)
+{
+	uint32_t field;
+
+	/* Validate the core number */
+	if (core >= cvmx_octeon_num_cores())
+		return -1;
+
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX))
+		return cvmx_read_csr(CVMX_L2C_WPAR_PPX(core)) & 0xffff;
+
+	/*
+	 * Use the lower two bits of the coreNumber to determine the
+	 * bit offset of the UMSK[] field in the L2C_SPAR register.
+	 */
+	field = (core & 0x3) * 8;
+
+	/*
+	 * Return the UMSK[] field from the appropriate L2C_SPAR
+	 * register based on the coreNumber.
+	 */
+
+	switch (core & 0xC) {
+	case 0x0:
+		return (cvmx_read_csr(CVMX_L2C_SPAR0) & (0xFF << field)) >> field;
+	case 0x4:
+		return (cvmx_read_csr(CVMX_L2C_SPAR1) & (0xFF << field)) >> field;
+	case 0x8:
+		return (cvmx_read_csr(CVMX_L2C_SPAR2) & (0xFF << field)) >> field;
+	case 0xC:
+		return (cvmx_read_csr(CVMX_L2C_SPAR3) & (0xFF << field)) >> field;
+	}
+	return 0;
+}
+
+int cvmx_l2c_set_core_way_partition(uint32_t core, uint32_t mask)
+{
+	uint32_t field;
+	uint32_t valid_mask;
+
+	valid_mask = (0x1 << cvmx_l2c_get_num_assoc()) - 1;
+
+	mask &= valid_mask;
+
+	/* A UMSK setting which blocks all L2C Ways is an error on some chips */
+	if (mask == valid_mask && !OCTEON_IS_MODEL(OCTEON_CN63XX))
+		return -1;
+
+	/* Validate the core number */
+	if (core >= cvmx_octeon_num_cores())
+		return -1;
+
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX)) {
+		cvmx_write_csr(CVMX_L2C_WPAR_PPX(core), mask);
+		return 0;
+	}
+
+	/*
+	 * Use the lower two bits of core to determine the bit offset of the
+	 * UMSK[] field in the L2C_SPAR register.
+	 */
+	field = (core & 0x3) * 8;
+
+	/*
+	 * Assign the new mask setting to the UMSK[] field in the appropriate
+	 * L2C_SPAR register based on the core_num.
+	 *
+	 */
+	switch (core & 0xC) {
+	case 0x0:
+		cvmx_write_csr(CVMX_L2C_SPAR0,
+			       (cvmx_read_csr(CVMX_L2C_SPAR0) & ~(0xFF << field)) |
+			       mask << field);
+		break;
+	case 0x4:
+		cvmx_write_csr(CVMX_L2C_SPAR1,
+			       (cvmx_read_csr(CVMX_L2C_SPAR1) & ~(0xFF << field)) |
+			       mask << field);
+		break;
+	case 0x8:
+		cvmx_write_csr(CVMX_L2C_SPAR2,
+			       (cvmx_read_csr(CVMX_L2C_SPAR2) & ~(0xFF << field)) |
+			       mask << field);
+		break;
+	case 0xC:
+		cvmx_write_csr(CVMX_L2C_SPAR3,
+			       (cvmx_read_csr(CVMX_L2C_SPAR3) & ~(0xFF << field)) |
+			       mask << field);
+		break;
+	}
+	return 0;
+}
+
+int cvmx_l2c_set_hw_way_partition(uint32_t mask)
+{
+	uint32_t valid_mask;
+
+	valid_mask = (0x1 << cvmx_l2c_get_num_assoc()) - 1;
+	mask &= valid_mask;
+
+	/* A UMSK setting which blocks all L2C Ways is an error on some chips */
+	if (mask == valid_mask  && !OCTEON_IS_MODEL(OCTEON_CN63XX))
+		return -1;
+
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX))
+		cvmx_write_csr(CVMX_L2C_WPAR_IOBX(0), mask);
+	else
+		cvmx_write_csr(CVMX_L2C_SPAR4,
+			       (cvmx_read_csr(CVMX_L2C_SPAR4) & ~0xFF) | mask);
+	return 0;
+}
+
+int cvmx_l2c_get_hw_way_partition(void)
+{
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX))
+		return cvmx_read_csr(CVMX_L2C_WPAR_IOBX(0)) & 0xffff;
+	else
+		return cvmx_read_csr(CVMX_L2C_SPAR4) & (0xFF);
+}
+
+void cvmx_l2c_config_perf(uint32_t counter, enum cvmx_l2c_event event,
+			  uint32_t clear_on_read)
+{
+	if (OCTEON_IS_MODEL(OCTEON_CN5XXX) || OCTEON_IS_MODEL(OCTEON_CN3XXX)) {
+		union cvmx_l2c_pfctl pfctl;
+
+		pfctl.u64 = cvmx_read_csr(CVMX_L2C_PFCTL);
+
+		switch (counter) {
+		case 0:
+			pfctl.s.cnt0sel = event;
+			pfctl.s.cnt0ena = 1;
+			pfctl.s.cnt0rdclr = clear_on_read;
+			break;
+		case 1:
+			pfctl.s.cnt1sel = event;
+			pfctl.s.cnt1ena = 1;
+			pfctl.s.cnt1rdclr = clear_on_read;
+			break;
+		case 2:
+			pfctl.s.cnt2sel = event;
+			pfctl.s.cnt2ena = 1;
+			pfctl.s.cnt2rdclr = clear_on_read;
+			break;
+		case 3:
+		default:
+			pfctl.s.cnt3sel = event;
+			pfctl.s.cnt3ena = 1;
+			pfctl.s.cnt3rdclr = clear_on_read;
+			break;
+		}
+
+		cvmx_write_csr(CVMX_L2C_PFCTL, pfctl.u64);
+	} else {
+		union cvmx_l2c_tadx_prf l2c_tadx_prf;
+		int tad;
+
+		cvmx_dprintf("L2C performance counter events are different for this chip, mapping 'event' to cvmx_l2c_tad_event_t\n");
+		if (clear_on_read)
+			cvmx_dprintf("L2C counters don't support clear on read for this chip\n");
+
+		l2c_tadx_prf.u64 = cvmx_read_csr(CVMX_L2C_TADX_PRF(0));
+
+		switch (counter) {
+		case 0:
+			l2c_tadx_prf.s.cnt0sel = event;
+			break;
+		case 1:
+			l2c_tadx_prf.s.cnt1sel = event;
+			break;
+		case 2:
+			l2c_tadx_prf.s.cnt2sel = event;
+			break;
+		default:
+		case 3:
+			l2c_tadx_prf.s.cnt3sel = event;
+			break;
+		}
+		for (tad = 0; tad < CVMX_L2C_TADS; tad++)
+			cvmx_write_csr(CVMX_L2C_TADX_PRF(tad),
+				       l2c_tadx_prf.u64);
+	}
+}
+
+uint64_t cvmx_l2c_read_perf(uint32_t counter)
+{
+	switch (counter) {
+	case 0:
+		if (OCTEON_IS_MODEL(OCTEON_CN5XXX) || OCTEON_IS_MODEL(OCTEON_CN3XXX))
+			return cvmx_read_csr(CVMX_L2C_PFC0);
+		else {
+			uint64_t counter = 0;
+			int tad;
+			for (tad = 0; tad < CVMX_L2C_TADS; tad++)
+				counter += cvmx_read_csr(CVMX_L2C_TADX_PFC0(tad));
+			return counter;
+		}
+	case 1:
+		if (OCTEON_IS_MODEL(OCTEON_CN5XXX) || OCTEON_IS_MODEL(OCTEON_CN3XXX))
+			return cvmx_read_csr(CVMX_L2C_PFC1);
+		else {
+			uint64_t counter = 0;
+			int tad;
+			for (tad = 0; tad < CVMX_L2C_TADS; tad++)
+				counter += cvmx_read_csr(CVMX_L2C_TADX_PFC1(tad));
+			return counter;
+		}
+	case 2:
+		if (OCTEON_IS_MODEL(OCTEON_CN5XXX) || OCTEON_IS_MODEL(OCTEON_CN3XXX))
+			return cvmx_read_csr(CVMX_L2C_PFC2);
+		else {
+			uint64_t counter = 0;
+			int tad;
+			for (tad = 0; tad < CVMX_L2C_TADS; tad++)
+				counter += cvmx_read_csr(CVMX_L2C_TADX_PFC2(tad));
+			return counter;
+		}
+	case 3:
+	default:
+		if (OCTEON_IS_MODEL(OCTEON_CN5XXX) || OCTEON_IS_MODEL(OCTEON_CN3XXX))
+			return cvmx_read_csr(CVMX_L2C_PFC3);
+		else {
+			uint64_t counter = 0;
+			int tad;
+			for (tad = 0; tad < CVMX_L2C_TADS; tad++)
+				counter += cvmx_read_csr(CVMX_L2C_TADX_PFC3(tad));
+			return counter;
+		}
+	}
+}
+
+/**
+ * @INTERNAL
+ * Helper function use to fault in cache lines for L2 cache locking
+ *
+ * @addr:   Address of base of memory region to read into L2 cache
+ * @len:    Length (in bytes) of region to fault in
+ */
+static void fault_in(uint64_t addr, int len)
+{
+	volatile char *ptr;
+	volatile char dummy;
+	/*
+	 * Adjust addr and length so we get all cache lines even for
+	 * small ranges spanning two cache lines.
+	 */
+	len += addr & CVMX_CACHE_LINE_MASK;
+	addr &= ~CVMX_CACHE_LINE_MASK;
+	ptr = (volatile char *)cvmx_phys_to_ptr(addr);
+	/*
+	 * Invalidate L1 cache to make sure all loads result in data
+	 * being in L2.
+	 */
+	CVMX_DCACHE_INVALIDATE;
+	while (len > 0) {
+		dummy += *ptr;
+		len -= CVMX_CACHE_LINE_SIZE;
+		ptr += CVMX_CACHE_LINE_SIZE;
+	}
+}
+
+int cvmx_l2c_lock_line(uint64_t addr)
+{
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX)) {
+		int shift = CVMX_L2C_TAG_ADDR_ALIAS_SHIFT;
+		uint64_t assoc = cvmx_l2c_get_num_assoc();
+		uint64_t tag = addr >> shift;
+		uint64_t index = CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS, cvmx_l2c_address_to_index(addr) << CVMX_L2C_IDX_ADDR_SHIFT);
+		uint64_t way;
+		union cvmx_l2c_tadx_tag l2c_tadx_tag;
+
+		CVMX_CACHE_LCKL2(CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS, addr), 0);
+
+		/* Make sure we were able to lock the line */
+		for (way = 0; way < assoc; way++) {
+			CVMX_CACHE_LTGL2I(index | (way << shift), 0);
+			/* make sure CVMX_L2C_TADX_TAG is updated */
+			CVMX_SYNC;
+			l2c_tadx_tag.u64 = cvmx_read_csr(CVMX_L2C_TADX_TAG(0));
+			if (l2c_tadx_tag.s.valid && l2c_tadx_tag.s.tag == tag)
+				break;
+		}
+
+		/* Check if a valid line is found */
+		if (way >= assoc) {
+			/* cvmx_dprintf("ERROR: cvmx_l2c_lock_line: line not found for locking at 0x%llx address\n", (unsigned long long)addr); */
+			return -1;
+		}
+
+		/* Check if lock bit is not set */
+		if (!l2c_tadx_tag.s.lock) {
+			/* cvmx_dprintf("ERROR: cvmx_l2c_lock_line: Not able to lock at 0x%llx address\n", (unsigned long long)addr); */
+			return -1;
+		}
+		return way;
+	} else {
+		int retval = 0;
+		union cvmx_l2c_dbg l2cdbg;
+		union cvmx_l2c_lckbase lckbase;
+		union cvmx_l2c_lckoff lckoff;
+		union cvmx_l2t_err l2t_err;
+
+		cvmx_spinlock_lock(&cvmx_l2c_spinlock);
+
+		l2cdbg.u64 = 0;
+		lckbase.u64 = 0;
+		lckoff.u64 = 0;
+
+		/* Clear l2t error bits if set */
+		l2t_err.u64 = cvmx_read_csr(CVMX_L2T_ERR);
+		l2t_err.s.lckerr = 1;
+		l2t_err.s.lckerr2 = 1;
+		cvmx_write_csr(CVMX_L2T_ERR, l2t_err.u64);
+
+		addr &= ~CVMX_CACHE_LINE_MASK;
+
+		/* Set this core as debug core */
+		l2cdbg.s.ppnum = cvmx_get_core_num();
+		CVMX_SYNC;
+		cvmx_write_csr(CVMX_L2C_DBG, l2cdbg.u64);
+		cvmx_read_csr(CVMX_L2C_DBG);
+
+		lckoff.s.lck_offset = 0; /* Only lock 1 line at a time */
+		cvmx_write_csr(CVMX_L2C_LCKOFF, lckoff.u64);
+		cvmx_read_csr(CVMX_L2C_LCKOFF);
+
+		if (((union cvmx_l2c_cfg)(cvmx_read_csr(CVMX_L2C_CFG))).s.idxalias) {
+			int alias_shift = CVMX_L2C_IDX_ADDR_SHIFT + 2 * CVMX_L2_SET_BITS - 1;
+			uint64_t addr_tmp = addr ^ (addr & ((1 << alias_shift) - 1)) >> CVMX_L2_SET_BITS;
+			lckbase.s.lck_base = addr_tmp >> 7;
+		} else {
+			lckbase.s.lck_base = addr >> 7;
+		}
+
+		lckbase.s.lck_ena = 1;
+		cvmx_write_csr(CVMX_L2C_LCKBASE, lckbase.u64);
+		/* Make sure it gets there */
+		cvmx_read_csr(CVMX_L2C_LCKBASE);
+
+		fault_in(addr, CVMX_CACHE_LINE_SIZE);
+
+		lckbase.s.lck_ena = 0;
+		cvmx_write_csr(CVMX_L2C_LCKBASE, lckbase.u64);
+		/* Make sure it gets there */
+		cvmx_read_csr(CVMX_L2C_LCKBASE);
+
+		/* Stop being debug core */
+		cvmx_write_csr(CVMX_L2C_DBG, 0);
+		cvmx_read_csr(CVMX_L2C_DBG);
+
+		l2t_err.u64 = cvmx_read_csr(CVMX_L2T_ERR);
+		if (l2t_err.s.lckerr || l2t_err.s.lckerr2)
+			retval = 1;  /* We were unable to lock the line */
+
+		cvmx_spinlock_unlock(&cvmx_l2c_spinlock);
+		return retval;
+	}
+}
+
+int cvmx_l2c_lock_mem_region(uint64_t start, uint64_t len)
+{
+	int retval = 0;
+
+	/* Round start/end to cache line boundaries */
+	len += start & CVMX_CACHE_LINE_MASK;
+	start &= ~CVMX_CACHE_LINE_MASK;
+	len = (len + CVMX_CACHE_LINE_MASK) & ~CVMX_CACHE_LINE_MASK;
+
+	while (len) {
+		retval += cvmx_l2c_lock_line(start);
+		start += CVMX_CACHE_LINE_SIZE;
+		len -= CVMX_CACHE_LINE_SIZE;
+	}
+	return retval;
+}
+
+void cvmx_l2c_flush(void)
+{
+	uint64_t assoc, set;
+	uint64_t n_assoc, n_set;
+
+	n_set = cvmx_l2c_get_num_sets();
+	n_assoc = cvmx_l2c_get_num_assoc();
+
+	if (OCTEON_IS_MODEL(OCTEON_CN6XXX)) {
+		uint64_t address;
+		/* These may look like constants, but they aren't... */
+		int assoc_shift = CVMX_L2C_TAG_ADDR_ALIAS_SHIFT;
+		int set_shift = CVMX_L2C_IDX_ADDR_SHIFT;
+		for (set = 0; set < n_set; set++) {
+			for (assoc = 0; assoc < n_assoc; assoc++) {
+				address = CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
+						       (assoc << assoc_shift) |	(set << set_shift));
+				CVMX_CACHE_WBIL2I(address, 0);
+			}
+		}
+	} else {
+		for (set = 0; set < n_set; set++)
+			for (assoc = 0; assoc < n_assoc; assoc++)
+				cvmx_l2c_flush_line(assoc, set);
+	}
+}
+
+
+int cvmx_l2c_unlock_line(uint64_t address)
+{
+
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX)) {
+		int assoc;
+		union cvmx_l2c_tag tag;
+		uint32_t tag_addr;
+		uint32_t index = cvmx_l2c_address_to_index(address);
+
+		tag_addr = ((address >> CVMX_L2C_TAG_ADDR_ALIAS_SHIFT) & ((1 << CVMX_L2C_TAG_ADDR_ALIAS_SHIFT) - 1));
+
+		/*
+		 * For 63XX, we can flush a line by using the physical
+		 * address directly, so finding the cache line used by
+		 * the address is only required to provide the proper
+		 * return value for the function.
+		 */
+		for (assoc = 0; assoc < CVMX_L2_ASSOC; assoc++) {
+			tag = cvmx_l2c_get_tag(assoc, index);
+
+			if (tag.s.V && (tag.s.addr == tag_addr)) {
+				CVMX_CACHE_WBIL2(CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS, address), 0);
+				return tag.s.L;
+			}
+		}
+	} else {
+		int assoc;
+		union cvmx_l2c_tag tag;
+		uint32_t tag_addr;
+
+		uint32_t index = cvmx_l2c_address_to_index(address);
+
+		/* Compute portion of address that is stored in tag */
+		tag_addr = ((address >> CVMX_L2C_TAG_ADDR_ALIAS_SHIFT) & ((1 << CVMX_L2C_TAG_ADDR_ALIAS_SHIFT) - 1));
+		for (assoc = 0; assoc < CVMX_L2_ASSOC; assoc++) {
+			tag = cvmx_l2c_get_tag(assoc, index);
+
+			if (tag.s.V && (tag.s.addr == tag_addr)) {
+				cvmx_l2c_flush_line(assoc, index);
+				return tag.s.L;
+			}
+		}
+	}
+	return 0;
+}
+
+int cvmx_l2c_unlock_mem_region(uint64_t start, uint64_t len)
+{
+	int num_unlocked = 0;
+	/* Round start/end to cache line boundaries */
+	len += start & CVMX_CACHE_LINE_MASK;
+	start &= ~CVMX_CACHE_LINE_MASK;
+	len = (len + CVMX_CACHE_LINE_MASK) & ~CVMX_CACHE_LINE_MASK;
+	while (len > 0) {
+		num_unlocked += cvmx_l2c_unlock_line(start);
+		start += CVMX_CACHE_LINE_SIZE;
+		len -= CVMX_CACHE_LINE_SIZE;
+	}
+
+	return num_unlocked;
+}
+
+/*
+ * Internal l2c tag types.  These are converted to a generic structure
+ * that can be used on all chips.
+ */
+union __cvmx_l2c_tag {
+	uint64_t u64;
+	struct cvmx_l2c_tag_cn50xx {
+		uint64_t reserved:40;
+		uint64_t V:1;		/* Line valid */
+		uint64_t D:1;		/* Line dirty */
+		uint64_t L:1;		/* Line locked */
+		uint64_t U:1;		/* Use, LRU eviction */
+		uint64_t addr:20;	/* Phys mem addr (33..14) */
+	} cn50xx;
+	struct cvmx_l2c_tag_cn30xx {
+		uint64_t reserved:41;
+		uint64_t V:1;		/* Line valid */
+		uint64_t D:1;		/* Line dirty */
+		uint64_t L:1;		/* Line locked */
+		uint64_t U:1;		/* Use, LRU eviction */
+		uint64_t addr:19;	/* Phys mem addr (33..15) */
+	} cn30xx;
+	struct cvmx_l2c_tag_cn31xx {
+		uint64_t reserved:42;
+		uint64_t V:1;		/* Line valid */
+		uint64_t D:1;		/* Line dirty */
+		uint64_t L:1;		/* Line locked */
+		uint64_t U:1;		/* Use, LRU eviction */
+		uint64_t addr:18;	/* Phys mem addr (33..16) */
+	} cn31xx;
+	struct cvmx_l2c_tag_cn38xx {
+		uint64_t reserved:43;
+		uint64_t V:1;		/* Line valid */
+		uint64_t D:1;		/* Line dirty */
+		uint64_t L:1;		/* Line locked */
+		uint64_t U:1;		/* Use, LRU eviction */
+		uint64_t addr:17;	/* Phys mem addr (33..17) */
+	} cn38xx;
+	struct cvmx_l2c_tag_cn58xx {
+		uint64_t reserved:44;
+		uint64_t V:1;		/* Line valid */
+		uint64_t D:1;		/* Line dirty */
+		uint64_t L:1;		/* Line locked */
+		uint64_t U:1;		/* Use, LRU eviction */
+		uint64_t addr:16;	/* Phys mem addr (33..18) */
+	} cn58xx;
+	struct cvmx_l2c_tag_cn58xx cn56xx;	/* 2048 sets */
+	struct cvmx_l2c_tag_cn31xx cn52xx;	/* 512 sets */
+};
+
+
+/**
+ * @INTERNAL
+ * Function to read a L2C tag.  This code make the current core
+ * the 'debug core' for the L2.  This code must only be executed by
+ * 1 core at a time.
+ *
+ * @assoc:  Association (way) of the tag to dump
+ * @index:  Index of the cacheline
+ *
+ * Returns The Octeon model specific tag structure.  This is
+ *         translated by a wrapper function to a generic form that is
+ *         easier for applications to use.
+ */
+static union __cvmx_l2c_tag __read_l2_tag(uint64_t assoc, uint64_t index)
+{
+
+	uint64_t debug_tag_addr = CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS, (index << 7) + 96);
+	uint64_t core = cvmx_get_core_num();
+	union __cvmx_l2c_tag tag_val;
+	uint64_t dbg_addr = CVMX_L2C_DBG;
+	unsigned long flags;
+
+	union cvmx_l2c_dbg debug_val;
+	debug_val.u64 = 0;
+	/*
+	 * For low core count parts, the core number is always small
+	 * enough to stay in the correct field and not set any
+	 * reserved bits.
+	 */
+	debug_val.s.ppnum = core;
+	debug_val.s.l2t = 1;
+	debug_val.s.set = assoc;
+
+	local_irq_save(flags);
+	/*
+	 * Make sure core is quiet (no prefetches, etc.) before
+	 * entering debug mode.
+	 */
+	CVMX_SYNC;
+	/* Flush L1 to make sure debug load misses L1 */
+	CVMX_DCACHE_INVALIDATE;
+
+	/*
+	 * The following must be done in assembly as when in debug
+	 * mode all data loads from L2 return special debug data, not
+	 * normal memory contents.  Also, interrupts must be disabled,
+	 * since if an interrupt occurs while in debug mode the ISR
+	 * will get debug data from all its memory * reads instead of
+	 * the contents of memory.
+	 */
+
+	asm volatile (
+		".set push\n\t"
+		".set mips64\n\t"
+		".set noreorder\n\t"
+		"sd    %[dbg_val], 0(%[dbg_addr])\n\t"   /* Enter debug mode, wait for store */
+		"ld    $0, 0(%[dbg_addr])\n\t"
+		"ld    %[tag_val], 0(%[tag_addr])\n\t"   /* Read L2C tag data */
+		"sd    $0, 0(%[dbg_addr])\n\t"          /* Exit debug mode, wait for store */
+		"ld    $0, 0(%[dbg_addr])\n\t"
+		"cache 9, 0($0)\n\t"             /* Invalidate dcache to discard debug data */
+		".set pop"
+		: [tag_val] "=r" (tag_val)
+		: [dbg_addr] "r" (dbg_addr), [dbg_val] "r" (debug_val), [tag_addr] "r" (debug_tag_addr)
+		: "memory");
+
+	local_irq_restore(flags);
+
+	return tag_val;
+}
+
+
+union cvmx_l2c_tag cvmx_l2c_get_tag(uint32_t association, uint32_t index)
+{
+	union cvmx_l2c_tag tag;
+	tag.u64 = 0;
+
+	if ((int)association >= cvmx_l2c_get_num_assoc()) {
+		cvmx_dprintf("ERROR: cvmx_l2c_get_tag association out of range\n");
+		return tag;
+	}
+	if ((int)index >= cvmx_l2c_get_num_sets()) {
+		cvmx_dprintf("ERROR: cvmx_l2c_get_tag index out of range (arg: %d, max: %d)\n",
+			     (int)index, cvmx_l2c_get_num_sets());
+		return tag;
+	}
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX)) {
+		union cvmx_l2c_tadx_tag l2c_tadx_tag;
+		uint64_t address = CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
+						(association << CVMX_L2C_TAG_ADDR_ALIAS_SHIFT) |
+						(index << CVMX_L2C_IDX_ADDR_SHIFT));
+		/*
+		 * Use L2 cache Index load tag cache instruction, as
+		 * hardware loads the virtual tag for the L2 cache
+		 * block with the contents of L2C_TAD0_TAG
+		 * register.
+		 */
+		CVMX_CACHE_LTGL2I(address, 0);
+		CVMX_SYNC;   /* make sure CVMX_L2C_TADX_TAG is updated */
+		l2c_tadx_tag.u64 = cvmx_read_csr(CVMX_L2C_TADX_TAG(0));
+
+		tag.s.V     = l2c_tadx_tag.s.valid;
+		tag.s.D     = l2c_tadx_tag.s.dirty;
+		tag.s.L     = l2c_tadx_tag.s.lock;
+		tag.s.U     = l2c_tadx_tag.s.use;
+		tag.s.addr  = l2c_tadx_tag.s.tag;
+	} else {
+		union __cvmx_l2c_tag tmp_tag;
+		/* __read_l2_tag is intended for internal use only */
+		tmp_tag = __read_l2_tag(association, index);
+
+		/*
+		 * Convert all tag structure types to generic version,
+		 * as it can represent all models.
+		 */
+		if (OCTEON_IS_MODEL(OCTEON_CN58XX) || OCTEON_IS_MODEL(OCTEON_CN56XX)) {
+			tag.s.V    = tmp_tag.cn58xx.V;
+			tag.s.D    = tmp_tag.cn58xx.D;
+			tag.s.L    = tmp_tag.cn58xx.L;
+			tag.s.U    = tmp_tag.cn58xx.U;
+			tag.s.addr = tmp_tag.cn58xx.addr;
+		} else if (OCTEON_IS_MODEL(OCTEON_CN38XX)) {
+			tag.s.V    = tmp_tag.cn38xx.V;
+			tag.s.D    = tmp_tag.cn38xx.D;
+			tag.s.L    = tmp_tag.cn38xx.L;
+			tag.s.U    = tmp_tag.cn38xx.U;
+			tag.s.addr = tmp_tag.cn38xx.addr;
+		} else if (OCTEON_IS_MODEL(OCTEON_CN31XX) || OCTEON_IS_MODEL(OCTEON_CN52XX)) {
+			tag.s.V    = tmp_tag.cn31xx.V;
+			tag.s.D    = tmp_tag.cn31xx.D;
+			tag.s.L    = tmp_tag.cn31xx.L;
+			tag.s.U    = tmp_tag.cn31xx.U;
+			tag.s.addr = tmp_tag.cn31xx.addr;
+		} else if (OCTEON_IS_MODEL(OCTEON_CN30XX)) {
+			tag.s.V    = tmp_tag.cn30xx.V;
+			tag.s.D    = tmp_tag.cn30xx.D;
+			tag.s.L    = tmp_tag.cn30xx.L;
+			tag.s.U    = tmp_tag.cn30xx.U;
+			tag.s.addr = tmp_tag.cn30xx.addr;
+		} else if (OCTEON_IS_MODEL(OCTEON_CN50XX)) {
+			tag.s.V    = tmp_tag.cn50xx.V;
+			tag.s.D    = tmp_tag.cn50xx.D;
+			tag.s.L    = tmp_tag.cn50xx.L;
+			tag.s.U    = tmp_tag.cn50xx.U;
+			tag.s.addr = tmp_tag.cn50xx.addr;
+		} else {
+			cvmx_dprintf("Unsupported OCTEON Model in %s\n", __func__);
+		}
+	}
+	return tag;
+}
+
+uint32_t cvmx_l2c_address_to_index(uint64_t addr)
+{
+	uint64_t idx = addr >> CVMX_L2C_IDX_ADDR_SHIFT;
+	int indxalias = 0;
+
+	if (OCTEON_IS_MODEL(OCTEON_CN6XXX)) {
+		union cvmx_l2c_ctl l2c_ctl;
+		l2c_ctl.u64 = cvmx_read_csr(CVMX_L2C_CTL);
+		indxalias = !l2c_ctl.s.disidxalias;
+	} else {
+		union cvmx_l2c_cfg l2c_cfg;
+		l2c_cfg.u64 = cvmx_read_csr(CVMX_L2C_CFG);
+		indxalias = l2c_cfg.s.idxalias;
+	}
+
+	if (indxalias) {
+		if (OCTEON_IS_MODEL(OCTEON_CN63XX)) {
+			uint32_t a_14_12 = (idx / (CVMX_L2C_MEMBANK_SELECT_SIZE/(1<<CVMX_L2C_IDX_ADDR_SHIFT))) & 0x7;
+			idx ^= idx / cvmx_l2c_get_num_sets();
+			idx ^= a_14_12;
+		} else {
+			idx ^= ((addr & CVMX_L2C_ALIAS_MASK) >> CVMX_L2C_TAG_ADDR_ALIAS_SHIFT);
+		}
+	}
+	idx &= CVMX_L2C_IDX_MASK;
+	return idx;
+}
+
+int cvmx_l2c_get_cache_size_bytes(void)
+{
+	return cvmx_l2c_get_num_sets() * cvmx_l2c_get_num_assoc() *
+		CVMX_CACHE_LINE_SIZE;
+}
+
+/**
+ * Return log base 2 of the number of sets in the L2 cache
+ * Returns
+ */
+int cvmx_l2c_get_set_bits(void)
+{
+	int l2_set_bits;
+	if (OCTEON_IS_MODEL(OCTEON_CN56XX) || OCTEON_IS_MODEL(OCTEON_CN58XX))
+		l2_set_bits = 11;	/* 2048 sets */
+	else if (OCTEON_IS_MODEL(OCTEON_CN38XX) || OCTEON_IS_MODEL(OCTEON_CN63XX))
+		l2_set_bits = 10;	/* 1024 sets */
+	else if (OCTEON_IS_MODEL(OCTEON_CN31XX) || OCTEON_IS_MODEL(OCTEON_CN52XX))
+		l2_set_bits = 9;	/* 512 sets */
+	else if (OCTEON_IS_MODEL(OCTEON_CN30XX))
+		l2_set_bits = 8;	/* 256 sets */
+	else if (OCTEON_IS_MODEL(OCTEON_CN50XX))
+		l2_set_bits = 7;	/* 128 sets */
+	else {
+		cvmx_dprintf("Unsupported OCTEON Model in %s\n", __func__);
+		l2_set_bits = 11;	/* 2048 sets */
+	}
+	return l2_set_bits;
+}
+
+/* Return the number of sets in the L2 Cache */
+int cvmx_l2c_get_num_sets(void)
+{
+	return 1 << cvmx_l2c_get_set_bits();
+}
+
+/* Return the number of associations in the L2 Cache */
+int cvmx_l2c_get_num_assoc(void)
+{
+	int l2_assoc;
+	if (OCTEON_IS_MODEL(OCTEON_CN56XX) ||
+	    OCTEON_IS_MODEL(OCTEON_CN52XX) ||
+	    OCTEON_IS_MODEL(OCTEON_CN58XX) ||
+	    OCTEON_IS_MODEL(OCTEON_CN50XX) ||
+	    OCTEON_IS_MODEL(OCTEON_CN38XX))
+		l2_assoc = 8;
+	else if (OCTEON_IS_MODEL(OCTEON_CN63XX))
+		l2_assoc = 16;
+	else if (OCTEON_IS_MODEL(OCTEON_CN31XX) ||
+		 OCTEON_IS_MODEL(OCTEON_CN30XX))
+		l2_assoc = 4;
+	else {
+		cvmx_dprintf("Unsupported OCTEON Model in %s\n", __func__);
+		l2_assoc = 8;
+	}
+
+	/* Check to see if part of the cache is disabled */
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX)) {
+		union cvmx_mio_fus_dat3 mio_fus_dat3;
+
+		mio_fus_dat3.u64 = cvmx_read_csr(CVMX_MIO_FUS_DAT3);
+		/*
+		 * cvmx_mio_fus_dat3.s.l2c_crip fuses map as follows
+		 * <2> will be not used for 63xx
+		 * <1> disables 1/2 ways
+		 * <0> disables 1/4 ways
+		 * They are cumulative, so for 63xx:
+		 * <1> <0>
+		 * 0 0 16-way 2MB cache
+		 * 0 1 12-way 1.5MB cache
+		 * 1 0 8-way 1MB cache
+		 * 1 1 4-way 512KB cache
+		 */
+
+		if (mio_fus_dat3.s.l2c_crip == 3)
+			l2_assoc = 4;
+		else if (mio_fus_dat3.s.l2c_crip == 2)
+			l2_assoc = 8;
+		else if (mio_fus_dat3.s.l2c_crip == 1)
+			l2_assoc = 12;
+	} else {
+		union cvmx_l2d_fus3 val;
+		val.u64 = cvmx_read_csr(CVMX_L2D_FUS3);
+		/*
+		 * Using shifts here, as bit position names are
+		 * different for each model but they all mean the
+		 * same.
+		 */
+		if ((val.u64 >> 35) & 0x1)
+			l2_assoc = l2_assoc >> 2;
+		else if ((val.u64 >> 34) & 0x1)
+			l2_assoc = l2_assoc >> 1;
+	}
+	return l2_assoc;
+}
+
+/**
+ * Flush a line from the L2 cache
+ * This should only be called from one core at a time, as this routine
+ * sets the core to the 'debug' core in order to flush the line.
+ *
+ * @assoc:  Association (or way) to flush
+ * @index:  Index to flush
+ */
+void cvmx_l2c_flush_line(uint32_t assoc, uint32_t index)
+{
+	/* Check the range of the index. */
+	if (index > (uint32_t)cvmx_l2c_get_num_sets()) {
+		cvmx_dprintf("ERROR: cvmx_l2c_flush_line index out of range.\n");
+		return;
+	}
+
+	/* Check the range of association. */
+	if (assoc > (uint32_t)cvmx_l2c_get_num_assoc()) {
+		cvmx_dprintf("ERROR: cvmx_l2c_flush_line association out of range.\n");
+		return;
+	}
+
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX)) {
+		uint64_t address;
+		/* Create the address based on index and association.
+		 * Bits<20:17> select the way of the cache block involved in
+		 *             the operation
+		 * Bits<16:7> of the effect address select the index
+		 */
+		address = CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
+				(assoc << CVMX_L2C_TAG_ADDR_ALIAS_SHIFT) |
+				(index << CVMX_L2C_IDX_ADDR_SHIFT));
+		CVMX_CACHE_WBIL2I(address, 0);
+	} else {
+		union cvmx_l2c_dbg l2cdbg;
+
+		l2cdbg.u64 = 0;
+		if (!OCTEON_IS_MODEL(OCTEON_CN30XX))
+			l2cdbg.s.ppnum = cvmx_get_core_num();
+		l2cdbg.s.finv = 1;
+
+		l2cdbg.s.set = assoc;
+		cvmx_spinlock_lock(&cvmx_l2c_spinlock);
+		/*
+		 * Enter debug mode, and make sure all other writes
+		 * complete before we enter debug mode
+		 */
+		CVMX_SYNC;
+		cvmx_write_csr(CVMX_L2C_DBG, l2cdbg.u64);
+		cvmx_read_csr(CVMX_L2C_DBG);
+
+		CVMX_PREPARE_FOR_STORE(CVMX_ADD_SEG(CVMX_MIPS_SPACE_XKPHYS,
+						    index * CVMX_CACHE_LINE_SIZE),
+				       0);
+		/* Exit debug mode */
+		CVMX_SYNC;
+		cvmx_write_csr(CVMX_L2C_DBG, 0);
+		cvmx_read_csr(CVMX_L2C_DBG);
+		cvmx_spinlock_unlock(&cvmx_l2c_spinlock);
+	}
+}
diff --git a/arch/mips/cavium-octeon/executive/cvmx-sysinfo.c b/arch/mips/cavium-octeon/executive/cvmx-sysinfo.c
new file mode 100644
index 00000000..8b18a20c
--- /dev/null
+++ b/arch/mips/cavium-octeon/executive/cvmx-sysinfo.c
@@ -0,0 +1,117 @@
+/***********************license start***************
+ * Author: Cavium Networks
+ *
+ * Contact: support@caviumnetworks.com
+ * This file is part of the OCTEON SDK
+ *
+ * Copyright (c) 2003-2008 Cavium Networks
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this file; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * or visit http://www.gnu.org/licenses/.
+ *
+ * This file may also be available under a different license from Cavium.
+ * Contact Cavium Networks for more information
+ ***********************license end**************************************/
+
+/*
+ * This module provides system/board/application information obtained
+ * by the bootloader.
+ */
+#include <linux/module.h>
+
+#include <asm/octeon/cvmx.h>
+#include <asm/octeon/cvmx-spinlock.h>
+#include <asm/octeon/cvmx-sysinfo.h>
+
+/**
+ * This structure defines the private state maintained by sysinfo module.
+ *
+ */
+static struct {
+	struct cvmx_sysinfo sysinfo;	   /* system information */
+	cvmx_spinlock_t lock;	   /* mutex spinlock */
+
+} state = {
+	.lock = CVMX_SPINLOCK_UNLOCKED_INITIALIZER
+};
+
+
+/*
+ * Global variables that define the min/max of the memory region set
+ * up for 32 bit userspace access.
+ */
+uint64_t linux_mem32_min;
+uint64_t linux_mem32_max;
+uint64_t linux_mem32_wired;
+uint64_t linux_mem32_offset;
+
+/**
+ * This function returns the application information as obtained
+ * by the bootloader.  This provides the core mask of the cores
+ * running the same application image, as well as the physical
+ * memory regions available to the core.
+ *
+ * Returns  Pointer to the boot information structure
+ *
+ */
+struct cvmx_sysinfo *cvmx_sysinfo_get(void)
+{
+	return &(state.sysinfo);
+}
+EXPORT_SYMBOL(cvmx_sysinfo_get);
+
+/**
+ * This function is used in non-simple executive environments (such as
+ * Linux kernel, u-boot, etc.)  to configure the minimal fields that
+ * are required to use simple executive files directly.
+ *
+ * Locking (if required) must be handled outside of this
+ * function
+ *
+ * @phy_mem_desc_ptr:
+ *                   Pointer to global physical memory descriptor
+ *                   (bootmem descriptor) @board_type: Octeon board
+ *                   type enumeration
+ *
+ * @board_rev_major:
+ *                   Board major revision
+ * @board_rev_minor:
+ *                   Board minor revision
+ * @cpu_clock_hz:
+ *                   CPU clock freqency in hertz
+ *
+ * Returns 0: Failure
+ *         1: success
+ */
+int cvmx_sysinfo_minimal_initialize(void *phy_mem_desc_ptr,
+				    uint16_t board_type,
+				    uint8_t board_rev_major,
+				    uint8_t board_rev_minor,
+				    uint32_t cpu_clock_hz)
+{
+
+	/* The sysinfo structure was already initialized */
+	if (state.sysinfo.board_type)
+		return 0;
+
+	memset(&(state.sysinfo), 0x0, sizeof(state.sysinfo));
+	state.sysinfo.phy_mem_desc_ptr = phy_mem_desc_ptr;
+	state.sysinfo.board_type = board_type;
+	state.sysinfo.board_rev_major = board_rev_major;
+	state.sysinfo.board_rev_minor = board_rev_minor;
+	state.sysinfo.cpu_clock_hz = cpu_clock_hz;
+
+	return 1;
+}
diff --git a/arch/mips/cavium-octeon/executive/octeon-model.c b/arch/mips/cavium-octeon/executive/octeon-model.c
new file mode 100644
index 00000000..c8d35684
--- /dev/null
+++ b/arch/mips/cavium-octeon/executive/octeon-model.c
@@ -0,0 +1,358 @@
+/***********************license start***************
+ * Author: Cavium Networks
+ *
+ * Contact: support@caviumnetworks.com
+ * This file is part of the OCTEON SDK
+ *
+ * Copyright (c) 2003-2008 Cavium Networks
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this file; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * or visit http://www.gnu.org/licenses/.
+ *
+ * This file may also be available under a different license from Cavium.
+ * Contact Cavium Networks for more information
+ ***********************license end**************************************/
+
+/*
+ * File defining functions for working with different Octeon
+ * models.
+ */
+#include <asm/octeon/octeon.h>
+
+/**
+ * Given the chip processor ID from COP0, this function returns a
+ * string representing the chip model number. The string is of the
+ * form CNXXXXpX.X-FREQ-SUFFIX.
+ * - XXXX = The chip model number
+ * - X.X = Chip pass number
+ * - FREQ = Current frequency in Mhz
+ * - SUFFIX = NSP, EXP, SCP, SSP, or CP
+ *
+ * @chip_id: Chip ID
+ *
+ * Returns Model string
+ */
+const char *octeon_model_get_string(uint32_t chip_id)
+{
+	static char buffer[32];
+	return octeon_model_get_string_buffer(chip_id, buffer);
+}
+
+/*
+ * Version of octeon_model_get_string() that takes buffer as argument,
+ * as running early in u-boot static/global variables don't work when
+ * running from flash.
+ */
+const char *octeon_model_get_string_buffer(uint32_t chip_id, char *buffer)
+{
+	const char *family;
+	const char *core_model;
+	char pass[4];
+	int clock_mhz;
+	const char *suffix;
+	union cvmx_l2d_fus3 fus3;
+	int num_cores;
+	union cvmx_mio_fus_dat2 fus_dat2;
+	union cvmx_mio_fus_dat3 fus_dat3;
+	char fuse_model[10];
+	uint32_t fuse_data = 0;
+
+	fus3.u64 = cvmx_read_csr(CVMX_L2D_FUS3);
+	fus_dat2.u64 = cvmx_read_csr(CVMX_MIO_FUS_DAT2);
+	fus_dat3.u64 = cvmx_read_csr(CVMX_MIO_FUS_DAT3);
+
+	num_cores = cvmx_octeon_num_cores();
+
+	/* Make sure the non existent devices look disabled */
+	switch ((chip_id >> 8) & 0xff) {
+	case 6:		/* CN50XX */
+	case 2:		/* CN30XX */
+		fus_dat3.s.nodfa_dte = 1;
+		fus_dat3.s.nozip = 1;
+		break;
+	case 4:		/* CN57XX or CN56XX */
+		fus_dat3.s.nodfa_dte = 1;
+		break;
+	default:
+		break;
+	}
+
+	/* Make a guess at the suffix */
+	/* NSP = everything */
+	/* EXP = No crypto */
+	/* SCP = No DFA, No zip */
+	/* CP = No DFA, No crypto, No zip */
+	if (fus_dat3.s.nodfa_dte) {
+		if (fus_dat2.s.nocrypto)
+			suffix = "CP";
+		else
+			suffix = "SCP";
+	} else if (fus_dat2.s.nocrypto)
+		suffix = "EXP";
+	else
+		suffix = "NSP";
+
+	/*
+	 * Assume pass number is encoded using <5:3><2:0>. Exceptions
+	 * will be fixed later.
+	 */
+	sprintf(pass, "%u.%u", ((chip_id >> 3) & 7) + 1, chip_id & 7);
+
+	/*
+	 * Use the number of cores to determine the last 2 digits of
+	 * the model number. There are some exceptions that are fixed
+	 * later.
+	 */
+	switch (num_cores) {
+	case 16:
+		core_model = "60";
+		break;
+	case 15:
+		core_model = "58";
+		break;
+	case 14:
+		core_model = "55";
+		break;
+	case 13:
+		core_model = "52";
+		break;
+	case 12:
+		core_model = "50";
+		break;
+	case 11:
+		core_model = "48";
+		break;
+	case 10:
+		core_model = "45";
+		break;
+	case 9:
+		core_model = "42";
+		break;
+	case 8:
+		core_model = "40";
+		break;
+	case 7:
+		core_model = "38";
+		break;
+	case 6:
+		core_model = "34";
+		break;
+	case 5:
+		core_model = "32";
+		break;
+	case 4:
+		core_model = "30";
+		break;
+	case 3:
+		core_model = "25";
+		break;
+	case 2:
+		core_model = "20";
+		break;
+	case 1:
+		core_model = "10";
+		break;
+	default:
+		core_model = "XX";
+		break;
+	}
+
+	/* Now figure out the family, the first two digits */
+	switch ((chip_id >> 8) & 0xff) {
+	case 0:		/* CN38XX, CN37XX or CN36XX */
+		if (fus3.cn38xx.crip_512k) {
+			/*
+			 * For some unknown reason, the 16 core one is
+			 * called 37 instead of 36.
+			 */
+			if (num_cores >= 16)
+				family = "37";
+			else
+				family = "36";
+		} else
+			family = "38";
+		/*
+		 * This series of chips didn't follow the standard
+		 * pass numbering.
+		 */
+		switch (chip_id & 0xf) {
+		case 0:
+			strcpy(pass, "1.X");
+			break;
+		case 1:
+			strcpy(pass, "2.X");
+			break;
+		case 3:
+			strcpy(pass, "3.X");
+			break;
+		default:
+			strcpy(pass, "X.X");
+			break;
+		}
+		break;
+	case 1:		/* CN31XX or CN3020 */
+		if ((chip_id & 0x10) || fus3.cn31xx.crip_128k)
+			family = "30";
+		else
+			family = "31";
+		/*
+		 * This series of chips didn't follow the standard
+		 * pass numbering.
+		 */
+		switch (chip_id & 0xf) {
+		case 0:
+			strcpy(pass, "1.0");
+			break;
+		case 2:
+			strcpy(pass, "1.1");
+			break;
+		default:
+			strcpy(pass, "X.X");
+			break;
+		}
+		break;
+	case 2:		/* CN3010 or CN3005 */
+		family = "30";
+		/* A chip with half cache is an 05 */
+		if (fus3.cn30xx.crip_64k)
+			core_model = "05";
+		/*
+		 * This series of chips didn't follow the standard
+		 * pass numbering.
+		 */
+		switch (chip_id & 0xf) {
+		case 0:
+			strcpy(pass, "1.0");
+			break;
+		case 2:
+			strcpy(pass, "1.1");
+			break;
+		default:
+			strcpy(pass, "X.X");
+			break;
+		}
+		break;
+	case 3:		/* CN58XX */
+		family = "58";
+		/* Special case. 4 core, no crypto */
+		if ((num_cores == 4) && fus_dat2.cn38xx.nocrypto)
+			core_model = "29";
+
+		/* Pass 1 uses different encodings for pass numbers */
+		if ((chip_id & 0xFF) < 0x8) {
+			switch (chip_id & 0x3) {
+			case 0:
+				strcpy(pass, "1.0");
+				break;
+			case 1:
+				strcpy(pass, "1.1");
+				break;
+			case 3:
+				strcpy(pass, "1.2");
+				break;
+			default:
+				strcpy(pass, "1.X");
+				break;
+			}
+		}
+		break;
+	case 4:		/* CN57XX, CN56XX, CN55XX, CN54XX */
+		if (fus_dat2.cn56xx.raid_en) {
+			if (fus3.cn56xx.crip_1024k)
+				family = "55";
+			else
+				family = "57";
+			if (fus_dat2.cn56xx.nocrypto)
+				suffix = "SP";
+			else
+				suffix = "SSP";
+		} else {
+			if (fus_dat2.cn56xx.nocrypto)
+				suffix = "CP";
+			else {
+				suffix = "NSP";
+				if (fus_dat3.s.nozip)
+					suffix = "SCP";
+			}
+			if (fus3.cn56xx.crip_1024k)
+				family = "54";
+			else
+				family = "56";
+		}
+		break;
+	case 6:		/* CN50XX */
+		family = "50";
+		break;
+	case 7:		/* CN52XX */
+		if (fus3.cn52xx.crip_256k)
+			family = "51";
+		else
+			family = "52";
+		break;
+	default:
+		family = "XX";
+		core_model = "XX";
+		strcpy(pass, "X.X");
+		suffix = "XXX";
+		break;
+	}
+
+	clock_mhz = octeon_get_clock_rate() / 1000000;
+
+	if (family[0] != '3') {
+		/* Check for model in fuses, overrides normal decode */
+		/* This is _not_ valid for Octeon CN3XXX models */
+		fuse_data |= cvmx_fuse_read_byte(51);
+		fuse_data = fuse_data << 8;
+		fuse_data |= cvmx_fuse_read_byte(50);
+		fuse_data = fuse_data << 8;
+		fuse_data |= cvmx_fuse_read_byte(49);
+		fuse_data = fuse_data << 8;
+		fuse_data |= cvmx_fuse_read_byte(48);
+		if (fuse_data & 0x7ffff) {
+			int model = fuse_data & 0x3fff;
+			int suffix = (fuse_data >> 14) & 0x1f;
+			if (suffix && model) {
+				/*
+				 * Have both number and suffix in
+				 * fuses, so both
+				 */
+				sprintf(fuse_model, "%d%c",
+					model, 'A' + suffix - 1);
+				core_model = "";
+				family = fuse_model;
+			} else if (suffix && !model) {
+				/*
+				 * Only have suffix, so add suffix to
+				 * 'normal' model number.
+				 */
+				sprintf(fuse_model, "%s%c", core_model,
+					'A' + suffix - 1);
+				core_model = fuse_model;
+			} else {
+				/*
+				 * Don't have suffix, so just use
+				 * model from fuses.
+				 */
+				sprintf(fuse_model, "%d", model);
+				core_model = "";
+				family = fuse_model;
+			}
+		}
+	}
+	sprintf(buffer, "CN%s%sp%s-%d-%s",
+		family, core_model, pass, clock_mhz, suffix);
+	return buffer;
+}
diff --git a/arch/mips/cavium-octeon/flash_setup.c b/arch/mips/cavium-octeon/flash_setup.c
new file mode 100644
index 00000000..0ee02f5e
--- /dev/null
+++ b/arch/mips/cavium-octeon/flash_setup.c
@@ -0,0 +1,75 @@
+/*
+ *   Octeon Bootbus flash setup
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2007, 2008 Cavium Networks
+ */
+#include <linux/kernel.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/map.h>
+#include <linux/mtd/partitions.h>
+
+#include <asm/octeon/octeon.h>
+
+static struct map_info flash_map;
+static struct mtd_info *mymtd;
+static int nr_parts;
+static struct mtd_partition *parts;
+static const char *part_probe_types[] = {
+	"cmdlinepart",
+#ifdef CONFIG_MTD_REDBOOT_PARTS
+	"RedBoot",
+#endif
+	NULL
+};
+
+/**
+ * Module/ driver initialization.
+ *
+ * Returns Zero on success
+ */
+static int __init flash_init(void)
+{
+	/*
+	 * Read the bootbus region 0 setup to determine the base
+	 * address of the flash.
+	 */
+	union cvmx_mio_boot_reg_cfgx region_cfg;
+	region_cfg.u64 = cvmx_read_csr(CVMX_MIO_BOOT_REG_CFGX(0));
+	if (region_cfg.s.en) {
+		/*
+		 * The bootloader always takes the flash and sets its
+		 * address so the entire flash fits below
+		 * 0x1fc00000. This way the flash aliases to
+		 * 0x1fc00000 for booting. Software can access the
+		 * full flash at the true address, while core boot can
+		 * access 4MB.
+		 */
+		/* Use this name so old part lines work */
+		flash_map.name = "phys_mapped_flash";
+		flash_map.phys = region_cfg.s.base << 16;
+		flash_map.size = 0x1fc00000 - flash_map.phys;
+		flash_map.bankwidth = 1;
+		flash_map.virt = ioremap(flash_map.phys, flash_map.size);
+		pr_notice("Bootbus flash: Setting flash for %luMB flash at "
+			  "0x%08llx\n", flash_map.size >> 20, flash_map.phys);
+		simple_map_init(&flash_map);
+		mymtd = do_map_probe("cfi_probe", &flash_map);
+		if (mymtd) {
+			mymtd->owner = THIS_MODULE;
+
+			nr_parts = parse_mtd_partitions(mymtd,
+							part_probe_types,
+							&parts, 0);
+			mtd_device_register(mymtd, parts, nr_parts);
+		} else {
+			pr_err("Failed to register MTD device for flash\n");
+		}
+	}
+	return 0;
+}
+
+late_initcall(flash_init);
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
new file mode 100644
index 00000000..ffd4ae66
--- /dev/null
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -0,0 +1,1044 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004-2008, 2009, 2010, 2011 Cavium Networks
+ */
+
+#include <linux/interrupt.h>
+#include <linux/bitops.h>
+#include <linux/percpu.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+
+#include <asm/octeon/octeon.h>
+
+static DEFINE_RAW_SPINLOCK(octeon_irq_ciu0_lock);
+static DEFINE_RAW_SPINLOCK(octeon_irq_ciu1_lock);
+
+static DEFINE_PER_CPU(unsigned long, octeon_irq_ciu0_en_mirror);
+static DEFINE_PER_CPU(unsigned long, octeon_irq_ciu1_en_mirror);
+
+static __read_mostly u8 octeon_irq_ciu_to_irq[8][64];
+
+union octeon_ciu_chip_data {
+	void *p;
+	unsigned long l;
+	struct {
+		unsigned int line:6;
+		unsigned int bit:6;
+	} s;
+};
+
+struct octeon_core_chip_data {
+	struct mutex core_irq_mutex;
+	bool current_en;
+	bool desired_en;
+	u8 bit;
+};
+
+#define MIPS_CORE_IRQ_LINES 8
+
+static struct octeon_core_chip_data octeon_irq_core_chip_data[MIPS_CORE_IRQ_LINES];
+
+static void __init octeon_irq_set_ciu_mapping(int irq, int line, int bit,
+					      struct irq_chip *chip,
+					      irq_flow_handler_t handler)
+{
+	union octeon_ciu_chip_data cd;
+
+	irq_set_chip_and_handler(irq, chip, handler);
+
+	cd.l = 0;
+	cd.s.line = line;
+	cd.s.bit = bit;
+
+	irq_set_chip_data(irq, cd.p);
+	octeon_irq_ciu_to_irq[line][bit] = irq;
+}
+
+static int octeon_coreid_for_cpu(int cpu)
+{
+#ifdef CONFIG_SMP
+	return cpu_logical_map(cpu);
+#else
+	return cvmx_get_core_num();
+#endif
+}
+
+static int octeon_cpu_for_coreid(int coreid)
+{
+#ifdef CONFIG_SMP
+	return cpu_number_map(coreid);
+#else
+	return smp_processor_id();
+#endif
+}
+
+static void octeon_irq_core_ack(struct irq_data *data)
+{
+	struct octeon_core_chip_data *cd = irq_data_get_irq_chip_data(data);
+	unsigned int bit = cd->bit;
+
+	/*
+	 * We don't need to disable IRQs to make these atomic since
+	 * they are already disabled earlier in the low level
+	 * interrupt code.
+	 */
+	clear_c0_status(0x100 << bit);
+	/* The two user interrupts must be cleared manually. */
+	if (bit < 2)
+		clear_c0_cause(0x100 << bit);
+}
+
+static void octeon_irq_core_eoi(struct irq_data *data)
+{
+	struct octeon_core_chip_data *cd = irq_data_get_irq_chip_data(data);
+
+	/*
+	 * We don't need to disable IRQs to make these atomic since
+	 * they are already disabled earlier in the low level
+	 * interrupt code.
+	 */
+	set_c0_status(0x100 << cd->bit);
+}
+
+static void octeon_irq_core_set_enable_local(void *arg)
+{
+	struct irq_data *data = arg;
+	struct octeon_core_chip_data *cd = irq_data_get_irq_chip_data(data);
+	unsigned int mask = 0x100 << cd->bit;
+
+	/*
+	 * Interrupts are already disabled, so these are atomic.
+	 */
+	if (cd->desired_en)
+		set_c0_status(mask);
+	else
+		clear_c0_status(mask);
+
+}
+
+static void octeon_irq_core_disable(struct irq_data *data)
+{
+	struct octeon_core_chip_data *cd = irq_data_get_irq_chip_data(data);
+	cd->desired_en = false;
+}
+
+static void octeon_irq_core_enable(struct irq_data *data)
+{
+	struct octeon_core_chip_data *cd = irq_data_get_irq_chip_data(data);
+	cd->desired_en = true;
+}
+
+static void octeon_irq_core_bus_lock(struct irq_data *data)
+{
+	struct octeon_core_chip_data *cd = irq_data_get_irq_chip_data(data);
+
+	mutex_lock(&cd->core_irq_mutex);
+}
+
+static void octeon_irq_core_bus_sync_unlock(struct irq_data *data)
+{
+	struct octeon_core_chip_data *cd = irq_data_get_irq_chip_data(data);
+
+	if (cd->desired_en != cd->current_en) {
+		on_each_cpu(octeon_irq_core_set_enable_local, data, 1);
+
+		cd->current_en = cd->desired_en;
+	}
+
+	mutex_unlock(&cd->core_irq_mutex);
+}
+
+static struct irq_chip octeon_irq_chip_core = {
+	.name = "Core",
+	.irq_enable = octeon_irq_core_enable,
+	.irq_disable = octeon_irq_core_disable,
+	.irq_ack = octeon_irq_core_ack,
+	.irq_eoi = octeon_irq_core_eoi,
+	.irq_bus_lock = octeon_irq_core_bus_lock,
+	.irq_bus_sync_unlock = octeon_irq_core_bus_sync_unlock,
+
+	.irq_cpu_online = octeon_irq_core_eoi,
+	.irq_cpu_offline = octeon_irq_core_ack,
+	.flags = IRQCHIP_ONOFFLINE_ENABLED,
+};
+
+static void __init octeon_irq_init_core(void)
+{
+	int i;
+	int irq;
+	struct octeon_core_chip_data *cd;
+
+	for (i = 0; i < MIPS_CORE_IRQ_LINES; i++) {
+		cd = &octeon_irq_core_chip_data[i];
+		cd->current_en = false;
+		cd->desired_en = false;
+		cd->bit = i;
+		mutex_init(&cd->core_irq_mutex);
+
+		irq = OCTEON_IRQ_SW0 + i;
+		switch (irq) {
+		case OCTEON_IRQ_TIMER:
+		case OCTEON_IRQ_SW0:
+		case OCTEON_IRQ_SW1:
+		case OCTEON_IRQ_5:
+		case OCTEON_IRQ_PERF:
+			irq_set_chip_data(irq, cd);
+			irq_set_chip_and_handler(irq, &octeon_irq_chip_core,
+						 handle_percpu_irq);
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+static int next_cpu_for_irq(struct irq_data *data)
+{
+
+#ifdef CONFIG_SMP
+	int cpu;
+	int weight = cpumask_weight(data->affinity);
+
+	if (weight > 1) {
+		cpu = smp_processor_id();
+		for (;;) {
+			cpu = cpumask_next(cpu, data->affinity);
+			if (cpu >= nr_cpu_ids) {
+				cpu = -1;
+				continue;
+			} else if (cpumask_test_cpu(cpu, cpu_online_mask)) {
+				break;
+			}
+		}
+	} else if (weight == 1) {
+		cpu = cpumask_first(data->affinity);
+	} else {
+		cpu = smp_processor_id();
+	}
+	return cpu;
+#else
+	return smp_processor_id();
+#endif
+}
+
+static void octeon_irq_ciu_enable(struct irq_data *data)
+{
+	int cpu = next_cpu_for_irq(data);
+	int coreid = octeon_coreid_for_cpu(cpu);
+	unsigned long *pen;
+	unsigned long flags;
+	union octeon_ciu_chip_data cd;
+
+	cd.p = irq_data_get_irq_chip_data(data);
+
+	if (cd.s.line == 0) {
+		raw_spin_lock_irqsave(&octeon_irq_ciu0_lock, flags);
+		pen = &per_cpu(octeon_irq_ciu0_en_mirror, cpu);
+		set_bit(cd.s.bit, pen);
+		cvmx_write_csr(CVMX_CIU_INTX_EN0(coreid * 2), *pen);
+		raw_spin_unlock_irqrestore(&octeon_irq_ciu0_lock, flags);
+	} else {
+		raw_spin_lock_irqsave(&octeon_irq_ciu1_lock, flags);
+		pen = &per_cpu(octeon_irq_ciu1_en_mirror, cpu);
+		set_bit(cd.s.bit, pen);
+		cvmx_write_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1), *pen);
+		raw_spin_unlock_irqrestore(&octeon_irq_ciu1_lock, flags);
+	}
+}
+
+static void octeon_irq_ciu_enable_local(struct irq_data *data)
+{
+	unsigned long *pen;
+	unsigned long flags;
+	union octeon_ciu_chip_data cd;
+
+	cd.p = irq_data_get_irq_chip_data(data);
+
+	if (cd.s.line == 0) {
+		raw_spin_lock_irqsave(&octeon_irq_ciu0_lock, flags);
+		pen = &__get_cpu_var(octeon_irq_ciu0_en_mirror);
+		set_bit(cd.s.bit, pen);
+		cvmx_write_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2), *pen);
+		raw_spin_unlock_irqrestore(&octeon_irq_ciu0_lock, flags);
+	} else {
+		raw_spin_lock_irqsave(&octeon_irq_ciu1_lock, flags);
+		pen = &__get_cpu_var(octeon_irq_ciu1_en_mirror);
+		set_bit(cd.s.bit, pen);
+		cvmx_write_csr(CVMX_CIU_INTX_EN1(cvmx_get_core_num() * 2 + 1), *pen);
+		raw_spin_unlock_irqrestore(&octeon_irq_ciu1_lock, flags);
+	}
+}
+
+static void octeon_irq_ciu_disable_local(struct irq_data *data)
+{
+	unsigned long *pen;
+	unsigned long flags;
+	union octeon_ciu_chip_data cd;
+
+	cd.p = irq_data_get_irq_chip_data(data);
+
+	if (cd.s.line == 0) {
+		raw_spin_lock_irqsave(&octeon_irq_ciu0_lock, flags);
+		pen = &__get_cpu_var(octeon_irq_ciu0_en_mirror);
+		clear_bit(cd.s.bit, pen);
+		cvmx_write_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2), *pen);
+		raw_spin_unlock_irqrestore(&octeon_irq_ciu0_lock, flags);
+	} else {
+		raw_spin_lock_irqsave(&octeon_irq_ciu1_lock, flags);
+		pen = &__get_cpu_var(octeon_irq_ciu1_en_mirror);
+		clear_bit(cd.s.bit, pen);
+		cvmx_write_csr(CVMX_CIU_INTX_EN1(cvmx_get_core_num() * 2 + 1), *pen);
+		raw_spin_unlock_irqrestore(&octeon_irq_ciu1_lock, flags);
+	}
+}
+
+static void octeon_irq_ciu_disable_all(struct irq_data *data)
+{
+	unsigned long flags;
+	unsigned long *pen;
+	int cpu;
+	union octeon_ciu_chip_data cd;
+
+	wmb(); /* Make sure flag changes arrive before register updates. */
+
+	cd.p = irq_data_get_irq_chip_data(data);
+
+	if (cd.s.line == 0) {
+		raw_spin_lock_irqsave(&octeon_irq_ciu0_lock, flags);
+		for_each_online_cpu(cpu) {
+			int coreid = octeon_coreid_for_cpu(cpu);
+			pen = &per_cpu(octeon_irq_ciu0_en_mirror, cpu);
+			clear_bit(cd.s.bit, pen);
+			cvmx_write_csr(CVMX_CIU_INTX_EN0(coreid * 2), *pen);
+		}
+		raw_spin_unlock_irqrestore(&octeon_irq_ciu0_lock, flags);
+	} else {
+		raw_spin_lock_irqsave(&octeon_irq_ciu1_lock, flags);
+		for_each_online_cpu(cpu) {
+			int coreid = octeon_coreid_for_cpu(cpu);
+			pen = &per_cpu(octeon_irq_ciu1_en_mirror, cpu);
+			clear_bit(cd.s.bit, pen);
+			cvmx_write_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1), *pen);
+		}
+		raw_spin_unlock_irqrestore(&octeon_irq_ciu1_lock, flags);
+	}
+}
+
+static void octeon_irq_ciu_enable_all(struct irq_data *data)
+{
+	unsigned long flags;
+	unsigned long *pen;
+	int cpu;
+	union octeon_ciu_chip_data cd;
+
+	cd.p = irq_data_get_irq_chip_data(data);
+
+	if (cd.s.line == 0) {
+		raw_spin_lock_irqsave(&octeon_irq_ciu0_lock, flags);
+		for_each_online_cpu(cpu) {
+			int coreid = octeon_coreid_for_cpu(cpu);
+			pen = &per_cpu(octeon_irq_ciu0_en_mirror, cpu);
+			set_bit(cd.s.bit, pen);
+			cvmx_write_csr(CVMX_CIU_INTX_EN0(coreid * 2), *pen);
+		}
+		raw_spin_unlock_irqrestore(&octeon_irq_ciu0_lock, flags);
+	} else {
+		raw_spin_lock_irqsave(&octeon_irq_ciu1_lock, flags);
+		for_each_online_cpu(cpu) {
+			int coreid = octeon_coreid_for_cpu(cpu);
+			pen = &per_cpu(octeon_irq_ciu1_en_mirror, cpu);
+			set_bit(cd.s.bit, pen);
+			cvmx_write_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1), *pen);
+		}
+		raw_spin_unlock_irqrestore(&octeon_irq_ciu1_lock, flags);
+	}
+}
+
+/*
+ * Enable the irq on the next core in the affinity set for chips that
+ * have the EN*_W1{S,C} registers.
+ */
+static void octeon_irq_ciu_enable_v2(struct irq_data *data)
+{
+	u64 mask;
+	int cpu = next_cpu_for_irq(data);
+	union octeon_ciu_chip_data cd;
+
+	cd.p = irq_data_get_irq_chip_data(data);
+	mask = 1ull << (cd.s.bit);
+
+	/*
+	 * Called under the desc lock, so these should never get out
+	 * of sync.
+	 */
+	if (cd.s.line == 0) {
+		int index = octeon_coreid_for_cpu(cpu) * 2;
+		set_bit(cd.s.bit, &per_cpu(octeon_irq_ciu0_en_mirror, cpu));
+		cvmx_write_csr(CVMX_CIU_INTX_EN0_W1S(index), mask);
+	} else {
+		int index = octeon_coreid_for_cpu(cpu) * 2 + 1;
+		set_bit(cd.s.bit, &per_cpu(octeon_irq_ciu1_en_mirror, cpu));
+		cvmx_write_csr(CVMX_CIU_INTX_EN1_W1S(index), mask);
+	}
+}
+
+/*
+ * Enable the irq on the current CPU for chips that
+ * have the EN*_W1{S,C} registers.
+ */
+static void octeon_irq_ciu_enable_local_v2(struct irq_data *data)
+{
+	u64 mask;
+	union octeon_ciu_chip_data cd;
+
+	cd.p = irq_data_get_irq_chip_data(data);
+	mask = 1ull << (cd.s.bit);
+
+	if (cd.s.line == 0) {
+		int index = cvmx_get_core_num() * 2;
+		set_bit(cd.s.bit, &__get_cpu_var(octeon_irq_ciu0_en_mirror));
+		cvmx_write_csr(CVMX_CIU_INTX_EN0_W1S(index), mask);
+	} else {
+		int index = cvmx_get_core_num() * 2 + 1;
+		set_bit(cd.s.bit, &__get_cpu_var(octeon_irq_ciu1_en_mirror));
+		cvmx_write_csr(CVMX_CIU_INTX_EN1_W1S(index), mask);
+	}
+}
+
+static void octeon_irq_ciu_disable_local_v2(struct irq_data *data)
+{
+	u64 mask;
+	union octeon_ciu_chip_data cd;
+
+	cd.p = irq_data_get_irq_chip_data(data);
+	mask = 1ull << (cd.s.bit);
+
+	if (cd.s.line == 0) {
+		int index = cvmx_get_core_num() * 2;
+		clear_bit(cd.s.bit, &__get_cpu_var(octeon_irq_ciu0_en_mirror));
+		cvmx_write_csr(CVMX_CIU_INTX_EN0_W1C(index), mask);
+	} else {
+		int index = cvmx_get_core_num() * 2 + 1;
+		clear_bit(cd.s.bit, &__get_cpu_var(octeon_irq_ciu1_en_mirror));
+		cvmx_write_csr(CVMX_CIU_INTX_EN1_W1C(index), mask);
+	}
+}
+
+/*
+ * Write to the W1C bit in CVMX_CIU_INTX_SUM0 to clear the irq.
+ */
+static void octeon_irq_ciu_ack(struct irq_data *data)
+{
+	u64 mask;
+	union octeon_ciu_chip_data cd;
+
+	cd.p = data->chip_data;
+	mask = 1ull << (cd.s.bit);
+
+	if (cd.s.line == 0) {
+		int index = cvmx_get_core_num() * 2;
+		cvmx_write_csr(CVMX_CIU_INTX_SUM0(index), mask);
+	} else {
+		cvmx_write_csr(CVMX_CIU_INT_SUM1, mask);
+	}
+}
+
+/*
+ * Disable the irq on the all cores for chips that have the EN*_W1{S,C}
+ * registers.
+ */
+static void octeon_irq_ciu_disable_all_v2(struct irq_data *data)
+{
+	int cpu;
+	u64 mask;
+	union octeon_ciu_chip_data cd;
+
+	wmb(); /* Make sure flag changes arrive before register updates. */
+
+	cd.p = data->chip_data;
+	mask = 1ull << (cd.s.bit);
+
+	if (cd.s.line == 0) {
+		for_each_online_cpu(cpu) {
+			int index = octeon_coreid_for_cpu(cpu) * 2;
+			clear_bit(cd.s.bit, &per_cpu(octeon_irq_ciu0_en_mirror, cpu));
+			cvmx_write_csr(CVMX_CIU_INTX_EN0_W1C(index), mask);
+		}
+	} else {
+		for_each_online_cpu(cpu) {
+			int index = octeon_coreid_for_cpu(cpu) * 2 + 1;
+			clear_bit(cd.s.bit, &per_cpu(octeon_irq_ciu1_en_mirror, cpu));
+			cvmx_write_csr(CVMX_CIU_INTX_EN1_W1C(index), mask);
+		}
+	}
+}
+
+/*
+ * Enable the irq on the all cores for chips that have the EN*_W1{S,C}
+ * registers.
+ */
+static void octeon_irq_ciu_enable_all_v2(struct irq_data *data)
+{
+	int cpu;
+	u64 mask;
+	union octeon_ciu_chip_data cd;
+
+	cd.p = data->chip_data;
+	mask = 1ull << (cd.s.bit);
+
+	if (cd.s.line == 0) {
+		for_each_online_cpu(cpu) {
+			int index = octeon_coreid_for_cpu(cpu) * 2;
+			set_bit(cd.s.bit, &per_cpu(octeon_irq_ciu0_en_mirror, cpu));
+			cvmx_write_csr(CVMX_CIU_INTX_EN0_W1S(index), mask);
+		}
+	} else {
+		for_each_online_cpu(cpu) {
+			int index = octeon_coreid_for_cpu(cpu) * 2 + 1;
+			set_bit(cd.s.bit, &per_cpu(octeon_irq_ciu1_en_mirror, cpu));
+			cvmx_write_csr(CVMX_CIU_INTX_EN1_W1S(index), mask);
+		}
+	}
+}
+
+#ifdef CONFIG_SMP
+
+static void octeon_irq_cpu_offline_ciu(struct irq_data *data)
+{
+	int cpu = smp_processor_id();
+	cpumask_t new_affinity;
+
+	if (!cpumask_test_cpu(cpu, data->affinity))
+		return;
+
+	if (cpumask_weight(data->affinity) > 1) {
+		/*
+		 * It has multi CPU affinity, just remove this CPU
+		 * from the affinity set.
+		 */
+		cpumask_copy(&new_affinity, data->affinity);
+		cpumask_clear_cpu(cpu, &new_affinity);
+	} else {
+		/* Otherwise, put it on lowest numbered online CPU. */
+		cpumask_clear(&new_affinity);
+		cpumask_set_cpu(cpumask_first(cpu_online_mask), &new_affinity);
+	}
+	__irq_set_affinity_locked(data, &new_affinity);
+}
+
+static int octeon_irq_ciu_set_affinity(struct irq_data *data,
+				       const struct cpumask *dest, bool force)
+{
+	int cpu;
+	bool enable_one = !irqd_irq_disabled(data) && !irqd_irq_masked(data);
+	unsigned long flags;
+	union octeon_ciu_chip_data cd;
+
+	cd.p = data->chip_data;
+
+	/*
+	 * For non-v2 CIU, we will allow only single CPU affinity.
+	 * This removes the need to do locking in the .ack/.eoi
+	 * functions.
+	 */
+	if (cpumask_weight(dest) != 1)
+		return -EINVAL;
+
+	if (!enable_one)
+		return 0;
+
+	if (cd.s.line == 0) {
+		raw_spin_lock_irqsave(&octeon_irq_ciu0_lock, flags);
+		for_each_online_cpu(cpu) {
+			int coreid = octeon_coreid_for_cpu(cpu);
+			unsigned long *pen = &per_cpu(octeon_irq_ciu0_en_mirror, cpu);
+
+			if (cpumask_test_cpu(cpu, dest) && enable_one) {
+				enable_one = false;
+				set_bit(cd.s.bit, pen);
+			} else {
+				clear_bit(cd.s.bit, pen);
+			}
+			cvmx_write_csr(CVMX_CIU_INTX_EN0(coreid * 2), *pen);
+		}
+		raw_spin_unlock_irqrestore(&octeon_irq_ciu0_lock, flags);
+	} else {
+		raw_spin_lock_irqsave(&octeon_irq_ciu1_lock, flags);
+		for_each_online_cpu(cpu) {
+			int coreid = octeon_coreid_for_cpu(cpu);
+			unsigned long *pen = &per_cpu(octeon_irq_ciu1_en_mirror, cpu);
+
+			if (cpumask_test_cpu(cpu, dest) && enable_one) {
+				enable_one = false;
+				set_bit(cd.s.bit, pen);
+			} else {
+				clear_bit(cd.s.bit, pen);
+			}
+			cvmx_write_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1), *pen);
+		}
+		raw_spin_unlock_irqrestore(&octeon_irq_ciu1_lock, flags);
+	}
+	return 0;
+}
+
+/*
+ * Set affinity for the irq for chips that have the EN*_W1{S,C}
+ * registers.
+ */
+static int octeon_irq_ciu_set_affinity_v2(struct irq_data *data,
+					  const struct cpumask *dest,
+					  bool force)
+{
+	int cpu;
+	bool enable_one = !irqd_irq_disabled(data) && !irqd_irq_masked(data);
+	u64 mask;
+	union octeon_ciu_chip_data cd;
+
+	if (!enable_one)
+		return 0;
+
+	cd.p = data->chip_data;
+	mask = 1ull << cd.s.bit;
+
+	if (cd.s.line == 0) {
+		for_each_online_cpu(cpu) {
+			unsigned long *pen = &per_cpu(octeon_irq_ciu0_en_mirror, cpu);
+			int index = octeon_coreid_for_cpu(cpu) * 2;
+			if (cpumask_test_cpu(cpu, dest) && enable_one) {
+				enable_one = false;
+				set_bit(cd.s.bit, pen);
+				cvmx_write_csr(CVMX_CIU_INTX_EN0_W1S(index), mask);
+			} else {
+				clear_bit(cd.s.bit, pen);
+				cvmx_write_csr(CVMX_CIU_INTX_EN0_W1C(index), mask);
+			}
+		}
+	} else {
+		for_each_online_cpu(cpu) {
+			unsigned long *pen = &per_cpu(octeon_irq_ciu1_en_mirror, cpu);
+			int index = octeon_coreid_for_cpu(cpu) * 2 + 1;
+			if (cpumask_test_cpu(cpu, dest) && enable_one) {
+				enable_one = false;
+				set_bit(cd.s.bit, pen);
+				cvmx_write_csr(CVMX_CIU_INTX_EN1_W1S(index), mask);
+			} else {
+				clear_bit(cd.s.bit, pen);
+				cvmx_write_csr(CVMX_CIU_INTX_EN1_W1C(index), mask);
+			}
+		}
+	}
+	return 0;
+}
+#endif
+
+/*
+ * The v1 CIU code already masks things, so supply a dummy version to
+ * the core chip code.
+ */
+static void octeon_irq_dummy_mask(struct irq_data *data)
+{
+}
+
+/*
+ * Newer octeon chips have support for lockless CIU operation.
+ */
+static struct irq_chip octeon_irq_chip_ciu_v2 = {
+	.name = "CIU",
+	.irq_enable = octeon_irq_ciu_enable_v2,
+	.irq_disable = octeon_irq_ciu_disable_all_v2,
+	.irq_mask = octeon_irq_ciu_disable_local_v2,
+	.irq_unmask = octeon_irq_ciu_enable_v2,
+#ifdef CONFIG_SMP
+	.irq_set_affinity = octeon_irq_ciu_set_affinity_v2,
+	.irq_cpu_offline = octeon_irq_cpu_offline_ciu,
+#endif
+};
+
+static struct irq_chip octeon_irq_chip_ciu_edge_v2 = {
+	.name = "CIU-E",
+	.irq_enable = octeon_irq_ciu_enable_v2,
+	.irq_disable = octeon_irq_ciu_disable_all_v2,
+	.irq_ack = octeon_irq_ciu_ack,
+	.irq_mask = octeon_irq_ciu_disable_local_v2,
+	.irq_unmask = octeon_irq_ciu_enable_v2,
+#ifdef CONFIG_SMP
+	.irq_set_affinity = octeon_irq_ciu_set_affinity_v2,
+	.irq_cpu_offline = octeon_irq_cpu_offline_ciu,
+#endif
+};
+
+static struct irq_chip octeon_irq_chip_ciu = {
+	.name = "CIU",
+	.irq_enable = octeon_irq_ciu_enable,
+	.irq_disable = octeon_irq_ciu_disable_all,
+	.irq_mask = octeon_irq_dummy_mask,
+#ifdef CONFIG_SMP
+	.irq_set_affinity = octeon_irq_ciu_set_affinity,
+	.irq_cpu_offline = octeon_irq_cpu_offline_ciu,
+#endif
+};
+
+static struct irq_chip octeon_irq_chip_ciu_edge = {
+	.name = "CIU-E",
+	.irq_enable = octeon_irq_ciu_enable,
+	.irq_disable = octeon_irq_ciu_disable_all,
+	.irq_mask = octeon_irq_dummy_mask,
+	.irq_ack = octeon_irq_ciu_ack,
+#ifdef CONFIG_SMP
+	.irq_set_affinity = octeon_irq_ciu_set_affinity,
+	.irq_cpu_offline = octeon_irq_cpu_offline_ciu,
+#endif
+};
+
+/* The mbox versions don't do any affinity or round-robin. */
+static struct irq_chip octeon_irq_chip_ciu_mbox_v2 = {
+	.name = "CIU-M",
+	.irq_enable = octeon_irq_ciu_enable_all_v2,
+	.irq_disable = octeon_irq_ciu_disable_all_v2,
+	.irq_ack = octeon_irq_ciu_disable_local_v2,
+	.irq_eoi = octeon_irq_ciu_enable_local_v2,
+
+	.irq_cpu_online = octeon_irq_ciu_enable_local_v2,
+	.irq_cpu_offline = octeon_irq_ciu_disable_local_v2,
+	.flags = IRQCHIP_ONOFFLINE_ENABLED,
+};
+
+static struct irq_chip octeon_irq_chip_ciu_mbox = {
+	.name = "CIU-M",
+	.irq_enable = octeon_irq_ciu_enable_all,
+	.irq_disable = octeon_irq_ciu_disable_all,
+
+	.irq_cpu_online = octeon_irq_ciu_enable_local,
+	.irq_cpu_offline = octeon_irq_ciu_disable_local,
+	.flags = IRQCHIP_ONOFFLINE_ENABLED,
+};
+
+/*
+ * Watchdog interrupts are special.  They are associated with a single
+ * core, so we hardwire the affinity to that core.
+ */
+static void octeon_irq_ciu_wd_enable(struct irq_data *data)
+{
+	unsigned long flags;
+	unsigned long *pen;
+	int coreid = data->irq - OCTEON_IRQ_WDOG0;	/* Bit 0-63 of EN1 */
+	int cpu = octeon_cpu_for_coreid(coreid);
+
+	raw_spin_lock_irqsave(&octeon_irq_ciu1_lock, flags);
+	pen = &per_cpu(octeon_irq_ciu1_en_mirror, cpu);
+	set_bit(coreid, pen);
+	cvmx_write_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1), *pen);
+	raw_spin_unlock_irqrestore(&octeon_irq_ciu1_lock, flags);
+}
+
+/*
+ * Watchdog interrupts are special.  They are associated with a single
+ * core, so we hardwire the affinity to that core.
+ */
+static void octeon_irq_ciu1_wd_enable_v2(struct irq_data *data)
+{
+	int coreid = data->irq - OCTEON_IRQ_WDOG0;
+	int cpu = octeon_cpu_for_coreid(coreid);
+
+	set_bit(coreid, &per_cpu(octeon_irq_ciu1_en_mirror, cpu));
+	cvmx_write_csr(CVMX_CIU_INTX_EN1_W1S(coreid * 2 + 1), 1ull << coreid);
+}
+
+
+static struct irq_chip octeon_irq_chip_ciu_wd_v2 = {
+	.name = "CIU-W",
+	.irq_enable = octeon_irq_ciu1_wd_enable_v2,
+	.irq_disable = octeon_irq_ciu_disable_all_v2,
+	.irq_mask = octeon_irq_ciu_disable_local_v2,
+	.irq_unmask = octeon_irq_ciu_enable_local_v2,
+};
+
+static struct irq_chip octeon_irq_chip_ciu_wd = {
+	.name = "CIU-W",
+	.irq_enable = octeon_irq_ciu_wd_enable,
+	.irq_disable = octeon_irq_ciu_disable_all,
+	.irq_mask = octeon_irq_dummy_mask,
+};
+
+static void octeon_irq_ip2_v1(void)
+{
+	const unsigned long core_id = cvmx_get_core_num();
+	u64 ciu_sum = cvmx_read_csr(CVMX_CIU_INTX_SUM0(core_id * 2));
+
+	ciu_sum &= __get_cpu_var(octeon_irq_ciu0_en_mirror);
+	clear_c0_status(STATUSF_IP2);
+	if (likely(ciu_sum)) {
+		int bit = fls64(ciu_sum) - 1;
+		int irq = octeon_irq_ciu_to_irq[0][bit];
+		if (likely(irq))
+			do_IRQ(irq);
+		else
+			spurious_interrupt();
+	} else {
+		spurious_interrupt();
+	}
+	set_c0_status(STATUSF_IP2);
+}
+
+static void octeon_irq_ip2_v2(void)
+{
+	const unsigned long core_id = cvmx_get_core_num();
+	u64 ciu_sum = cvmx_read_csr(CVMX_CIU_INTX_SUM0(core_id * 2));
+
+	ciu_sum &= __get_cpu_var(octeon_irq_ciu0_en_mirror);
+	if (likely(ciu_sum)) {
+		int bit = fls64(ciu_sum) - 1;
+		int irq = octeon_irq_ciu_to_irq[0][bit];
+		if (likely(irq))
+			do_IRQ(irq);
+		else
+			spurious_interrupt();
+	} else {
+		spurious_interrupt();
+	}
+}
+static void octeon_irq_ip3_v1(void)
+{
+	u64 ciu_sum = cvmx_read_csr(CVMX_CIU_INT_SUM1);
+
+	ciu_sum &= __get_cpu_var(octeon_irq_ciu1_en_mirror);
+	clear_c0_status(STATUSF_IP3);
+	if (likely(ciu_sum)) {
+		int bit = fls64(ciu_sum) - 1;
+		int irq = octeon_irq_ciu_to_irq[1][bit];
+		if (likely(irq))
+			do_IRQ(irq);
+		else
+			spurious_interrupt();
+	} else {
+		spurious_interrupt();
+	}
+	set_c0_status(STATUSF_IP3);
+}
+
+static void octeon_irq_ip3_v2(void)
+{
+	u64 ciu_sum = cvmx_read_csr(CVMX_CIU_INT_SUM1);
+
+	ciu_sum &= __get_cpu_var(octeon_irq_ciu1_en_mirror);
+	if (likely(ciu_sum)) {
+		int bit = fls64(ciu_sum) - 1;
+		int irq = octeon_irq_ciu_to_irq[1][bit];
+		if (likely(irq))
+			do_IRQ(irq);
+		else
+			spurious_interrupt();
+	} else {
+		spurious_interrupt();
+	}
+}
+
+static void octeon_irq_ip4_mask(void)
+{
+	clear_c0_status(STATUSF_IP4);
+	spurious_interrupt();
+}
+
+static void (*octeon_irq_ip2)(void);
+static void (*octeon_irq_ip3)(void);
+static void (*octeon_irq_ip4)(void);
+
+void __cpuinitdata (*octeon_irq_setup_secondary)(void);
+
+static void __cpuinit octeon_irq_percpu_enable(void)
+{
+	irq_cpu_online();
+}
+
+static void __cpuinit octeon_irq_init_ciu_percpu(void)
+{
+	int coreid = cvmx_get_core_num();
+	/*
+	 * Disable All CIU Interrupts. The ones we need will be
+	 * enabled later.  Read the SUM register so we know the write
+	 * completed.
+	 */
+	cvmx_write_csr(CVMX_CIU_INTX_EN0((coreid * 2)), 0);
+	cvmx_write_csr(CVMX_CIU_INTX_EN0((coreid * 2 + 1)), 0);
+	cvmx_write_csr(CVMX_CIU_INTX_EN1((coreid * 2)), 0);
+	cvmx_write_csr(CVMX_CIU_INTX_EN1((coreid * 2 + 1)), 0);
+	cvmx_read_csr(CVMX_CIU_INTX_SUM0((coreid * 2)));
+}
+
+static void __cpuinit octeon_irq_setup_secondary_ciu(void)
+{
+
+	__get_cpu_var(octeon_irq_ciu0_en_mirror) = 0;
+	__get_cpu_var(octeon_irq_ciu1_en_mirror) = 0;
+
+	octeon_irq_init_ciu_percpu();
+	octeon_irq_percpu_enable();
+
+	/* Enable the CIU lines */
+	set_c0_status(STATUSF_IP3 | STATUSF_IP2);
+	clear_c0_status(STATUSF_IP4);
+}
+
+static void __init octeon_irq_init_ciu(void)
+{
+	unsigned int i;
+	struct irq_chip *chip;
+	struct irq_chip *chip_edge;
+	struct irq_chip *chip_mbox;
+	struct irq_chip *chip_wd;
+
+	octeon_irq_init_ciu_percpu();
+	octeon_irq_setup_secondary = octeon_irq_setup_secondary_ciu;
+
+	if (OCTEON_IS_MODEL(OCTEON_CN58XX_PASS2_X) ||
+	    OCTEON_IS_MODEL(OCTEON_CN56XX_PASS2_X) ||
+	    OCTEON_IS_MODEL(OCTEON_CN52XX_PASS2_X) ||
+	    OCTEON_IS_MODEL(OCTEON_CN6XXX)) {
+		octeon_irq_ip2 = octeon_irq_ip2_v2;
+		octeon_irq_ip3 = octeon_irq_ip3_v2;
+		chip = &octeon_irq_chip_ciu_v2;
+		chip_edge = &octeon_irq_chip_ciu_edge_v2;
+		chip_mbox = &octeon_irq_chip_ciu_mbox_v2;
+		chip_wd = &octeon_irq_chip_ciu_wd_v2;
+	} else {
+		octeon_irq_ip2 = octeon_irq_ip2_v1;
+		octeon_irq_ip3 = octeon_irq_ip3_v1;
+		chip = &octeon_irq_chip_ciu;
+		chip_edge = &octeon_irq_chip_ciu_edge;
+		chip_mbox = &octeon_irq_chip_ciu_mbox;
+		chip_wd = &octeon_irq_chip_ciu_wd;
+	}
+	octeon_irq_ip4 = octeon_irq_ip4_mask;
+
+	/* Mips internal */
+	octeon_irq_init_core();
+
+	/* CIU_0 */
+	for (i = 0; i < 16; i++)
+		octeon_irq_set_ciu_mapping(i + OCTEON_IRQ_WORKQ0, 0, i + 0, chip, handle_level_irq);
+	for (i = 0; i < 16; i++)
+		octeon_irq_set_ciu_mapping(i + OCTEON_IRQ_GPIO0, 0, i + 16, chip, handle_level_irq);
+
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_MBOX0, 0, 32, chip_mbox, handle_percpu_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_MBOX1, 0, 33, chip_mbox, handle_percpu_irq);
+
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_UART0, 0, 34, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_UART1, 0, 35, chip, handle_level_irq);
+
+	for (i = 0; i < 4; i++)
+		octeon_irq_set_ciu_mapping(i + OCTEON_IRQ_PCI_INT0, 0, i + 36, chip, handle_level_irq);
+	for (i = 0; i < 4; i++)
+		octeon_irq_set_ciu_mapping(i + OCTEON_IRQ_PCI_MSI0, 0, i + 40, chip, handle_level_irq);
+
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_TWSI, 0, 45, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_RML, 0, 46, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_TRACE0, 0, 47, chip, handle_level_irq);
+
+	for (i = 0; i < 2; i++)
+		octeon_irq_set_ciu_mapping(i + OCTEON_IRQ_GMX_DRP0, 0, i + 48, chip_edge, handle_edge_irq);
+
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_IPD_DRP, 0, 50, chip_edge, handle_edge_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_KEY_ZERO, 0, 51, chip_edge, handle_edge_irq);
+
+	for (i = 0; i < 4; i++)
+		octeon_irq_set_ciu_mapping(i + OCTEON_IRQ_TIMER0, 0, i + 52, chip_edge, handle_edge_irq);
+
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_USB0, 0, 56, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_PCM, 0, 57, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_MPI, 0, 58, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_TWSI2, 0, 59, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_POWIQ, 0, 60, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_IPDPPTHR, 0, 61, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_MII0, 0, 62, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_BOOTDMA, 0, 63, chip, handle_level_irq);
+
+	/* CIU_1 */
+	for (i = 0; i < 16; i++)
+		octeon_irq_set_ciu_mapping(i + OCTEON_IRQ_WDOG0, 1, i + 0, chip_wd, handle_level_irq);
+
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_UART2, 1, 16, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_USB1, 1, 17, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_MII1, 1, 18, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_NAND, 1, 19, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_MIO, 1, 20, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_IOB, 1, 21, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_FPA, 1, 22, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_POW, 1, 23, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_L2C, 1, 24, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_IPD, 1, 25, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_PIP, 1, 26, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_PKO, 1, 27, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_ZIP, 1, 28, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_TIM, 1, 29, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_RAD, 1, 30, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_KEY, 1, 31, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_DFA, 1, 32, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_USBCTL, 1, 33, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_SLI, 1, 34, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_DPI, 1, 35, chip, handle_level_irq);
+
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_AGX0, 1, 36, chip, handle_level_irq);
+
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_AGL, 1, 46, chip, handle_level_irq);
+
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_PTP, 1, 47, chip_edge, handle_edge_irq);
+
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_PEM0, 1, 48, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_PEM1, 1, 49, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_SRIO0, 1, 50, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_SRIO1, 1, 51, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_LMC0, 1, 52, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_DFM, 1, 56, chip, handle_level_irq);
+	octeon_irq_set_ciu_mapping(OCTEON_IRQ_RST, 1, 63, chip, handle_level_irq);
+
+	/* Enable the CIU lines */
+	set_c0_status(STATUSF_IP3 | STATUSF_IP2);
+	clear_c0_status(STATUSF_IP4);
+}
+
+void __init arch_init_irq(void)
+{
+#ifdef CONFIG_SMP
+	/* Set the default affinity to the boot cpu. */
+	cpumask_clear(irq_default_affinity);
+	cpumask_set_cpu(smp_processor_id(), irq_default_affinity);
+#endif
+	octeon_irq_init_ciu();
+}
+
+asmlinkage void plat_irq_dispatch(void)
+{
+	unsigned long cop0_cause;
+	unsigned long cop0_status;
+
+	while (1) {
+		cop0_cause = read_c0_cause();
+		cop0_status = read_c0_status();
+		cop0_cause &= cop0_status;
+		cop0_cause &= ST0_IM;
+
+		if (unlikely(cop0_cause & STATUSF_IP2))
+			octeon_irq_ip2();
+		else if (unlikely(cop0_cause & STATUSF_IP3))
+			octeon_irq_ip3();
+		else if (unlikely(cop0_cause & STATUSF_IP4))
+			octeon_irq_ip4();
+		else if (likely(cop0_cause))
+			do_IRQ(fls(cop0_cause) - 9 + MIPS_CPU_IRQ_BASE);
+		else
+			break;
+	}
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+void fixup_irqs(void)
+{
+	irq_cpu_offline();
+}
+
+#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/mips/cavium-octeon/octeon-memcpy.S b/arch/mips/cavium-octeon/octeon-memcpy.S
new file mode 100644
index 00000000..88e0cddc
--- /dev/null
+++ b/arch/mips/cavium-octeon/octeon-memcpy.S
@@ -0,0 +1,521 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Unified implementation of memcpy, memmove and the __copy_user backend.
+ *
+ * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org)
+ * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
+ * Copyright (C) 2002 Broadcom, Inc.
+ *   memcpy/copy_user author: Mark Vandevoorde
+ *
+ * Mnemonic names for arguments to memcpy/__copy_user
+ */
+
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/regdef.h>
+
+#define dst a0
+#define src a1
+#define len a2
+
+/*
+ * Spec
+ *
+ * memcpy copies len bytes from src to dst and sets v0 to dst.
+ * It assumes that
+ *   - src and dst don't overlap
+ *   - src is readable
+ *   - dst is writable
+ * memcpy uses the standard calling convention
+ *
+ * __copy_user copies up to len bytes from src to dst and sets a2 (len) to
+ * the number of uncopied bytes due to an exception caused by a read or write.
+ * __copy_user assumes that src and dst don't overlap, and that the call is
+ * implementing one of the following:
+ *   copy_to_user
+ *     - src is readable  (no exceptions when reading src)
+ *   copy_from_user
+ *     - dst is writable  (no exceptions when writing dst)
+ * __copy_user uses a non-standard calling convention; see
+ * arch/mips/include/asm/uaccess.h
+ *
+ * When an exception happens on a load, the handler must
+ # ensure that all of the destination buffer is overwritten to prevent
+ * leaking information to user mode programs.
+ */
+
+/*
+ * Implementation
+ */
+
+/*
+ * The exception handler for loads requires that:
+ *  1- AT contain the address of the byte just past the end of the source
+ *     of the copy,
+ *  2- src_entry <= src < AT, and
+ *  3- (dst - src) == (dst_entry - src_entry),
+ * The _entry suffix denotes values when __copy_user was called.
+ *
+ * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user
+ * (2) is met by incrementing src by the number of bytes copied
+ * (3) is met by not doing loads between a pair of increments of dst and src
+ *
+ * The exception handlers for stores adjust len (if necessary) and return.
+ * These handlers do not need to overwrite any data.
+ *
+ * For __rmemcpy and memmove an exception is always a kernel bug, therefore
+ * they're not protected.
+ */
+
+#define EXC(inst_reg,addr,handler)		\
+9:	inst_reg, addr;				\
+	.section __ex_table,"a";		\
+	PTR	9b, handler;			\
+	.previous
+
+/*
+ * Only on the 64-bit kernel we can made use of 64-bit registers.
+ */
+#ifdef CONFIG_64BIT
+#define USE_DOUBLE
+#endif
+
+#ifdef USE_DOUBLE
+
+#define LOAD   ld
+#define LOADL  ldl
+#define LOADR  ldr
+#define STOREL sdl
+#define STORER sdr
+#define STORE  sd
+#define ADD    daddu
+#define SUB    dsubu
+#define SRL    dsrl
+#define SRA    dsra
+#define SLL    dsll
+#define SLLV   dsllv
+#define SRLV   dsrlv
+#define NBYTES 8
+#define LOG_NBYTES 3
+
+/*
+ * As we are sharing code base with the mips32 tree (which use the o32 ABI
+ * register definitions). We need to redefine the register definitions from
+ * the n64 ABI register naming to the o32 ABI register naming.
+ */
+#undef t0
+#undef t1
+#undef t2
+#undef t3
+#define t0	$8
+#define t1	$9
+#define t2	$10
+#define t3	$11
+#define t4	$12
+#define t5	$13
+#define t6	$14
+#define t7	$15
+
+#else
+
+#define LOAD   lw
+#define LOADL  lwl
+#define LOADR  lwr
+#define STOREL swl
+#define STORER swr
+#define STORE  sw
+#define ADD    addu
+#define SUB    subu
+#define SRL    srl
+#define SLL    sll
+#define SRA    sra
+#define SLLV   sllv
+#define SRLV   srlv
+#define NBYTES 4
+#define LOG_NBYTES 2
+
+#endif /* USE_DOUBLE */
+
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define LDFIRST LOADR
+#define LDREST  LOADL
+#define STFIRST STORER
+#define STREST  STOREL
+#define SHIFT_DISCARD SLLV
+#else
+#define LDFIRST LOADL
+#define LDREST  LOADR
+#define STFIRST STOREL
+#define STREST  STORER
+#define SHIFT_DISCARD SRLV
+#endif
+
+#define FIRST(unit) ((unit)*NBYTES)
+#define REST(unit)  (FIRST(unit)+NBYTES-1)
+#define UNIT(unit)  FIRST(unit)
+
+#define ADDRMASK (NBYTES-1)
+
+	.text
+	.set	noreorder
+	.set	noat
+
+/*
+ * A combined memcpy/__copy_user
+ * __copy_user sets len to 0 for success; else to an upper bound of
+ * the number of uncopied bytes.
+ * memcpy sets v0 to dst.
+ */
+	.align	5
+LEAF(memcpy)					/* a0=dst a1=src a2=len */
+	move	v0, dst				/* return value */
+__memcpy:
+FEXPORT(__copy_user)
+	/*
+	 * Note: dst & src may be unaligned, len may be 0
+	 * Temps
+	 */
+	#
+	# Octeon doesn't care if the destination is unaligned. The hardware
+	# can fix it faster than we can special case the assembly.
+	#
+	pref	0, 0(src)
+	sltu	t0, len, NBYTES		# Check if < 1 word
+	bnez	t0, copy_bytes_checklen
+	 and	t0, src, ADDRMASK	# Check if src unaligned
+	bnez	t0, src_unaligned
+	 sltu	t0, len, 4*NBYTES	# Check if < 4 words
+	bnez	t0, less_than_4units
+	 sltu	t0, len, 8*NBYTES	# Check if < 8 words
+	bnez	t0, less_than_8units
+	 sltu	t0, len, 16*NBYTES	# Check if < 16 words
+	bnez	t0, cleanup_both_aligned
+	 sltu	t0, len, 128+1		# Check if len < 129
+	bnez	t0, 1f			# Skip prefetch if len is too short
+	 sltu	t0, len, 256+1		# Check if len < 257
+	bnez	t0, 1f			# Skip prefetch if len is too short
+	 pref	0, 128(src)		# We must not prefetch invalid addresses
+	#
+	# This is where we loop if there is more than 128 bytes left
+2:	pref	0, 256(src)		# We must not prefetch invalid addresses
+	#
+	# This is where we loop if we can't prefetch anymore
+1:
+EXC(	LOAD	t0, UNIT(0)(src),	l_exc)
+EXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
+EXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
+EXC(	LOAD	t3, UNIT(3)(src),	l_exc_copy)
+	SUB	len, len, 16*NBYTES
+EXC(	STORE	t0, UNIT(0)(dst),	s_exc_p16u)
+EXC(	STORE	t1, UNIT(1)(dst),	s_exc_p15u)
+EXC(	STORE	t2, UNIT(2)(dst),	s_exc_p14u)
+EXC(	STORE	t3, UNIT(3)(dst),	s_exc_p13u)
+EXC(	LOAD	t0, UNIT(4)(src),	l_exc_copy)
+EXC(	LOAD	t1, UNIT(5)(src),	l_exc_copy)
+EXC(	LOAD	t2, UNIT(6)(src),	l_exc_copy)
+EXC(	LOAD	t3, UNIT(7)(src),	l_exc_copy)
+EXC(	STORE	t0, UNIT(4)(dst),	s_exc_p12u)
+EXC(	STORE	t1, UNIT(5)(dst),	s_exc_p11u)
+EXC(	STORE	t2, UNIT(6)(dst),	s_exc_p10u)
+	ADD	src, src, 16*NBYTES
+EXC(	STORE	t3, UNIT(7)(dst),	s_exc_p9u)
+	ADD	dst, dst, 16*NBYTES
+EXC(	LOAD	t0, UNIT(-8)(src),	l_exc_copy)
+EXC(	LOAD	t1, UNIT(-7)(src),	l_exc_copy)
+EXC(	LOAD	t2, UNIT(-6)(src),	l_exc_copy)
+EXC(	LOAD	t3, UNIT(-5)(src),	l_exc_copy)
+EXC(	STORE	t0, UNIT(-8)(dst),	s_exc_p8u)
+EXC(	STORE	t1, UNIT(-7)(dst),	s_exc_p7u)
+EXC(	STORE	t2, UNIT(-6)(dst),	s_exc_p6u)
+EXC(	STORE	t3, UNIT(-5)(dst),	s_exc_p5u)
+EXC(	LOAD	t0, UNIT(-4)(src),	l_exc_copy)
+EXC(	LOAD	t1, UNIT(-3)(src),	l_exc_copy)
+EXC(	LOAD	t2, UNIT(-2)(src),	l_exc_copy)
+EXC(	LOAD	t3, UNIT(-1)(src),	l_exc_copy)
+EXC(	STORE	t0, UNIT(-4)(dst),	s_exc_p4u)
+EXC(	STORE	t1, UNIT(-3)(dst),	s_exc_p3u)
+EXC(	STORE	t2, UNIT(-2)(dst),	s_exc_p2u)
+EXC(	STORE	t3, UNIT(-1)(dst),	s_exc_p1u)
+	sltu	t0, len, 256+1		# See if we can prefetch more
+	beqz	t0, 2b
+	 sltu	t0, len, 128		# See if we can loop more time
+	beqz	t0, 1b
+	 nop
+	#
+	# Jump here if there are less than 16*NBYTES left.
+	#
+cleanup_both_aligned:
+	beqz	len, done
+	 sltu	t0, len, 8*NBYTES
+	bnez	t0, less_than_8units
+	 nop
+EXC(	LOAD	t0, UNIT(0)(src),	l_exc)
+EXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
+EXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
+EXC(	LOAD	t3, UNIT(3)(src),	l_exc_copy)
+	SUB	len, len, 8*NBYTES
+EXC(	STORE	t0, UNIT(0)(dst),	s_exc_p8u)
+EXC(	STORE	t1, UNIT(1)(dst),	s_exc_p7u)
+EXC(	STORE	t2, UNIT(2)(dst),	s_exc_p6u)
+EXC(	STORE	t3, UNIT(3)(dst),	s_exc_p5u)
+EXC(	LOAD	t0, UNIT(4)(src),	l_exc_copy)
+EXC(	LOAD	t1, UNIT(5)(src),	l_exc_copy)
+EXC(	LOAD	t2, UNIT(6)(src),	l_exc_copy)
+EXC(	LOAD	t3, UNIT(7)(src),	l_exc_copy)
+EXC(	STORE	t0, UNIT(4)(dst),	s_exc_p4u)
+EXC(	STORE	t1, UNIT(5)(dst),	s_exc_p3u)
+EXC(	STORE	t2, UNIT(6)(dst),	s_exc_p2u)
+EXC(	STORE	t3, UNIT(7)(dst),	s_exc_p1u)
+	ADD	src, src, 8*NBYTES
+	beqz	len, done
+	 ADD	dst, dst, 8*NBYTES
+	#
+	# Jump here if there are less than 8*NBYTES left.
+	#
+less_than_8units:
+	sltu	t0, len, 4*NBYTES
+	bnez	t0, less_than_4units
+	 nop
+EXC(	LOAD	t0, UNIT(0)(src),	l_exc)
+EXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
+EXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
+EXC(	LOAD	t3, UNIT(3)(src),	l_exc_copy)
+	SUB	len, len, 4*NBYTES
+EXC(	STORE	t0, UNIT(0)(dst),	s_exc_p4u)
+EXC(	STORE	t1, UNIT(1)(dst),	s_exc_p3u)
+EXC(	STORE	t2, UNIT(2)(dst),	s_exc_p2u)
+EXC(	STORE	t3, UNIT(3)(dst),	s_exc_p1u)
+	ADD	src, src, 4*NBYTES
+	beqz	len, done
+	 ADD	dst, dst, 4*NBYTES
+	#
+	# Jump here if there are less than 4*NBYTES left. This means
+	# we may need to copy up to 3 NBYTES words.
+	#
+less_than_4units:
+	sltu	t0, len, 1*NBYTES
+	bnez	t0, copy_bytes_checklen
+	 nop
+	#
+	# 1) Copy NBYTES, then check length again
+	#
+EXC(	LOAD	t0, 0(src),		l_exc)
+	SUB	len, len, NBYTES
+	sltu	t1, len, 8
+EXC(	STORE	t0, 0(dst),		s_exc_p1u)
+	ADD	src, src, NBYTES
+	bnez	t1, copy_bytes_checklen
+	 ADD	dst, dst, NBYTES
+	#
+	# 2) Copy NBYTES, then check length again
+	#
+EXC(	LOAD	t0, 0(src),		l_exc)
+	SUB	len, len, NBYTES
+	sltu	t1, len, 8
+EXC(	STORE	t0, 0(dst),		s_exc_p1u)
+	ADD	src, src, NBYTES
+	bnez	t1, copy_bytes_checklen
+	 ADD	dst, dst, NBYTES
+	#
+	# 3) Copy NBYTES, then check length again
+	#
+EXC(	LOAD	t0, 0(src),		l_exc)
+	SUB	len, len, NBYTES
+	ADD	src, src, NBYTES
+	ADD	dst, dst, NBYTES
+	b copy_bytes_checklen
+EXC(	 STORE	t0, -8(dst),		s_exc_p1u)
+
+src_unaligned:
+#define rem t8
+	SRL	t0, len, LOG_NBYTES+2    # +2 for 4 units/iter
+	beqz	t0, cleanup_src_unaligned
+	 and	rem, len, (4*NBYTES-1)   # rem = len % 4*NBYTES
+1:
+/*
+ * Avoid consecutive LD*'s to the same register since some mips
+ * implementations can't issue them in the same cycle.
+ * It's OK to load FIRST(N+1) before REST(N) because the two addresses
+ * are to the same unit (unless src is aligned, but it's not).
+ */
+EXC(	LDFIRST	t0, FIRST(0)(src),	l_exc)
+EXC(	LDFIRST	t1, FIRST(1)(src),	l_exc_copy)
+	SUB     len, len, 4*NBYTES
+EXC(	LDREST	t0, REST(0)(src),	l_exc_copy)
+EXC(	LDREST	t1, REST(1)(src),	l_exc_copy)
+EXC(	LDFIRST	t2, FIRST(2)(src),	l_exc_copy)
+EXC(	LDFIRST	t3, FIRST(3)(src),	l_exc_copy)
+EXC(	LDREST	t2, REST(2)(src),	l_exc_copy)
+EXC(	LDREST	t3, REST(3)(src),	l_exc_copy)
+	ADD	src, src, 4*NBYTES
+EXC(	STORE	t0, UNIT(0)(dst),	s_exc_p4u)
+EXC(	STORE	t1, UNIT(1)(dst),	s_exc_p3u)
+EXC(	STORE	t2, UNIT(2)(dst),	s_exc_p2u)
+EXC(	STORE	t3, UNIT(3)(dst),	s_exc_p1u)
+	bne	len, rem, 1b
+	 ADD	dst, dst, 4*NBYTES
+
+cleanup_src_unaligned:
+	beqz	len, done
+	 and	rem, len, NBYTES-1  # rem = len % NBYTES
+	beq	rem, len, copy_bytes
+	 nop
+1:
+EXC(	LDFIRST t0, FIRST(0)(src),	l_exc)
+EXC(	LDREST	t0, REST(0)(src),	l_exc_copy)
+	SUB	len, len, NBYTES
+EXC(	STORE	t0, 0(dst),		s_exc_p1u)
+	ADD	src, src, NBYTES
+	bne	len, rem, 1b
+	 ADD	dst, dst, NBYTES
+
+copy_bytes_checklen:
+	beqz	len, done
+	 nop
+copy_bytes:
+	/* 0 < len < NBYTES  */
+#define COPY_BYTE(N)			\
+EXC(	lb	t0, N(src), l_exc);	\
+	SUB	len, len, 1;		\
+	beqz	len, done;		\
+EXC(	 sb	t0, N(dst), s_exc_p1)
+
+	COPY_BYTE(0)
+	COPY_BYTE(1)
+#ifdef USE_DOUBLE
+	COPY_BYTE(2)
+	COPY_BYTE(3)
+	COPY_BYTE(4)
+	COPY_BYTE(5)
+#endif
+EXC(	lb	t0, NBYTES-2(src), l_exc)
+	SUB	len, len, 1
+	jr	ra
+EXC(	 sb	t0, NBYTES-2(dst), s_exc_p1)
+done:
+	jr	ra
+	 nop
+	END(memcpy)
+
+l_exc_copy:
+	/*
+	 * Copy bytes from src until faulting load address (or until a
+	 * lb faults)
+	 *
+	 * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28)
+	 * may be more than a byte beyond the last address.
+	 * Hence, the lb below may get an exception.
+	 *
+	 * Assumes src < THREAD_BUADDR($28)
+	 */
+	LOAD	t0, TI_TASK($28)
+	 nop
+	LOAD	t0, THREAD_BUADDR(t0)
+1:
+EXC(	lb	t1, 0(src),	l_exc)
+	ADD	src, src, 1
+	sb	t1, 0(dst)	# can't fault -- we're copy_from_user
+	bne	src, t0, 1b
+	 ADD	dst, dst, 1
+l_exc:
+	LOAD	t0, TI_TASK($28)
+	 nop
+	LOAD	t0, THREAD_BUADDR(t0)	# t0 is just past last good address
+	 nop
+	SUB	len, AT, t0		# len number of uncopied bytes
+	/*
+	 * Here's where we rely on src and dst being incremented in tandem,
+	 *   See (3) above.
+	 * dst += (fault addr - src) to put dst at first byte to clear
+	 */
+	ADD	dst, t0			# compute start address in a1
+	SUB	dst, src
+	/*
+	 * Clear len bytes starting at dst.  Can't call __bzero because it
+	 * might modify len.  An inefficient loop for these rare times...
+	 */
+	beqz	len, done
+	 SUB	src, len, 1
+1:	sb	zero, 0(dst)
+	ADD	dst, dst, 1
+	bnez	src, 1b
+	 SUB	src, src, 1
+	jr	ra
+	 nop
+
+
+#define SEXC(n)				\
+s_exc_p ## n ## u:			\
+	jr	ra;			\
+	 ADD	len, len, n*NBYTES
+
+SEXC(16)
+SEXC(15)
+SEXC(14)
+SEXC(13)
+SEXC(12)
+SEXC(11)
+SEXC(10)
+SEXC(9)
+SEXC(8)
+SEXC(7)
+SEXC(6)
+SEXC(5)
+SEXC(4)
+SEXC(3)
+SEXC(2)
+SEXC(1)
+
+s_exc_p1:
+	jr	ra
+	 ADD	len, len, 1
+s_exc:
+	jr	ra
+	 nop
+
+	.align	5
+LEAF(memmove)
+	ADD	t0, a0, a2
+	ADD	t1, a1, a2
+	sltu	t0, a1, t0			# dst + len <= src -> memcpy
+	sltu	t1, a0, t1			# dst >= src + len -> memcpy
+	and	t0, t1
+	beqz	t0, __memcpy
+	 move	v0, a0				/* return value */
+	beqz	a2, r_out
+	END(memmove)
+
+	/* fall through to __rmemcpy */
+LEAF(__rmemcpy)					/* a0=dst a1=src a2=len */
+	 sltu	t0, a1, a0
+	beqz	t0, r_end_bytes_up		# src >= dst
+	 nop
+	ADD	a0, a2				# dst = dst + len
+	ADD	a1, a2				# src = src + len
+
+r_end_bytes:
+	lb	t0, -1(a1)
+	SUB	a2, a2, 0x1
+	sb	t0, -1(a0)
+	SUB	a1, a1, 0x1
+	bnez	a2, r_end_bytes
+	 SUB	a0, a0, 0x1
+
+r_out:
+	jr	ra
+	 move	a2, zero
+
+r_end_bytes_up:
+	lb	t0, (a1)
+	SUB	a2, a2, 0x1
+	sb	t0, (a0)
+	ADD	a1, a1, 0x1
+	bnez	a2, r_end_bytes_up
+	 ADD	a0, a0, 0x1
+
+	jr	ra
+	 move	a2, zero
+	END(__rmemcpy)
diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c
new file mode 100644
index 00000000..cd61d728
--- /dev/null
+++ b/arch/mips/cavium-octeon/octeon-platform.c
@@ -0,0 +1,445 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004-2010 Cavium Networks
+ * Copyright (C) 2008 Wind River Systems
+ */
+
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/i2c.h>
+#include <linux/usb.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#include <asm/octeon/octeon.h>
+#include <asm/octeon/cvmx-rnm-defs.h>
+
+static struct octeon_cf_data octeon_cf_data;
+
+static int __init octeon_cf_device_init(void)
+{
+	union cvmx_mio_boot_reg_cfgx mio_boot_reg_cfg;
+	unsigned long base_ptr, region_base, region_size;
+	struct platform_device *pd;
+	struct resource cf_resources[3];
+	unsigned int num_resources;
+	int i;
+	int ret = 0;
+
+	/* Setup octeon-cf platform device if present. */
+	base_ptr = 0;
+	if (octeon_bootinfo->major_version == 1
+		&& octeon_bootinfo->minor_version >= 1) {
+		if (octeon_bootinfo->compact_flash_common_base_addr)
+			base_ptr =
+				octeon_bootinfo->compact_flash_common_base_addr;
+	} else {
+		base_ptr = 0x1d000800;
+	}
+
+	if (!base_ptr)
+		return ret;
+
+	/* Find CS0 region. */
+	for (i = 0; i < 8; i++) {
+		mio_boot_reg_cfg.u64 = cvmx_read_csr(CVMX_MIO_BOOT_REG_CFGX(i));
+		region_base = mio_boot_reg_cfg.s.base << 16;
+		region_size = (mio_boot_reg_cfg.s.size + 1) << 16;
+		if (mio_boot_reg_cfg.s.en && base_ptr >= region_base
+		    && base_ptr < region_base + region_size)
+			break;
+	}
+	if (i >= 7) {
+		/* i and i + 1 are CS0 and CS1, both must be less than 8. */
+		goto out;
+	}
+	octeon_cf_data.base_region = i;
+	octeon_cf_data.is16bit = mio_boot_reg_cfg.s.width;
+	octeon_cf_data.base_region_bias = base_ptr - region_base;
+	memset(cf_resources, 0, sizeof(cf_resources));
+	num_resources = 0;
+	cf_resources[num_resources].flags	= IORESOURCE_MEM;
+	cf_resources[num_resources].start	= region_base;
+	cf_resources[num_resources].end	= region_base + region_size - 1;
+	num_resources++;
+
+
+	if (!(base_ptr & 0xfffful)) {
+		/*
+		 * Boot loader signals availability of DMA (true_ide
+		 * mode) by setting low order bits of base_ptr to
+		 * zero.
+		 */
+
+		/* Assume that CS1 immediately follows. */
+		mio_boot_reg_cfg.u64 =
+			cvmx_read_csr(CVMX_MIO_BOOT_REG_CFGX(i + 1));
+		region_base = mio_boot_reg_cfg.s.base << 16;
+		region_size = (mio_boot_reg_cfg.s.size + 1) << 16;
+		if (!mio_boot_reg_cfg.s.en)
+			goto out;
+
+		cf_resources[num_resources].flags	= IORESOURCE_MEM;
+		cf_resources[num_resources].start	= region_base;
+		cf_resources[num_resources].end	= region_base + region_size - 1;
+		num_resources++;
+
+		octeon_cf_data.dma_engine = 0;
+		cf_resources[num_resources].flags	= IORESOURCE_IRQ;
+		cf_resources[num_resources].start	= OCTEON_IRQ_BOOTDMA;
+		cf_resources[num_resources].end	= OCTEON_IRQ_BOOTDMA;
+		num_resources++;
+	} else {
+		octeon_cf_data.dma_engine = -1;
+	}
+
+	pd = platform_device_alloc("pata_octeon_cf", -1);
+	if (!pd) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	pd->dev.platform_data = &octeon_cf_data;
+
+	ret = platform_device_add_resources(pd, cf_resources, num_resources);
+	if (ret)
+		goto fail;
+
+	ret = platform_device_add(pd);
+	if (ret)
+		goto fail;
+
+	return ret;
+fail:
+	platform_device_put(pd);
+out:
+	return ret;
+}
+device_initcall(octeon_cf_device_init);
+
+/* Octeon Random Number Generator.  */
+static int __init octeon_rng_device_init(void)
+{
+	struct platform_device *pd;
+	int ret = 0;
+
+	struct resource rng_resources[] = {
+		{
+			.flags	= IORESOURCE_MEM,
+			.start	= XKPHYS_TO_PHYS(CVMX_RNM_CTL_STATUS),
+			.end	= XKPHYS_TO_PHYS(CVMX_RNM_CTL_STATUS) + 0xf
+		}, {
+			.flags	= IORESOURCE_MEM,
+			.start	= cvmx_build_io_address(8, 0),
+			.end	= cvmx_build_io_address(8, 0) + 0x7
+		}
+	};
+
+	pd = platform_device_alloc("octeon_rng", -1);
+	if (!pd) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = platform_device_add_resources(pd, rng_resources,
+					    ARRAY_SIZE(rng_resources));
+	if (ret)
+		goto fail;
+
+	ret = platform_device_add(pd);
+	if (ret)
+		goto fail;
+
+	return ret;
+fail:
+	platform_device_put(pd);
+
+out:
+	return ret;
+}
+device_initcall(octeon_rng_device_init);
+
+static struct i2c_board_info __initdata octeon_i2c_devices[] = {
+	{
+		I2C_BOARD_INFO("ds1337", 0x68),
+	},
+};
+
+static int __init octeon_i2c_devices_init(void)
+{
+	return i2c_register_board_info(0, octeon_i2c_devices,
+				       ARRAY_SIZE(octeon_i2c_devices));
+}
+arch_initcall(octeon_i2c_devices_init);
+
+#define OCTEON_I2C_IO_BASE 0x1180000001000ull
+#define OCTEON_I2C_IO_UNIT_OFFSET 0x200
+
+static struct octeon_i2c_data octeon_i2c_data[2];
+
+static int __init octeon_i2c_device_init(void)
+{
+	struct platform_device *pd;
+	int ret = 0;
+	int port, num_ports;
+
+	struct resource i2c_resources[] = {
+		{
+			.flags	= IORESOURCE_MEM,
+		}, {
+			.flags	= IORESOURCE_IRQ,
+		}
+	};
+
+	if (OCTEON_IS_MODEL(OCTEON_CN56XX) || OCTEON_IS_MODEL(OCTEON_CN52XX))
+		num_ports = 2;
+	else
+		num_ports = 1;
+
+	for (port = 0; port < num_ports; port++) {
+		octeon_i2c_data[port].sys_freq = octeon_get_io_clock_rate();
+		/*FIXME: should be examined. At the moment is set for 100Khz */
+		octeon_i2c_data[port].i2c_freq = 100000;
+
+		pd = platform_device_alloc("i2c-octeon", port);
+		if (!pd) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		pd->dev.platform_data = octeon_i2c_data + port;
+
+		i2c_resources[0].start =
+			OCTEON_I2C_IO_BASE + (port * OCTEON_I2C_IO_UNIT_OFFSET);
+		i2c_resources[0].end = i2c_resources[0].start + 0x1f;
+		switch (port) {
+		case 0:
+			i2c_resources[1].start = OCTEON_IRQ_TWSI;
+			i2c_resources[1].end = OCTEON_IRQ_TWSI;
+			break;
+		case 1:
+			i2c_resources[1].start = OCTEON_IRQ_TWSI2;
+			i2c_resources[1].end = OCTEON_IRQ_TWSI2;
+			break;
+		default:
+			BUG();
+		}
+
+		ret = platform_device_add_resources(pd,
+						    i2c_resources,
+						    ARRAY_SIZE(i2c_resources));
+		if (ret)
+			goto fail;
+
+		ret = platform_device_add(pd);
+		if (ret)
+			goto fail;
+	}
+	return ret;
+fail:
+	platform_device_put(pd);
+out:
+	return ret;
+}
+device_initcall(octeon_i2c_device_init);
+
+/* Octeon SMI/MDIO interface.  */
+static int __init octeon_mdiobus_device_init(void)
+{
+	struct platform_device *pd;
+	int ret = 0;
+
+	if (octeon_is_simulation())
+		return 0; /* No mdio in the simulator. */
+
+	/* The bus number is the platform_device id.  */
+	pd = platform_device_alloc("mdio-octeon", 0);
+	if (!pd) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = platform_device_add(pd);
+	if (ret)
+		goto fail;
+
+	return ret;
+fail:
+	platform_device_put(pd);
+
+out:
+	return ret;
+
+}
+device_initcall(octeon_mdiobus_device_init);
+
+/* Octeon mgmt port Ethernet interface.  */
+static int __init octeon_mgmt_device_init(void)
+{
+	struct platform_device *pd;
+	int ret = 0;
+	int port, num_ports;
+
+	struct resource mgmt_port_resource = {
+		.flags	= IORESOURCE_IRQ,
+		.start	= -1,
+		.end	= -1
+	};
+
+	if (!OCTEON_IS_MODEL(OCTEON_CN56XX) && !OCTEON_IS_MODEL(OCTEON_CN52XX))
+		return 0;
+
+	if (OCTEON_IS_MODEL(OCTEON_CN56XX))
+		num_ports = 1;
+	else
+		num_ports = 2;
+
+	for (port = 0; port < num_ports; port++) {
+		pd = platform_device_alloc("octeon_mgmt", port);
+		if (!pd) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		/* No DMA restrictions */
+		pd->dev.coherent_dma_mask = DMA_BIT_MASK(64);
+		pd->dev.dma_mask = &pd->dev.coherent_dma_mask;
+
+		switch (port) {
+		case 0:
+			mgmt_port_resource.start = OCTEON_IRQ_MII0;
+			break;
+		case 1:
+			mgmt_port_resource.start = OCTEON_IRQ_MII1;
+			break;
+		default:
+			BUG();
+		}
+		mgmt_port_resource.end = mgmt_port_resource.start;
+
+		ret = platform_device_add_resources(pd, &mgmt_port_resource, 1);
+
+		if (ret)
+			goto fail;
+
+		ret = platform_device_add(pd);
+		if (ret)
+			goto fail;
+	}
+	return ret;
+fail:
+	platform_device_put(pd);
+
+out:
+	return ret;
+
+}
+device_initcall(octeon_mgmt_device_init);
+
+#ifdef CONFIG_USB
+
+static int __init octeon_ehci_device_init(void)
+{
+	struct platform_device *pd;
+	int ret = 0;
+
+	struct resource usb_resources[] = {
+		{
+			.flags	= IORESOURCE_MEM,
+		}, {
+			.flags	= IORESOURCE_IRQ,
+		}
+	};
+
+	/* Only Octeon2 has ehci/ohci */
+	if (!OCTEON_IS_MODEL(OCTEON_CN63XX))
+		return 0;
+
+	if (octeon_is_simulation() || usb_disabled())
+		return 0; /* No USB in the simulator. */
+
+	pd = platform_device_alloc("octeon-ehci", 0);
+	if (!pd) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	usb_resources[0].start = 0x00016F0000000000ULL;
+	usb_resources[0].end = usb_resources[0].start + 0x100;
+
+	usb_resources[1].start = OCTEON_IRQ_USB0;
+	usb_resources[1].end = OCTEON_IRQ_USB0;
+
+	ret = platform_device_add_resources(pd, usb_resources,
+					    ARRAY_SIZE(usb_resources));
+	if (ret)
+		goto fail;
+
+	ret = platform_device_add(pd);
+	if (ret)
+		goto fail;
+
+	return ret;
+fail:
+	platform_device_put(pd);
+out:
+	return ret;
+}
+device_initcall(octeon_ehci_device_init);
+
+static int __init octeon_ohci_device_init(void)
+{
+	struct platform_device *pd;
+	int ret = 0;
+
+	struct resource usb_resources[] = {
+		{
+			.flags	= IORESOURCE_MEM,
+		}, {
+			.flags	= IORESOURCE_IRQ,
+		}
+	};
+
+	/* Only Octeon2 has ehci/ohci */
+	if (!OCTEON_IS_MODEL(OCTEON_CN63XX))
+		return 0;
+
+	if (octeon_is_simulation() || usb_disabled())
+		return 0; /* No USB in the simulator. */
+
+	pd = platform_device_alloc("octeon-ohci", 0);
+	if (!pd) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	usb_resources[0].start = 0x00016F0000000400ULL;
+	usb_resources[0].end = usb_resources[0].start + 0x100;
+
+	usb_resources[1].start = OCTEON_IRQ_USB0;
+	usb_resources[1].end = OCTEON_IRQ_USB0;
+
+	ret = platform_device_add_resources(pd, usb_resources,
+					    ARRAY_SIZE(usb_resources));
+	if (ret)
+		goto fail;
+
+	ret = platform_device_add(pd);
+	if (ret)
+		goto fail;
+
+	return ret;
+fail:
+	platform_device_put(pd);
+out:
+	return ret;
+}
+device_initcall(octeon_ohci_device_init);
+
+#endif /* CONFIG_USB */
+
+MODULE_AUTHOR("David Daney <ddaney@caviumnetworks.com>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Platform driver for Octeon SOC");
diff --git a/arch/mips/cavium-octeon/octeon_boot.h b/arch/mips/cavium-octeon/octeon_boot.h
new file mode 100644
index 00000000..428864b2
--- /dev/null
+++ b/arch/mips/cavium-octeon/octeon_boot.h
@@ -0,0 +1,72 @@
+/*
+ * (C) Copyright 2004, 2005 Cavium Networks
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#ifndef __OCTEON_BOOT_H__
+#define __OCTEON_BOOT_H__
+
+#include <linux/types.h>
+
+struct boot_init_vector {
+	/* First stage address - in ram instead of flash */
+	uint64_t code_addr;
+	/* Setup code for application, NOT application entry point */
+	uint32_t app_start_func_addr;
+	/* k0 is used for global data - needs to be passed to other cores */
+	uint32_t k0_val;
+	/* Address of boot info block structure */
+	uint64_t boot_info_addr;
+	uint32_t flags;         /* flags */
+	uint32_t pad;
+};
+
+/* similar to bootloader's linux_app_boot_info but without global data */
+struct linux_app_boot_info {
+	uint32_t labi_signature;
+	uint32_t start_core0_addr;
+	uint32_t avail_coremask;
+	uint32_t pci_console_active;
+	uint32_t icache_prefetch_disable;
+	uint64_t InitTLBStart_addr;
+	uint32_t start_app_addr;
+	uint32_t cur_exception_base;
+	uint32_t no_mark_private_data;
+	uint32_t compact_flash_common_base_addr;
+	uint32_t compact_flash_attribute_base_addr;
+	uint32_t led_display_base_addr;
+};
+
+/* If not to copy a lot of bootloader's structures
+   here is only offset of requested member */
+#define AVAIL_COREMASK_OFFSET_IN_LINUX_APP_BOOT_BLOCK    0x765c
+
+/* hardcoded in bootloader */
+#define  LABI_ADDR_IN_BOOTLOADER                         0x700
+
+#define LINUX_APP_BOOT_BLOCK_NAME "linux-app-boot"
+
+#define LABI_SIGNATURE 0xAABBCC01
+
+/*  from uboot-headers/octeon_mem_map.h */
+#define EXCEPTION_BASE_INCR     (4 * 1024)
+			       /* Increment size for exception base addresses (4k minimum) */
+#define EXCEPTION_BASE_BASE     0
+#define BOOTLOADER_PRIV_DATA_BASE       (EXCEPTION_BASE_BASE + 0x800)
+#define BOOTLOADER_BOOT_VECTOR          (BOOTLOADER_PRIV_DATA_BASE)
+
+#endif /* __OCTEON_BOOT_H__ */
diff --git a/arch/mips/cavium-octeon/serial.c b/arch/mips/cavium-octeon/serial.c
new file mode 100644
index 00000000..057f0ae8
--- /dev/null
+++ b/arch/mips/cavium-octeon/serial.c
@@ -0,0 +1,137 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004-2007 Cavium Networks
+ */
+#include <linux/console.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/serial.h>
+#include <linux/serial_8250.h>
+#include <linux/serial_reg.h>
+#include <linux/tty.h>
+#include <linux/irq.h>
+
+#include <asm/time.h>
+
+#include <asm/octeon/octeon.h>
+
+#define DEBUG_UART 1
+
+unsigned int octeon_serial_in(struct uart_port *up, int offset)
+{
+	int rv = cvmx_read_csr((uint64_t)(up->membase + (offset << 3)));
+	if (offset == UART_IIR && (rv & 0xf) == 7) {
+		/* Busy interrupt, read the USR (39) and try again. */
+		cvmx_read_csr((uint64_t)(up->membase + (39 << 3)));
+		rv = cvmx_read_csr((uint64_t)(up->membase + (offset << 3)));
+	}
+	return rv;
+}
+
+void octeon_serial_out(struct uart_port *up, int offset, int value)
+{
+	/*
+	 * If bits 6 or 7 of the OCTEON UART's LCR are set, it quits
+	 * working.
+	 */
+	if (offset == UART_LCR)
+		value &= 0x9f;
+	cvmx_write_csr((uint64_t)(up->membase + (offset << 3)), (u8)value);
+}
+
+/*
+ * Allocated in .bss, so it is all zeroed.
+ */
+#define OCTEON_MAX_UARTS 3
+static struct plat_serial8250_port octeon_uart8250_data[OCTEON_MAX_UARTS + 1];
+static struct platform_device octeon_uart8250_device = {
+	.name			= "serial8250",
+	.id			= PLAT8250_DEV_PLATFORM,
+	.dev			= {
+		.platform_data	= octeon_uart8250_data,
+	},
+};
+
+static void __init octeon_uart_set_common(struct plat_serial8250_port *p)
+{
+	p->flags = ASYNC_SKIP_TEST | UPF_SHARE_IRQ | UPF_FIXED_TYPE;
+	p->type = PORT_OCTEON;
+	p->iotype = UPIO_MEM;
+	p->regshift = 3;	/* I/O addresses are every 8 bytes */
+	if (octeon_is_simulation())
+		/* Make simulator output fast*/
+		p->uartclk = 115200 * 16;
+	else
+		p->uartclk = octeon_get_io_clock_rate();
+	p->serial_in = octeon_serial_in;
+	p->serial_out = octeon_serial_out;
+}
+
+static int __init octeon_serial_init(void)
+{
+	int enable_uart0;
+	int enable_uart1;
+	int enable_uart2;
+	struct plat_serial8250_port *p;
+
+#ifdef CONFIG_CAVIUM_OCTEON_2ND_KERNEL
+	/*
+	 * If we are configured to run as the second of two kernels,
+	 * disable uart0 and enable uart1. Uart0 is owned by the first
+	 * kernel
+	 */
+	enable_uart0 = 0;
+	enable_uart1 = 1;
+#else
+	/*
+	 * We are configured for the first kernel. We'll enable uart0
+	 * if the bootloader told us to use 0, otherwise will enable
+	 * uart 1.
+	 */
+	enable_uart0 = (octeon_get_boot_uart() == 0);
+	enable_uart1 = (octeon_get_boot_uart() == 1);
+#ifdef CONFIG_KGDB
+	enable_uart1 = 1;
+#endif
+#endif
+
+	/* Right now CN52XX is the only chip with a third uart */
+	enable_uart2 = OCTEON_IS_MODEL(OCTEON_CN52XX);
+
+	p = octeon_uart8250_data;
+	if (enable_uart0) {
+		/* Add a ttyS device for hardware uart 0 */
+		octeon_uart_set_common(p);
+		p->membase = (void *) CVMX_MIO_UARTX_RBR(0);
+		p->mapbase = CVMX_MIO_UARTX_RBR(0) & ((1ull << 49) - 1);
+		p->irq = OCTEON_IRQ_UART0;
+		p++;
+	}
+
+	if (enable_uart1) {
+		/* Add a ttyS device for hardware uart 1 */
+		octeon_uart_set_common(p);
+		p->membase = (void *) CVMX_MIO_UARTX_RBR(1);
+		p->mapbase = CVMX_MIO_UARTX_RBR(1) & ((1ull << 49) - 1);
+		p->irq = OCTEON_IRQ_UART1;
+		p++;
+	}
+	if (enable_uart2) {
+		/* Add a ttyS device for hardware uart 2 */
+		octeon_uart_set_common(p);
+		p->membase = (void *) CVMX_MIO_UART2_RBR;
+		p->mapbase = CVMX_MIO_UART2_RBR & ((1ull << 49) - 1);
+		p->irq = OCTEON_IRQ_UART2;
+		p++;
+	}
+
+	BUG_ON(p > &octeon_uart8250_data[OCTEON_MAX_UARTS]);
+
+	return platform_device_register(&octeon_uart8250_device);
+}
+
+device_initcall(octeon_serial_init);
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
new file mode 100644
index 00000000..2d9028f1
--- /dev/null
+++ b/arch/mips/cavium-octeon/setup.c
@@ -0,0 +1,785 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004-2007 Cavium Networks
+ * Copyright (C) 2008 Wind River Systems
+ */
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/serial.h>
+#include <linux/smp.h>
+#include <linux/types.h>
+#include <linux/string.h>	/* for memset */
+#include <linux/tty.h>
+#include <linux/time.h>
+#include <linux/platform_device.h>
+#include <linux/serial_core.h>
+#include <linux/serial_8250.h>
+
+#include <asm/processor.h>
+#include <asm/reboot.h>
+#include <asm/smp-ops.h>
+#include <asm/system.h>
+#include <asm/irq_cpu.h>
+#include <asm/mipsregs.h>
+#include <asm/bootinfo.h>
+#include <asm/sections.h>
+#include <asm/time.h>
+
+#include <asm/octeon/octeon.h>
+#include <asm/octeon/pci-octeon.h>
+#include <asm/octeon/cvmx-mio-defs.h>
+
+#ifdef CONFIG_CAVIUM_DECODE_RSL
+extern void cvmx_interrupt_rsl_decode(void);
+extern int __cvmx_interrupt_ecc_report_single_bit_errors;
+extern void cvmx_interrupt_rsl_enable(void);
+#endif
+
+extern struct plat_smp_ops octeon_smp_ops;
+
+#ifdef CONFIG_PCI
+extern void pci_console_init(const char *arg);
+#endif
+
+static unsigned long long MAX_MEMORY = 512ull << 20;
+
+struct octeon_boot_descriptor *octeon_boot_desc_ptr;
+
+struct cvmx_bootinfo *octeon_bootinfo;
+EXPORT_SYMBOL(octeon_bootinfo);
+
+#ifdef CONFIG_CAVIUM_RESERVE32
+uint64_t octeon_reserve32_memory;
+EXPORT_SYMBOL(octeon_reserve32_memory);
+#endif
+
+static int octeon_uart;
+
+extern asmlinkage void handle_int(void);
+extern asmlinkage void plat_irq_dispatch(void);
+
+/**
+ * Return non zero if we are currently running in the Octeon simulator
+ *
+ * Returns
+ */
+int octeon_is_simulation(void)
+{
+	return octeon_bootinfo->board_type == CVMX_BOARD_TYPE_SIM;
+}
+EXPORT_SYMBOL(octeon_is_simulation);
+
+/**
+ * Return true if Octeon is in PCI Host mode. This means
+ * Linux can control the PCI bus.
+ *
+ * Returns Non zero if Octeon in host mode.
+ */
+int octeon_is_pci_host(void)
+{
+#ifdef CONFIG_PCI
+	return octeon_bootinfo->config_flags & CVMX_BOOTINFO_CFG_FLAG_PCI_HOST;
+#else
+	return 0;
+#endif
+}
+
+/**
+ * Get the clock rate of Octeon
+ *
+ * Returns Clock rate in HZ
+ */
+uint64_t octeon_get_clock_rate(void)
+{
+	struct cvmx_sysinfo *sysinfo = cvmx_sysinfo_get();
+
+	return sysinfo->cpu_clock_hz;
+}
+EXPORT_SYMBOL(octeon_get_clock_rate);
+
+static u64 octeon_io_clock_rate;
+
+u64 octeon_get_io_clock_rate(void)
+{
+	return octeon_io_clock_rate;
+}
+EXPORT_SYMBOL(octeon_get_io_clock_rate);
+
+
+/**
+ * Write to the LCD display connected to the bootbus. This display
+ * exists on most Cavium evaluation boards. If it doesn't exist, then
+ * this function doesn't do anything.
+ *
+ * @s:      String to write
+ */
+void octeon_write_lcd(const char *s)
+{
+	if (octeon_bootinfo->led_display_base_addr) {
+		void __iomem *lcd_address =
+			ioremap_nocache(octeon_bootinfo->led_display_base_addr,
+					8);
+		int i;
+		for (i = 0; i < 8; i++, s++) {
+			if (*s)
+				iowrite8(*s, lcd_address + i);
+			else
+				iowrite8(' ', lcd_address + i);
+		}
+		iounmap(lcd_address);
+	}
+}
+
+/**
+ * Return the console uart passed by the bootloader
+ *
+ * Returns uart   (0 or 1)
+ */
+int octeon_get_boot_uart(void)
+{
+	int uart;
+#ifdef CONFIG_CAVIUM_OCTEON_2ND_KERNEL
+	uart = 1;
+#else
+	uart = (octeon_boot_desc_ptr->flags & OCTEON_BL_FLAG_CONSOLE_UART1) ?
+		1 : 0;
+#endif
+	return uart;
+}
+
+/**
+ * Get the coremask Linux was booted on.
+ *
+ * Returns Core mask
+ */
+int octeon_get_boot_coremask(void)
+{
+	return octeon_boot_desc_ptr->core_mask;
+}
+
+/**
+ * Check the hardware BIST results for a CPU
+ */
+void octeon_check_cpu_bist(void)
+{
+	const int coreid = cvmx_get_core_num();
+	unsigned long long mask;
+	unsigned long long bist_val;
+
+	/* Check BIST results for COP0 registers */
+	mask = 0x1f00000000ull;
+	bist_val = read_octeon_c0_icacheerr();
+	if (bist_val & mask)
+		pr_err("Core%d BIST Failure: CacheErr(icache) = 0x%llx\n",
+		       coreid, bist_val);
+
+	bist_val = read_octeon_c0_dcacheerr();
+	if (bist_val & 1)
+		pr_err("Core%d L1 Dcache parity error: "
+		       "CacheErr(dcache) = 0x%llx\n",
+		       coreid, bist_val);
+
+	mask = 0xfc00000000000000ull;
+	bist_val = read_c0_cvmmemctl();
+	if (bist_val & mask)
+		pr_err("Core%d BIST Failure: COP0_CVM_MEM_CTL = 0x%llx\n",
+		       coreid, bist_val);
+
+	write_octeon_c0_dcacheerr(0);
+}
+
+/**
+ * Reboot Octeon
+ *
+ * @command: Command to pass to the bootloader. Currently ignored.
+ */
+static void octeon_restart(char *command)
+{
+	/* Disable all watchdogs before soft reset. They don't get cleared */
+#ifdef CONFIG_SMP
+	int cpu;
+	for_each_online_cpu(cpu)
+		cvmx_write_csr(CVMX_CIU_WDOGX(cpu_logical_map(cpu)), 0);
+#else
+	cvmx_write_csr(CVMX_CIU_WDOGX(cvmx_get_core_num()), 0);
+#endif
+
+	mb();
+	while (1)
+		cvmx_write_csr(CVMX_CIU_SOFT_RST, 1);
+}
+
+
+/**
+ * Permanently stop a core.
+ *
+ * @arg: Ignored.
+ */
+static void octeon_kill_core(void *arg)
+{
+	mb();
+	if (octeon_is_simulation()) {
+		/* The simulator needs the watchdog to stop for dead cores */
+		cvmx_write_csr(CVMX_CIU_WDOGX(cvmx_get_core_num()), 0);
+		/* A break instruction causes the simulator stop a core */
+		asm volatile ("sync\nbreak");
+	}
+}
+
+
+/**
+ * Halt the system
+ */
+static void octeon_halt(void)
+{
+	smp_call_function(octeon_kill_core, NULL, 0);
+
+	switch (octeon_bootinfo->board_type) {
+	case CVMX_BOARD_TYPE_NAO38:
+		/* Driving a 1 to GPIO 12 shuts off this board */
+		cvmx_write_csr(CVMX_GPIO_BIT_CFGX(12), 1);
+		cvmx_write_csr(CVMX_GPIO_TX_SET, 0x1000);
+		break;
+	default:
+		octeon_write_lcd("PowerOff");
+		break;
+	}
+
+	octeon_kill_core(NULL);
+}
+
+/**
+ * Handle all the error condition interrupts that might occur.
+ *
+ */
+#ifdef CONFIG_CAVIUM_DECODE_RSL
+static irqreturn_t octeon_rlm_interrupt(int cpl, void *dev_id)
+{
+	cvmx_interrupt_rsl_decode();
+	return IRQ_HANDLED;
+}
+#endif
+
+/**
+ * Return a string representing the system type
+ *
+ * Returns
+ */
+const char *octeon_board_type_string(void)
+{
+	static char name[80];
+	sprintf(name, "%s (%s)",
+		cvmx_board_type_to_string(octeon_bootinfo->board_type),
+		octeon_model_get_string(read_c0_prid()));
+	return name;
+}
+
+const char *get_system_type(void)
+	__attribute__ ((alias("octeon_board_type_string")));
+
+void octeon_user_io_init(void)
+{
+	union octeon_cvmemctl cvmmemctl;
+	union cvmx_iob_fau_timeout fau_timeout;
+	union cvmx_pow_nw_tim nm_tim;
+
+	/* Get the current settings for CP0_CVMMEMCTL_REG */
+	cvmmemctl.u64 = read_c0_cvmmemctl();
+	/* R/W If set, marked write-buffer entries time out the same
+	 * as as other entries; if clear, marked write-buffer entries
+	 * use the maximum timeout. */
+	cvmmemctl.s.dismarkwblongto = 1;
+	/* R/W If set, a merged store does not clear the write-buffer
+	 * entry timeout state. */
+	cvmmemctl.s.dismrgclrwbto = 0;
+	/* R/W Two bits that are the MSBs of the resultant CVMSEG LM
+	 * word location for an IOBDMA. The other 8 bits come from the
+	 * SCRADDR field of the IOBDMA. */
+	cvmmemctl.s.iobdmascrmsb = 0;
+	/* R/W If set, SYNCWS and SYNCS only order marked stores; if
+	 * clear, SYNCWS and SYNCS only order unmarked
+	 * stores. SYNCWSMARKED has no effect when DISSYNCWS is
+	 * set. */
+	cvmmemctl.s.syncwsmarked = 0;
+	/* R/W If set, SYNCWS acts as SYNCW and SYNCS acts as SYNC. */
+	cvmmemctl.s.dissyncws = 0;
+	/* R/W If set, no stall happens on write buffer full. */
+	if (OCTEON_IS_MODEL(OCTEON_CN38XX_PASS2))
+		cvmmemctl.s.diswbfst = 1;
+	else
+		cvmmemctl.s.diswbfst = 0;
+	/* R/W If set (and SX set), supervisor-level loads/stores can
+	 * use XKPHYS addresses with <48>==0 */
+	cvmmemctl.s.xkmemenas = 0;
+
+	/* R/W If set (and UX set), user-level loads/stores can use
+	 * XKPHYS addresses with VA<48>==0 */
+	cvmmemctl.s.xkmemenau = 0;
+
+	/* R/W If set (and SX set), supervisor-level loads/stores can
+	 * use XKPHYS addresses with VA<48>==1 */
+	cvmmemctl.s.xkioenas = 0;
+
+	/* R/W If set (and UX set), user-level loads/stores can use
+	 * XKPHYS addresses with VA<48>==1 */
+	cvmmemctl.s.xkioenau = 0;
+
+	/* R/W If set, all stores act as SYNCW (NOMERGE must be set
+	 * when this is set) RW, reset to 0. */
+	cvmmemctl.s.allsyncw = 0;
+
+	/* R/W If set, no stores merge, and all stores reach the
+	 * coherent bus in order. */
+	cvmmemctl.s.nomerge = 0;
+	/* R/W Selects the bit in the counter used for DID time-outs 0
+	 * = 231, 1 = 230, 2 = 229, 3 = 214. Actual time-out is
+	 * between 1x and 2x this interval. For example, with
+	 * DIDTTO=3, expiration interval is between 16K and 32K. */
+	cvmmemctl.s.didtto = 0;
+	/* R/W If set, the (mem) CSR clock never turns off. */
+	cvmmemctl.s.csrckalwys = 0;
+	/* R/W If set, mclk never turns off. */
+	cvmmemctl.s.mclkalwys = 0;
+	/* R/W Selects the bit in the counter used for write buffer
+	 * flush time-outs (WBFLT+11) is the bit position in an
+	 * internal counter used to determine expiration. The write
+	 * buffer expires between 1x and 2x this interval. For
+	 * example, with WBFLT = 0, a write buffer expires between 2K
+	 * and 4K cycles after the write buffer entry is allocated. */
+	cvmmemctl.s.wbfltime = 0;
+	/* R/W If set, do not put Istream in the L2 cache. */
+	cvmmemctl.s.istrnol2 = 0;
+
+	/*
+	 * R/W The write buffer threshold. As per erratum Core-14752
+	 * for CN63XX, a sc/scd might fail if the write buffer is
+	 * full.  Lowering WBTHRESH greatly lowers the chances of the
+	 * write buffer ever being full and triggering the erratum.
+	 */
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX_PASS1_X))
+		cvmmemctl.s.wbthresh = 4;
+	else
+		cvmmemctl.s.wbthresh = 10;
+
+	/* R/W If set, CVMSEG is available for loads/stores in
+	 * kernel/debug mode. */
+#if CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0
+	cvmmemctl.s.cvmsegenak = 1;
+#else
+	cvmmemctl.s.cvmsegenak = 0;
+#endif
+	/* R/W If set, CVMSEG is available for loads/stores in
+	 * supervisor mode. */
+	cvmmemctl.s.cvmsegenas = 0;
+	/* R/W If set, CVMSEG is available for loads/stores in user
+	 * mode. */
+	cvmmemctl.s.cvmsegenau = 0;
+	/* R/W Size of local memory in cache blocks, 54 (6912 bytes)
+	 * is max legal value. */
+	cvmmemctl.s.lmemsz = CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE;
+
+	write_c0_cvmmemctl(cvmmemctl.u64);
+
+	if (smp_processor_id() == 0)
+		pr_notice("CVMSEG size: %d cache lines (%d bytes)\n",
+			  CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE,
+			  CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE * 128);
+
+	/* Set a default for the hardware timeouts */
+	fau_timeout.u64 = 0;
+	fau_timeout.s.tout_val = 0xfff;
+	/* Disable tagwait FAU timeout */
+	fau_timeout.s.tout_enb = 0;
+	cvmx_write_csr(CVMX_IOB_FAU_TIMEOUT, fau_timeout.u64);
+
+	nm_tim.u64 = 0;
+	/* 4096 cycles */
+	nm_tim.s.nw_tim = 3;
+	cvmx_write_csr(CVMX_POW_NW_TIM, nm_tim.u64);
+
+	write_octeon_c0_icacheerr(0);
+	write_c0_derraddr1(0);
+}
+
+/**
+ * Early entry point for arch setup
+ */
+void __init prom_init(void)
+{
+	struct cvmx_sysinfo *sysinfo;
+	int i;
+	int argc;
+#ifdef CONFIG_CAVIUM_RESERVE32
+	int64_t addr = -1;
+#endif
+	/*
+	 * The bootloader passes a pointer to the boot descriptor in
+	 * $a3, this is available as fw_arg3.
+	 */
+	octeon_boot_desc_ptr = (struct octeon_boot_descriptor *)fw_arg3;
+	octeon_bootinfo =
+		cvmx_phys_to_ptr(octeon_boot_desc_ptr->cvmx_desc_vaddr);
+	cvmx_bootmem_init(cvmx_phys_to_ptr(octeon_bootinfo->phy_mem_desc_addr));
+
+	sysinfo = cvmx_sysinfo_get();
+	memset(sysinfo, 0, sizeof(*sysinfo));
+	sysinfo->system_dram_size = octeon_bootinfo->dram_size << 20;
+	sysinfo->phy_mem_desc_ptr =
+		cvmx_phys_to_ptr(octeon_bootinfo->phy_mem_desc_addr);
+	sysinfo->core_mask = octeon_bootinfo->core_mask;
+	sysinfo->exception_base_addr = octeon_bootinfo->exception_base_addr;
+	sysinfo->cpu_clock_hz = octeon_bootinfo->eclock_hz;
+	sysinfo->dram_data_rate_hz = octeon_bootinfo->dclock_hz * 2;
+	sysinfo->board_type = octeon_bootinfo->board_type;
+	sysinfo->board_rev_major = octeon_bootinfo->board_rev_major;
+	sysinfo->board_rev_minor = octeon_bootinfo->board_rev_minor;
+	memcpy(sysinfo->mac_addr_base, octeon_bootinfo->mac_addr_base,
+	       sizeof(sysinfo->mac_addr_base));
+	sysinfo->mac_addr_count = octeon_bootinfo->mac_addr_count;
+	memcpy(sysinfo->board_serial_number,
+	       octeon_bootinfo->board_serial_number,
+	       sizeof(sysinfo->board_serial_number));
+	sysinfo->compact_flash_common_base_addr =
+		octeon_bootinfo->compact_flash_common_base_addr;
+	sysinfo->compact_flash_attribute_base_addr =
+		octeon_bootinfo->compact_flash_attribute_base_addr;
+	sysinfo->led_display_base_addr = octeon_bootinfo->led_display_base_addr;
+	sysinfo->dfa_ref_clock_hz = octeon_bootinfo->dfa_ref_clock_hz;
+	sysinfo->bootloader_config_flags = octeon_bootinfo->config_flags;
+
+	if (OCTEON_IS_MODEL(OCTEON_CN6XXX)) {
+		/* I/O clock runs at a different rate than the CPU. */
+		union cvmx_mio_rst_boot rst_boot;
+		rst_boot.u64 = cvmx_read_csr(CVMX_MIO_RST_BOOT);
+		octeon_io_clock_rate = 50000000 * rst_boot.s.pnr_mul;
+	} else {
+		octeon_io_clock_rate = sysinfo->cpu_clock_hz;
+	}
+
+	/*
+	 * Only enable the LED controller if we're running on a CN38XX, CN58XX,
+	 * or CN56XX. The CN30XX and CN31XX don't have an LED controller.
+	 */
+	if (!octeon_is_simulation() &&
+	    octeon_has_feature(OCTEON_FEATURE_LED_CONTROLLER)) {
+		cvmx_write_csr(CVMX_LED_EN, 0);
+		cvmx_write_csr(CVMX_LED_PRT, 0);
+		cvmx_write_csr(CVMX_LED_DBG, 0);
+		cvmx_write_csr(CVMX_LED_PRT_FMT, 0);
+		cvmx_write_csr(CVMX_LED_UDD_CNTX(0), 32);
+		cvmx_write_csr(CVMX_LED_UDD_CNTX(1), 32);
+		cvmx_write_csr(CVMX_LED_UDD_DATX(0), 0);
+		cvmx_write_csr(CVMX_LED_UDD_DATX(1), 0);
+		cvmx_write_csr(CVMX_LED_EN, 1);
+	}
+#ifdef CONFIG_CAVIUM_RESERVE32
+	/*
+	 * We need to temporarily allocate all memory in the reserve32
+	 * region. This makes sure the kernel doesn't allocate this
+	 * memory when it is getting memory from the
+	 * bootloader. Later, after the memory allocations are
+	 * complete, the reserve32 will be freed.
+	 *
+	 * Allocate memory for RESERVED32 aligned on 2MB boundary. This
+	 * is in case we later use hugetlb entries with it.
+	 */
+	addr = cvmx_bootmem_phy_named_block_alloc(CONFIG_CAVIUM_RESERVE32 << 20,
+						0, 0, 2 << 20,
+						"CAVIUM_RESERVE32", 0);
+	if (addr < 0)
+		pr_err("Failed to allocate CAVIUM_RESERVE32 memory area\n");
+	else
+		octeon_reserve32_memory = addr;
+#endif
+
+#ifdef CONFIG_CAVIUM_OCTEON_LOCK_L2
+	if (cvmx_read_csr(CVMX_L2D_FUS3) & (3ull << 34)) {
+		pr_info("Skipping L2 locking due to reduced L2 cache size\n");
+	} else {
+		uint32_t ebase = read_c0_ebase() & 0x3ffff000;
+#ifdef CONFIG_CAVIUM_OCTEON_LOCK_L2_TLB
+		/* TLB refill */
+		cvmx_l2c_lock_mem_region(ebase, 0x100);
+#endif
+#ifdef CONFIG_CAVIUM_OCTEON_LOCK_L2_EXCEPTION
+		/* General exception */
+		cvmx_l2c_lock_mem_region(ebase + 0x180, 0x80);
+#endif
+#ifdef CONFIG_CAVIUM_OCTEON_LOCK_L2_LOW_LEVEL_INTERRUPT
+		/* Interrupt handler */
+		cvmx_l2c_lock_mem_region(ebase + 0x200, 0x80);
+#endif
+#ifdef CONFIG_CAVIUM_OCTEON_LOCK_L2_INTERRUPT
+		cvmx_l2c_lock_mem_region(__pa_symbol(handle_int), 0x100);
+		cvmx_l2c_lock_mem_region(__pa_symbol(plat_irq_dispatch), 0x80);
+#endif
+#ifdef CONFIG_CAVIUM_OCTEON_LOCK_L2_MEMCPY
+		cvmx_l2c_lock_mem_region(__pa_symbol(memcpy), 0x480);
+#endif
+	}
+#endif
+
+	octeon_check_cpu_bist();
+
+	octeon_uart = octeon_get_boot_uart();
+
+#ifdef CONFIG_SMP
+	octeon_write_lcd("LinuxSMP");
+#else
+	octeon_write_lcd("Linux");
+#endif
+
+#ifdef CONFIG_CAVIUM_GDB
+	/*
+	 * When debugging the linux kernel, force the cores to enter
+	 * the debug exception handler to break in.
+	 */
+	if (octeon_get_boot_debug_flag()) {
+		cvmx_write_csr(CVMX_CIU_DINT, 1 << cvmx_get_core_num());
+		cvmx_read_csr(CVMX_CIU_DINT);
+	}
+#endif
+
+	/*
+	 * BIST should always be enabled when doing a soft reset. L2
+	 * Cache locking for instance is not cleared unless BIST is
+	 * enabled.  Unfortunately due to a chip errata G-200 for
+	 * Cn38XX and CN31XX, BIST msut be disabled on these parts.
+	 */
+	if (OCTEON_IS_MODEL(OCTEON_CN38XX_PASS2) ||
+	    OCTEON_IS_MODEL(OCTEON_CN31XX))
+		cvmx_write_csr(CVMX_CIU_SOFT_BIST, 0);
+	else
+		cvmx_write_csr(CVMX_CIU_SOFT_BIST, 1);
+
+	/* Default to 64MB in the simulator to speed things up */
+	if (octeon_is_simulation())
+		MAX_MEMORY = 64ull << 20;
+
+	arcs_cmdline[0] = 0;
+	argc = octeon_boot_desc_ptr->argc;
+	for (i = 0; i < argc; i++) {
+		const char *arg =
+			cvmx_phys_to_ptr(octeon_boot_desc_ptr->argv[i]);
+		if ((strncmp(arg, "MEM=", 4) == 0) ||
+		    (strncmp(arg, "mem=", 4) == 0)) {
+			sscanf(arg + 4, "%llu", &MAX_MEMORY);
+			MAX_MEMORY <<= 20;
+			if (MAX_MEMORY == 0)
+				MAX_MEMORY = 32ull << 30;
+		} else if (strcmp(arg, "ecc_verbose") == 0) {
+#ifdef CONFIG_CAVIUM_REPORT_SINGLE_BIT_ECC
+			__cvmx_interrupt_ecc_report_single_bit_errors = 1;
+			pr_notice("Reporting of single bit ECC errors is "
+				  "turned on\n");
+#endif
+		} else if (strlen(arcs_cmdline) + strlen(arg) + 1 <
+			   sizeof(arcs_cmdline) - 1) {
+			strcat(arcs_cmdline, " ");
+			strcat(arcs_cmdline, arg);
+		}
+	}
+
+	if (strstr(arcs_cmdline, "console=") == NULL) {
+#ifdef CONFIG_CAVIUM_OCTEON_2ND_KERNEL
+		strcat(arcs_cmdline, " console=ttyS0,115200");
+#else
+		if (octeon_uart == 1)
+			strcat(arcs_cmdline, " console=ttyS1,115200");
+		else
+			strcat(arcs_cmdline, " console=ttyS0,115200");
+#endif
+	}
+
+	if (octeon_is_simulation()) {
+		/*
+		 * The simulator uses a mtdram device pre filled with
+		 * the filesystem. Also specify the calibration delay
+		 * to avoid calculating it every time.
+		 */
+		strcat(arcs_cmdline, " rw root=1f00 slram=root,0x40000000,+1073741824");
+	}
+
+	mips_hpt_frequency = octeon_get_clock_rate();
+
+	octeon_init_cvmcount();
+	octeon_setup_delays();
+
+	_machine_restart = octeon_restart;
+	_machine_halt = octeon_halt;
+
+	octeon_user_io_init();
+	register_smp_ops(&octeon_smp_ops);
+}
+
+/* Exclude a single page from the regions obtained in plat_mem_setup. */
+static __init void memory_exclude_page(u64 addr, u64 *mem, u64 *size)
+{
+	if (addr > *mem && addr < *mem + *size) {
+		u64 inc = addr - *mem;
+		add_memory_region(*mem, inc, BOOT_MEM_RAM);
+		*mem += inc;
+		*size -= inc;
+	}
+
+	if (addr == *mem && *size > PAGE_SIZE) {
+		*mem += PAGE_SIZE;
+		*size -= PAGE_SIZE;
+	}
+}
+
+void __init plat_mem_setup(void)
+{
+	uint64_t mem_alloc_size;
+	uint64_t total;
+	int64_t memory;
+
+	total = 0;
+
+	/* First add the init memory we will be returning.  */
+	memory = __pa_symbol(&__init_begin) & PAGE_MASK;
+	mem_alloc_size = (__pa_symbol(&__init_end) & PAGE_MASK) - memory;
+	if (mem_alloc_size > 0) {
+		add_memory_region(memory, mem_alloc_size, BOOT_MEM_RAM);
+		total += mem_alloc_size;
+	}
+
+	/*
+	 * The Mips memory init uses the first memory location for
+	 * some memory vectors. When SPARSEMEM is in use, it doesn't
+	 * verify that the size is big enough for the final
+	 * vectors. Making the smallest chuck 4MB seems to be enough
+	 * to consistently work.
+	 */
+	mem_alloc_size = 4 << 20;
+	if (mem_alloc_size > MAX_MEMORY)
+		mem_alloc_size = MAX_MEMORY;
+
+	/*
+	 * When allocating memory, we want incrementing addresses from
+	 * bootmem_alloc so the code in add_memory_region can merge
+	 * regions next to each other.
+	 */
+	cvmx_bootmem_lock();
+	while ((boot_mem_map.nr_map < BOOT_MEM_MAP_MAX)
+		&& (total < MAX_MEMORY)) {
+#if defined(CONFIG_64BIT) || defined(CONFIG_64BIT_PHYS_ADDR)
+		memory = cvmx_bootmem_phy_alloc(mem_alloc_size,
+						__pa_symbol(&__init_end), -1,
+						0x100000,
+						CVMX_BOOTMEM_FLAG_NO_LOCKING);
+#elif defined(CONFIG_HIGHMEM)
+		memory = cvmx_bootmem_phy_alloc(mem_alloc_size, 0, 1ull << 31,
+						0x100000,
+						CVMX_BOOTMEM_FLAG_NO_LOCKING);
+#else
+		memory = cvmx_bootmem_phy_alloc(mem_alloc_size, 0, 512 << 20,
+						0x100000,
+						CVMX_BOOTMEM_FLAG_NO_LOCKING);
+#endif
+		if (memory >= 0) {
+			u64 size = mem_alloc_size;
+
+			/*
+			 * exclude a page at the beginning and end of
+			 * the 256MB PCIe 'hole' so the kernel will not
+			 * try to allocate multi-page buffers that
+			 * span the discontinuity.
+			 */
+			memory_exclude_page(CVMX_PCIE_BAR1_PHYS_BASE,
+					    &memory, &size);
+			memory_exclude_page(CVMX_PCIE_BAR1_PHYS_BASE +
+					    CVMX_PCIE_BAR1_PHYS_SIZE,
+					    &memory, &size);
+
+			/*
+			 * This function automatically merges address
+			 * regions next to each other if they are
+			 * received in incrementing order.
+			 */
+			if (size)
+				add_memory_region(memory, size, BOOT_MEM_RAM);
+			total += mem_alloc_size;
+		} else {
+			break;
+		}
+	}
+	cvmx_bootmem_unlock();
+
+#ifdef CONFIG_CAVIUM_RESERVE32
+	/*
+	 * Now that we've allocated the kernel memory it is safe to
+	 * free the reserved region. We free it here so that builtin
+	 * drivers can use the memory.
+	 */
+	if (octeon_reserve32_memory)
+		cvmx_bootmem_free_named("CAVIUM_RESERVE32");
+#endif /* CONFIG_CAVIUM_RESERVE32 */
+
+	if (total == 0)
+		panic("Unable to allocate memory from "
+		      "cvmx_bootmem_phy_alloc\n");
+}
+
+/*
+ * Emit one character to the boot UART.  Exported for use by the
+ * watchdog timer.
+ */
+int prom_putchar(char c)
+{
+	uint64_t lsrval;
+
+	/* Spin until there is room */
+	do {
+		lsrval = cvmx_read_csr(CVMX_MIO_UARTX_LSR(octeon_uart));
+	} while ((lsrval & 0x20) == 0);
+
+	/* Write the byte */
+	cvmx_write_csr(CVMX_MIO_UARTX_THR(octeon_uart), c & 0xffull);
+	return 1;
+}
+EXPORT_SYMBOL(prom_putchar);
+
+void prom_free_prom_memory(void)
+{
+	if (OCTEON_IS_MODEL(OCTEON_CN63XX_PASS1_X)) {
+		/* Check for presence of Core-14449 fix.  */
+		u32 insn;
+		u32 *foo;
+
+		foo = &insn;
+
+		asm volatile("# before" : : : "memory");
+		prefetch(foo);
+		asm volatile(
+			".set push\n\t"
+			".set noreorder\n\t"
+			"bal 1f\n\t"
+			"nop\n"
+			"1:\tlw %0,-12($31)\n\t"
+			".set pop\n\t"
+			: "=r" (insn) : : "$31", "memory");
+
+		if ((insn >> 26) != 0x33)
+			panic("No PREF instruction at Core-14449 probe point.\n");
+
+		if (((insn >> 16) & 0x1f) != 28)
+			panic("Core-14449 WAR not in place (%04x).\n"
+			      "Please build kernel with proper options (CONFIG_CAVIUM_CN63XXP1).\n", insn);
+	}
+#ifdef CONFIG_CAVIUM_DECODE_RSL
+	cvmx_interrupt_rsl_enable();
+
+	/* Add an interrupt handler for general failures. */
+	if (request_irq(OCTEON_IRQ_RML, octeon_rlm_interrupt, IRQF_SHARED,
+			"RML/RSL", octeon_rlm_interrupt)) {
+		panic("Unable to request_irq(OCTEON_IRQ_RML)\n");
+	}
+#endif
+}
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
new file mode 100644
index 00000000..8b606423
--- /dev/null
+++ b/arch/mips/cavium-octeon/smp.c
@@ -0,0 +1,424 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004-2008, 2009, 2010 Cavium Networks
+ */
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+
+#include <asm/mmu_context.h>
+#include <asm/system.h>
+#include <asm/time.h>
+
+#include <asm/octeon/octeon.h>
+
+#include "octeon_boot.h"
+
+volatile unsigned long octeon_processor_boot = 0xff;
+volatile unsigned long octeon_processor_sp;
+volatile unsigned long octeon_processor_gp;
+
+#ifdef CONFIG_HOTPLUG_CPU
+uint64_t octeon_bootloader_entry_addr;
+EXPORT_SYMBOL(octeon_bootloader_entry_addr);
+#endif
+
+static irqreturn_t mailbox_interrupt(int irq, void *dev_id)
+{
+	const int coreid = cvmx_get_core_num();
+	uint64_t action;
+
+	/* Load the mailbox register to figure out what we're supposed to do */
+	action = cvmx_read_csr(CVMX_CIU_MBOX_CLRX(coreid)) & 0xffff;
+
+	/* Clear the mailbox to clear the interrupt */
+	cvmx_write_csr(CVMX_CIU_MBOX_CLRX(coreid), action);
+
+	if (action & SMP_CALL_FUNCTION)
+		smp_call_function_interrupt();
+	if (action & SMP_RESCHEDULE_YOURSELF)
+		scheduler_ipi();
+
+	/* Check if we've been told to flush the icache */
+	if (action & SMP_ICACHE_FLUSH)
+		asm volatile ("synci 0($0)\n");
+	return IRQ_HANDLED;
+}
+
+/**
+ * Cause the function described by call_data to be executed on the passed
+ * cpu.  When the function has finished, increment the finished field of
+ * call_data.
+ */
+void octeon_send_ipi_single(int cpu, unsigned int action)
+{
+	int coreid = cpu_logical_map(cpu);
+	/*
+	pr_info("SMP: Mailbox send cpu=%d, coreid=%d, action=%u\n", cpu,
+	       coreid, action);
+	*/
+	cvmx_write_csr(CVMX_CIU_MBOX_SETX(coreid), action);
+}
+
+static inline void octeon_send_ipi_mask(const struct cpumask *mask,
+					unsigned int action)
+{
+	unsigned int i;
+
+	for_each_cpu_mask(i, *mask)
+		octeon_send_ipi_single(i, action);
+}
+
+/**
+ * Detect available CPUs, populate cpu_possible_map
+ */
+static void octeon_smp_hotplug_setup(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+	struct linux_app_boot_info *labi;
+
+	labi = (struct linux_app_boot_info *)PHYS_TO_XKSEG_CACHED(LABI_ADDR_IN_BOOTLOADER);
+	if (labi->labi_signature != LABI_SIGNATURE)
+		panic("The bootloader version on this board is incorrect.");
+
+	octeon_bootloader_entry_addr = labi->InitTLBStart_addr;
+#endif
+}
+
+static void octeon_smp_setup(void)
+{
+	const int coreid = cvmx_get_core_num();
+	int cpus;
+	int id;
+	int core_mask = octeon_get_boot_coremask();
+#ifdef CONFIG_HOTPLUG_CPU
+	unsigned int num_cores = cvmx_octeon_num_cores();
+#endif
+
+	/* The present CPUs are initially just the boot cpu (CPU 0). */
+	for (id = 0; id < NR_CPUS; id++) {
+		set_cpu_possible(id, id == 0);
+		set_cpu_present(id, id == 0);
+	}
+
+	__cpu_number_map[coreid] = 0;
+	__cpu_logical_map[0] = coreid;
+
+	/* The present CPUs get the lowest CPU numbers. */
+	cpus = 1;
+	for (id = 0; id < NR_CPUS; id++) {
+		if ((id != coreid) && (core_mask & (1 << id))) {
+			set_cpu_possible(cpus, true);
+			set_cpu_present(cpus, true);
+			__cpu_number_map[id] = cpus;
+			__cpu_logical_map[cpus] = id;
+			cpus++;
+		}
+	}
+
+#ifdef CONFIG_HOTPLUG_CPU
+	/*
+	 * The possible CPUs are all those present on the chip.  We
+	 * will assign CPU numbers for possible cores as well.  Cores
+	 * are always consecutively numberd from 0.
+	 */
+	for (id = 0; id < num_cores && id < NR_CPUS; id++) {
+		if (!(core_mask & (1 << id))) {
+			set_cpu_possible(cpus, true);
+			__cpu_number_map[id] = cpus;
+			__cpu_logical_map[cpus] = id;
+			cpus++;
+		}
+	}
+#endif
+
+	octeon_smp_hotplug_setup();
+}
+
+/**
+ * Firmware CPU startup hook
+ *
+ */
+static void octeon_boot_secondary(int cpu, struct task_struct *idle)
+{
+	int count;
+
+	pr_info("SMP: Booting CPU%02d (CoreId %2d)...\n", cpu,
+		cpu_logical_map(cpu));
+
+	octeon_processor_sp = __KSTK_TOS(idle);
+	octeon_processor_gp = (unsigned long)(task_thread_info(idle));
+	octeon_processor_boot = cpu_logical_map(cpu);
+	mb();
+
+	count = 10000;
+	while (octeon_processor_sp && count) {
+		/* Waiting for processor to get the SP and GP */
+		udelay(1);
+		count--;
+	}
+	if (count == 0)
+		pr_err("Secondary boot timeout\n");
+}
+
+/**
+ * After we've done initial boot, this function is called to allow the
+ * board code to clean up state, if needed
+ */
+static void __cpuinit octeon_init_secondary(void)
+{
+	unsigned int sr;
+
+	sr = set_c0_status(ST0_BEV);
+	write_c0_ebase((u32)ebase);
+	write_c0_status(sr);
+
+	octeon_check_cpu_bist();
+	octeon_init_cvmcount();
+
+	octeon_irq_setup_secondary();
+	raw_local_irq_enable();
+}
+
+/**
+ * Callout to firmware before smp_init
+ *
+ */
+void octeon_prepare_cpus(unsigned int max_cpus)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+	struct linux_app_boot_info *labi;
+
+	labi = (struct linux_app_boot_info *)PHYS_TO_XKSEG_CACHED(LABI_ADDR_IN_BOOTLOADER);
+
+	if (labi->labi_signature != LABI_SIGNATURE)
+		panic("The bootloader version on this board is incorrect.");
+#endif
+	/*
+	 * Only the low order mailbox bits are used for IPIs, leave
+	 * the other bits alone.
+	 */
+	cvmx_write_csr(CVMX_CIU_MBOX_CLRX(cvmx_get_core_num()), 0xffff);
+	if (request_irq(OCTEON_IRQ_MBOX0, mailbox_interrupt, IRQF_DISABLED,
+			"SMP-IPI", mailbox_interrupt)) {
+		panic("Cannot request_irq(OCTEON_IRQ_MBOX0)\n");
+	}
+}
+
+/**
+ * Last chance for the board code to finish SMP initialization before
+ * the CPU is "online".
+ */
+static void octeon_smp_finish(void)
+{
+#ifdef CONFIG_CAVIUM_GDB
+	unsigned long tmp;
+	/* Pulse MCD0 signal on Ctrl-C to stop all the cores. Also set the MCD0
+	   to be not masked by this core so we know the signal is received by
+	   someone */
+	asm volatile ("dmfc0 %0, $22\n"
+		      "ori   %0, %0, 0x9100\n" "dmtc0 %0, $22\n" : "=r" (tmp));
+#endif
+
+	octeon_user_io_init();
+
+	/* to generate the first CPU timer interrupt */
+	write_c0_compare(read_c0_count() + mips_hpt_frequency / HZ);
+}
+
+/**
+ * Hook for after all CPUs are online
+ */
+static void octeon_cpus_done(void)
+{
+#ifdef CONFIG_CAVIUM_GDB
+	unsigned long tmp;
+	/* Pulse MCD0 signal on Ctrl-C to stop all the cores. Also set the MCD0
+	   to be not masked by this core so we know the signal is received by
+	   someone */
+	asm volatile ("dmfc0 %0, $22\n"
+		      "ori   %0, %0, 0x9100\n" "dmtc0 %0, $22\n" : "=r" (tmp));
+#endif
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/* State of each CPU. */
+DEFINE_PER_CPU(int, cpu_state);
+
+extern void fixup_irqs(void);
+
+static DEFINE_SPINLOCK(smp_reserve_lock);
+
+static int octeon_cpu_disable(void)
+{
+	unsigned int cpu = smp_processor_id();
+
+	if (cpu == 0)
+		return -EBUSY;
+
+	spin_lock(&smp_reserve_lock);
+
+	cpu_clear(cpu, cpu_online_map);
+	cpu_clear(cpu, cpu_callin_map);
+	local_irq_disable();
+	fixup_irqs();
+	local_irq_enable();
+
+	flush_cache_all();
+	local_flush_tlb_all();
+
+	spin_unlock(&smp_reserve_lock);
+
+	return 0;
+}
+
+static void octeon_cpu_die(unsigned int cpu)
+{
+	int coreid = cpu_logical_map(cpu);
+	uint32_t mask, new_mask;
+	const struct cvmx_bootmem_named_block_desc *block_desc;
+
+	while (per_cpu(cpu_state, cpu) != CPU_DEAD)
+		cpu_relax();
+
+	/*
+	 * This is a bit complicated strategics of getting/settig available
+	 * cores mask, copied from bootloader
+	 */
+
+	mask = 1 << coreid;
+	/* LINUX_APP_BOOT_BLOCK is initialized in bootoct binary */
+	block_desc = cvmx_bootmem_find_named_block(LINUX_APP_BOOT_BLOCK_NAME);
+
+	if (!block_desc) {
+		struct linux_app_boot_info *labi;
+
+		labi = (struct linux_app_boot_info *)PHYS_TO_XKSEG_CACHED(LABI_ADDR_IN_BOOTLOADER);
+
+		labi->avail_coremask |= mask;
+		new_mask = labi->avail_coremask;
+	} else {		       /* alternative, already initialized */
+		uint32_t *p = (uint32_t *)PHYS_TO_XKSEG_CACHED(block_desc->base_addr +
+							       AVAIL_COREMASK_OFFSET_IN_LINUX_APP_BOOT_BLOCK);
+		*p |= mask;
+		new_mask = *p;
+	}
+
+	pr_info("Reset core %d. Available Coremask = 0x%x \n", coreid, new_mask);
+	mb();
+	cvmx_write_csr(CVMX_CIU_PP_RST, 1 << coreid);
+	cvmx_write_csr(CVMX_CIU_PP_RST, 0);
+}
+
+void play_dead(void)
+{
+	int cpu = cpu_number_map(cvmx_get_core_num());
+
+	idle_task_exit();
+	octeon_processor_boot = 0xff;
+	per_cpu(cpu_state, cpu) = CPU_DEAD;
+
+	mb();
+
+	while (1)	/* core will be reset here */
+		;
+}
+
+extern void kernel_entry(unsigned long arg1, ...);
+
+static void start_after_reset(void)
+{
+	kernel_entry(0, 0, 0);  /* set a2 = 0 for secondary core */
+}
+
+static int octeon_update_boot_vector(unsigned int cpu)
+{
+
+	int coreid = cpu_logical_map(cpu);
+	uint32_t avail_coremask;
+	const struct cvmx_bootmem_named_block_desc *block_desc;
+	struct boot_init_vector *boot_vect =
+		(struct boot_init_vector *)PHYS_TO_XKSEG_CACHED(BOOTLOADER_BOOT_VECTOR);
+
+	block_desc = cvmx_bootmem_find_named_block(LINUX_APP_BOOT_BLOCK_NAME);
+
+	if (!block_desc) {
+		struct linux_app_boot_info *labi;
+
+		labi = (struct linux_app_boot_info *)PHYS_TO_XKSEG_CACHED(LABI_ADDR_IN_BOOTLOADER);
+
+		avail_coremask = labi->avail_coremask;
+		labi->avail_coremask &= ~(1 << coreid);
+	} else {		       /* alternative, already initialized */
+		avail_coremask = *(uint32_t *)PHYS_TO_XKSEG_CACHED(
+			block_desc->base_addr + AVAIL_COREMASK_OFFSET_IN_LINUX_APP_BOOT_BLOCK);
+	}
+
+	if (!(avail_coremask & (1 << coreid))) {
+		/* core not available, assume, that catched by simple-executive */
+		cvmx_write_csr(CVMX_CIU_PP_RST, 1 << coreid);
+		cvmx_write_csr(CVMX_CIU_PP_RST, 0);
+	}
+
+	boot_vect[coreid].app_start_func_addr =
+		(uint32_t) (unsigned long) start_after_reset;
+	boot_vect[coreid].code_addr = octeon_bootloader_entry_addr;
+
+	mb();
+
+	cvmx_write_csr(CVMX_CIU_NMI, (1 << coreid) & avail_coremask);
+
+	return 0;
+}
+
+static int __cpuinit octeon_cpu_callback(struct notifier_block *nfb,
+	unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+		octeon_update_boot_vector(cpu);
+		break;
+	case CPU_ONLINE:
+		pr_info("Cpu %d online\n", cpu);
+		break;
+	case CPU_DEAD:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static int __cpuinit register_cavium_notifier(void)
+{
+	hotcpu_notifier(octeon_cpu_callback, 0);
+	return 0;
+}
+late_initcall(register_cavium_notifier);
+
+#endif  /* CONFIG_HOTPLUG_CPU */
+
+struct plat_smp_ops octeon_smp_ops = {
+	.send_ipi_single	= octeon_send_ipi_single,
+	.send_ipi_mask		= octeon_send_ipi_mask,
+	.init_secondary		= octeon_init_secondary,
+	.smp_finish		= octeon_smp_finish,
+	.cpus_done		= octeon_cpus_done,
+	.boot_secondary		= octeon_boot_secondary,
+	.smp_setup		= octeon_smp_setup,
+	.prepare_cpus		= octeon_prepare_cpus,
+#ifdef CONFIG_HOTPLUG_CPU
+	.cpu_disable		= octeon_cpu_disable,
+	.cpu_die		= octeon_cpu_die,
+#endif
+};
author	root <root@artemis.panaceas.org>	2015-12-25 04:40:36 +0000
committer	root <root@artemis.panaceas.org>	2015-12-25 04:40:36 +0000
commit	849369d6c66d3054688672f97d31fceb8e8230fb (patch)
tree	6135abc790ca67dedbe07c39806591e70eda81ce /arch/mips/cavium-octeon
download	linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.tar.gz linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.tar.bz2 linux-3.0.35-kobo-849369d6c66d3054688672f97d31fceb8e8230fb.zip