aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/layerscape/patches-5.4/701-net-0223-soc-fsl-dpio-Replace-QMAN-array-mode-by-ring-mode-en.patch
diff options
context:
space:
mode:
authorYangbo Lu <yangbo.lu@nxp.com>2020-04-10 10:47:05 +0800
committerPetr Štetiar <ynezz@true.cz>2020-05-07 12:53:06 +0200
commitcddd4591404fb4c53dc0b3c0b15b942cdbed4356 (patch)
tree392c1179de46b0f804e3789edca19069b64e6b44 /target/linux/layerscape/patches-5.4/701-net-0223-soc-fsl-dpio-Replace-QMAN-array-mode-by-ring-mode-en.patch
parentd1d2c0b5579ea4f69a42246c9318539d61ba1999 (diff)
downloadupstream-cddd4591404fb4c53dc0b3c0b15b942cdbed4356.tar.gz
upstream-cddd4591404fb4c53dc0b3c0b15b942cdbed4356.tar.bz2
upstream-cddd4591404fb4c53dc0b3c0b15b942cdbed4356.zip
layerscape: add patches-5.4
Add patches for linux-5.4. The patches are from NXP LSDK-20.04 release which was tagged LSDK-20.04-V5.4. https://source.codeaurora.org/external/qoriq/qoriq-components/linux/ For boards LS1021A-IOT, and Traverse-LS1043 which are not involved in LSDK, port the dts patches from 4.14. The patches are sorted into the following categories: 301-arch-xxxx 302-dts-xxxx 303-core-xxxx 701-net-xxxx 801-audio-xxxx 802-can-xxxx 803-clock-xxxx 804-crypto-xxxx 805-display-xxxx 806-dma-xxxx 807-gpio-xxxx 808-i2c-xxxx 809-jailhouse-xxxx 810-keys-xxxx 811-kvm-xxxx 812-pcie-xxxx 813-pm-xxxx 814-qe-xxxx 815-sata-xxxx 816-sdhc-xxxx 817-spi-xxxx 818-thermal-xxxx 819-uart-xxxx 820-usb-xxxx 821-vfio-xxxx Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Diffstat (limited to 'target/linux/layerscape/patches-5.4/701-net-0223-soc-fsl-dpio-Replace-QMAN-array-mode-by-ring-mode-en.patch')
-rw-r--r--target/linux/layerscape/patches-5.4/701-net-0223-soc-fsl-dpio-Replace-QMAN-array-mode-by-ring-mode-en.patch649
1 files changed, 649 insertions, 0 deletions
diff --git a/target/linux/layerscape/patches-5.4/701-net-0223-soc-fsl-dpio-Replace-QMAN-array-mode-by-ring-mode-en.patch b/target/linux/layerscape/patches-5.4/701-net-0223-soc-fsl-dpio-Replace-QMAN-array-mode-by-ring-mode-en.patch
new file mode 100644
index 0000000000..f287cab97f
--- /dev/null
+++ b/target/linux/layerscape/patches-5.4/701-net-0223-soc-fsl-dpio-Replace-QMAN-array-mode-by-ring-mode-en.patch
@@ -0,0 +1,649 @@
+From 0b8c6bbb0a561f15598f6701089a992bdea3963c Mon Sep 17 00:00:00 2001
+From: Youri Querry <youri.querry_1@nxp.com>
+Date: Mon, 4 Nov 2019 11:03:09 -0500
+Subject: [PATCH] soc: fsl: dpio: Replace QMAN array mode by ring mode enqueue.
+
+This change of algorithm will enable faster bulk enqueue.
+This will grately benefit XDP bulk enqueue.
+
+Signed-off-by: Youri Querry <youri.querry_1@nxp.com>
+---
+ drivers/soc/fsl/dpio/qbman-portal.c | 420 +++++++++++++++++++++++++++---------
+ drivers/soc/fsl/dpio/qbman-portal.h | 13 ++
+ 2 files changed, 335 insertions(+), 98 deletions(-)
+
+--- a/drivers/soc/fsl/dpio/qbman-portal.c
++++ b/drivers/soc/fsl/dpio/qbman-portal.c
+@@ -8,6 +8,7 @@
+ #include <asm/cacheflush.h>
+ #include <linux/io.h>
+ #include <linux/slab.h>
++#include <linux/spinlock.h>
+ #include <soc/fsl/dpaa2-global.h>
+
+ #include "qbman-portal.h"
+@@ -22,6 +23,7 @@
+
+ /* CINH register offsets */
+ #define QBMAN_CINH_SWP_EQCR_PI 0x800
++#define QBMAN_CINH_SWP_EQCR_CI 0x840
+ #define QBMAN_CINH_SWP_EQAR 0x8c0
+ #define QBMAN_CINH_SWP_CR_RT 0x900
+ #define QBMAN_CINH_SWP_VDQCR_RT 0x940
+@@ -45,6 +47,8 @@
+ #define QBMAN_CENA_SWP_CR 0x600
+ #define QBMAN_CENA_SWP_RR(vb) (0x700 + ((u32)(vb) >> 1))
+ #define QBMAN_CENA_SWP_VDQCR 0x780
++#define QBMAN_CENA_SWP_EQCR_CI 0x840
++#define QBMAN_CENA_SWP_EQCR_CI_MEMBACK 0x1840
+
+ /* CENA register offsets in memory-backed mode */
+ #define QBMAN_CENA_SWP_DQRR_MEM(n) (0x800 + ((u32)(n) << 6))
+@@ -72,6 +76,12 @@
+ /* opaque token for static dequeues */
+ #define QMAN_SDQCR_TOKEN 0xbb
+
++#define QBMAN_EQCR_DCA_IDXMASK 0x0f
++#define QBMAN_ENQUEUE_FLAG_DCA (1ULL << 31)
++
++#define EQ_DESC_SIZE_WITHOUT_FD 29
++#define EQ_DESC_SIZE_FD_START 32
++
+ enum qbman_sdqcr_dct {
+ qbman_sdqcr_dct_null = 0,
+ qbman_sdqcr_dct_prio_ics,
+@@ -224,6 +234,15 @@ static inline u32 qbman_set_swp_cfg(u8 m
+
+ #define QMAN_RT_MODE 0x00000100
+
++static inline u8 qm_cyc_diff(u8 ringsize, u8 first, u8 last)
++{
++ /* 'first' is included, 'last' is excluded */
++ if (first <= last)
++ return last - first;
++ else
++ return (2 * ringsize) - (first - last);
++}
++
+ /**
+ * qbman_swp_init() - Create a functional object representing the given
+ * QBMan portal descriptor.
+@@ -236,6 +255,10 @@ struct qbman_swp *qbman_swp_init(const s
+ {
+ struct qbman_swp *p = kzalloc(sizeof(*p), GFP_KERNEL);
+ u32 reg;
++ u32 mask_size;
++ u32 eqcr_pi;
++
++ spin_lock_init(&p->access_spinlock);
+
+ if (!p)
+ return NULL;
+@@ -264,25 +287,38 @@ struct qbman_swp *qbman_swp_init(const s
+ p->addr_cena = d->cena_bar;
+ p->addr_cinh = d->cinh_bar;
+
+- if ((p->desc->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000)
+- memset(p->addr_cena, 0, 64 * 1024);
++ if ((p->desc->qman_version & QMAN_REV_MASK) < QMAN_REV_5000) {
+
+- reg = qbman_set_swp_cfg(p->dqrr.dqrr_size,
+- 0, /* Writes cacheable */
+- 0, /* EQCR_CI stashing threshold */
+- 3, /* RPM: Valid bit mode, RCR in array mode */
+- 2, /* DCM: Discrete consumption ack mode */
+- 3, /* EPM: Valid bit mode, EQCR in array mode */
+- 1, /* mem stashing drop enable == TRUE */
+- 1, /* mem stashing priority == TRUE */
+- 1, /* mem stashing enable == TRUE */
+- 1, /* dequeue stashing priority == TRUE */
+- 0, /* dequeue stashing enable == FALSE */
+- 0); /* EQCR_CI stashing priority == FALSE */
+- if ((p->desc->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000)
++ reg = qbman_set_swp_cfg(p->dqrr.dqrr_size,
++ 0, /* Writes Non-cacheable */
++ 0, /* EQCR_CI stashing threshold */
++ 3, /* RPM: RCR in array mode */
++ 2, /* DCM: Discrete consumption ack */
++ 2, /* EPM: EQCR in ring mode */
++ 1, /* mem stashing drop enable enable */
++ 1, /* mem stashing priority enable */
++ 1, /* mem stashing enable */
++ 1, /* dequeue stashing priority enable */
++ 0, /* dequeue stashing enable enable */
++ 0); /* EQCR_CI stashing priority enable */
++ } else {
++ memset(p->addr_cena, 0, 64 * 1024);
++ reg = qbman_set_swp_cfg(p->dqrr.dqrr_size,
++ 0, /* Writes Non-cacheable */
++ 1, /* EQCR_CI stashing threshold */
++ 3, /* RPM: RCR in array mode */
++ 2, /* DCM: Discrete consumption ack */
++ 0, /* EPM: EQCR in ring mode */
++ 1, /* mem stashing drop enable */
++ 1, /* mem stashing priority enable */
++ 1, /* mem stashing enable */
++ 1, /* dequeue stashing priority enable */
++ 0, /* dequeue stashing enable */
++ 0); /* EQCR_CI stashing priority enable */
+ reg |= 1 << SWP_CFG_CPBS_SHIFT | /* memory-backed mode */
+ 1 << SWP_CFG_VPM_SHIFT | /* VDQCR read triggered mode */
+ 1 << SWP_CFG_CPM_SHIFT; /* CR read triggered mode */
++ }
+
+ qbman_write_register(p, QBMAN_CINH_SWP_CFG, reg);
+ reg = qbman_read_register(p, QBMAN_CINH_SWP_CFG);
+@@ -304,7 +340,9 @@ struct qbman_swp *qbman_swp_init(const s
+ */
+ qbman_write_register(p, QBMAN_CINH_SWP_SDQCR, 0);
+
++ p->eqcr.pi_ring_size = 8;
+ if ((p->desc->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000) {
++ p->eqcr.pi_ring_size = 32;
+ qbman_swp_enqueue_ptr =
+ qbman_swp_enqueue_mem_back;
+ qbman_swp_enqueue_multiple_ptr =
+@@ -316,6 +354,15 @@ struct qbman_swp *qbman_swp_init(const s
+ qbman_swp_release_ptr = qbman_swp_release_mem_back;
+ }
+
++ for (mask_size = p->eqcr.pi_ring_size; mask_size > 0; mask_size >>= 1)
++ p->eqcr.pi_ci_mask = (p->eqcr.pi_ci_mask << 1) + 1;
++ eqcr_pi = qbman_read_register(p, QBMAN_CINH_SWP_EQCR_PI);
++ p->eqcr.pi = eqcr_pi & p->eqcr.pi_ci_mask;
++ p->eqcr.pi_vb = eqcr_pi & QB_VALID_BIT;
++ p->eqcr.ci = qbman_read_register(p, QBMAN_CINH_SWP_EQCR_CI)
++ & p->eqcr.pi_ci_mask;
++ p->eqcr.available = p->eqcr.pi_ring_size;
++
+ return p;
+ }
+
+@@ -468,8 +515,9 @@ enum qb_enqueue_commands {
+ enqueue_rejects_to_fq = 2
+ };
+
+-#define QB_ENQUEUE_CMD_ORP_ENABLE_SHIFT 2
+-#define QB_ENQUEUE_CMD_TARGET_TYPE_SHIFT 4
++#define QB_ENQUEUE_CMD_ORP_ENABLE_SHIFT 2
++#define QB_ENQUEUE_CMD_TARGET_TYPE_SHIFT 4
++#define QB_ENQUEUE_CMD_DCA_EN_SHIFT 7
+
+ /**
+ * qbman_eq_desc_clear() - Clear the contents of a descriptor to
+@@ -582,6 +630,7 @@ static inline void qbman_write_eqcr_am_r
+ QMAN_RT_MODE);
+ }
+
++#define QB_RT_BIT ((u32)0x100)
+ /**
+ * qbman_swp_enqueue_direct() - Issue an enqueue command
+ * @s: the software portal used for enqueue
+@@ -593,35 +642,19 @@ static inline void qbman_write_eqcr_am_r
+ *
+ * Return 0 for successful enqueue, -EBUSY if the EQCR is not ready.
+ */
+-int qbman_swp_enqueue_direct(struct qbman_swp *s, const struct qbman_eq_desc *d,
+- const struct dpaa2_fd *fd)
++static
++int qbman_swp_enqueue_direct(struct qbman_swp *s,
++ const struct qbman_eq_desc *d,
++ const struct dpaa2_fd *fd)
+ {
+- struct qbman_eq_desc_with_fd *p;
+- u32 eqar = qbman_read_register(s, QBMAN_CINH_SWP_EQAR);
+-
+- if (!EQAR_SUCCESS(eqar))
+- return -EBUSY;
++ int flags = 0;
++ int ret = qbman_swp_enqueue_multiple_direct(s, d, fd, &flags, 1);
+
+- p = qbman_get_cmd(s, QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar)));
+- /* This is mapped as DEVICE type memory, writes are
+- * with address alignment:
+- * desc.dca address alignment = 1
+- * desc.seqnum address alignment = 2
+- * desc.orpid address alignment = 4
+- * desc.tgtid address alignment = 8
+- */
+- p->desc.dca = d->dca;
+- p->desc.seqnum = d->seqnum;
+- p->desc.orpid = d->orpid;
+- memcpy(&p->desc.tgtid, &d->tgtid, 24);
+- memcpy(&p->fd, fd, sizeof(*fd));
+-
+- /* Set the verb byte, have to substitute in the valid-bit */
+- dma_wmb();
+- p->desc.verb = d->verb | EQAR_VB(eqar);
+- dccvac(p);
+-
+- return 0;
++ if (ret >= 0)
++ ret = 0;
++ else
++ ret = -EBUSY;
++ return ret;
+ }
+
+ /**
+@@ -635,35 +668,19 @@ int qbman_swp_enqueue_direct(struct qbma
+ *
+ * Return 0 for successful enqueue, -EBUSY if the EQCR is not ready.
+ */
++static
+ int qbman_swp_enqueue_mem_back(struct qbman_swp *s,
+ const struct qbman_eq_desc *d,
+ const struct dpaa2_fd *fd)
+ {
+- struct qbman_eq_desc_with_fd *p;
+- u32 eqar = qbman_read_register(s, QBMAN_CINH_SWP_EQAR);
+-
+- if (!EQAR_SUCCESS(eqar))
+- return -EBUSY;
+-
+- p = qbman_get_cmd(s, QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar)));
+- /* This is mapped as DEVICE type memory, writes are
+- * with address alignment:
+- * desc.dca address alignment = 1
+- * desc.seqnum address alignment = 2
+- * desc.orpid address alignment = 4
+- * desc.tgtid address alignment = 8
+- */
+- p->desc.dca = d->dca;
+- p->desc.seqnum = d->seqnum;
+- p->desc.orpid = d->orpid;
+- memcpy(&p->desc.tgtid, &d->tgtid, 24);
+- memcpy(&p->fd, fd, sizeof(*fd));
+-
+- p->desc.verb = d->verb | EQAR_VB(eqar);
+- dma_wmb();
+- qbman_write_eqcr_am_rt_register(s, EQAR_IDX(eqar));
++ int flags = 0;
++ int ret = qbman_swp_enqueue_multiple_mem_back(s, d, fd, &flags, 1);
+
+- return 0;
++ if (ret >= 0)
++ ret = 0;
++ else
++ ret = -EBUSY;
++ return ret;
+ }
+
+ /**
+@@ -672,26 +689,84 @@ int qbman_swp_enqueue_mem_back(struct qb
+ * @s: the software portal used for enqueue
+ * @d: the enqueue descriptor
+ * @fd: table pointer of frame descriptor table to be enqueued
+- * @flags: table pointer of flags, not used for the moment
++ * @flags: table pointer of QBMAN_ENQUEUE_FLAG_DCA flags, not used if NULL
+ * @num_frames: number of fd to be enqueued
+ *
+ * Return the number of fd enqueued, or a negative error number.
+ */
++static
+ int qbman_swp_enqueue_multiple_direct(struct qbman_swp *s,
+ const struct qbman_eq_desc *d,
+ const struct dpaa2_fd *fd,
+ uint32_t *flags,
+ int num_frames)
+ {
+- int count = 0;
++ uint32_t *p = NULL;
++ const uint32_t *cl = (uint32_t *)d;
++ uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
++ int i, num_enqueued = 0;
++ uint64_t addr_cena;
++
++ spin_lock(&s->access_spinlock);
++ half_mask = (s->eqcr.pi_ci_mask>>1);
++ full_mask = s->eqcr.pi_ci_mask;
++
++ if (!s->eqcr.available) {
++ eqcr_ci = s->eqcr.ci;
++ p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI;
++ s->eqcr.ci = qbman_read_register(s, QBMAN_CINH_SWP_EQCR_CI);
++
++ s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
++ eqcr_ci, s->eqcr.ci);
++ if (!s->eqcr.available) {
++ spin_unlock(&s->access_spinlock);
++ return 0;
++ }
++ }
+
+- while (count < num_frames) {
+- if (qbman_swp_enqueue_direct(s, d, fd) != 0)
+- break;
+- count++;
++ eqcr_pi = s->eqcr.pi;
++ num_enqueued = (s->eqcr.available < num_frames) ?
++ s->eqcr.available : num_frames;
++ s->eqcr.available -= num_enqueued;
++ /* Fill in the EQCR ring */
++ for (i = 0; i < num_enqueued; i++) {
++ p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
++ /* Skip copying the verb */
++ memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1);
++ memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)],
++ &fd[i], sizeof(*fd));
++ eqcr_pi++;
+ }
+
+- return count;
++ dma_wmb();
++
++ /* Set the verb byte, have to substitute in the valid-bit */
++ eqcr_pi = s->eqcr.pi;
++ for (i = 0; i < num_enqueued; i++) {
++ p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
++ p[0] = cl[0] | s->eqcr.pi_vb;
++ if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
++ struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
++
++ d->dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
++ ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
++ }
++ eqcr_pi++;
++ if (!(eqcr_pi & half_mask))
++ s->eqcr.pi_vb ^= QB_VALID_BIT;
++ }
++
++ /* Flush all the cacheline without load/store in between */
++ eqcr_pi = s->eqcr.pi;
++ addr_cena = (size_t)s->addr_cena;
++ for (i = 0; i < num_enqueued; i++) {
++ dccvac((addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)));
++ eqcr_pi++;
++ }
++ s->eqcr.pi = eqcr_pi & full_mask;
++ spin_unlock(&s->access_spinlock);
++
++ return num_enqueued;
+ }
+
+ /**
+@@ -700,26 +775,80 @@ int qbman_swp_enqueue_multiple_direct(st
+ * @s: the software portal used for enqueue
+ * @d: the enqueue descriptor
+ * @fd: table pointer of frame descriptor table to be enqueued
+- * @flags: table pointer of flags, not used for the moment
++ * @flags: table pointer of QBMAN_ENQUEUE_FLAG_DCA flags, not used if NULL
+ * @num_frames: number of fd to be enqueued
+ *
+ * Return the number of fd enqueued, or a negative error number.
+ */
++static
+ int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
+- const struct qbman_eq_desc *d,
+- const struct dpaa2_fd *fd,
+- uint32_t *flags,
+- int num_frames)
+-{
+- int count = 0;
++ const struct qbman_eq_desc *d,
++ const struct dpaa2_fd *fd,
++ uint32_t *flags,
++ int num_frames)
++{
++ uint32_t *p = NULL;
++ const uint32_t *cl = (uint32_t *)(d);
++ uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
++ int i, num_enqueued = 0;
++ unsigned long irq_flags;
++
++ spin_lock(&s->access_spinlock);
++ local_irq_save(irq_flags);
++
++ half_mask = (s->eqcr.pi_ci_mask>>1);
++ full_mask = s->eqcr.pi_ci_mask;
++ if (!s->eqcr.available) {
++ eqcr_ci = s->eqcr.ci;
++ p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI_MEMBACK;
++ s->eqcr.ci = __raw_readl(p) & full_mask;
++ s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
++ eqcr_ci, s->eqcr.ci);
++ if (!s->eqcr.available) {
++ local_irq_restore(irq_flags);
++ spin_unlock(&s->access_spinlock);
++ return 0;
++ }
++ }
++
++ eqcr_pi = s->eqcr.pi;
++ num_enqueued = (s->eqcr.available < num_frames) ?
++ s->eqcr.available : num_frames;
++ s->eqcr.available -= num_enqueued;
++ /* Fill in the EQCR ring */
++ for (i = 0; i < num_enqueued; i++) {
++ p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
++ /* Skip copying the verb */
++ memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1);
++ memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)],
++ &fd[i], sizeof(*fd));
++ eqcr_pi++;
++ }
+
+- while (count < num_frames) {
+- if (qbman_swp_enqueue_mem_back(s, d, fd) != 0)
+- break;
+- count++;
++ /* Set the verb byte, have to substitute in the valid-bit */
++ eqcr_pi = s->eqcr.pi;
++ for (i = 0; i < num_enqueued; i++) {
++ p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
++ p[0] = cl[0] | s->eqcr.pi_vb;
++ if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
++ struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
++
++ d->dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
++ ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
++ }
++ eqcr_pi++;
++ if (!(eqcr_pi & half_mask))
++ s->eqcr.pi_vb ^= QB_VALID_BIT;
+ }
++ s->eqcr.pi = eqcr_pi & full_mask;
++
++ dma_wmb();
++ qbman_write_register(s, QBMAN_CINH_SWP_EQCR_PI,
++ (QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb);
++ local_irq_restore(irq_flags);
++ spin_unlock(&s->access_spinlock);
+
+- return count;
++ return num_enqueued;
+ }
+
+ /**
+@@ -732,20 +861,69 @@ int qbman_swp_enqueue_multiple_mem_back(
+ *
+ * Return the number of fd enqueued, or a negative error number.
+ */
++static
+ int qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s,
+ const struct qbman_eq_desc *d,
+ const struct dpaa2_fd *fd,
+ int num_frames)
+ {
+- int count = 0;
++ uint32_t *p;
++ const uint32_t *cl;
++ uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
++ int i, num_enqueued = 0;
++ uint64_t addr_cena;
++
++ half_mask = (s->eqcr.pi_ci_mask>>1);
++ full_mask = s->eqcr.pi_ci_mask;
++ if (!s->eqcr.available) {
++ eqcr_ci = s->eqcr.ci;
++ p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI;
++ s->eqcr.ci = qbman_read_register(s, QBMAN_CINH_SWP_EQCR_CI);
++ s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
++ eqcr_ci, s->eqcr.ci);
++ if (!s->eqcr.available)
++ return 0;
++ }
+
+- while (count < num_frames) {
+- if (qbman_swp_enqueue_direct(s, &(d[count]), fd) != 0)
+- break;
+- count++;
++ eqcr_pi = s->eqcr.pi;
++ num_enqueued = (s->eqcr.available < num_frames) ?
++ s->eqcr.available : num_frames;
++ s->eqcr.available -= num_enqueued;
++ /* Fill in the EQCR ring */
++ for (i = 0; i < num_enqueued; i++) {
++ p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
++ cl = (uint32_t *)(&d[i]);
++ /* Skip copying the verb */
++ memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1);
++ memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)],
++ &fd[i], sizeof(*fd));
++ eqcr_pi++;
+ }
+
+- return count;
++ dma_wmb();
++
++ /* Set the verb byte, have to substitute in the valid-bit */
++ eqcr_pi = s->eqcr.pi;
++ for (i = 0; i < num_enqueued; i++) {
++ p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
++ cl = (uint32_t *)(&d[i]);
++ p[0] = cl[0] | s->eqcr.pi_vb;
++ eqcr_pi++;
++ if (!(eqcr_pi & half_mask))
++ s->eqcr.pi_vb ^= QB_VALID_BIT;
++ }
++
++ /* Flush all the cacheline without load/store in between */
++ eqcr_pi = s->eqcr.pi;
++ addr_cena = (uint64_t)s->addr_cena;
++ for (i = 0; i < num_enqueued; i++) {
++ dccvac((uint64_t *)(addr_cena +
++ QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)));
++ eqcr_pi++;
++ }
++ s->eqcr.pi = eqcr_pi & full_mask;
++
++ return num_enqueued;
+ }
+
+ /**
+@@ -758,20 +936,62 @@ int qbman_swp_enqueue_multiple_desc_dire
+ *
+ * Return the number of fd enqueued, or a negative error number.
+ */
++static
+ int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
+ const struct qbman_eq_desc *d,
+ const struct dpaa2_fd *fd,
+ int num_frames)
+ {
+- int count = 0;
++ uint32_t *p;
++ const uint32_t *cl;
++ uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
++ int i, num_enqueued = 0;
++
++ half_mask = (s->eqcr.pi_ci_mask>>1);
++ full_mask = s->eqcr.pi_ci_mask;
++ if (!s->eqcr.available) {
++ eqcr_ci = s->eqcr.ci;
++ p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI_MEMBACK;
++ s->eqcr.ci = __raw_readl(p) & full_mask;
++ s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
++ eqcr_ci, s->eqcr.ci);
++ if (!s->eqcr.available)
++ return 0;
++ }
+
+- while (count < num_frames) {
+- if (qbman_swp_enqueue_mem_back(s, &(d[count]), fd) != 0)
+- break;
+- count++;
++ eqcr_pi = s->eqcr.pi;
++ num_enqueued = (s->eqcr.available < num_frames) ?
++ s->eqcr.available : num_frames;
++ s->eqcr.available -= num_enqueued;
++ /* Fill in the EQCR ring */
++ for (i = 0; i < num_enqueued; i++) {
++ p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
++ cl = (uint32_t *)(&d[i]);
++ /* Skip copying the verb */
++ memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1);
++ memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)],
++ &fd[i], sizeof(*fd));
++ eqcr_pi++;
+ }
+
+- return count;
++ /* Set the verb byte, have to substitute in the valid-bit */
++ eqcr_pi = s->eqcr.pi;
++ for (i = 0; i < num_enqueued; i++) {
++ p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
++ cl = (uint32_t *)(&d[i]);
++ p[0] = cl[0] | s->eqcr.pi_vb;
++ eqcr_pi++;
++ if (!(eqcr_pi & half_mask))
++ s->eqcr.pi_vb ^= QB_VALID_BIT;
++ }
++
++ s->eqcr.pi = eqcr_pi & full_mask;
++
++ dma_wmb();
++ qbman_write_register(s, QBMAN_CINH_SWP_EQCR_PI,
++ (QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb);
++
++ return num_enqueued;
+ }
+
+ /* Static (push) dequeue */
+@@ -937,6 +1157,7 @@ void qbman_pull_desc_set_channel(struct
+ * Return 0 for success, and -EBUSY if the software portal is not ready
+ * to do pull dequeue.
+ */
++static
+ int qbman_swp_pull_direct(struct qbman_swp *s, struct qbman_pull_desc *d)
+ {
+ struct qbman_pull_desc *p;
+@@ -973,6 +1194,7 @@ int qbman_swp_pull_direct(struct qbman_s
+ * Return 0 for success, and -EBUSY if the software portal is not ready
+ * to do pull dequeue.
+ */
++static
+ int qbman_swp_pull_mem_back(struct qbman_swp *s, struct qbman_pull_desc *d)
+ {
+ struct qbman_pull_desc *p;
+@@ -991,6 +1213,8 @@ int qbman_swp_pull_mem_back(struct qbman
+ p->dq_src = d->dq_src;
+ p->rsp_addr = d->rsp_addr;
+ p->rsp_addr_virt = d->rsp_addr_virt;
++
++ /* Set the verb byte, have to substitute in the valid-bit */
+ p->verb = d->verb | s->vdq.valid_bit;
+ s->vdq.valid_bit ^= QB_VALID_BIT;
+ dma_wmb();
+--- a/drivers/soc/fsl/dpio/qbman-portal.h
++++ b/drivers/soc/fsl/dpio/qbman-portal.h
+@@ -143,6 +143,19 @@ struct qbman_swp {
+ u8 dqrr_size;
+ int reset_bug; /* indicates dqrr reset workaround is needed */
+ } dqrr;
++
++ struct {
++ u32 pi;
++ u32 pi_vb;
++ u32 pi_ring_size;
++ u32 pi_ci_mask;
++ u32 ci;
++ int available;
++ u32 pend;
++ u32 no_pfdr;
++ } eqcr;
++
++ spinlock_t access_spinlock;
+ };
+
+ /* Function pointers */