diff options
author | Yangbo Lu <yangbo.lu@nxp.com> | 2020-04-10 10:47:05 +0800 |
---|---|---|
committer | Petr Štetiar <ynezz@true.cz> | 2020-05-07 12:53:06 +0200 |
commit | cddd4591404fb4c53dc0b3c0b15b942cdbed4356 (patch) | |
tree | 392c1179de46b0f804e3789edca19069b64e6b44 /target/linux/layerscape/patches-5.4/701-net-0223-soc-fsl-dpio-Replace-QMAN-array-mode-by-ring-mode-en.patch | |
parent | d1d2c0b5579ea4f69a42246c9318539d61ba1999 (diff) | |
download | upstream-cddd4591404fb4c53dc0b3c0b15b942cdbed4356.tar.gz upstream-cddd4591404fb4c53dc0b3c0b15b942cdbed4356.tar.bz2 upstream-cddd4591404fb4c53dc0b3c0b15b942cdbed4356.zip |
layerscape: add patches-5.4
Add patches for linux-5.4. The patches are from NXP LSDK-20.04 release
which was tagged LSDK-20.04-V5.4.
https://source.codeaurora.org/external/qoriq/qoriq-components/linux/
For boards LS1021A-IOT, and Traverse-LS1043 which are not involved in
LSDK, port the dts patches from 4.14.
The patches are sorted into the following categories:
301-arch-xxxx
302-dts-xxxx
303-core-xxxx
701-net-xxxx
801-audio-xxxx
802-can-xxxx
803-clock-xxxx
804-crypto-xxxx
805-display-xxxx
806-dma-xxxx
807-gpio-xxxx
808-i2c-xxxx
809-jailhouse-xxxx
810-keys-xxxx
811-kvm-xxxx
812-pcie-xxxx
813-pm-xxxx
814-qe-xxxx
815-sata-xxxx
816-sdhc-xxxx
817-spi-xxxx
818-thermal-xxxx
819-uart-xxxx
820-usb-xxxx
821-vfio-xxxx
Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Diffstat (limited to 'target/linux/layerscape/patches-5.4/701-net-0223-soc-fsl-dpio-Replace-QMAN-array-mode-by-ring-mode-en.patch')
-rw-r--r-- | target/linux/layerscape/patches-5.4/701-net-0223-soc-fsl-dpio-Replace-QMAN-array-mode-by-ring-mode-en.patch | 649 |
1 files changed, 649 insertions, 0 deletions
diff --git a/target/linux/layerscape/patches-5.4/701-net-0223-soc-fsl-dpio-Replace-QMAN-array-mode-by-ring-mode-en.patch b/target/linux/layerscape/patches-5.4/701-net-0223-soc-fsl-dpio-Replace-QMAN-array-mode-by-ring-mode-en.patch new file mode 100644 index 0000000000..f287cab97f --- /dev/null +++ b/target/linux/layerscape/patches-5.4/701-net-0223-soc-fsl-dpio-Replace-QMAN-array-mode-by-ring-mode-en.patch @@ -0,0 +1,649 @@ +From 0b8c6bbb0a561f15598f6701089a992bdea3963c Mon Sep 17 00:00:00 2001 +From: Youri Querry <youri.querry_1@nxp.com> +Date: Mon, 4 Nov 2019 11:03:09 -0500 +Subject: [PATCH] soc: fsl: dpio: Replace QMAN array mode by ring mode enqueue. + +This change of algorithm will enable faster bulk enqueue. +This will grately benefit XDP bulk enqueue. + +Signed-off-by: Youri Querry <youri.querry_1@nxp.com> +--- + drivers/soc/fsl/dpio/qbman-portal.c | 420 +++++++++++++++++++++++++++--------- + drivers/soc/fsl/dpio/qbman-portal.h | 13 ++ + 2 files changed, 335 insertions(+), 98 deletions(-) + +--- a/drivers/soc/fsl/dpio/qbman-portal.c ++++ b/drivers/soc/fsl/dpio/qbman-portal.c +@@ -8,6 +8,7 @@ + #include <asm/cacheflush.h> + #include <linux/io.h> + #include <linux/slab.h> ++#include <linux/spinlock.h> + #include <soc/fsl/dpaa2-global.h> + + #include "qbman-portal.h" +@@ -22,6 +23,7 @@ + + /* CINH register offsets */ + #define QBMAN_CINH_SWP_EQCR_PI 0x800 ++#define QBMAN_CINH_SWP_EQCR_CI 0x840 + #define QBMAN_CINH_SWP_EQAR 0x8c0 + #define QBMAN_CINH_SWP_CR_RT 0x900 + #define QBMAN_CINH_SWP_VDQCR_RT 0x940 +@@ -45,6 +47,8 @@ + #define QBMAN_CENA_SWP_CR 0x600 + #define QBMAN_CENA_SWP_RR(vb) (0x700 + ((u32)(vb) >> 1)) + #define QBMAN_CENA_SWP_VDQCR 0x780 ++#define QBMAN_CENA_SWP_EQCR_CI 0x840 ++#define QBMAN_CENA_SWP_EQCR_CI_MEMBACK 0x1840 + + /* CENA register offsets in memory-backed mode */ + #define QBMAN_CENA_SWP_DQRR_MEM(n) (0x800 + ((u32)(n) << 6)) +@@ -72,6 +76,12 @@ + /* opaque token for static dequeues */ + #define QMAN_SDQCR_TOKEN 0xbb + ++#define QBMAN_EQCR_DCA_IDXMASK 0x0f ++#define QBMAN_ENQUEUE_FLAG_DCA (1ULL << 31) ++ ++#define EQ_DESC_SIZE_WITHOUT_FD 29 ++#define EQ_DESC_SIZE_FD_START 32 ++ + enum qbman_sdqcr_dct { + qbman_sdqcr_dct_null = 0, + qbman_sdqcr_dct_prio_ics, +@@ -224,6 +234,15 @@ static inline u32 qbman_set_swp_cfg(u8 m + + #define QMAN_RT_MODE 0x00000100 + ++static inline u8 qm_cyc_diff(u8 ringsize, u8 first, u8 last) ++{ ++ /* 'first' is included, 'last' is excluded */ ++ if (first <= last) ++ return last - first; ++ else ++ return (2 * ringsize) - (first - last); ++} ++ + /** + * qbman_swp_init() - Create a functional object representing the given + * QBMan portal descriptor. +@@ -236,6 +255,10 @@ struct qbman_swp *qbman_swp_init(const s + { + struct qbman_swp *p = kzalloc(sizeof(*p), GFP_KERNEL); + u32 reg; ++ u32 mask_size; ++ u32 eqcr_pi; ++ ++ spin_lock_init(&p->access_spinlock); + + if (!p) + return NULL; +@@ -264,25 +287,38 @@ struct qbman_swp *qbman_swp_init(const s + p->addr_cena = d->cena_bar; + p->addr_cinh = d->cinh_bar; + +- if ((p->desc->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000) +- memset(p->addr_cena, 0, 64 * 1024); ++ if ((p->desc->qman_version & QMAN_REV_MASK) < QMAN_REV_5000) { + +- reg = qbman_set_swp_cfg(p->dqrr.dqrr_size, +- 0, /* Writes cacheable */ +- 0, /* EQCR_CI stashing threshold */ +- 3, /* RPM: Valid bit mode, RCR in array mode */ +- 2, /* DCM: Discrete consumption ack mode */ +- 3, /* EPM: Valid bit mode, EQCR in array mode */ +- 1, /* mem stashing drop enable == TRUE */ +- 1, /* mem stashing priority == TRUE */ +- 1, /* mem stashing enable == TRUE */ +- 1, /* dequeue stashing priority == TRUE */ +- 0, /* dequeue stashing enable == FALSE */ +- 0); /* EQCR_CI stashing priority == FALSE */ +- if ((p->desc->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000) ++ reg = qbman_set_swp_cfg(p->dqrr.dqrr_size, ++ 0, /* Writes Non-cacheable */ ++ 0, /* EQCR_CI stashing threshold */ ++ 3, /* RPM: RCR in array mode */ ++ 2, /* DCM: Discrete consumption ack */ ++ 2, /* EPM: EQCR in ring mode */ ++ 1, /* mem stashing drop enable enable */ ++ 1, /* mem stashing priority enable */ ++ 1, /* mem stashing enable */ ++ 1, /* dequeue stashing priority enable */ ++ 0, /* dequeue stashing enable enable */ ++ 0); /* EQCR_CI stashing priority enable */ ++ } else { ++ memset(p->addr_cena, 0, 64 * 1024); ++ reg = qbman_set_swp_cfg(p->dqrr.dqrr_size, ++ 0, /* Writes Non-cacheable */ ++ 1, /* EQCR_CI stashing threshold */ ++ 3, /* RPM: RCR in array mode */ ++ 2, /* DCM: Discrete consumption ack */ ++ 0, /* EPM: EQCR in ring mode */ ++ 1, /* mem stashing drop enable */ ++ 1, /* mem stashing priority enable */ ++ 1, /* mem stashing enable */ ++ 1, /* dequeue stashing priority enable */ ++ 0, /* dequeue stashing enable */ ++ 0); /* EQCR_CI stashing priority enable */ + reg |= 1 << SWP_CFG_CPBS_SHIFT | /* memory-backed mode */ + 1 << SWP_CFG_VPM_SHIFT | /* VDQCR read triggered mode */ + 1 << SWP_CFG_CPM_SHIFT; /* CR read triggered mode */ ++ } + + qbman_write_register(p, QBMAN_CINH_SWP_CFG, reg); + reg = qbman_read_register(p, QBMAN_CINH_SWP_CFG); +@@ -304,7 +340,9 @@ struct qbman_swp *qbman_swp_init(const s + */ + qbman_write_register(p, QBMAN_CINH_SWP_SDQCR, 0); + ++ p->eqcr.pi_ring_size = 8; + if ((p->desc->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000) { ++ p->eqcr.pi_ring_size = 32; + qbman_swp_enqueue_ptr = + qbman_swp_enqueue_mem_back; + qbman_swp_enqueue_multiple_ptr = +@@ -316,6 +354,15 @@ struct qbman_swp *qbman_swp_init(const s + qbman_swp_release_ptr = qbman_swp_release_mem_back; + } + ++ for (mask_size = p->eqcr.pi_ring_size; mask_size > 0; mask_size >>= 1) ++ p->eqcr.pi_ci_mask = (p->eqcr.pi_ci_mask << 1) + 1; ++ eqcr_pi = qbman_read_register(p, QBMAN_CINH_SWP_EQCR_PI); ++ p->eqcr.pi = eqcr_pi & p->eqcr.pi_ci_mask; ++ p->eqcr.pi_vb = eqcr_pi & QB_VALID_BIT; ++ p->eqcr.ci = qbman_read_register(p, QBMAN_CINH_SWP_EQCR_CI) ++ & p->eqcr.pi_ci_mask; ++ p->eqcr.available = p->eqcr.pi_ring_size; ++ + return p; + } + +@@ -468,8 +515,9 @@ enum qb_enqueue_commands { + enqueue_rejects_to_fq = 2 + }; + +-#define QB_ENQUEUE_CMD_ORP_ENABLE_SHIFT 2 +-#define QB_ENQUEUE_CMD_TARGET_TYPE_SHIFT 4 ++#define QB_ENQUEUE_CMD_ORP_ENABLE_SHIFT 2 ++#define QB_ENQUEUE_CMD_TARGET_TYPE_SHIFT 4 ++#define QB_ENQUEUE_CMD_DCA_EN_SHIFT 7 + + /** + * qbman_eq_desc_clear() - Clear the contents of a descriptor to +@@ -582,6 +630,7 @@ static inline void qbman_write_eqcr_am_r + QMAN_RT_MODE); + } + ++#define QB_RT_BIT ((u32)0x100) + /** + * qbman_swp_enqueue_direct() - Issue an enqueue command + * @s: the software portal used for enqueue +@@ -593,35 +642,19 @@ static inline void qbman_write_eqcr_am_r + * + * Return 0 for successful enqueue, -EBUSY if the EQCR is not ready. + */ +-int qbman_swp_enqueue_direct(struct qbman_swp *s, const struct qbman_eq_desc *d, +- const struct dpaa2_fd *fd) ++static ++int qbman_swp_enqueue_direct(struct qbman_swp *s, ++ const struct qbman_eq_desc *d, ++ const struct dpaa2_fd *fd) + { +- struct qbman_eq_desc_with_fd *p; +- u32 eqar = qbman_read_register(s, QBMAN_CINH_SWP_EQAR); +- +- if (!EQAR_SUCCESS(eqar)) +- return -EBUSY; ++ int flags = 0; ++ int ret = qbman_swp_enqueue_multiple_direct(s, d, fd, &flags, 1); + +- p = qbman_get_cmd(s, QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar))); +- /* This is mapped as DEVICE type memory, writes are +- * with address alignment: +- * desc.dca address alignment = 1 +- * desc.seqnum address alignment = 2 +- * desc.orpid address alignment = 4 +- * desc.tgtid address alignment = 8 +- */ +- p->desc.dca = d->dca; +- p->desc.seqnum = d->seqnum; +- p->desc.orpid = d->orpid; +- memcpy(&p->desc.tgtid, &d->tgtid, 24); +- memcpy(&p->fd, fd, sizeof(*fd)); +- +- /* Set the verb byte, have to substitute in the valid-bit */ +- dma_wmb(); +- p->desc.verb = d->verb | EQAR_VB(eqar); +- dccvac(p); +- +- return 0; ++ if (ret >= 0) ++ ret = 0; ++ else ++ ret = -EBUSY; ++ return ret; + } + + /** +@@ -635,35 +668,19 @@ int qbman_swp_enqueue_direct(struct qbma + * + * Return 0 for successful enqueue, -EBUSY if the EQCR is not ready. + */ ++static + int qbman_swp_enqueue_mem_back(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct dpaa2_fd *fd) + { +- struct qbman_eq_desc_with_fd *p; +- u32 eqar = qbman_read_register(s, QBMAN_CINH_SWP_EQAR); +- +- if (!EQAR_SUCCESS(eqar)) +- return -EBUSY; +- +- p = qbman_get_cmd(s, QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar))); +- /* This is mapped as DEVICE type memory, writes are +- * with address alignment: +- * desc.dca address alignment = 1 +- * desc.seqnum address alignment = 2 +- * desc.orpid address alignment = 4 +- * desc.tgtid address alignment = 8 +- */ +- p->desc.dca = d->dca; +- p->desc.seqnum = d->seqnum; +- p->desc.orpid = d->orpid; +- memcpy(&p->desc.tgtid, &d->tgtid, 24); +- memcpy(&p->fd, fd, sizeof(*fd)); +- +- p->desc.verb = d->verb | EQAR_VB(eqar); +- dma_wmb(); +- qbman_write_eqcr_am_rt_register(s, EQAR_IDX(eqar)); ++ int flags = 0; ++ int ret = qbman_swp_enqueue_multiple_mem_back(s, d, fd, &flags, 1); + +- return 0; ++ if (ret >= 0) ++ ret = 0; ++ else ++ ret = -EBUSY; ++ return ret; + } + + /** +@@ -672,26 +689,84 @@ int qbman_swp_enqueue_mem_back(struct qb + * @s: the software portal used for enqueue + * @d: the enqueue descriptor + * @fd: table pointer of frame descriptor table to be enqueued +- * @flags: table pointer of flags, not used for the moment ++ * @flags: table pointer of QBMAN_ENQUEUE_FLAG_DCA flags, not used if NULL + * @num_frames: number of fd to be enqueued + * + * Return the number of fd enqueued, or a negative error number. + */ ++static + int qbman_swp_enqueue_multiple_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct dpaa2_fd *fd, + uint32_t *flags, + int num_frames) + { +- int count = 0; ++ uint32_t *p = NULL; ++ const uint32_t *cl = (uint32_t *)d; ++ uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; ++ int i, num_enqueued = 0; ++ uint64_t addr_cena; ++ ++ spin_lock(&s->access_spinlock); ++ half_mask = (s->eqcr.pi_ci_mask>>1); ++ full_mask = s->eqcr.pi_ci_mask; ++ ++ if (!s->eqcr.available) { ++ eqcr_ci = s->eqcr.ci; ++ p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI; ++ s->eqcr.ci = qbman_read_register(s, QBMAN_CINH_SWP_EQCR_CI); ++ ++ s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, ++ eqcr_ci, s->eqcr.ci); ++ if (!s->eqcr.available) { ++ spin_unlock(&s->access_spinlock); ++ return 0; ++ } ++ } + +- while (count < num_frames) { +- if (qbman_swp_enqueue_direct(s, d, fd) != 0) +- break; +- count++; ++ eqcr_pi = s->eqcr.pi; ++ num_enqueued = (s->eqcr.available < num_frames) ? ++ s->eqcr.available : num_frames; ++ s->eqcr.available -= num_enqueued; ++ /* Fill in the EQCR ring */ ++ for (i = 0; i < num_enqueued; i++) { ++ p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); ++ /* Skip copying the verb */ ++ memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1); ++ memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)], ++ &fd[i], sizeof(*fd)); ++ eqcr_pi++; + } + +- return count; ++ dma_wmb(); ++ ++ /* Set the verb byte, have to substitute in the valid-bit */ ++ eqcr_pi = s->eqcr.pi; ++ for (i = 0; i < num_enqueued; i++) { ++ p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); ++ p[0] = cl[0] | s->eqcr.pi_vb; ++ if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) { ++ struct qbman_eq_desc *d = (struct qbman_eq_desc *)p; ++ ++ d->dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) | ++ ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK); ++ } ++ eqcr_pi++; ++ if (!(eqcr_pi & half_mask)) ++ s->eqcr.pi_vb ^= QB_VALID_BIT; ++ } ++ ++ /* Flush all the cacheline without load/store in between */ ++ eqcr_pi = s->eqcr.pi; ++ addr_cena = (size_t)s->addr_cena; ++ for (i = 0; i < num_enqueued; i++) { ++ dccvac((addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask))); ++ eqcr_pi++; ++ } ++ s->eqcr.pi = eqcr_pi & full_mask; ++ spin_unlock(&s->access_spinlock); ++ ++ return num_enqueued; + } + + /** +@@ -700,26 +775,80 @@ int qbman_swp_enqueue_multiple_direct(st + * @s: the software portal used for enqueue + * @d: the enqueue descriptor + * @fd: table pointer of frame descriptor table to be enqueued +- * @flags: table pointer of flags, not used for the moment ++ * @flags: table pointer of QBMAN_ENQUEUE_FLAG_DCA flags, not used if NULL + * @num_frames: number of fd to be enqueued + * + * Return the number of fd enqueued, or a negative error number. + */ ++static + int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s, +- const struct qbman_eq_desc *d, +- const struct dpaa2_fd *fd, +- uint32_t *flags, +- int num_frames) +-{ +- int count = 0; ++ const struct qbman_eq_desc *d, ++ const struct dpaa2_fd *fd, ++ uint32_t *flags, ++ int num_frames) ++{ ++ uint32_t *p = NULL; ++ const uint32_t *cl = (uint32_t *)(d); ++ uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; ++ int i, num_enqueued = 0; ++ unsigned long irq_flags; ++ ++ spin_lock(&s->access_spinlock); ++ local_irq_save(irq_flags); ++ ++ half_mask = (s->eqcr.pi_ci_mask>>1); ++ full_mask = s->eqcr.pi_ci_mask; ++ if (!s->eqcr.available) { ++ eqcr_ci = s->eqcr.ci; ++ p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI_MEMBACK; ++ s->eqcr.ci = __raw_readl(p) & full_mask; ++ s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, ++ eqcr_ci, s->eqcr.ci); ++ if (!s->eqcr.available) { ++ local_irq_restore(irq_flags); ++ spin_unlock(&s->access_spinlock); ++ return 0; ++ } ++ } ++ ++ eqcr_pi = s->eqcr.pi; ++ num_enqueued = (s->eqcr.available < num_frames) ? ++ s->eqcr.available : num_frames; ++ s->eqcr.available -= num_enqueued; ++ /* Fill in the EQCR ring */ ++ for (i = 0; i < num_enqueued; i++) { ++ p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); ++ /* Skip copying the verb */ ++ memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1); ++ memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)], ++ &fd[i], sizeof(*fd)); ++ eqcr_pi++; ++ } + +- while (count < num_frames) { +- if (qbman_swp_enqueue_mem_back(s, d, fd) != 0) +- break; +- count++; ++ /* Set the verb byte, have to substitute in the valid-bit */ ++ eqcr_pi = s->eqcr.pi; ++ for (i = 0; i < num_enqueued; i++) { ++ p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); ++ p[0] = cl[0] | s->eqcr.pi_vb; ++ if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) { ++ struct qbman_eq_desc *d = (struct qbman_eq_desc *)p; ++ ++ d->dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) | ++ ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK); ++ } ++ eqcr_pi++; ++ if (!(eqcr_pi & half_mask)) ++ s->eqcr.pi_vb ^= QB_VALID_BIT; + } ++ s->eqcr.pi = eqcr_pi & full_mask; ++ ++ dma_wmb(); ++ qbman_write_register(s, QBMAN_CINH_SWP_EQCR_PI, ++ (QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb); ++ local_irq_restore(irq_flags); ++ spin_unlock(&s->access_spinlock); + +- return count; ++ return num_enqueued; + } + + /** +@@ -732,20 +861,69 @@ int qbman_swp_enqueue_multiple_mem_back( + * + * Return the number of fd enqueued, or a negative error number. + */ ++static + int qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct dpaa2_fd *fd, + int num_frames) + { +- int count = 0; ++ uint32_t *p; ++ const uint32_t *cl; ++ uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; ++ int i, num_enqueued = 0; ++ uint64_t addr_cena; ++ ++ half_mask = (s->eqcr.pi_ci_mask>>1); ++ full_mask = s->eqcr.pi_ci_mask; ++ if (!s->eqcr.available) { ++ eqcr_ci = s->eqcr.ci; ++ p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI; ++ s->eqcr.ci = qbman_read_register(s, QBMAN_CINH_SWP_EQCR_CI); ++ s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, ++ eqcr_ci, s->eqcr.ci); ++ if (!s->eqcr.available) ++ return 0; ++ } + +- while (count < num_frames) { +- if (qbman_swp_enqueue_direct(s, &(d[count]), fd) != 0) +- break; +- count++; ++ eqcr_pi = s->eqcr.pi; ++ num_enqueued = (s->eqcr.available < num_frames) ? ++ s->eqcr.available : num_frames; ++ s->eqcr.available -= num_enqueued; ++ /* Fill in the EQCR ring */ ++ for (i = 0; i < num_enqueued; i++) { ++ p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); ++ cl = (uint32_t *)(&d[i]); ++ /* Skip copying the verb */ ++ memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1); ++ memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)], ++ &fd[i], sizeof(*fd)); ++ eqcr_pi++; + } + +- return count; ++ dma_wmb(); ++ ++ /* Set the verb byte, have to substitute in the valid-bit */ ++ eqcr_pi = s->eqcr.pi; ++ for (i = 0; i < num_enqueued; i++) { ++ p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); ++ cl = (uint32_t *)(&d[i]); ++ p[0] = cl[0] | s->eqcr.pi_vb; ++ eqcr_pi++; ++ if (!(eqcr_pi & half_mask)) ++ s->eqcr.pi_vb ^= QB_VALID_BIT; ++ } ++ ++ /* Flush all the cacheline without load/store in between */ ++ eqcr_pi = s->eqcr.pi; ++ addr_cena = (uint64_t)s->addr_cena; ++ for (i = 0; i < num_enqueued; i++) { ++ dccvac((uint64_t *)(addr_cena + ++ QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask))); ++ eqcr_pi++; ++ } ++ s->eqcr.pi = eqcr_pi & full_mask; ++ ++ return num_enqueued; + } + + /** +@@ -758,20 +936,62 @@ int qbman_swp_enqueue_multiple_desc_dire + * + * Return the number of fd enqueued, or a negative error number. + */ ++static + int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s, + const struct qbman_eq_desc *d, + const struct dpaa2_fd *fd, + int num_frames) + { +- int count = 0; ++ uint32_t *p; ++ const uint32_t *cl; ++ uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask; ++ int i, num_enqueued = 0; ++ ++ half_mask = (s->eqcr.pi_ci_mask>>1); ++ full_mask = s->eqcr.pi_ci_mask; ++ if (!s->eqcr.available) { ++ eqcr_ci = s->eqcr.ci; ++ p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI_MEMBACK; ++ s->eqcr.ci = __raw_readl(p) & full_mask; ++ s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, ++ eqcr_ci, s->eqcr.ci); ++ if (!s->eqcr.available) ++ return 0; ++ } + +- while (count < num_frames) { +- if (qbman_swp_enqueue_mem_back(s, &(d[count]), fd) != 0) +- break; +- count++; ++ eqcr_pi = s->eqcr.pi; ++ num_enqueued = (s->eqcr.available < num_frames) ? ++ s->eqcr.available : num_frames; ++ s->eqcr.available -= num_enqueued; ++ /* Fill in the EQCR ring */ ++ for (i = 0; i < num_enqueued; i++) { ++ p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); ++ cl = (uint32_t *)(&d[i]); ++ /* Skip copying the verb */ ++ memcpy(&p[1], &cl[1], EQ_DESC_SIZE_WITHOUT_FD - 1); ++ memcpy(&p[EQ_DESC_SIZE_FD_START/sizeof(uint32_t)], ++ &fd[i], sizeof(*fd)); ++ eqcr_pi++; + } + +- return count; ++ /* Set the verb byte, have to substitute in the valid-bit */ ++ eqcr_pi = s->eqcr.pi; ++ for (i = 0; i < num_enqueued; i++) { ++ p = (s->addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)); ++ cl = (uint32_t *)(&d[i]); ++ p[0] = cl[0] | s->eqcr.pi_vb; ++ eqcr_pi++; ++ if (!(eqcr_pi & half_mask)) ++ s->eqcr.pi_vb ^= QB_VALID_BIT; ++ } ++ ++ s->eqcr.pi = eqcr_pi & full_mask; ++ ++ dma_wmb(); ++ qbman_write_register(s, QBMAN_CINH_SWP_EQCR_PI, ++ (QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb); ++ ++ return num_enqueued; + } + + /* Static (push) dequeue */ +@@ -937,6 +1157,7 @@ void qbman_pull_desc_set_channel(struct + * Return 0 for success, and -EBUSY if the software portal is not ready + * to do pull dequeue. + */ ++static + int qbman_swp_pull_direct(struct qbman_swp *s, struct qbman_pull_desc *d) + { + struct qbman_pull_desc *p; +@@ -973,6 +1194,7 @@ int qbman_swp_pull_direct(struct qbman_s + * Return 0 for success, and -EBUSY if the software portal is not ready + * to do pull dequeue. + */ ++static + int qbman_swp_pull_mem_back(struct qbman_swp *s, struct qbman_pull_desc *d) + { + struct qbman_pull_desc *p; +@@ -991,6 +1213,8 @@ int qbman_swp_pull_mem_back(struct qbman + p->dq_src = d->dq_src; + p->rsp_addr = d->rsp_addr; + p->rsp_addr_virt = d->rsp_addr_virt; ++ ++ /* Set the verb byte, have to substitute in the valid-bit */ + p->verb = d->verb | s->vdq.valid_bit; + s->vdq.valid_bit ^= QB_VALID_BIT; + dma_wmb(); +--- a/drivers/soc/fsl/dpio/qbman-portal.h ++++ b/drivers/soc/fsl/dpio/qbman-portal.h +@@ -143,6 +143,19 @@ struct qbman_swp { + u8 dqrr_size; + int reset_bug; /* indicates dqrr reset workaround is needed */ + } dqrr; ++ ++ struct { ++ u32 pi; ++ u32 pi_vb; ++ u32 pi_ring_size; ++ u32 pi_ci_mask; ++ u32 ci; ++ int available; ++ u32 pend; ++ u32 no_pfdr; ++ } eqcr; ++ ++ spinlock_t access_spinlock; + }; + + /* Function pointers */ |