From b5d425af237dc03327078d6b9be178a38b5f8723 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 27 Aug 2020 06:39:48 +0200 Subject: mediatek/ramips: unify ethernet driver fixes and add performance optimizations Increase DMA burst size and tx ring size and optimize tx processing Signed-off-by: Felix Fietkau --- ...ethernet-mtk_eth_soc-use-napi_consume_skb.patch | 72 ++++++ ...t-mtk_eth_soc-significantly-reduce-mdio-b.patch | 26 ++ ...-ethernet-mtk_eth_soc-fix-rx-vlan-offload.patch | 31 +++ ...t-mtk_eth_soc-fix-unnecessary-tx-queue-st.patch | 50 ++++ ...t-mtk_eth_soc-use-larger-burst-size-for-q.patch | 32 +++ ...ernet-mtk_eth_soc-increase-DMA-ring-sizes.patch | 21 ++ ...t-mtk_eth_soc-implement-dynamic-interrupt.patch | 281 +++++++++++++++++++++ ...t-mtk_eth_soc-drop-descriptor-cpu-own-bit.patch | 34 +++ ...t-mtk_eth_soc-cache-hardware-pointer-of-l.patch | 67 +++++ ...t-mtk_eth_soc-only-read-the-full-rx-descr.patch | 44 ++++ ...t-mtk_eth_soc-unmap-rx-data-before-callin.patch | 44 ++++ 11 files changed, 702 insertions(+) create mode 100644 target/linux/generic/pending-5.4/770-00-net-ethernet-mtk_eth_soc-use-napi_consume_skb.patch create mode 100644 target/linux/generic/pending-5.4/770-01-net-ethernet-mtk_eth_soc-significantly-reduce-mdio-b.patch create mode 100644 target/linux/generic/pending-5.4/770-02-net-ethernet-mtk_eth_soc-fix-rx-vlan-offload.patch create mode 100644 target/linux/generic/pending-5.4/770-03-net-ethernet-mtk_eth_soc-fix-unnecessary-tx-queue-st.patch create mode 100644 target/linux/generic/pending-5.4/770-04-net-ethernet-mtk_eth_soc-use-larger-burst-size-for-q.patch create mode 100644 target/linux/generic/pending-5.4/770-05-net-ethernet-mtk_eth_soc-increase-DMA-ring-sizes.patch create mode 100644 target/linux/generic/pending-5.4/770-06-net-ethernet-mtk_eth_soc-implement-dynamic-interrupt.patch create mode 100644 target/linux/generic/pending-5.4/770-07-net-ethernet-mtk_eth_soc-drop-descriptor-cpu-own-bit.patch create mode 100644 target/linux/generic/pending-5.4/770-08-net-ethernet-mtk_eth_soc-cache-hardware-pointer-of-l.patch create mode 100644 target/linux/generic/pending-5.4/770-09-net-ethernet-mtk_eth_soc-only-read-the-full-rx-descr.patch create mode 100644 target/linux/generic/pending-5.4/770-10-net-ethernet-mtk_eth_soc-unmap-rx-data-before-callin.patch (limited to 'target/linux/generic/pending-5.4') diff --git a/target/linux/generic/pending-5.4/770-00-net-ethernet-mtk_eth_soc-use-napi_consume_skb.patch b/target/linux/generic/pending-5.4/770-00-net-ethernet-mtk_eth_soc-use-napi_consume_skb.patch new file mode 100644 index 0000000000..9909c4c963 --- /dev/null +++ b/target/linux/generic/pending-5.4/770-00-net-ethernet-mtk_eth_soc-use-napi_consume_skb.patch @@ -0,0 +1,72 @@ +From: Felix Fietkau +Date: Mon, 8 Jun 2020 17:01:12 +0200 +Subject: [PATCH] net: ethernet: mtk_eth_soc: use napi_consume_skb + +Should improve performance, since it can use bulk free + +Signed-off-by: Felix Fietkau +--- + +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +@@ -853,7 +853,8 @@ static int txd_to_idx(struct mtk_tx_ring + return ((void *)dma - (void *)ring->dma) / sizeof(*dma); + } + +-static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf) ++static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf, ++ bool napi) + { + if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) { + if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) { +@@ -885,8 +886,12 @@ static void mtk_tx_unmap(struct mtk_eth + + tx_buf->flags = 0; + if (tx_buf->skb && +- (tx_buf->skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC)) +- dev_kfree_skb_any(tx_buf->skb); ++ (tx_buf->skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC)) { ++ if (napi) ++ napi_consume_skb(tx_buf->skb, napi); ++ else ++ dev_kfree_skb_any(tx_buf->skb); ++ } + tx_buf->skb = NULL; + } + +@@ -1064,7 +1069,7 @@ err_dma: + tx_buf = mtk_desc_to_tx_buf(ring, itxd); + + /* unmap dma */ +- mtk_tx_unmap(eth, tx_buf); ++ mtk_tx_unmap(eth, tx_buf, false); + + itxd->txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU; + if (!MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) +@@ -1382,7 +1387,7 @@ static int mtk_poll_tx_qdma(struct mtk_e + done[mac]++; + budget--; + } +- mtk_tx_unmap(eth, tx_buf); ++ mtk_tx_unmap(eth, tx_buf, true); + + ring->last_free = desc; + atomic_inc(&ring->free_count); +@@ -1419,7 +1424,7 @@ static int mtk_poll_tx_pdma(struct mtk_e + budget--; + } + +- mtk_tx_unmap(eth, tx_buf); ++ mtk_tx_unmap(eth, tx_buf, true); + + desc = &ring->dma[cpu]; + ring->last_free = desc; +@@ -1621,7 +1626,7 @@ static void mtk_tx_clean(struct mtk_eth + + if (ring->buf) { + for (i = 0; i < MTK_DMA_SIZE; i++) +- mtk_tx_unmap(eth, &ring->buf[i]); ++ mtk_tx_unmap(eth, &ring->buf[i], false); + kfree(ring->buf); + ring->buf = NULL; + } diff --git a/target/linux/generic/pending-5.4/770-01-net-ethernet-mtk_eth_soc-significantly-reduce-mdio-b.patch b/target/linux/generic/pending-5.4/770-01-net-ethernet-mtk_eth_soc-significantly-reduce-mdio-b.patch new file mode 100644 index 0000000000..922c35a3e4 --- /dev/null +++ b/target/linux/generic/pending-5.4/770-01-net-ethernet-mtk_eth_soc-significantly-reduce-mdio-b.patch @@ -0,0 +1,26 @@ +From: Felix Fietkau +Date: Mon, 8 Jun 2020 17:02:39 +0200 +Subject: [PATCH] net: ethernet: mtk_eth_soc: significantly reduce mdio bus + access latency + +usleep_range often ends up sleeping much longer than the 10-20us provided +as a range here. This causes significant latency in mdio bus acceses, +which easily adds multiple seconds to the boot time on MT7621 when polling +DSA slave ports. +Use cond_resched instead of usleep_range, since the MDIO access does not +take much time + +Signed-off-by: Felix Fietkau +--- + +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +@@ -85,7 +85,7 @@ static int mtk_mdio_busy_wait(struct mtk + return 0; + if (time_after(jiffies, t_start + PHY_IAC_TIMEOUT)) + break; +- usleep_range(10, 20); ++ cond_resched(); + } + + dev_err(eth->dev, "mdio: MDIO timeout\n"); diff --git a/target/linux/generic/pending-5.4/770-02-net-ethernet-mtk_eth_soc-fix-rx-vlan-offload.patch b/target/linux/generic/pending-5.4/770-02-net-ethernet-mtk_eth_soc-fix-rx-vlan-offload.patch new file mode 100644 index 0000000000..899bc41c93 --- /dev/null +++ b/target/linux/generic/pending-5.4/770-02-net-ethernet-mtk_eth_soc-fix-rx-vlan-offload.patch @@ -0,0 +1,31 @@ +From: Felix Fietkau +Date: Wed, 26 Aug 2020 16:52:12 +0200 +Subject: [PATCH] net: ethernet: mtk_eth_soc: fix rx vlan offload + +The VLAN ID in the rx descriptor is only valid if the RX_DMA_VID bit is set +Fixes frames wrongly marked with VLAN tags + +Signed-off-by: Felix Fietkau +--- + +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +@@ -1320,7 +1320,7 @@ static int mtk_poll_rx(struct napi_struc + skb->protocol = eth_type_trans(skb, netdev); + + if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX && +- RX_DMA_VID(trxd.rxd3)) ++ (trxd.rxd2 & RX_DMA_VTAG)) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + RX_DMA_VID(trxd.rxd3)); + skb_record_rx_queue(skb, 0); +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h +@@ -293,6 +293,7 @@ + #define RX_DMA_LSO BIT(30) + #define RX_DMA_PLEN0(_x) (((_x) & 0x3fff) << 16) + #define RX_DMA_GET_PLEN0(_x) (((_x) >> 16) & 0x3fff) ++#define RX_DMA_VTAG BIT(15) + + /* QDMA descriptor rxd3 */ + #define RX_DMA_VID(_x) ((_x) & 0xfff) diff --git a/target/linux/generic/pending-5.4/770-03-net-ethernet-mtk_eth_soc-fix-unnecessary-tx-queue-st.patch b/target/linux/generic/pending-5.4/770-03-net-ethernet-mtk_eth_soc-fix-unnecessary-tx-queue-st.patch new file mode 100644 index 0000000000..fd28bb8692 --- /dev/null +++ b/target/linux/generic/pending-5.4/770-03-net-ethernet-mtk_eth_soc-fix-unnecessary-tx-queue-st.patch @@ -0,0 +1,50 @@ +From: Felix Fietkau +Date: Wed, 26 Aug 2020 16:55:54 +0200 +Subject: [PATCH] net: ethernet: mtk_eth_soc: fix unnecessary tx queue + stops + +When running short on descriptors, only stop the queue for the netdev that tx +was attempted for. By the time the something tries to send on the other netdev, +the ring might have some more room already + +Signed-off-by: Felix Fietkau +--- + +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +@@ -1126,17 +1126,6 @@ static void mtk_wake_queue(struct mtk_et + } + } + +-static void mtk_stop_queue(struct mtk_eth *eth) +-{ +- int i; +- +- for (i = 0; i < MTK_MAC_COUNT; i++) { +- if (!eth->netdev[i]) +- continue; +- netif_stop_queue(eth->netdev[i]); +- } +-} +- + static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) + { + struct mtk_mac *mac = netdev_priv(dev); +@@ -1157,7 +1146,7 @@ static int mtk_start_xmit(struct sk_buff + + tx_num = mtk_cal_txd_req(skb); + if (unlikely(atomic_read(&ring->free_count) <= tx_num)) { +- mtk_stop_queue(eth); ++ netif_stop_queue(dev); + netif_err(eth, tx_queued, dev, + "Tx Ring full when queue awake!\n"); + spin_unlock(ð->page_lock); +@@ -1183,7 +1172,7 @@ static int mtk_start_xmit(struct sk_buff + goto drop; + + if (unlikely(atomic_read(&ring->free_count) <= ring->thresh)) +- mtk_stop_queue(eth); ++ netif_stop_queue(dev); + + spin_unlock(ð->page_lock); + diff --git a/target/linux/generic/pending-5.4/770-04-net-ethernet-mtk_eth_soc-use-larger-burst-size-for-q.patch b/target/linux/generic/pending-5.4/770-04-net-ethernet-mtk_eth_soc-use-larger-burst-size-for-q.patch new file mode 100644 index 0000000000..b066d35d2e --- /dev/null +++ b/target/linux/generic/pending-5.4/770-04-net-ethernet-mtk_eth_soc-use-larger-burst-size-for-q.patch @@ -0,0 +1,32 @@ +From: Felix Fietkau +Date: Wed, 26 Aug 2020 16:58:55 +0200 +Subject: [PATCH] net: ethernet: mtk_eth_soc: use larger burst size for + qdma tx + +Improves tx performance + +Signed-off-by: Felix Fietkau +--- + +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +@@ -2187,7 +2187,7 @@ static int mtk_start_dma(struct mtk_eth + if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) { + mtk_w32(eth, + MTK_TX_WB_DDONE | MTK_TX_DMA_EN | +- MTK_DMA_SIZE_16DWORDS | MTK_NDP_CO_PRO | ++ MTK_TX_BT_32DWORDS | MTK_NDP_CO_PRO | + MTK_RX_DMA_EN | MTK_RX_2B_OFFSET | + MTK_RX_BT_32DWORDS, + MTK_QDMA_GLO_CFG); +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h +@@ -195,7 +195,7 @@ + #define MTK_RX_BT_32DWORDS (3 << 11) + #define MTK_NDP_CO_PRO BIT(10) + #define MTK_TX_WB_DDONE BIT(6) +-#define MTK_DMA_SIZE_16DWORDS (2 << 4) ++#define MTK_TX_BT_32DWORDS (3 << 4) + #define MTK_RX_DMA_BUSY BIT(3) + #define MTK_TX_DMA_BUSY BIT(1) + #define MTK_RX_DMA_EN BIT(2) diff --git a/target/linux/generic/pending-5.4/770-05-net-ethernet-mtk_eth_soc-increase-DMA-ring-sizes.patch b/target/linux/generic/pending-5.4/770-05-net-ethernet-mtk_eth_soc-increase-DMA-ring-sizes.patch new file mode 100644 index 0000000000..f68cbc333c --- /dev/null +++ b/target/linux/generic/pending-5.4/770-05-net-ethernet-mtk_eth_soc-increase-DMA-ring-sizes.patch @@ -0,0 +1,21 @@ +From: Felix Fietkau +Date: Wed, 26 Aug 2020 16:59:41 +0200 +Subject: [PATCH] net: ethernet: mtk_eth_soc: increase DMA ring sizes + +256 descriptors is not enough for multi-gigabit traffic under load on MT7622. +Bump it to 512 to improve performance + +Signed-off-by: Felix Fietkau +--- + +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h +@@ -19,7 +19,7 @@ + #define MTK_QDMA_PAGE_SIZE 2048 + #define MTK_MAX_RX_LENGTH 1536 + #define MTK_TX_DMA_BUF_LEN 0x3fff +-#define MTK_DMA_SIZE 256 ++#define MTK_DMA_SIZE 512 + #define MTK_NAPI_WEIGHT 64 + #define MTK_MAC_COUNT 2 + #define MTK_RX_ETH_HLEN (VLAN_ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) diff --git a/target/linux/generic/pending-5.4/770-06-net-ethernet-mtk_eth_soc-implement-dynamic-interrupt.patch b/target/linux/generic/pending-5.4/770-06-net-ethernet-mtk_eth_soc-implement-dynamic-interrupt.patch new file mode 100644 index 0000000000..4372029423 --- /dev/null +++ b/target/linux/generic/pending-5.4/770-06-net-ethernet-mtk_eth_soc-implement-dynamic-interrupt.patch @@ -0,0 +1,281 @@ +From: Felix Fietkau +Date: Wed, 26 Aug 2020 17:02:30 +0200 +Subject: [PATCH] net: ethernet: mtk_eth_soc: implement dynamic interrupt + moderation + +Reduces the number of interrupts under load + +Signed-off-by: Felix Fietkau +--- + +--- a/drivers/net/ethernet/mediatek/Kconfig ++++ b/drivers/net/ethernet/mediatek/Kconfig +@@ -10,6 +10,7 @@ if NET_VENDOR_MEDIATEK + config NET_MEDIATEK_SOC + tristate "MediaTek SoC Gigabit Ethernet support" + select PHYLINK ++ select DIMLIB + ---help--- + This driver supports the gigabit ethernet MACs in the + MediaTek SoC family. +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +@@ -1228,12 +1228,13 @@ static void mtk_update_rx_cpu_idx(struct + static int mtk_poll_rx(struct napi_struct *napi, int budget, + struct mtk_eth *eth) + { ++ struct dim_sample dim_sample = {}; + struct mtk_rx_ring *ring; + int idx; + struct sk_buff *skb; + u8 *data, *new_data; + struct mtk_rx_dma *rxd, trxd; +- int done = 0; ++ int done = 0, bytes = 0; + + while (done < budget) { + struct net_device *netdev; +@@ -1307,6 +1308,7 @@ static int mtk_poll_rx(struct napi_struc + else + skb_checksum_none_assert(skb); + skb->protocol = eth_type_trans(skb, netdev); ++ bytes += pktlen; + + if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX && + (trxd.rxd2 & RX_DMA_VTAG)) +@@ -1338,6 +1340,12 @@ rx_done: + mtk_update_rx_cpu_idx(eth); + } + ++ eth->rx_packets += done; ++ eth->rx_bytes += bytes; ++ dim_update_sample(eth->rx_events, eth->rx_packets, eth->rx_bytes, ++ &dim_sample); ++ net_dim(ð->rx_dim, dim_sample); ++ + return done; + } + +@@ -1430,6 +1438,7 @@ static int mtk_poll_tx_pdma(struct mtk_e + static int mtk_poll_tx(struct mtk_eth *eth, int budget) + { + struct mtk_tx_ring *ring = ð->tx_ring; ++ struct dim_sample dim_sample = {}; + unsigned int done[MTK_MAX_DEVS]; + unsigned int bytes[MTK_MAX_DEVS]; + int total = 0, i; +@@ -1447,8 +1456,14 @@ static int mtk_poll_tx(struct mtk_eth *e + continue; + netdev_completed_queue(eth->netdev[i], done[i], bytes[i]); + total += done[i]; ++ eth->tx_packets += done[i]; ++ eth->tx_bytes += bytes[i]; + } + ++ dim_update_sample(eth->tx_events, eth->tx_packets, eth->tx_bytes, ++ &dim_sample); ++ net_dim(ð->tx_dim, dim_sample); ++ + if (mtk_queue_stopped(eth) && + (atomic_read(&ring->free_count) > ring->thresh)) + mtk_wake_queue(eth); +@@ -2123,6 +2138,7 @@ static irqreturn_t mtk_handle_irq_rx(int + { + struct mtk_eth *eth = _eth; + ++ eth->rx_events++; + if (likely(napi_schedule_prep(ð->rx_napi))) { + __napi_schedule(ð->rx_napi); + mtk_rx_irq_disable(eth, MTK_RX_DONE_INT); +@@ -2135,6 +2151,7 @@ static irqreturn_t mtk_handle_irq_tx(int + { + struct mtk_eth *eth = _eth; + ++ eth->tx_events++; + if (likely(napi_schedule_prep(ð->tx_napi))) { + __napi_schedule(ð->tx_napi); + mtk_tx_irq_disable(eth, MTK_TX_DONE_INT); +@@ -2282,6 +2299,9 @@ static int mtk_stop(struct net_device *d + napi_disable(ð->tx_napi); + napi_disable(ð->rx_napi); + ++ cancel_work_sync(ð->rx_dim.work); ++ cancel_work_sync(ð->tx_dim.work); ++ + if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) + mtk_stop_dma(eth, MTK_QDMA_GLO_CFG); + mtk_stop_dma(eth, MTK_PDMA_GLO_CFG); +@@ -2331,6 +2351,64 @@ err_disable_clks: + return ret; + } + ++static void mtk_dim_rx(struct work_struct *work) ++{ ++ struct dim *dim = container_of(work, struct dim, work); ++ struct mtk_eth *eth = container_of(dim, struct mtk_eth, rx_dim); ++ struct dim_cq_moder cur_profile; ++ u32 val, cur; ++ ++ cur_profile = net_dim_get_rx_moderation(eth->rx_dim.mode, ++ dim->profile_ix); ++ spin_lock_bh(ð->dim_lock); ++ ++ val = mtk_r32(eth, MTK_PDMA_DELAY_INT); ++ val &= MTK_PDMA_DELAY_TX_MASK; ++ val |= MTK_PDMA_DELAY_RX_EN; ++ ++ cur = min_t(u32, DIV_ROUND_UP(cur_profile.usec, 20), MTK_PDMA_DELAY_PTIME_MASK); ++ val |= cur << MTK_PDMA_DELAY_RX_PTIME_SHIFT; ++ ++ cur = min_t(u32, cur_profile.pkts, MTK_PDMA_DELAY_PINT_MASK); ++ val |= cur << MTK_PDMA_DELAY_RX_PINT_SHIFT; ++ ++ mtk_w32(eth, val, MTK_PDMA_DELAY_INT); ++ mtk_w32(eth, val, MTK_QDMA_DELAY_INT); ++ ++ spin_unlock_bh(ð->dim_lock); ++ ++ dim->state = DIM_START_MEASURE; ++} ++ ++static void mtk_dim_tx(struct work_struct *work) ++{ ++ struct dim *dim = container_of(work, struct dim, work); ++ struct mtk_eth *eth = container_of(dim, struct mtk_eth, tx_dim); ++ struct dim_cq_moder cur_profile; ++ u32 val, cur; ++ ++ cur_profile = net_dim_get_tx_moderation(eth->tx_dim.mode, ++ dim->profile_ix); ++ spin_lock_bh(ð->dim_lock); ++ ++ val = mtk_r32(eth, MTK_PDMA_DELAY_INT); ++ val &= MTK_PDMA_DELAY_RX_MASK; ++ val |= MTK_PDMA_DELAY_TX_EN; ++ ++ cur = min_t(u32, DIV_ROUND_UP(cur_profile.usec, 20), MTK_PDMA_DELAY_PTIME_MASK); ++ val |= cur << MTK_PDMA_DELAY_TX_PTIME_SHIFT; ++ ++ cur = min_t(u32, cur_profile.pkts, MTK_PDMA_DELAY_PINT_MASK); ++ val |= cur << MTK_PDMA_DELAY_TX_PINT_SHIFT; ++ ++ mtk_w32(eth, val, MTK_PDMA_DELAY_INT); ++ mtk_w32(eth, val, MTK_QDMA_DELAY_INT); ++ ++ spin_unlock_bh(ð->dim_lock); ++ ++ dim->state = DIM_START_MEASURE; ++} ++ + static int mtk_hw_init(struct mtk_eth *eth) + { + int i, val, ret; +@@ -2352,9 +2430,6 @@ static int mtk_hw_init(struct mtk_eth *e + goto err_disable_pm; + } + +- /* enable interrupt delay for RX */ +- mtk_w32(eth, MTK_PDMA_DELAY_RX_DELAY, MTK_PDMA_DELAY_INT); +- + /* disable delay and normal interrupt */ + mtk_tx_irq_disable(eth, ~0); + mtk_rx_irq_disable(eth, ~0); +@@ -2393,11 +2468,10 @@ static int mtk_hw_init(struct mtk_eth *e + /* Enable RX VLan Offloading */ + mtk_w32(eth, 1, MTK_CDMP_EG_CTRL); + +- /* enable interrupt delay for RX */ +- mtk_w32(eth, MTK_PDMA_DELAY_RX_DELAY, MTK_PDMA_DELAY_INT); ++ mtk_dim_rx(ð->rx_dim.work); ++ mtk_dim_tx(ð->tx_dim.work); + + /* disable delay and normal interrupt */ +- mtk_w32(eth, 0, MTK_QDMA_DELAY_INT); + mtk_tx_irq_disable(eth, ~0); + mtk_rx_irq_disable(eth, ~0); + mtk_w32(eth, RST_GL_PSE, MTK_RST_GL); +@@ -2916,6 +2990,13 @@ static int mtk_probe(struct platform_dev + spin_lock_init(ð->page_lock); + spin_lock_init(ð->tx_irq_lock); + spin_lock_init(ð->rx_irq_lock); ++ spin_lock_init(ð->dim_lock); ++ ++ eth->rx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; ++ INIT_WORK(ð->rx_dim.work, mtk_dim_rx); ++ ++ eth->tx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; ++ INIT_WORK(ð->tx_dim.work, mtk_dim_tx); + + if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) { + eth->ethsys = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + + #define MTK_QDMA_PAGE_SIZE 2048 + #define MTK_MAX_RX_LENGTH 1536 +@@ -129,13 +130,18 @@ + + /* PDMA Delay Interrupt Register */ + #define MTK_PDMA_DELAY_INT 0xa0c ++#define MTK_PDMA_DELAY_RX_MASK GENMASK(15, 0) + #define MTK_PDMA_DELAY_RX_EN BIT(15) +-#define MTK_PDMA_DELAY_RX_PINT 4 + #define MTK_PDMA_DELAY_RX_PINT_SHIFT 8 +-#define MTK_PDMA_DELAY_RX_PTIME 4 +-#define MTK_PDMA_DELAY_RX_DELAY \ +- (MTK_PDMA_DELAY_RX_EN | MTK_PDMA_DELAY_RX_PTIME | \ +- (MTK_PDMA_DELAY_RX_PINT << MTK_PDMA_DELAY_RX_PINT_SHIFT)) ++#define MTK_PDMA_DELAY_RX_PTIME_SHIFT 0 ++ ++#define MTK_PDMA_DELAY_TX_MASK GENMASK(31, 16) ++#define MTK_PDMA_DELAY_TX_EN BIT(31) ++#define MTK_PDMA_DELAY_TX_PINT_SHIFT 24 ++#define MTK_PDMA_DELAY_TX_PTIME_SHIFT 16 ++ ++#define MTK_PDMA_DELAY_PINT_MASK 0x7f ++#define MTK_PDMA_DELAY_PTIME_MASK 0xff + + /* PDMA Interrupt Status Register */ + #define MTK_PDMA_INT_STATUS 0xa20 +@@ -217,6 +223,7 @@ + /* QDMA Interrupt Status Register */ + #define MTK_QDMA_INT_STATUS 0x1A18 + #define MTK_RX_DONE_DLY BIT(30) ++#define MTK_TX_DONE_DLY BIT(28) + #define MTK_RX_DONE_INT3 BIT(19) + #define MTK_RX_DONE_INT2 BIT(18) + #define MTK_RX_DONE_INT1 BIT(17) +@@ -226,8 +233,7 @@ + #define MTK_TX_DONE_INT1 BIT(1) + #define MTK_TX_DONE_INT0 BIT(0) + #define MTK_RX_DONE_INT MTK_RX_DONE_DLY +-#define MTK_TX_DONE_INT (MTK_TX_DONE_INT0 | MTK_TX_DONE_INT1 | \ +- MTK_TX_DONE_INT2 | MTK_TX_DONE_INT3) ++#define MTK_TX_DONE_INT MTK_TX_DONE_DLY + + /* QDMA Interrupt grouping registers */ + #define MTK_QDMA_INT_GRP1 0x1a20 +@@ -890,6 +896,18 @@ struct mtk_eth { + + const struct mtk_soc_data *soc; + ++ spinlock_t dim_lock; ++ ++ u32 rx_events; ++ u32 rx_packets; ++ u32 rx_bytes; ++ struct dim rx_dim; ++ ++ u32 tx_events; ++ u32 tx_packets; ++ u32 tx_bytes; ++ struct dim tx_dim; ++ + u32 tx_int_mask_reg; + u32 tx_int_status_reg; + u32 rx_dma_l4_valid; diff --git a/target/linux/generic/pending-5.4/770-07-net-ethernet-mtk_eth_soc-drop-descriptor-cpu-own-bit.patch b/target/linux/generic/pending-5.4/770-07-net-ethernet-mtk_eth_soc-drop-descriptor-cpu-own-bit.patch new file mode 100644 index 0000000000..0a9dfd68a1 --- /dev/null +++ b/target/linux/generic/pending-5.4/770-07-net-ethernet-mtk_eth_soc-drop-descriptor-cpu-own-bit.patch @@ -0,0 +1,34 @@ +From: Felix Fietkau +Date: Wed, 26 Aug 2020 17:48:14 +0200 +Subject: [PATCH] net: ethernet: mtk_eth_soc: drop descriptor cpu-own bit check + in qdma tx cleanup + +mtk_poll_tx_qdma already checks the MTK_QTX_DRX_PTR register, which points +at the last completed descriptor. +To slightly improve performance, also remove the register bit which forces +the hardware to write back this bit earlier. + +Signed-off-by: Felix Fietkau +--- + +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +@@ -1368,9 +1368,6 @@ static int mtk_poll_tx_qdma(struct mtk_e + int mac = 0; + + desc = mtk_qdma_phys_to_virt(ring, desc->txd2); +- if ((desc->txd3 & TX_DMA_OWNER_CPU) == 0) +- break; +- + tx_buf = mtk_desc_to_tx_buf(ring, desc); + if (tx_buf->flags & MTK_TX_FLAGS_FPORT1) + mac = 1; +@@ -2203,7 +2200,7 @@ static int mtk_start_dma(struct mtk_eth + + if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) { + mtk_w32(eth, +- MTK_TX_WB_DDONE | MTK_TX_DMA_EN | ++ MTK_TX_DMA_EN | + MTK_TX_BT_32DWORDS | MTK_NDP_CO_PRO | + MTK_RX_DMA_EN | MTK_RX_2B_OFFSET | + MTK_RX_BT_32DWORDS, diff --git a/target/linux/generic/pending-5.4/770-08-net-ethernet-mtk_eth_soc-cache-hardware-pointer-of-l.patch b/target/linux/generic/pending-5.4/770-08-net-ethernet-mtk_eth_soc-cache-hardware-pointer-of-l.patch new file mode 100644 index 0000000000..ddbd751ead --- /dev/null +++ b/target/linux/generic/pending-5.4/770-08-net-ethernet-mtk_eth_soc-cache-hardware-pointer-of-l.patch @@ -0,0 +1,67 @@ +From: Felix Fietkau +Date: Thu, 27 Aug 2020 06:32:03 +0200 +Subject: [PATCH] net: ethernet: mtk_eth_soc: cache hardware pointer of last + freed tx descriptor + +The value is only updated by the CPU, so it is cheaper to access from the ring +data structure than from a hardware register + +Signed-off-by: Felix Fietkau +--- + +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +@@ -1358,7 +1358,7 @@ static int mtk_poll_tx_qdma(struct mtk_e + struct mtk_tx_buf *tx_buf; + u32 cpu, dma; + +- cpu = mtk_r32(eth, MTK_QTX_CRX_PTR); ++ cpu = ring->last_free_ptr; + dma = mtk_r32(eth, MTK_QTX_DRX_PTR); + + desc = mtk_qdma_phys_to_virt(ring, cpu); +@@ -1389,6 +1389,7 @@ static int mtk_poll_tx_qdma(struct mtk_e + cpu = next_cpu; + } + ++ ring->last_free_ptr = cpu; + mtk_w32(eth, cpu, MTK_QTX_CRX_PTR); + + return budget; +@@ -1589,6 +1590,7 @@ static int mtk_tx_alloc(struct mtk_eth * + atomic_set(&ring->free_count, MTK_DMA_SIZE - 2); + ring->next_free = &ring->dma[0]; + ring->last_free = &ring->dma[MTK_DMA_SIZE - 1]; ++ ring->last_free_ptr = (u32)(ring->phys + ((MTK_DMA_SIZE - 1) * sz)); + ring->thresh = MAX_SKB_FRAGS; + + /* make sure that all changes to the dma ring are flushed before we +@@ -1602,9 +1604,7 @@ static int mtk_tx_alloc(struct mtk_eth * + mtk_w32(eth, + ring->phys + ((MTK_DMA_SIZE - 1) * sz), + MTK_QTX_CRX_PTR); +- mtk_w32(eth, +- ring->phys + ((MTK_DMA_SIZE - 1) * sz), +- MTK_QTX_DRX_PTR); ++ mtk_w32(eth, ring->last_free_ptr, MTK_QTX_DRX_PTR); + mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES, + MTK_QTX_CFG(0)); + } else { +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h +@@ -622,6 +622,7 @@ struct mtk_tx_buf { + * @phys: The physical addr of tx_buf + * @next_free: Pointer to the next free descriptor + * @last_free: Pointer to the last free descriptor ++ * @last_free_ptr: Hardware pointer value of the last free descriptor + * @thresh: The threshold of minimum amount of free descriptors + * @free_count: QDMA uses a linked list. Track how many free descriptors + * are present +@@ -632,6 +633,7 @@ struct mtk_tx_ring { + dma_addr_t phys; + struct mtk_tx_dma *next_free; + struct mtk_tx_dma *last_free; ++ u32 last_free_ptr; + u16 thresh; + atomic_t free_count; + int dma_size; diff --git a/target/linux/generic/pending-5.4/770-09-net-ethernet-mtk_eth_soc-only-read-the-full-rx-descr.patch b/target/linux/generic/pending-5.4/770-09-net-ethernet-mtk_eth_soc-only-read-the-full-rx-descr.patch new file mode 100644 index 0000000000..1b7a0a9aca --- /dev/null +++ b/target/linux/generic/pending-5.4/770-09-net-ethernet-mtk_eth_soc-only-read-the-full-rx-descr.patch @@ -0,0 +1,44 @@ +From: Felix Fietkau +Date: Thu, 27 Aug 2020 09:24:25 +0200 +Subject: [PATCH] net: ethernet: mtk_eth_soc: only read the full rx + descriptor if DMA is done + +Uncached memory access is expensive, and there is no need to access all +descriptor words if we can't process them anyway + +Signed-off-by: Felix Fietkau +--- + +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +@@ -772,13 +772,18 @@ static inline int mtk_max_buf_size(int f + return buf_size; + } + +-static inline void mtk_rx_get_desc(struct mtk_rx_dma *rxd, ++static inline bool mtk_rx_get_desc(struct mtk_rx_dma *rxd, + struct mtk_rx_dma *dma_rxd) + { +- rxd->rxd1 = READ_ONCE(dma_rxd->rxd1); + rxd->rxd2 = READ_ONCE(dma_rxd->rxd2); ++ if (!(rxd->rxd2 & RX_DMA_DONE)) ++ return false; ++ ++ rxd->rxd1 = READ_ONCE(dma_rxd->rxd1); + rxd->rxd3 = READ_ONCE(dma_rxd->rxd3); + rxd->rxd4 = READ_ONCE(dma_rxd->rxd4); ++ ++ return true; + } + + /* the qdma core needs scratch memory to be setup */ +@@ -1250,8 +1255,7 @@ static int mtk_poll_rx(struct napi_struc + rxd = &ring->dma[idx]; + data = ring->data[idx]; + +- mtk_rx_get_desc(&trxd, rxd); +- if (!(trxd.rxd2 & RX_DMA_DONE)) ++ if (!mtk_rx_get_desc(&trxd, rxd)) + break; + + /* find out which mac the packet come from. values start at 1 */ diff --git a/target/linux/generic/pending-5.4/770-10-net-ethernet-mtk_eth_soc-unmap-rx-data-before-callin.patch b/target/linux/generic/pending-5.4/770-10-net-ethernet-mtk_eth_soc-unmap-rx-data-before-callin.patch new file mode 100644 index 0000000000..2e1d2bb79d --- /dev/null +++ b/target/linux/generic/pending-5.4/770-10-net-ethernet-mtk_eth_soc-unmap-rx-data-before-callin.patch @@ -0,0 +1,44 @@ +From: Felix Fietkau +Date: Thu, 27 Aug 2020 09:44:43 +0200 +Subject: [PATCH] net: ethernet: mtk_eth_soc: unmap rx data before calling + build_skb + +Since build_skb accesses the data area (for initializing shinfo), dma unmap +needs to happen before that call + +Signed-off-by: Felix Fietkau +--- + +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +@@ -1293,17 +1293,18 @@ static int mtk_poll_rx(struct napi_struc + goto release_desc; + } + ++ dma_unmap_single(eth->dev, trxd.rxd1, ++ ring->buf_size, DMA_FROM_DEVICE); ++ + /* receive data */ + skb = build_skb(data, ring->frag_size); + if (unlikely(!skb)) { +- skb_free_frag(new_data); ++ skb_free_frag(data); + netdev->stats.rx_dropped++; +- goto release_desc; ++ goto skip_rx; + } + skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); + +- dma_unmap_single(eth->dev, trxd.rxd1, +- ring->buf_size, DMA_FROM_DEVICE); + pktlen = RX_DMA_GET_PLEN0(trxd.rxd2); + skb->dev = netdev; + skb_put(skb, pktlen); +@@ -1321,6 +1322,7 @@ static int mtk_poll_rx(struct napi_struc + skb_record_rx_queue(skb, 0); + napi_gro_receive(napi, skb); + ++skip_rx: + ring->data[idx] = new_data; + rxd->rxd1 = (unsigned int)dma_addr; + -- cgit v1.2.3