aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/generic/pending-5.4
diff options
context:
space:
mode:
authorFelix Fietkau <nbd@nbd.name>2020-08-27 06:39:48 +0200
committerFelix Fietkau <nbd@nbd.name>2020-09-01 17:01:56 +0200
commitb5d425af237dc03327078d6b9be178a38b5f8723 (patch)
tree42448318e13eef9ff603b01ebc3088a32ce617ec /target/linux/generic/pending-5.4
parent6541028598b4a1079ee6182c5b50d6bcd9e21002 (diff)
downloadupstream-b5d425af237dc03327078d6b9be178a38b5f8723.tar.gz
upstream-b5d425af237dc03327078d6b9be178a38b5f8723.tar.bz2
upstream-b5d425af237dc03327078d6b9be178a38b5f8723.zip
mediatek/ramips: unify ethernet driver fixes and add performance optimizations
Increase DMA burst size and tx ring size and optimize tx processing Signed-off-by: Felix Fietkau <nbd@nbd.name>
Diffstat (limited to 'target/linux/generic/pending-5.4')
-rw-r--r--target/linux/generic/pending-5.4/770-00-net-ethernet-mtk_eth_soc-use-napi_consume_skb.patch72
-rw-r--r--target/linux/generic/pending-5.4/770-01-net-ethernet-mtk_eth_soc-significantly-reduce-mdio-b.patch26
-rw-r--r--target/linux/generic/pending-5.4/770-02-net-ethernet-mtk_eth_soc-fix-rx-vlan-offload.patch31
-rw-r--r--target/linux/generic/pending-5.4/770-03-net-ethernet-mtk_eth_soc-fix-unnecessary-tx-queue-st.patch50
-rw-r--r--target/linux/generic/pending-5.4/770-04-net-ethernet-mtk_eth_soc-use-larger-burst-size-for-q.patch32
-rw-r--r--target/linux/generic/pending-5.4/770-05-net-ethernet-mtk_eth_soc-increase-DMA-ring-sizes.patch21
-rw-r--r--target/linux/generic/pending-5.4/770-06-net-ethernet-mtk_eth_soc-implement-dynamic-interrupt.patch281
-rw-r--r--target/linux/generic/pending-5.4/770-07-net-ethernet-mtk_eth_soc-drop-descriptor-cpu-own-bit.patch34
-rw-r--r--target/linux/generic/pending-5.4/770-08-net-ethernet-mtk_eth_soc-cache-hardware-pointer-of-l.patch67
-rw-r--r--target/linux/generic/pending-5.4/770-09-net-ethernet-mtk_eth_soc-only-read-the-full-rx-descr.patch44
-rw-r--r--target/linux/generic/pending-5.4/770-10-net-ethernet-mtk_eth_soc-unmap-rx-data-before-callin.patch44
11 files changed, 702 insertions, 0 deletions
diff --git a/target/linux/generic/pending-5.4/770-00-net-ethernet-mtk_eth_soc-use-napi_consume_skb.patch b/target/linux/generic/pending-5.4/770-00-net-ethernet-mtk_eth_soc-use-napi_consume_skb.patch
new file mode 100644
index 0000000000..9909c4c963
--- /dev/null
+++ b/target/linux/generic/pending-5.4/770-00-net-ethernet-mtk_eth_soc-use-napi_consume_skb.patch
@@ -0,0 +1,72 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Mon, 8 Jun 2020 17:01:12 +0200
+Subject: [PATCH] net: ethernet: mtk_eth_soc: use napi_consume_skb
+
+Should improve performance, since it can use bulk free
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -853,7 +853,8 @@ static int txd_to_idx(struct mtk_tx_ring
+ return ((void *)dma - (void *)ring->dma) / sizeof(*dma);
+ }
+
+-static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf)
++static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf,
++ bool napi)
+ {
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+ if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) {
+@@ -885,8 +886,12 @@ static void mtk_tx_unmap(struct mtk_eth
+
+ tx_buf->flags = 0;
+ if (tx_buf->skb &&
+- (tx_buf->skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC))
+- dev_kfree_skb_any(tx_buf->skb);
++ (tx_buf->skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC)) {
++ if (napi)
++ napi_consume_skb(tx_buf->skb, napi);
++ else
++ dev_kfree_skb_any(tx_buf->skb);
++ }
+ tx_buf->skb = NULL;
+ }
+
+@@ -1064,7 +1069,7 @@ err_dma:
+ tx_buf = mtk_desc_to_tx_buf(ring, itxd);
+
+ /* unmap dma */
+- mtk_tx_unmap(eth, tx_buf);
++ mtk_tx_unmap(eth, tx_buf, false);
+
+ itxd->txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU;
+ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+@@ -1382,7 +1387,7 @@ static int mtk_poll_tx_qdma(struct mtk_e
+ done[mac]++;
+ budget--;
+ }
+- mtk_tx_unmap(eth, tx_buf);
++ mtk_tx_unmap(eth, tx_buf, true);
+
+ ring->last_free = desc;
+ atomic_inc(&ring->free_count);
+@@ -1419,7 +1424,7 @@ static int mtk_poll_tx_pdma(struct mtk_e
+ budget--;
+ }
+
+- mtk_tx_unmap(eth, tx_buf);
++ mtk_tx_unmap(eth, tx_buf, true);
+
+ desc = &ring->dma[cpu];
+ ring->last_free = desc;
+@@ -1621,7 +1626,7 @@ static void mtk_tx_clean(struct mtk_eth
+
+ if (ring->buf) {
+ for (i = 0; i < MTK_DMA_SIZE; i++)
+- mtk_tx_unmap(eth, &ring->buf[i]);
++ mtk_tx_unmap(eth, &ring->buf[i], false);
+ kfree(ring->buf);
+ ring->buf = NULL;
+ }
diff --git a/target/linux/generic/pending-5.4/770-01-net-ethernet-mtk_eth_soc-significantly-reduce-mdio-b.patch b/target/linux/generic/pending-5.4/770-01-net-ethernet-mtk_eth_soc-significantly-reduce-mdio-b.patch
new file mode 100644
index 0000000000..922c35a3e4
--- /dev/null
+++ b/target/linux/generic/pending-5.4/770-01-net-ethernet-mtk_eth_soc-significantly-reduce-mdio-b.patch
@@ -0,0 +1,26 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Mon, 8 Jun 2020 17:02:39 +0200
+Subject: [PATCH] net: ethernet: mtk_eth_soc: significantly reduce mdio bus
+ access latency
+
+usleep_range often ends up sleeping much longer than the 10-20us provided
+as a range here. This causes significant latency in mdio bus acceses,
+which easily adds multiple seconds to the boot time on MT7621 when polling
+DSA slave ports.
+Use cond_resched instead of usleep_range, since the MDIO access does not
+take much time
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -85,7 +85,7 @@ static int mtk_mdio_busy_wait(struct mtk
+ return 0;
+ if (time_after(jiffies, t_start + PHY_IAC_TIMEOUT))
+ break;
+- usleep_range(10, 20);
++ cond_resched();
+ }
+
+ dev_err(eth->dev, "mdio: MDIO timeout\n");
diff --git a/target/linux/generic/pending-5.4/770-02-net-ethernet-mtk_eth_soc-fix-rx-vlan-offload.patch b/target/linux/generic/pending-5.4/770-02-net-ethernet-mtk_eth_soc-fix-rx-vlan-offload.patch
new file mode 100644
index 0000000000..899bc41c93
--- /dev/null
+++ b/target/linux/generic/pending-5.4/770-02-net-ethernet-mtk_eth_soc-fix-rx-vlan-offload.patch
@@ -0,0 +1,31 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Wed, 26 Aug 2020 16:52:12 +0200
+Subject: [PATCH] net: ethernet: mtk_eth_soc: fix rx vlan offload
+
+The VLAN ID in the rx descriptor is only valid if the RX_DMA_VID bit is set
+Fixes frames wrongly marked with VLAN tags
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -1320,7 +1320,7 @@ static int mtk_poll_rx(struct napi_struc
+ skb->protocol = eth_type_trans(skb, netdev);
+
+ if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX &&
+- RX_DMA_VID(trxd.rxd3))
++ (trxd.rxd2 & RX_DMA_VTAG))
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+ RX_DMA_VID(trxd.rxd3));
+ skb_record_rx_queue(skb, 0);
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -293,6 +293,7 @@
+ #define RX_DMA_LSO BIT(30)
+ #define RX_DMA_PLEN0(_x) (((_x) & 0x3fff) << 16)
+ #define RX_DMA_GET_PLEN0(_x) (((_x) >> 16) & 0x3fff)
++#define RX_DMA_VTAG BIT(15)
+
+ /* QDMA descriptor rxd3 */
+ #define RX_DMA_VID(_x) ((_x) & 0xfff)
diff --git a/target/linux/generic/pending-5.4/770-03-net-ethernet-mtk_eth_soc-fix-unnecessary-tx-queue-st.patch b/target/linux/generic/pending-5.4/770-03-net-ethernet-mtk_eth_soc-fix-unnecessary-tx-queue-st.patch
new file mode 100644
index 0000000000..fd28bb8692
--- /dev/null
+++ b/target/linux/generic/pending-5.4/770-03-net-ethernet-mtk_eth_soc-fix-unnecessary-tx-queue-st.patch
@@ -0,0 +1,50 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Wed, 26 Aug 2020 16:55:54 +0200
+Subject: [PATCH] net: ethernet: mtk_eth_soc: fix unnecessary tx queue
+ stops
+
+When running short on descriptors, only stop the queue for the netdev that tx
+was attempted for. By the time the something tries to send on the other netdev,
+the ring might have some more room already
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -1126,17 +1126,6 @@ static void mtk_wake_queue(struct mtk_et
+ }
+ }
+
+-static void mtk_stop_queue(struct mtk_eth *eth)
+-{
+- int i;
+-
+- for (i = 0; i < MTK_MAC_COUNT; i++) {
+- if (!eth->netdev[i])
+- continue;
+- netif_stop_queue(eth->netdev[i]);
+- }
+-}
+-
+ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+ struct mtk_mac *mac = netdev_priv(dev);
+@@ -1157,7 +1146,7 @@ static int mtk_start_xmit(struct sk_buff
+
+ tx_num = mtk_cal_txd_req(skb);
+ if (unlikely(atomic_read(&ring->free_count) <= tx_num)) {
+- mtk_stop_queue(eth);
++ netif_stop_queue(dev);
+ netif_err(eth, tx_queued, dev,
+ "Tx Ring full when queue awake!\n");
+ spin_unlock(&eth->page_lock);
+@@ -1183,7 +1172,7 @@ static int mtk_start_xmit(struct sk_buff
+ goto drop;
+
+ if (unlikely(atomic_read(&ring->free_count) <= ring->thresh))
+- mtk_stop_queue(eth);
++ netif_stop_queue(dev);
+
+ spin_unlock(&eth->page_lock);
+
diff --git a/target/linux/generic/pending-5.4/770-04-net-ethernet-mtk_eth_soc-use-larger-burst-size-for-q.patch b/target/linux/generic/pending-5.4/770-04-net-ethernet-mtk_eth_soc-use-larger-burst-size-for-q.patch
new file mode 100644
index 0000000000..b066d35d2e
--- /dev/null
+++ b/target/linux/generic/pending-5.4/770-04-net-ethernet-mtk_eth_soc-use-larger-burst-size-for-q.patch
@@ -0,0 +1,32 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Wed, 26 Aug 2020 16:58:55 +0200
+Subject: [PATCH] net: ethernet: mtk_eth_soc: use larger burst size for
+ qdma tx
+
+Improves tx performance
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -2187,7 +2187,7 @@ static int mtk_start_dma(struct mtk_eth
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+ mtk_w32(eth,
+ MTK_TX_WB_DDONE | MTK_TX_DMA_EN |
+- MTK_DMA_SIZE_16DWORDS | MTK_NDP_CO_PRO |
++ MTK_TX_BT_32DWORDS | MTK_NDP_CO_PRO |
+ MTK_RX_DMA_EN | MTK_RX_2B_OFFSET |
+ MTK_RX_BT_32DWORDS,
+ MTK_QDMA_GLO_CFG);
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -195,7 +195,7 @@
+ #define MTK_RX_BT_32DWORDS (3 << 11)
+ #define MTK_NDP_CO_PRO BIT(10)
+ #define MTK_TX_WB_DDONE BIT(6)
+-#define MTK_DMA_SIZE_16DWORDS (2 << 4)
++#define MTK_TX_BT_32DWORDS (3 << 4)
+ #define MTK_RX_DMA_BUSY BIT(3)
+ #define MTK_TX_DMA_BUSY BIT(1)
+ #define MTK_RX_DMA_EN BIT(2)
diff --git a/target/linux/generic/pending-5.4/770-05-net-ethernet-mtk_eth_soc-increase-DMA-ring-sizes.patch b/target/linux/generic/pending-5.4/770-05-net-ethernet-mtk_eth_soc-increase-DMA-ring-sizes.patch
new file mode 100644
index 0000000000..f68cbc333c
--- /dev/null
+++ b/target/linux/generic/pending-5.4/770-05-net-ethernet-mtk_eth_soc-increase-DMA-ring-sizes.patch
@@ -0,0 +1,21 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Wed, 26 Aug 2020 16:59:41 +0200
+Subject: [PATCH] net: ethernet: mtk_eth_soc: increase DMA ring sizes
+
+256 descriptors is not enough for multi-gigabit traffic under load on MT7622.
+Bump it to 512 to improve performance
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -19,7 +19,7 @@
+ #define MTK_QDMA_PAGE_SIZE 2048
+ #define MTK_MAX_RX_LENGTH 1536
+ #define MTK_TX_DMA_BUF_LEN 0x3fff
+-#define MTK_DMA_SIZE 256
++#define MTK_DMA_SIZE 512
+ #define MTK_NAPI_WEIGHT 64
+ #define MTK_MAC_COUNT 2
+ #define MTK_RX_ETH_HLEN (VLAN_ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
diff --git a/target/linux/generic/pending-5.4/770-06-net-ethernet-mtk_eth_soc-implement-dynamic-interrupt.patch b/target/linux/generic/pending-5.4/770-06-net-ethernet-mtk_eth_soc-implement-dynamic-interrupt.patch
new file mode 100644
index 0000000000..4372029423
--- /dev/null
+++ b/target/linux/generic/pending-5.4/770-06-net-ethernet-mtk_eth_soc-implement-dynamic-interrupt.patch
@@ -0,0 +1,281 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Wed, 26 Aug 2020 17:02:30 +0200
+Subject: [PATCH] net: ethernet: mtk_eth_soc: implement dynamic interrupt
+ moderation
+
+Reduces the number of interrupts under load
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/mediatek/Kconfig
++++ b/drivers/net/ethernet/mediatek/Kconfig
+@@ -10,6 +10,7 @@ if NET_VENDOR_MEDIATEK
+ config NET_MEDIATEK_SOC
+ tristate "MediaTek SoC Gigabit Ethernet support"
+ select PHYLINK
++ select DIMLIB
+ ---help---
+ This driver supports the gigabit ethernet MACs in the
+ MediaTek SoC family.
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -1228,12 +1228,13 @@ static void mtk_update_rx_cpu_idx(struct
+ static int mtk_poll_rx(struct napi_struct *napi, int budget,
+ struct mtk_eth *eth)
+ {
++ struct dim_sample dim_sample = {};
+ struct mtk_rx_ring *ring;
+ int idx;
+ struct sk_buff *skb;
+ u8 *data, *new_data;
+ struct mtk_rx_dma *rxd, trxd;
+- int done = 0;
++ int done = 0, bytes = 0;
+
+ while (done < budget) {
+ struct net_device *netdev;
+@@ -1307,6 +1308,7 @@ static int mtk_poll_rx(struct napi_struc
+ else
+ skb_checksum_none_assert(skb);
+ skb->protocol = eth_type_trans(skb, netdev);
++ bytes += pktlen;
+
+ if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX &&
+ (trxd.rxd2 & RX_DMA_VTAG))
+@@ -1338,6 +1340,12 @@ rx_done:
+ mtk_update_rx_cpu_idx(eth);
+ }
+
++ eth->rx_packets += done;
++ eth->rx_bytes += bytes;
++ dim_update_sample(eth->rx_events, eth->rx_packets, eth->rx_bytes,
++ &dim_sample);
++ net_dim(&eth->rx_dim, dim_sample);
++
+ return done;
+ }
+
+@@ -1430,6 +1438,7 @@ static int mtk_poll_tx_pdma(struct mtk_e
+ static int mtk_poll_tx(struct mtk_eth *eth, int budget)
+ {
+ struct mtk_tx_ring *ring = &eth->tx_ring;
++ struct dim_sample dim_sample = {};
+ unsigned int done[MTK_MAX_DEVS];
+ unsigned int bytes[MTK_MAX_DEVS];
+ int total = 0, i;
+@@ -1447,8 +1456,14 @@ static int mtk_poll_tx(struct mtk_eth *e
+ continue;
+ netdev_completed_queue(eth->netdev[i], done[i], bytes[i]);
+ total += done[i];
++ eth->tx_packets += done[i];
++ eth->tx_bytes += bytes[i];
+ }
+
++ dim_update_sample(eth->tx_events, eth->tx_packets, eth->tx_bytes,
++ &dim_sample);
++ net_dim(&eth->tx_dim, dim_sample);
++
+ if (mtk_queue_stopped(eth) &&
+ (atomic_read(&ring->free_count) > ring->thresh))
+ mtk_wake_queue(eth);
+@@ -2123,6 +2138,7 @@ static irqreturn_t mtk_handle_irq_rx(int
+ {
+ struct mtk_eth *eth = _eth;
+
++ eth->rx_events++;
+ if (likely(napi_schedule_prep(&eth->rx_napi))) {
+ __napi_schedule(&eth->rx_napi);
+ mtk_rx_irq_disable(eth, MTK_RX_DONE_INT);
+@@ -2135,6 +2151,7 @@ static irqreturn_t mtk_handle_irq_tx(int
+ {
+ struct mtk_eth *eth = _eth;
+
++ eth->tx_events++;
+ if (likely(napi_schedule_prep(&eth->tx_napi))) {
+ __napi_schedule(&eth->tx_napi);
+ mtk_tx_irq_disable(eth, MTK_TX_DONE_INT);
+@@ -2282,6 +2299,9 @@ static int mtk_stop(struct net_device *d
+ napi_disable(&eth->tx_napi);
+ napi_disable(&eth->rx_napi);
+
++ cancel_work_sync(&eth->rx_dim.work);
++ cancel_work_sync(&eth->tx_dim.work);
++
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+ mtk_stop_dma(eth, MTK_QDMA_GLO_CFG);
+ mtk_stop_dma(eth, MTK_PDMA_GLO_CFG);
+@@ -2331,6 +2351,64 @@ err_disable_clks:
+ return ret;
+ }
+
++static void mtk_dim_rx(struct work_struct *work)
++{
++ struct dim *dim = container_of(work, struct dim, work);
++ struct mtk_eth *eth = container_of(dim, struct mtk_eth, rx_dim);
++ struct dim_cq_moder cur_profile;
++ u32 val, cur;
++
++ cur_profile = net_dim_get_rx_moderation(eth->rx_dim.mode,
++ dim->profile_ix);
++ spin_lock_bh(&eth->dim_lock);
++
++ val = mtk_r32(eth, MTK_PDMA_DELAY_INT);
++ val &= MTK_PDMA_DELAY_TX_MASK;
++ val |= MTK_PDMA_DELAY_RX_EN;
++
++ cur = min_t(u32, DIV_ROUND_UP(cur_profile.usec, 20), MTK_PDMA_DELAY_PTIME_MASK);
++ val |= cur << MTK_PDMA_DELAY_RX_PTIME_SHIFT;
++
++ cur = min_t(u32, cur_profile.pkts, MTK_PDMA_DELAY_PINT_MASK);
++ val |= cur << MTK_PDMA_DELAY_RX_PINT_SHIFT;
++
++ mtk_w32(eth, val, MTK_PDMA_DELAY_INT);
++ mtk_w32(eth, val, MTK_QDMA_DELAY_INT);
++
++ spin_unlock_bh(&eth->dim_lock);
++
++ dim->state = DIM_START_MEASURE;
++}
++
++static void mtk_dim_tx(struct work_struct *work)
++{
++ struct dim *dim = container_of(work, struct dim, work);
++ struct mtk_eth *eth = container_of(dim, struct mtk_eth, tx_dim);
++ struct dim_cq_moder cur_profile;
++ u32 val, cur;
++
++ cur_profile = net_dim_get_tx_moderation(eth->tx_dim.mode,
++ dim->profile_ix);
++ spin_lock_bh(&eth->dim_lock);
++
++ val = mtk_r32(eth, MTK_PDMA_DELAY_INT);
++ val &= MTK_PDMA_DELAY_RX_MASK;
++ val |= MTK_PDMA_DELAY_TX_EN;
++
++ cur = min_t(u32, DIV_ROUND_UP(cur_profile.usec, 20), MTK_PDMA_DELAY_PTIME_MASK);
++ val |= cur << MTK_PDMA_DELAY_TX_PTIME_SHIFT;
++
++ cur = min_t(u32, cur_profile.pkts, MTK_PDMA_DELAY_PINT_MASK);
++ val |= cur << MTK_PDMA_DELAY_TX_PINT_SHIFT;
++
++ mtk_w32(eth, val, MTK_PDMA_DELAY_INT);
++ mtk_w32(eth, val, MTK_QDMA_DELAY_INT);
++
++ spin_unlock_bh(&eth->dim_lock);
++
++ dim->state = DIM_START_MEASURE;
++}
++
+ static int mtk_hw_init(struct mtk_eth *eth)
+ {
+ int i, val, ret;
+@@ -2352,9 +2430,6 @@ static int mtk_hw_init(struct mtk_eth *e
+ goto err_disable_pm;
+ }
+
+- /* enable interrupt delay for RX */
+- mtk_w32(eth, MTK_PDMA_DELAY_RX_DELAY, MTK_PDMA_DELAY_INT);
+-
+ /* disable delay and normal interrupt */
+ mtk_tx_irq_disable(eth, ~0);
+ mtk_rx_irq_disable(eth, ~0);
+@@ -2393,11 +2468,10 @@ static int mtk_hw_init(struct mtk_eth *e
+ /* Enable RX VLan Offloading */
+ mtk_w32(eth, 1, MTK_CDMP_EG_CTRL);
+
+- /* enable interrupt delay for RX */
+- mtk_w32(eth, MTK_PDMA_DELAY_RX_DELAY, MTK_PDMA_DELAY_INT);
++ mtk_dim_rx(&eth->rx_dim.work);
++ mtk_dim_tx(&eth->tx_dim.work);
+
+ /* disable delay and normal interrupt */
+- mtk_w32(eth, 0, MTK_QDMA_DELAY_INT);
+ mtk_tx_irq_disable(eth, ~0);
+ mtk_rx_irq_disable(eth, ~0);
+ mtk_w32(eth, RST_GL_PSE, MTK_RST_GL);
+@@ -2916,6 +2990,13 @@ static int mtk_probe(struct platform_dev
+ spin_lock_init(&eth->page_lock);
+ spin_lock_init(&eth->tx_irq_lock);
+ spin_lock_init(&eth->rx_irq_lock);
++ spin_lock_init(&eth->dim_lock);
++
++ eth->rx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
++ INIT_WORK(&eth->rx_dim.work, mtk_dim_rx);
++
++ eth->tx_dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
++ INIT_WORK(&eth->tx_dim.work, mtk_dim_tx);
+
+ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
+ eth->ethsys = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -15,6 +15,7 @@
+ #include <linux/u64_stats_sync.h>
+ #include <linux/refcount.h>
+ #include <linux/phylink.h>
++#include <linux/dim.h>
+
+ #define MTK_QDMA_PAGE_SIZE 2048
+ #define MTK_MAX_RX_LENGTH 1536
+@@ -129,13 +130,18 @@
+
+ /* PDMA Delay Interrupt Register */
+ #define MTK_PDMA_DELAY_INT 0xa0c
++#define MTK_PDMA_DELAY_RX_MASK GENMASK(15, 0)
+ #define MTK_PDMA_DELAY_RX_EN BIT(15)
+-#define MTK_PDMA_DELAY_RX_PINT 4
+ #define MTK_PDMA_DELAY_RX_PINT_SHIFT 8
+-#define MTK_PDMA_DELAY_RX_PTIME 4
+-#define MTK_PDMA_DELAY_RX_DELAY \
+- (MTK_PDMA_DELAY_RX_EN | MTK_PDMA_DELAY_RX_PTIME | \
+- (MTK_PDMA_DELAY_RX_PINT << MTK_PDMA_DELAY_RX_PINT_SHIFT))
++#define MTK_PDMA_DELAY_RX_PTIME_SHIFT 0
++
++#define MTK_PDMA_DELAY_TX_MASK GENMASK(31, 16)
++#define MTK_PDMA_DELAY_TX_EN BIT(31)
++#define MTK_PDMA_DELAY_TX_PINT_SHIFT 24
++#define MTK_PDMA_DELAY_TX_PTIME_SHIFT 16
++
++#define MTK_PDMA_DELAY_PINT_MASK 0x7f
++#define MTK_PDMA_DELAY_PTIME_MASK 0xff
+
+ /* PDMA Interrupt Status Register */
+ #define MTK_PDMA_INT_STATUS 0xa20
+@@ -217,6 +223,7 @@
+ /* QDMA Interrupt Status Register */
+ #define MTK_QDMA_INT_STATUS 0x1A18
+ #define MTK_RX_DONE_DLY BIT(30)
++#define MTK_TX_DONE_DLY BIT(28)
+ #define MTK_RX_DONE_INT3 BIT(19)
+ #define MTK_RX_DONE_INT2 BIT(18)
+ #define MTK_RX_DONE_INT1 BIT(17)
+@@ -226,8 +233,7 @@
+ #define MTK_TX_DONE_INT1 BIT(1)
+ #define MTK_TX_DONE_INT0 BIT(0)
+ #define MTK_RX_DONE_INT MTK_RX_DONE_DLY
+-#define MTK_TX_DONE_INT (MTK_TX_DONE_INT0 | MTK_TX_DONE_INT1 | \
+- MTK_TX_DONE_INT2 | MTK_TX_DONE_INT3)
++#define MTK_TX_DONE_INT MTK_TX_DONE_DLY
+
+ /* QDMA Interrupt grouping registers */
+ #define MTK_QDMA_INT_GRP1 0x1a20
+@@ -890,6 +896,18 @@ struct mtk_eth {
+
+ const struct mtk_soc_data *soc;
+
++ spinlock_t dim_lock;
++
++ u32 rx_events;
++ u32 rx_packets;
++ u32 rx_bytes;
++ struct dim rx_dim;
++
++ u32 tx_events;
++ u32 tx_packets;
++ u32 tx_bytes;
++ struct dim tx_dim;
++
+ u32 tx_int_mask_reg;
+ u32 tx_int_status_reg;
+ u32 rx_dma_l4_valid;
diff --git a/target/linux/generic/pending-5.4/770-07-net-ethernet-mtk_eth_soc-drop-descriptor-cpu-own-bit.patch b/target/linux/generic/pending-5.4/770-07-net-ethernet-mtk_eth_soc-drop-descriptor-cpu-own-bit.patch
new file mode 100644
index 0000000000..0a9dfd68a1
--- /dev/null
+++ b/target/linux/generic/pending-5.4/770-07-net-ethernet-mtk_eth_soc-drop-descriptor-cpu-own-bit.patch
@@ -0,0 +1,34 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Wed, 26 Aug 2020 17:48:14 +0200
+Subject: [PATCH] net: ethernet: mtk_eth_soc: drop descriptor cpu-own bit check
+ in qdma tx cleanup
+
+mtk_poll_tx_qdma already checks the MTK_QTX_DRX_PTR register, which points
+at the last completed descriptor.
+To slightly improve performance, also remove the register bit which forces
+the hardware to write back this bit earlier.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -1368,9 +1368,6 @@ static int mtk_poll_tx_qdma(struct mtk_e
+ int mac = 0;
+
+ desc = mtk_qdma_phys_to_virt(ring, desc->txd2);
+- if ((desc->txd3 & TX_DMA_OWNER_CPU) == 0)
+- break;
+-
+ tx_buf = mtk_desc_to_tx_buf(ring, desc);
+ if (tx_buf->flags & MTK_TX_FLAGS_FPORT1)
+ mac = 1;
+@@ -2203,7 +2200,7 @@ static int mtk_start_dma(struct mtk_eth
+
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+ mtk_w32(eth,
+- MTK_TX_WB_DDONE | MTK_TX_DMA_EN |
++ MTK_TX_DMA_EN |
+ MTK_TX_BT_32DWORDS | MTK_NDP_CO_PRO |
+ MTK_RX_DMA_EN | MTK_RX_2B_OFFSET |
+ MTK_RX_BT_32DWORDS,
diff --git a/target/linux/generic/pending-5.4/770-08-net-ethernet-mtk_eth_soc-cache-hardware-pointer-of-l.patch b/target/linux/generic/pending-5.4/770-08-net-ethernet-mtk_eth_soc-cache-hardware-pointer-of-l.patch
new file mode 100644
index 0000000000..ddbd751ead
--- /dev/null
+++ b/target/linux/generic/pending-5.4/770-08-net-ethernet-mtk_eth_soc-cache-hardware-pointer-of-l.patch
@@ -0,0 +1,67 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Thu, 27 Aug 2020 06:32:03 +0200
+Subject: [PATCH] net: ethernet: mtk_eth_soc: cache hardware pointer of last
+ freed tx descriptor
+
+The value is only updated by the CPU, so it is cheaper to access from the ring
+data structure than from a hardware register
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -1358,7 +1358,7 @@ static int mtk_poll_tx_qdma(struct mtk_e
+ struct mtk_tx_buf *tx_buf;
+ u32 cpu, dma;
+
+- cpu = mtk_r32(eth, MTK_QTX_CRX_PTR);
++ cpu = ring->last_free_ptr;
+ dma = mtk_r32(eth, MTK_QTX_DRX_PTR);
+
+ desc = mtk_qdma_phys_to_virt(ring, cpu);
+@@ -1389,6 +1389,7 @@ static int mtk_poll_tx_qdma(struct mtk_e
+ cpu = next_cpu;
+ }
+
++ ring->last_free_ptr = cpu;
+ mtk_w32(eth, cpu, MTK_QTX_CRX_PTR);
+
+ return budget;
+@@ -1589,6 +1590,7 @@ static int mtk_tx_alloc(struct mtk_eth *
+ atomic_set(&ring->free_count, MTK_DMA_SIZE - 2);
+ ring->next_free = &ring->dma[0];
+ ring->last_free = &ring->dma[MTK_DMA_SIZE - 1];
++ ring->last_free_ptr = (u32)(ring->phys + ((MTK_DMA_SIZE - 1) * sz));
+ ring->thresh = MAX_SKB_FRAGS;
+
+ /* make sure that all changes to the dma ring are flushed before we
+@@ -1602,9 +1604,7 @@ static int mtk_tx_alloc(struct mtk_eth *
+ mtk_w32(eth,
+ ring->phys + ((MTK_DMA_SIZE - 1) * sz),
+ MTK_QTX_CRX_PTR);
+- mtk_w32(eth,
+- ring->phys + ((MTK_DMA_SIZE - 1) * sz),
+- MTK_QTX_DRX_PTR);
++ mtk_w32(eth, ring->last_free_ptr, MTK_QTX_DRX_PTR);
+ mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES,
+ MTK_QTX_CFG(0));
+ } else {
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -622,6 +622,7 @@ struct mtk_tx_buf {
+ * @phys: The physical addr of tx_buf
+ * @next_free: Pointer to the next free descriptor
+ * @last_free: Pointer to the last free descriptor
++ * @last_free_ptr: Hardware pointer value of the last free descriptor
+ * @thresh: The threshold of minimum amount of free descriptors
+ * @free_count: QDMA uses a linked list. Track how many free descriptors
+ * are present
+@@ -632,6 +633,7 @@ struct mtk_tx_ring {
+ dma_addr_t phys;
+ struct mtk_tx_dma *next_free;
+ struct mtk_tx_dma *last_free;
++ u32 last_free_ptr;
+ u16 thresh;
+ atomic_t free_count;
+ int dma_size;
diff --git a/target/linux/generic/pending-5.4/770-09-net-ethernet-mtk_eth_soc-only-read-the-full-rx-descr.patch b/target/linux/generic/pending-5.4/770-09-net-ethernet-mtk_eth_soc-only-read-the-full-rx-descr.patch
new file mode 100644
index 0000000000..1b7a0a9aca
--- /dev/null
+++ b/target/linux/generic/pending-5.4/770-09-net-ethernet-mtk_eth_soc-only-read-the-full-rx-descr.patch
@@ -0,0 +1,44 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Thu, 27 Aug 2020 09:24:25 +0200
+Subject: [PATCH] net: ethernet: mtk_eth_soc: only read the full rx
+ descriptor if DMA is done
+
+Uncached memory access is expensive, and there is no need to access all
+descriptor words if we can't process them anyway
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -772,13 +772,18 @@ static inline int mtk_max_buf_size(int f
+ return buf_size;
+ }
+
+-static inline void mtk_rx_get_desc(struct mtk_rx_dma *rxd,
++static inline bool mtk_rx_get_desc(struct mtk_rx_dma *rxd,
+ struct mtk_rx_dma *dma_rxd)
+ {
+- rxd->rxd1 = READ_ONCE(dma_rxd->rxd1);
+ rxd->rxd2 = READ_ONCE(dma_rxd->rxd2);
++ if (!(rxd->rxd2 & RX_DMA_DONE))
++ return false;
++
++ rxd->rxd1 = READ_ONCE(dma_rxd->rxd1);
+ rxd->rxd3 = READ_ONCE(dma_rxd->rxd3);
+ rxd->rxd4 = READ_ONCE(dma_rxd->rxd4);
++
++ return true;
+ }
+
+ /* the qdma core needs scratch memory to be setup */
+@@ -1250,8 +1255,7 @@ static int mtk_poll_rx(struct napi_struc
+ rxd = &ring->dma[idx];
+ data = ring->data[idx];
+
+- mtk_rx_get_desc(&trxd, rxd);
+- if (!(trxd.rxd2 & RX_DMA_DONE))
++ if (!mtk_rx_get_desc(&trxd, rxd))
+ break;
+
+ /* find out which mac the packet come from. values start at 1 */
diff --git a/target/linux/generic/pending-5.4/770-10-net-ethernet-mtk_eth_soc-unmap-rx-data-before-callin.patch b/target/linux/generic/pending-5.4/770-10-net-ethernet-mtk_eth_soc-unmap-rx-data-before-callin.patch
new file mode 100644
index 0000000000..2e1d2bb79d
--- /dev/null
+++ b/target/linux/generic/pending-5.4/770-10-net-ethernet-mtk_eth_soc-unmap-rx-data-before-callin.patch
@@ -0,0 +1,44 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Thu, 27 Aug 2020 09:44:43 +0200
+Subject: [PATCH] net: ethernet: mtk_eth_soc: unmap rx data before calling
+ build_skb
+
+Since build_skb accesses the data area (for initializing shinfo), dma unmap
+needs to happen before that call
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -1293,17 +1293,18 @@ static int mtk_poll_rx(struct napi_struc
+ goto release_desc;
+ }
+
++ dma_unmap_single(eth->dev, trxd.rxd1,
++ ring->buf_size, DMA_FROM_DEVICE);
++
+ /* receive data */
+ skb = build_skb(data, ring->frag_size);
+ if (unlikely(!skb)) {
+- skb_free_frag(new_data);
++ skb_free_frag(data);
+ netdev->stats.rx_dropped++;
+- goto release_desc;
++ goto skip_rx;
+ }
+ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+
+- dma_unmap_single(eth->dev, trxd.rxd1,
+- ring->buf_size, DMA_FROM_DEVICE);
+ pktlen = RX_DMA_GET_PLEN0(trxd.rxd2);
+ skb->dev = netdev;
+ skb_put(skb, pktlen);
+@@ -1321,6 +1322,7 @@ static int mtk_poll_rx(struct napi_struc
+ skb_record_rx_queue(skb, 0);
+ napi_gro_receive(napi, skb);
+
++skip_rx:
+ ring->data[idx] = new_data;
+ rxd->rxd1 = (unsigned int)dma_addr;
+