--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -24,6 +24,7 @@
 #include <linux/tcp.h>
 
 #if defined(CONFIG_NET_MEDIATEK_HW_QOS)
+
 struct mtk_ioctl_reg {
 	unsigned int off;
 	unsigned int val;
@@ -32,6 +33,13 @@ struct mtk_ioctl_reg {
 #define REG_HQOS_MAX			0x3FFF
 #define RAETH_QDMA_REG_READ		0x89F8
 #define RAETH_QDMA_REG_WRITE		0x89F9
+#define RAETH_QDMA_QUEUE_MAPPING        0x89FA
+
+unsigned int M2Q_table[16] = {0};
+unsigned int lan_wan_separate = 0;
+
+EXPORT_SYMBOL_GPL(M2Q_table);
+
 #endif
 
 #if defined(CONFIG_NET_MEDIATEK_HNAT) || defined(CONFIG_NET_MEDIATEK_HNAT_MODULE)
@@ -225,7 +233,7 @@ static void mtk_phy_link_adjust(struct n
 		if (flowctrl & FLOW_CTRL_RX)
 			mcr |= MAC_MCR_FORCE_RX_FC;
 
-		netif_dbg(mac->hw, link, dev, "rx pause %s, tx pause %s\n",
+		netif_info(mac->hw, link, dev, "rx pause %s, tx pause %s\n",
 			  flowctrl & FLOW_CTRL_RX ? "enabled" : "disabled",
 			  flowctrl & FLOW_CTRL_TX ? "enabled" : "disabled");
 	}
@@ -508,9 +516,9 @@ static struct rtnl_link_stats64 * mtk_ge
 	unsigned int start;
 
 	if (netif_running(dev) && netif_device_present(dev)) {
-		if (spin_trylock_bh(&hw_stats->stats_lock)) {
+		if (spin_trylock(&hw_stats->stats_lock)) {
 			mtk_stats_update_mac(mac);
-			spin_unlock_bh(&hw_stats->stats_lock);
+			spin_unlock(&hw_stats->stats_lock);
 		}
 	}
 
@@ -690,6 +698,7 @@ static int mtk_tx_map(struct sk_buff *sk
 	txd3 |= skb->mark & 0x7;
 	if (mac->id)
 		txd3 += 8;
+	txd3 = 0;
 #endif
 
 	mapped_addr = dma_map_single(eth->dev, skb->data,
@@ -760,16 +769,7 @@ static int mtk_tx_map(struct sk_buff *sk
 	WRITE_ONCE(itxd->txd3, (TX_DMA_SWC | TX_DMA_PLEN0(skb_headlen(skb)) |
 				(!nr_frags * TX_DMA_LS0)));
 
-	/* we have a single DMA ring so BQL needs to be updated for all devices
-	 * sitting on this ring
-	 */
-	for (i = 0; i < MTK_MAC_COUNT; i++) {
-		if (!eth->netdev[i])
-			continue;
-
-		netdev_sent_queue(eth->netdev[i], skb->len);
-	}
-
+	netdev_sent_queue(dev, skb->len);
 	skb_tx_timestamp(skb);
 
 	ring->next_free = mtk_qdma_phys_to_virt(ring, txd->txd2);
@@ -980,20 +980,9 @@ static int mtk_poll_rx(struct napi_struc
 		if (!(trxd.rxd2 & RX_DMA_DONE))
 			break;
 
-		/* find out which mac the packet comes from. If the special tag is
-		 * we can assume that the traffic is coming from the builtin mt7530
-		 * and the DSA driver has loaded. FPORT will be the physical switch
-		 * port in this case rather than the FE forward port id. */
-		if (!(trxd.rxd4 & RX_DMA_SP_TAG)) {
-			/* values start at 1 */
-			mac = (trxd.rxd4 >> RX_DMA_FPORT_SHIFT) &
-			      RX_DMA_FPORT_MASK;
-			mac--;
-		}
-
-		if (unlikely(mac < 0 || mac >= MTK_MAC_COUNT ||
-			     !eth->netdev[mac]))
-			goto release_desc;
+		/* find out which mac the packet come from. values start at 1 */
+		mac = (trxd.rxd4 >> 22) & 0x1;
+		mac = (mac + 1) % 2;
 
 		netdev = eth->netdev[mac];
 
@@ -1017,6 +1006,9 @@ static int mtk_poll_rx(struct napi_struc
 		}
 
 		/* receive data */
+		if (mac < 0 || mac > 2)
+			mac = 0;
+
 		skb = build_skb(data, ring->frag_size);
 		if (unlikely(!skb)) {
 			skb_free_frag(new_data);
@@ -1076,18 +1068,21 @@ static int mtk_poll_tx(struct mtk_eth *e
 	struct mtk_tx_dma *desc;
 	struct sk_buff *skb;
 	struct mtk_tx_buf *tx_buf;
-	int total = 0, done = 0;
-	unsigned int bytes = 0;
+	unsigned int done[MTK_MAX_DEVS];
+	unsigned int bytes[MTK_MAX_DEVS];
 	u32 cpu, dma;
 	static int condition;
-	int i;
+	int total = 0, i;
+
+	memset(done, 0, sizeof(done));
+	memset(bytes, 0, sizeof(bytes));
 
 	cpu = mtk_r32(eth, MTK_QTX_CRX_PTR);
 	dma = mtk_r32(eth, MTK_QTX_DRX_PTR);
 
 	desc = mtk_qdma_phys_to_virt(ring, cpu);
 
-	while ((cpu != dma) && done < budget) {
+	while ((cpu != dma) && budget) {
 		u32 next_cpu = desc->txd2;
 		int mac = 0;
 
@@ -1106,8 +1101,9 @@ static int mtk_poll_tx(struct mtk_eth *e
 		}
 
 		if (skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC) {
-			bytes += skb->len;
-			done++;
+			bytes[mac] += skb->len;
+			done[mac]++;
+			budget--;
 		}
 		mtk_tx_unmap(eth, tx_buf);
 
@@ -1119,13 +1115,11 @@ static int mtk_poll_tx(struct mtk_eth *e
 
 	mtk_w32(eth, cpu, MTK_QTX_CRX_PTR);
 
-	/* we have a single DMA ring so BQL needs to be updated for all devices
-	 * sitting on this ring
-	 */
 	for (i = 0; i < MTK_MAC_COUNT; i++) {
-		if (!eth->netdev[i])
+		if (!eth->netdev[i] || !done[i])
 			continue;
-		netdev_completed_queue(eth->netdev[i], done, bytes);
+		netdev_completed_queue(eth->netdev[i], done[i], bytes[i]);
+		total += done[i];
 	}
 
 	if (mtk_queue_stopped(eth) &&
@@ -1286,21 +1280,11 @@ static void mtk_tx_clean(struct mtk_eth
 
 static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
 {
-	struct mtk_rx_ring *ring;
+	struct mtk_rx_ring *ring = &eth->rx_ring[ring_no];
 	int rx_data_len, rx_dma_size;
 	int i;
-	u32 offset = 0;
-
-	if (rx_flag & MTK_RX_FLAGS_QDMA) {
-		if (ring_no)
-			return -EINVAL;
-		ring = &eth->rx_ring_qdma;
-		offset = 0x1000;
-	} else {
-		ring = &eth->rx_ring[ring_no];
-	}
 
-	if (rx_flag & MTK_RX_FLAGS_HWLRO) {
+	if (rx_flag == MTK_RX_FLAGS_HWLRO) {
 		rx_data_len = MTK_MAX_LRO_RX_LENGTH;
 		rx_dma_size = MTK_HW_LRO_DMA_SIZE;
 	} else {
@@ -1348,16 +1332,104 @@ static int mtk_rx_alloc(struct mtk_eth *
 	 */
 	wmb();
 
-	mtk_w32(eth, ring->phys, MTK_PRX_BASE_PTR_CFG(ring_no) + offset);
-	mtk_w32(eth, rx_dma_size, MTK_PRX_MAX_CNT_CFG(ring_no) + offset);
-	mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg + offset);
-	mtk_w32(eth, MTK_PST_DRX_IDX_CFG(ring_no), MTK_PDMA_RST_IDX + offset);
+	mtk_w32(eth, ring->phys, MTK_PRX_BASE_PTR_CFG(ring_no));
+	mtk_w32(eth, rx_dma_size, MTK_PRX_MAX_CNT_CFG(ring_no));
+	mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg);
+	mtk_w32(eth, MTK_PST_DRX_IDX_CFG(ring_no), MTK_PDMA_RST_IDX);
 
 	return 0;
 }
 
-static void mtk_rx_clean(struct mtk_eth *eth, struct mtk_rx_ring *ring)
+static int mtk_rx_alloc_qdma(struct mtk_eth *eth, int rx_flag)
 {
+	struct mtk_rx_ring *ring = &eth->rx_ring_qdma;
+	int rx_data_len, rx_dma_size;
+	int i;
+
+	rx_data_len = ETH_DATA_LEN;
+	rx_dma_size = MTK_DMA_SIZE;
+
+	ring->frag_size = mtk_max_frag_size(rx_data_len);
+	ring->buf_size = mtk_max_buf_size(ring->frag_size);
+	ring->data = kcalloc(rx_dma_size, sizeof(*ring->data),
+			     GFP_KERNEL);
+	if (!ring->data)
+		return -ENOMEM;
+
+	for (i = 0; i < rx_dma_size; i++) {
+		ring->data[i] = netdev_alloc_frag(ring->frag_size);
+		if (!ring->data[i])
+			return -ENOMEM;
+	}
+
+	ring->dma = dma_alloc_coherent(eth->dev,
+				       rx_dma_size * sizeof(*ring->dma),
+				       &ring->phys,
+				       GFP_ATOMIC | __GFP_ZERO);
+	if (!ring->dma)
+		return -ENOMEM;
+
+	for (i = 0; i < rx_dma_size; i++) {
+		dma_addr_t dma_addr = dma_map_single(eth->dev,
+				ring->data[i] + NET_SKB_PAD,
+				ring->buf_size,
+				DMA_FROM_DEVICE);
+		if (unlikely(dma_mapping_error(eth->dev, dma_addr)))
+			return -ENOMEM;
+		ring->dma[i].rxd1 = (unsigned int)dma_addr;
+
+		ring->dma[i].rxd2 = RX_DMA_PLEN0(ring->buf_size);
+	}
+	ring->dma_size = rx_dma_size;
+	ring->calc_idx_update = false;
+	ring->calc_idx = rx_dma_size - 1;
+	ring->crx_idx_reg = MTK_QRX_CRX_IDX_CFG(0);
+	/* make sure that all changes to the dma ring are flushed before we
+	 * continue
+	 */
+	wmb();
+
+	mtk_w32(eth, ring->phys, MTK_QRX_BASE_PTR_CFG(0));
+	mtk_w32(eth, rx_dma_size, MTK_QRX_MAX_CNT_CFG(0));
+	mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg);
+	mtk_w32(eth, MTK_PST_DRX_IDX_CFG(0), MTK_QDMA_RST_IDX);
+
+	return 0;
+}
+
+static void mtk_rx_clean(struct mtk_eth *eth, int ring_no)
+{
+	struct mtk_rx_ring *ring = &eth->rx_ring[ring_no];
+	int i;
+
+	if (ring->data && ring->dma) {
+		for (i = 0; i < ring->dma_size; i++) {
+			if (!ring->data[i])
+				continue;
+			if (!ring->dma[i].rxd1)
+				continue;
+			dma_unmap_single(eth->dev,
+					 ring->dma[i].rxd1,
+					 ring->buf_size,
+					 DMA_FROM_DEVICE);
+			skb_free_frag(ring->data[i]);
+		}
+		kfree(ring->data);
+		ring->data = NULL;
+	}
+
+	if (ring->dma) {
+		dma_free_coherent(eth->dev,
+				  ring->dma_size * sizeof(*ring->dma),
+				  ring->dma,
+				  ring->phys);
+		ring->dma = NULL;
+	}
+}
+
+static void mtk_rx_clean_qdma(struct mtk_eth *eth)
+{
+	struct mtk_rx_ring *ring = &eth->rx_ring_qdma;
 	int i;
 
 	if (ring->data && ring->dma) {
@@ -1683,7 +1755,7 @@ static int mtk_dma_init(struct mtk_eth *
 	if (err)
 		return err;
 
-	err = mtk_rx_alloc(eth, 0, MTK_RX_FLAGS_QDMA);
+	err = mtk_rx_alloc_qdma(eth, MTK_RX_FLAGS_NORMAL);
 	if (err)
 		return err;
 
@@ -1702,6 +1774,7 @@ static int mtk_dma_init(struct mtk_eth *
 			return err;
 	}
 
+
 	/* Enable random early drop and set drop threshold automatically */
 	mtk_w32(eth, FC_THRES_DROP_MODE | FC_THRES_DROP_EN | FC_THRES_MIN,
 		MTK_QDMA_FC_THRES);
@@ -1726,13 +1799,13 @@ static void mtk_dma_free(struct mtk_eth
 		eth->phy_scratch_ring = 0;
 	}
 	mtk_tx_clean(eth);
-	mtk_rx_clean(eth, &eth->rx_ring[0]);
-	mtk_rx_clean(eth, &eth->rx_ring_qdma);
+	mtk_rx_clean(eth, 0);
+	mtk_rx_clean_qdma(eth);
 
 	if (eth->hwlro) {
 		mtk_hwlro_rx_uninit(eth);
 		for (i = 1; i < MTK_MAX_RX_RING_NUM; i++)
-			mtk_rx_clean(eth, &eth->rx_ring[i]);
+			mtk_rx_clean(eth, i);
 	}
 
 	kfree(eth->scratch_head);
@@ -1947,20 +2020,14 @@ static int mtk_hw_init(struct mtk_eth *e
 	val = mtk_r32(eth, MTK_CDMQ_IG_CTRL);
 	mtk_w32(eth, val | MTK_CDMQ_STAG_EN, MTK_CDMQ_IG_CTRL);
 
-	/* Indicates CDM to parse the MTK special tag from CPU
-	 * which also is working out for untag packets.
-	 */
-	val = mtk_r32(eth, MTK_CDMQ_IG_CTRL);
-	mtk_w32(eth, val | MTK_CDMQ_STAG_EN, MTK_CDMQ_IG_CTRL);
-	val = mtk_r32(eth, MTK_CDMP_IG_CTRL);
-	mtk_w32(eth, val | MTK_CDMP_STAG_EN, MTK_CDMP_IG_CTRL);
-
 	/* Enable RX VLan Offloading */
 	if (MTK_HW_FEATURES & NETIF_F_HW_VLAN_CTAG_RX)
 		mtk_w32(eth, 1, MTK_CDMP_EG_CTRL);
 	else
 		mtk_w32(eth, 0, MTK_CDMP_EG_CTRL);
 
+	mtk_w32(eth, 0x81000001, MTK_CDMP_IG_CTRL);
+
 	/* disable delay and normal interrupt */
 #ifdef MTK_IRQ_DLY
 	mtk_w32(eth, 0x84048404, MTK_PDMA_DELAY_INT);
@@ -1990,6 +2057,9 @@ static int mtk_hw_init(struct mtk_eth *e
 		/* Enable RX checksum */
 		val |= MTK_GDMA_ICS_EN | MTK_GDMA_TCS_EN | MTK_GDMA_UCS_EN;
 
+		if (!i)
+			val |= BIT(24);
+
 		/* setup the mac dma */
 		mtk_w32(eth, val, MTK_GDMA_FWD_CFG(i));
 	}
@@ -2069,7 +2139,18 @@ static int mtk_do_ioctl(struct net_devic
 		if (reg.off > REG_HQOS_MAX)
 			return -EINVAL;
 		mtk_w32(eth, reg.val, 0x1800 + reg.off);
-//		printk("write reg off:%x val:%x\n", reg.off, reg.val);
+		printk("write reg off:%x val:%x\n", reg.off, reg.val);
+		return 0;
+
+	case RAETH_QDMA_QUEUE_MAPPING:
+		copy_from_user(&reg, ifr->ifr_data, sizeof(reg));
+		if ((reg.off & 0x100) == 0x100) {
+			lan_wan_separate = 1;
+			reg.off &= 0xff;
+		} else {
+			lan_wan_separate = 0;
+		}
+		M2Q_table[reg.off] = reg.val;
 		return 0;
 #endif
 	case SIOCGMIIPHY:
@@ -2288,9 +2369,9 @@ static void mtk_get_ethtool_stats(struct
 		return;
 
 	if (netif_running(dev) && netif_device_present(dev)) {
-		if (spin_trylock_bh(&hwstats->stats_lock)) {
+		if (spin_trylock(&hwstats->stats_lock)) {
 			mtk_stats_update_mac(mac);
-			spin_unlock_bh(&hwstats->stats_lock);
+			spin_unlock(&hwstats->stats_lock);
 		}
 	}
 
@@ -2443,7 +2524,7 @@ static int mtk_add_mac(struct mtk_eth *e
 	mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET;
 
 	SET_NETDEV_DEV(eth->netdev[id], eth->dev);
-	eth->netdev[id]->watchdog_timeo = 30 * HZ;
+	eth->netdev[id]->watchdog_timeo = 15 * HZ;
 	eth->netdev[id]->netdev_ops = &mtk_netdev_ops;
 	eth->netdev[id]->base_addr = (unsigned long)eth->base;
 
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -80,7 +80,6 @@
 
 /* CDMP Ingress Control Register */
 #define MTK_CDMP_IG_CTRL	0x400
-#define MTK_CDMP_STAG_EN	BIT(0)
 
 /* CDMP Exgress Control Register */
 #define MTK_CDMP_EG_CTRL	0x404
@@ -91,12 +90,27 @@
 #define MTK_GDMA_TCS_EN		BIT(21)
 #define MTK_GDMA_UCS_EN		BIT(20)
 
+/* GDMA Ingress Control Register */
+#define MTK_GDMA1_IG_CTRL(x)	(0x500 + (x * 0x1000))
+
 /* Unicast Filter MAC Address Register - Low */
 #define MTK_GDMA_MAC_ADRL(x)	(0x508 + (x * 0x1000))
 
 /* Unicast Filter MAC Address Register - High */
 #define MTK_GDMA_MAC_ADRH(x)	(0x50C + (x * 0x1000))
 
+/* QDMA RX Base Pointer Register */
+#define MTK_QRX_BASE_PTR0	0x1900
+#define MTK_QRX_BASE_PTR_CFG(x)	(MTK_QRX_BASE_PTR0 + (x * 0x10))
+
+/* QDMA RX Maximum Count Register */
+#define MTK_QRX_MAX_CNT0	0x1904
+#define MTK_QRX_MAX_CNT_CFG(x)	(MTK_QRX_MAX_CNT0 + (x * 0x10))
+
+/* QDMA RX CPU Pointer Register */
+#define MTK_QRX_CRX_IDX0	0x1908
+#define MTK_QRX_CRX_IDX_CFG(x)	(MTK_QRX_CRX_IDX0 + (x * 0x10))
+
 /* PDMA RX Base Pointer Register */
 #define MTK_PRX_BASE_PTR0	0x900
 #define MTK_PRX_BASE_PTR_CFG(x)	(MTK_PRX_BASE_PTR0 + (x * 0x10))
@@ -240,7 +254,10 @@
 #define MTK_QDMA_INT_MASK	0x1A1C
 
 /* QDMA Interrupt Mask Register */
+#define MTK_QDMA_HRED1		0x1A40
 #define MTK_QDMA_HRED2		0x1A44
+#define MTK_QDMA_SRED1		0x1A48
+#define MTK_QDMA_SRED2		0x1A4c
 
 /* QDMA TX Forward CPU Pointer Register */
 #define MTK_QTX_CTX_PTR		0x1B00
@@ -275,6 +292,7 @@
 #define TX_DMA_TSO		BIT(28)
 #define TX_DMA_FPORT_SHIFT	25
 #define TX_DMA_FPORT_MASK	0x7
+#define TX_DMA_VQID0		BIT(17)
 #define TX_DMA_INS_VLAN		BIT(16)
 
 /* QDMA descriptor txd3 */
@@ -294,7 +312,6 @@
 
 /* QDMA descriptor rxd4 */
 #define RX_DMA_L4_VALID		BIT(24)
-#define RX_DMA_SP_TAG		BIT(22)
 #define RX_DMA_FPORT_SHIFT	19
 #define RX_DMA_FPORT_MASK	0x7
 
@@ -310,6 +327,7 @@
 
 /* Mac control registers */
 #define MTK_MAC_MCR(x)		(0x10100 + (x * 0x100))
+#define MTK_MAC_MSR(x)		(0x10108 + (x * 0x100))
 #define MAC_MCR_MAX_RX_1536	BIT(24)
 #define MAC_MCR_IPG_CFG		(BIT(18) | BIT(16))
 #define MAC_MCR_FORCE_MODE	BIT(15)
@@ -495,7 +513,6 @@ struct mtk_tx_ring {
 enum mtk_rx_flags {
 	MTK_RX_FLAGS_NORMAL = 0,
 	MTK_RX_FLAGS_HWLRO,
-	MTK_RX_FLAGS_QDMA,
 };
 
 /* struct mtk_rx_ring -	This struct holds info describing a RX ring
@@ -539,9 +556,9 @@ struct mtk_rx_ring {
  * @pctl:		The register map pointing at the range used to setup
  *			GMAC port drive/slew values
  * @dma_refcnt:		track how many netdevs are using the DMA engine
- * @tx_ring:		Pointer to the memory holding info about the TX ring
- * @rx_ring:		Pointer to the memory holding info about the RX ring
- * @rx_ring_qdma:	Pointer to the memory holding info about the QDMA RX ring
+ * @tx_ring:		Pointer to the memore holding info about the TX ring
+ * @rx_ring:		Pointer to the memore holding info about the RX ring
+ * @rx_ring_qdma:	Pointer to the memore holding info about the RX ring (QDMA)
  * @tx_napi:		The TX NAPI struct
  * @rx_napi:		The RX NAPI struct
  * @scratch_ring:	Newer SoCs need memory for a second HW managed TX ring
@@ -563,6 +580,7 @@ struct mtk_eth {
 	struct net_device		*netdev[MTK_MAX_DEVS];
 	struct mtk_mac			*mac[MTK_MAX_DEVS];
 	int				irq[3];
+	cpumask_t			affinity_mask[3];
 	u32				msg_enable;
 	unsigned long			sysclk;
 	struct regmap			*ethsys;
@@ -615,4 +633,6 @@ void mtk_stats_update_mac(struct mtk_mac
 void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg);
 u32 mtk_r32(struct mtk_eth *eth, unsigned reg);
 
+extern unsigned int M2Q_table[16];
+
 #endif /* MTK_ETH_H */