1 files changed, 0 insertions, 301 deletions
diff --git a/target/linux/generic/pending-5.10/690-net-add-support-for-threaded-NAPI-polling.patch b/target/linux/generic/pending-5.10/690-net-add-support-for-threaded-NAPI-polling.patch
deleted file mode 100644
index 9b1bd6e976..0000000000
--- a/target/linux/generic/pending-5.10/690-net-add-support-for-threaded-NAPI-polling.patch
+++ /dev/null
@@ -1,301 +0,0 @@
-From: Felix Fietkau <nbd@nbd.name>
-Date: Sun, 26 Jul 2020 14:03:21 +0200
-Subject: [PATCH] net: add support for threaded NAPI polling
-
-For some drivers (especially 802.11 drivers), doing a lot of work in the NAPI
-poll function does not perform well. Since NAPI poll is bound to the CPU it
-was scheduled from, we can easily end up with a few very busy CPUs spending
-most of their time in softirq/ksoftirqd and some idle ones.
-
-Introduce threaded NAPI for such drivers based on a workqueue. The API is the
-same except for using netif_threaded_napi_add instead of netif_napi_add.
-
-In my tests with mt76 on MT7621 using threaded NAPI + a thread for tx scheduling
-improves LAN->WLAN bridging throughput by 10-50%. Throughput without threaded
-NAPI is wildly inconsistent, depending on the CPU that runs the tx scheduling
-thread.
-
-With threaded NAPI, throughput seems stable and consistent (and higher than
-the best results I got without it).
-
-Based on a patch by Hillf Danton
-
-Cc: Hillf Danton <hdanton@sina.com>
-Signed-off-by: Felix Fietkau <nbd@nbd.name>
----
-
---- a/include/linux/netdevice.h
-+++ b/include/linux/netdevice.h
-@@ -347,6 +347,7 @@ struct napi_struct {
- 	struct list_head	dev_list;
- 	struct hlist_node	napi_hash_node;
- 	unsigned int		napi_id;
-+	struct work_struct	work;
- };
- 
- enum {
-@@ -357,6 +358,7 @@ enum {
- 	NAPI_STATE_LISTED,	/* NAPI added to system lists */
- 	NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
- 	NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
-+	NAPI_STATE_THREADED,	/* Use threaded NAPI */
- };
- 
- enum {
-@@ -367,6 +369,7 @@ enum {
- 	NAPIF_STATE_LISTED	 = BIT(NAPI_STATE_LISTED),
- 	NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
- 	NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
-+	NAPIF_STATE_THREADED	 = BIT(NAPI_STATE_THREADED),
- };
- 
- enum gro_result {
-@@ -2211,6 +2214,7 @@ struct net_device {
- 	struct lock_class_key	*qdisc_running_key;
- 	bool			proto_down;
- 	unsigned		wol_enabled:1;
-+	unsigned		threaded:1;
- 
- 	struct list_head	net_notifier_list;
- 
-@@ -2413,6 +2417,26 @@ void netif_napi_add(struct net_device *d
- 		    int (*poll)(struct napi_struct *, int), int weight);
- 
- /**
-+ *	netif_threaded_napi_add - initialize a NAPI context
-+ *	@dev:  network device
-+ *	@napi: NAPI context
-+ *	@poll: polling function
-+ *	@weight: default weight
-+ *
-+ * This variant of netif_napi_add() should be used from drivers using NAPI
-+ * with CPU intensive poll functions.
-+ * This will schedule polling from a high priority workqueue
-+ */
-+static inline void netif_threaded_napi_add(struct net_device *dev,
-+					   struct napi_struct *napi,
-+					   int (*poll)(struct napi_struct *, int),
-+					   int weight)
-+{
-+	set_bit(NAPI_STATE_THREADED, &napi->state);
-+	netif_napi_add(dev, napi, poll, weight);
-+}
-+
-+/**
-  *	netif_tx_napi_add - initialize a NAPI context
-  *	@dev:  network device
-  *	@napi: NAPI context
---- a/net/core/dev.c
-+++ b/net/core/dev.c
-@@ -159,6 +159,7 @@ static DEFINE_SPINLOCK(offload_lock);
- struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
- struct list_head ptype_all __read_mostly;	/* Taps */
- static struct list_head offload_base __read_mostly;
-+static struct workqueue_struct *napi_workq __read_mostly;
- 
- static int netif_rx_internal(struct sk_buff *skb);
- static int call_netdevice_notifiers_info(unsigned long val,
-@@ -6407,6 +6408,11 @@ void __napi_schedule(struct napi_struct
- {
- 	unsigned long flags;
- 
-+	if (test_bit(NAPI_STATE_THREADED, &n->state)) {
-+		queue_work(napi_workq, &n->work);
-+		return;
-+	}
-+
- 	local_irq_save(flags);
- 	____napi_schedule(this_cpu_ptr(&softnet_data), n);
- 	local_irq_restore(flags);
-@@ -6454,6 +6460,11 @@ EXPORT_SYMBOL(napi_schedule_prep);
-  */
- void __napi_schedule_irqoff(struct napi_struct *n)
- {
-+	if (test_bit(NAPI_STATE_THREADED, &n->state)) {
-+		queue_work(napi_workq, &n->work);
-+		return;
-+	}
-+
- 	____napi_schedule(this_cpu_ptr(&softnet_data), n);
- }
- EXPORT_SYMBOL(__napi_schedule_irqoff);
-@@ -6715,12 +6726,94 @@ static void init_gro_hash(struct napi_st
- 	napi->gro_bitmask = 0;
- }
- 
-+static int __napi_poll(struct napi_struct *n, bool *repoll)
-+{
-+	int work, weight;
-+
-+	weight = n->weight;
-+
-+	/* This NAPI_STATE_SCHED test is for avoiding a race
-+	 * with netpoll's poll_napi().  Only the entity which
-+	 * obtains the lock and sees NAPI_STATE_SCHED set will
-+	 * actually make the ->poll() call.  Therefore we avoid
-+	 * accidentally calling ->poll() when NAPI is not scheduled.
-+	 */
-+	work = 0;
-+	if (test_bit(NAPI_STATE_SCHED, &n->state)) {
-+		work = n->poll(n, weight);
-+		trace_napi_poll(n, work, weight);
-+	}
-+
-+	if (unlikely(work > weight))
-+		pr_err_once("NAPI poll function %pS returned %d, exceeding its budget of %d.\n",
-+			    n->poll, work, weight);
-+
-+	if (likely(work < weight))
-+		return work;
-+
-+	/* Drivers must not modify the NAPI state if they
-+	 * consume the entire weight.  In such cases this code
-+	 * still "owns" the NAPI instance and therefore can
-+	 * move the instance around on the list at-will.
-+	 */
-+	if (unlikely(napi_disable_pending(n))) {
-+		napi_complete(n);
-+		return work;
-+	}
-+
-+	if (n->gro_bitmask) {
-+		/* flush too old packets
-+		 * If HZ < 1000, flush all packets.
-+		 */
-+		napi_gro_flush(n, HZ >= 1000);
-+	}
-+
-+	gro_normal_list(n);
-+
-+	*repoll = true;
-+
-+	return work;
-+}
-+
-+static void napi_workfn(struct work_struct *work)
-+{
-+	struct napi_struct *n = container_of(work, struct napi_struct, work);
-+	void *have;
-+
-+	for (;;) {
-+		bool repoll = false;
-+
-+		local_bh_disable();
-+
-+		have = netpoll_poll_lock(n);
-+		__napi_poll(n, &repoll);
-+		netpoll_poll_unlock(have);
-+
-+		local_bh_enable();
-+
-+		if (!repoll)
-+			return;
-+
-+		if (!need_resched())
-+			continue;
-+
-+		/*
-+		 * have to pay for the latency of task switch even if
-+		 * napi is scheduled
-+		 */
-+		queue_work(napi_workq, work);
-+		return;
-+	}
-+}
-+
- void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
- 		    int (*poll)(struct napi_struct *, int), int weight)
- {
- 	if (WARN_ON(test_and_set_bit(NAPI_STATE_LISTED, &napi->state)))
- 		return;
- 
-+	if (dev->threaded)
-+		set_bit(NAPI_STATE_THREADED, &napi->state);
- 	INIT_LIST_HEAD(&napi->poll_list);
- 	INIT_HLIST_NODE(&napi->napi_hash_node);
- 	hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
-@@ -6738,6 +6831,7 @@ void netif_napi_add(struct net_device *d
- #ifdef CONFIG_NETPOLL
- 	napi->poll_owner = -1;
- #endif
-+	INIT_WORK(&napi->work, napi_workfn);
- 	set_bit(NAPI_STATE_SCHED, &napi->state);
- 	set_bit(NAPI_STATE_NPSVC, &napi->state);
- 	list_add_rcu(&napi->dev_list, &dev->napi_list);
-@@ -6780,6 +6874,7 @@ void __netif_napi_del(struct napi_struct
- 	if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state))
- 		return;
- 
-+	cancel_work_sync(&napi->work);
- 	napi_hash_del(napi);
- 	list_del_rcu(&napi->dev_list);
- 	napi_free_frags(napi);
-@@ -6791,53 +6886,19 @@ EXPORT_SYMBOL(__netif_napi_del);
- 
- static int napi_poll(struct napi_struct *n, struct list_head *repoll)
- {
-+	bool do_repoll = false;
- 	void *have;
--	int work, weight;
-+	int work;
- 
- 	list_del_init(&n->poll_list);
- 
- 	have = netpoll_poll_lock(n);
- 
--	weight = n->weight;
-+	work = __napi_poll(n, &do_repoll);
- 
--	/* This NAPI_STATE_SCHED test is for avoiding a race
--	 * with netpoll's poll_napi().  Only the entity which
--	 * obtains the lock and sees NAPI_STATE_SCHED set will
--	 * actually make the ->poll() call.  Therefore we avoid
--	 * accidentally calling ->poll() when NAPI is not scheduled.
--	 */
--	work = 0;
--	if (test_bit(NAPI_STATE_SCHED, &n->state)) {
--		work = n->poll(n, weight);
--		trace_napi_poll(n, work, weight);
--	}
--
--	if (unlikely(work > weight))
--		pr_err_once("NAPI poll function %pS returned %d, exceeding its budget of %d.\n",
--			    n->poll, work, weight);
--
--	if (likely(work < weight))
-+	if (!do_repoll)
- 		goto out_unlock;
- 
--	/* Drivers must not modify the NAPI state if they
--	 * consume the entire weight.  In such cases this code
--	 * still "owns" the NAPI instance and therefore can
--	 * move the instance around on the list at-will.
--	 */
--	if (unlikely(napi_disable_pending(n))) {
--		napi_complete(n);
--		goto out_unlock;
--	}
--
--	if (n->gro_bitmask) {
--		/* flush too old packets
--		 * If HZ < 1000, flush all packets.
--		 */
--		napi_gro_flush(n, HZ >= 1000);
--	}
--
--	gro_normal_list(n);
--
- 	/* Some drivers may have called napi_schedule
- 	 * prior to exhausting their budget.
- 	 */
-@@ -11333,6 +11394,10 @@ static int __init net_dev_init(void)
- 		sd->backlog.weight = weight_p;
- 	}
- 
-+	napi_workq = alloc_workqueue("napi_workq", WQ_UNBOUND | WQ_HIGHPRI,
-+				     WQ_UNBOUND_MAX_ACTIVE | WQ_SYSFS);
-+	BUG_ON(!napi_workq);
-+
- 	dev_boot_phase = 0;
- 
- 	/* The loopback device is special if any other network devices