aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/generic/pending-5.10/690-net-add-support-for-threaded-NAPI-polling.patch
diff options
context:
space:
mode:
Diffstat (limited to 'target/linux/generic/pending-5.10/690-net-add-support-for-threaded-NAPI-polling.patch')
-rw-r--r--target/linux/generic/pending-5.10/690-net-add-support-for-threaded-NAPI-polling.patch284
1 files changed, 284 insertions, 0 deletions
diff --git a/target/linux/generic/pending-5.10/690-net-add-support-for-threaded-NAPI-polling.patch b/target/linux/generic/pending-5.10/690-net-add-support-for-threaded-NAPI-polling.patch
new file mode 100644
index 0000000000..79b7832f2a
--- /dev/null
+++ b/target/linux/generic/pending-5.10/690-net-add-support-for-threaded-NAPI-polling.patch
@@ -0,0 +1,284 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Sun, 26 Jul 2020 14:03:21 +0200
+Subject: [PATCH] net: add support for threaded NAPI polling
+
+For some drivers (especially 802.11 drivers), doing a lot of work in the NAPI
+poll function does not perform well. Since NAPI poll is bound to the CPU it
+was scheduled from, we can easily end up with a few very busy CPUs spending
+most of their time in softirq/ksoftirqd and some idle ones.
+
+Introduce threaded NAPI for such drivers based on a workqueue. The API is the
+same except for using netif_threaded_napi_add instead of netif_napi_add.
+
+In my tests with mt76 on MT7621 using threaded NAPI + a thread for tx scheduling
+improves LAN->WLAN bridging throughput by 10-50%. Throughput without threaded
+NAPI is wildly inconsistent, depending on the CPU that runs the tx scheduling
+thread.
+
+With threaded NAPI, throughput seems stable and consistent (and higher than
+the best results I got without it).
+
+Based on a patch by Hillf Danton
+
+Cc: Hillf Danton <hdanton@sina.com>
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -347,6 +347,7 @@ struct napi_struct {
+ struct list_head dev_list;
+ struct hlist_node napi_hash_node;
+ unsigned int napi_id;
++ struct work_struct work;
+ };
+
+ enum {
+@@ -357,6 +358,7 @@ enum {
+ NAPI_STATE_LISTED, /* NAPI added to system lists */
+ NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
+ NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
++ NAPI_STATE_THREADED, /* Use threaded NAPI */
+ };
+
+ enum {
+@@ -367,6 +369,7 @@ enum {
+ NAPIF_STATE_LISTED = BIT(NAPI_STATE_LISTED),
+ NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
+ NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
++ NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED),
+ };
+
+ enum gro_result {
+@@ -2411,6 +2414,26 @@ void netif_napi_add(struct net_device *d
+ int (*poll)(struct napi_struct *, int), int weight);
+
+ /**
++ * netif_threaded_napi_add - initialize a NAPI context
++ * @dev: network device
++ * @napi: NAPI context
++ * @poll: polling function
++ * @weight: default weight
++ *
++ * This variant of netif_napi_add() should be used from drivers using NAPI
++ * with CPU intensive poll functions.
++ * This will schedule polling from a high priority workqueue
++ */
++static inline void netif_threaded_napi_add(struct net_device *dev,
++ struct napi_struct *napi,
++ int (*poll)(struct napi_struct *, int),
++ int weight)
++{
++ set_bit(NAPI_STATE_THREADED, &napi->state);
++ netif_napi_add(dev, napi, poll, weight);
++}
++
++/**
+ * netif_tx_napi_add - initialize a NAPI context
+ * @dev: network device
+ * @napi: NAPI context
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -159,6 +159,7 @@ static DEFINE_SPINLOCK(offload_lock);
+ struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
+ struct list_head ptype_all __read_mostly; /* Taps */
+ static struct list_head offload_base __read_mostly;
++static struct workqueue_struct *napi_workq __read_mostly;
+
+ static int netif_rx_internal(struct sk_buff *skb);
+ static int call_netdevice_notifiers_info(unsigned long val,
+@@ -6404,6 +6405,11 @@ void __napi_schedule(struct napi_struct
+ {
+ unsigned long flags;
+
++ if (test_bit(NAPI_STATE_THREADED, &n->state)) {
++ queue_work(napi_workq, &n->work);
++ return;
++ }
++
+ local_irq_save(flags);
+ ____napi_schedule(this_cpu_ptr(&softnet_data), n);
+ local_irq_restore(flags);
+@@ -6451,6 +6457,11 @@ EXPORT_SYMBOL(napi_schedule_prep);
+ */
+ void __napi_schedule_irqoff(struct napi_struct *n)
+ {
++ if (test_bit(NAPI_STATE_THREADED, &n->state)) {
++ queue_work(napi_workq, &n->work);
++ return;
++ }
++
+ ____napi_schedule(this_cpu_ptr(&softnet_data), n);
+ }
+ EXPORT_SYMBOL(__napi_schedule_irqoff);
+@@ -6712,6 +6723,86 @@ static void init_gro_hash(struct napi_st
+ napi->gro_bitmask = 0;
+ }
+
++static int __napi_poll(struct napi_struct *n, bool *repoll)
++{
++ int work, weight;
++
++ weight = n->weight;
++
++ /* This NAPI_STATE_SCHED test is for avoiding a race
++ * with netpoll's poll_napi(). Only the entity which
++ * obtains the lock and sees NAPI_STATE_SCHED set will
++ * actually make the ->poll() call. Therefore we avoid
++ * accidentally calling ->poll() when NAPI is not scheduled.
++ */
++ work = 0;
++ if (test_bit(NAPI_STATE_SCHED, &n->state)) {
++ work = n->poll(n, weight);
++ trace_napi_poll(n, work, weight);
++ }
++
++ if (unlikely(work > weight))
++ pr_err_once("NAPI poll function %pS returned %d, exceeding its budget of %d.\n",
++ n->poll, work, weight);
++
++ if (likely(work < weight))
++ return work;
++
++ /* Drivers must not modify the NAPI state if they
++ * consume the entire weight. In such cases this code
++ * still "owns" the NAPI instance and therefore can
++ * move the instance around on the list at-will.
++ */
++ if (unlikely(napi_disable_pending(n))) {
++ napi_complete(n);
++ return work;
++ }
++
++ if (n->gro_bitmask) {
++ /* flush too old packets
++ * If HZ < 1000, flush all packets.
++ */
++ napi_gro_flush(n, HZ >= 1000);
++ }
++
++ gro_normal_list(n);
++
++ *repoll = true;
++
++ return work;
++}
++
++static void napi_workfn(struct work_struct *work)
++{
++ struct napi_struct *n = container_of(work, struct napi_struct, work);
++ void *have;
++
++ for (;;) {
++ bool repoll = false;
++
++ local_bh_disable();
++
++ have = netpoll_poll_lock(n);
++ __napi_poll(n, &repoll);
++ netpoll_poll_unlock(have);
++
++ local_bh_enable();
++
++ if (!repoll)
++ return;
++
++ if (!need_resched())
++ continue;
++
++ /*
++ * have to pay for the latency of task switch even if
++ * napi is scheduled
++ */
++ queue_work(napi_workq, work);
++ return;
++ }
++}
++
+ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
+ int (*poll)(struct napi_struct *, int), int weight)
+ {
+@@ -6735,6 +6826,7 @@ void netif_napi_add(struct net_device *d
+ #ifdef CONFIG_NETPOLL
+ napi->poll_owner = -1;
+ #endif
++ INIT_WORK(&napi->work, napi_workfn);
+ set_bit(NAPI_STATE_SCHED, &napi->state);
+ set_bit(NAPI_STATE_NPSVC, &napi->state);
+ list_add_rcu(&napi->dev_list, &dev->napi_list);
+@@ -6777,6 +6869,7 @@ void __netif_napi_del(struct napi_struct
+ if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state))
+ return;
+
++ cancel_work_sync(&napi->work);
+ napi_hash_del(napi);
+ list_del_rcu(&napi->dev_list);
+ napi_free_frags(napi);
+@@ -6788,52 +6881,18 @@ EXPORT_SYMBOL(__netif_napi_del);
+
+ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
+ {
++ bool do_repoll = false;
+ void *have;
+- int work, weight;
++ int work;
+
+ list_del_init(&n->poll_list);
+
+ have = netpoll_poll_lock(n);
+
+- weight = n->weight;
+-
+- /* This NAPI_STATE_SCHED test is for avoiding a race
+- * with netpoll's poll_napi(). Only the entity which
+- * obtains the lock and sees NAPI_STATE_SCHED set will
+- * actually make the ->poll() call. Therefore we avoid
+- * accidentally calling ->poll() when NAPI is not scheduled.
+- */
+- work = 0;
+- if (test_bit(NAPI_STATE_SCHED, &n->state)) {
+- work = n->poll(n, weight);
+- trace_napi_poll(n, work, weight);
+- }
+-
+- if (unlikely(work > weight))
+- pr_err_once("NAPI poll function %pS returned %d, exceeding its budget of %d.\n",
+- n->poll, work, weight);
+-
+- if (likely(work < weight))
+- goto out_unlock;
++ work = __napi_poll(n, &do_repoll);
+
+- /* Drivers must not modify the NAPI state if they
+- * consume the entire weight. In such cases this code
+- * still "owns" the NAPI instance and therefore can
+- * move the instance around on the list at-will.
+- */
+- if (unlikely(napi_disable_pending(n))) {
+- napi_complete(n);
++ if (!do_repoll)
+ goto out_unlock;
+- }
+-
+- if (n->gro_bitmask) {
+- /* flush too old packets
+- * If HZ < 1000, flush all packets.
+- */
+- napi_gro_flush(n, HZ >= 1000);
+- }
+-
+- gro_normal_list(n);
+
+ /* Some drivers may have called napi_schedule
+ * prior to exhausting their budget.
+@@ -11288,6 +11347,10 @@ static int __init net_dev_init(void)
+ sd->backlog.weight = weight_p;
+ }
+
++ napi_workq = alloc_workqueue("napi_workq", WQ_UNBOUND | WQ_HIGHPRI,
++ WQ_UNBOUND_MAX_ACTIVE | WQ_SYSFS);
++ BUG_ON(!napi_workq);
++
+ dev_boot_phase = 0;
+
+ /* The loopback device is special if any other network devices