diff options
author | Felix Fietkau <nbd@nbd.name> | 2021-12-13 18:15:07 +0100 |
---|---|---|
committer | Felix Fietkau <nbd@nbd.name> | 2021-12-21 12:56:22 +0100 |
commit | 5414aa88aead04f1c54b4654f2e7e94384369527 (patch) | |
tree | b5b6a28da85cc1fbac5d84c69449c4e3405716e1 /target/linux/generic/backport-5.4/601-v5.12-net-implement-threaded-able-napi-poll-loop-support.patch | |
parent | 4b52d895315e8416ab33e9276198428d7c0a1ba6 (diff) | |
download | upstream-5414aa88aead04f1c54b4654f2e7e94384369527.tar.gz upstream-5414aa88aead04f1c54b4654f2e7e94384369527.tar.bz2 upstream-5414aa88aead04f1c54b4654f2e7e94384369527.zip |
kernel: backport the upstream implementation of threaded NAPI to 5.4
The workqueue based implementation has a few corner cases and typically lower
performance than the upstream one
Signed-off-by: Felix Fietkau <nbd@nbd.name>
(cherry-picked from commit 01bebc070c35d87c24a594fff7ee1911965759aa)
Diffstat (limited to 'target/linux/generic/backport-5.4/601-v5.12-net-implement-threaded-able-napi-poll-loop-support.patch')
-rw-r--r-- | target/linux/generic/backport-5.4/601-v5.12-net-implement-threaded-able-napi-poll-loop-support.patch | 261 |
1 files changed, 261 insertions, 0 deletions
diff --git a/target/linux/generic/backport-5.4/601-v5.12-net-implement-threaded-able-napi-poll-loop-support.patch b/target/linux/generic/backport-5.4/601-v5.12-net-implement-threaded-able-napi-poll-loop-support.patch new file mode 100644 index 0000000000..c9bd4abb53 --- /dev/null +++ b/target/linux/generic/backport-5.4/601-v5.12-net-implement-threaded-able-napi-poll-loop-support.patch @@ -0,0 +1,261 @@ +From: Wei Wang <weiwan@google.com> +Date: Mon, 8 Feb 2021 11:34:09 -0800 +Subject: [PATCH] net: implement threaded-able napi poll loop support + +This patch allows running each napi poll loop inside its own +kernel thread. +The kthread is created during netif_napi_add() if dev->threaded +is set. And threaded mode is enabled in napi_enable(). We will +provide a way to set dev->threaded and enable threaded mode +without a device up/down in the following patch. + +Once that threaded mode is enabled and the kthread is +started, napi_schedule() will wake-up such thread instead +of scheduling the softirq. + +The threaded poll loop behaves quite likely the net_rx_action, +but it does not have to manipulate local irqs and uses +an explicit scheduling point based on netdev_budget. + +Co-developed-by: Paolo Abeni <pabeni@redhat.com> +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org> +Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org> +Co-developed-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Wei Wang <weiwan@google.com> +Reviewed-by: Alexander Duyck <alexanderduyck@fb.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -340,6 +340,7 @@ struct napi_struct { + struct list_head dev_list; + struct hlist_node napi_hash_node; + unsigned int napi_id; ++ struct task_struct *thread; + }; + + enum { +@@ -350,6 +351,7 @@ enum { + NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */ + NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */ + NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */ ++ NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/ + }; + + enum { +@@ -360,6 +362,7 @@ enum { + NAPIF_STATE_HASHED = BIT(NAPI_STATE_HASHED), + NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), + NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), ++ NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED), + }; + + enum gro_result { +@@ -504,20 +507,7 @@ bool napi_hash_del(struct napi_struct *n + */ + void napi_disable(struct napi_struct *n); + +-/** +- * napi_enable - enable NAPI scheduling +- * @n: NAPI context +- * +- * Resume NAPI from being scheduled on this context. +- * Must be paired with napi_disable. +- */ +-static inline void napi_enable(struct napi_struct *n) +-{ +- BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); +- smp_mb__before_atomic(); +- clear_bit(NAPI_STATE_SCHED, &n->state); +- clear_bit(NAPI_STATE_NPSVC, &n->state); +-} ++void napi_enable(struct napi_struct *n); + + /** + * napi_synchronize - wait until NAPI is not running +@@ -1783,6 +1773,8 @@ enum netdev_ml_priv_type { + * + * @wol_enabled: Wake-on-LAN is enabled + * ++ * @threaded: napi threaded mode is enabled ++ * + * FIXME: cleanup struct net_device such that network protocol info + * moves out. + */ +@@ -2075,6 +2067,7 @@ struct net_device { + struct lock_class_key addr_list_lock_key; + bool proto_down; + unsigned wol_enabled:1; ++ unsigned threaded:1; + }; + #define to_net_dev(d) container_of(d, struct net_device, dev) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -91,6 +91,7 @@ + #include <linux/etherdevice.h> + #include <linux/ethtool.h> + #include <linux/skbuff.h> ++#include <linux/kthread.h> + #include <linux/bpf.h> + #include <linux/bpf_trace.h> + #include <net/net_namespace.h> +@@ -1289,6 +1290,27 @@ void netdev_notify_peers(struct net_devi + } + EXPORT_SYMBOL(netdev_notify_peers); + ++static int napi_threaded_poll(void *data); ++ ++static int napi_kthread_create(struct napi_struct *n) ++{ ++ int err = 0; ++ ++ /* Create and wake up the kthread once to put it in ++ * TASK_INTERRUPTIBLE mode to avoid the blocked task ++ * warning and work with loadavg. ++ */ ++ n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d", ++ n->dev->name, n->napi_id); ++ if (IS_ERR(n->thread)) { ++ err = PTR_ERR(n->thread); ++ pr_err("kthread_run failed with err %d\n", err); ++ n->thread = NULL; ++ } ++ ++ return err; ++} ++ + static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) + { + const struct net_device_ops *ops = dev->netdev_ops; +@@ -3885,6 +3907,21 @@ int gro_normal_batch __read_mostly = 8; + static inline void ____napi_schedule(struct softnet_data *sd, + struct napi_struct *napi) + { ++ struct task_struct *thread; ++ ++ if (test_bit(NAPI_STATE_THREADED, &napi->state)) { ++ /* Paired with smp_mb__before_atomic() in ++ * napi_enable(). Use READ_ONCE() to guarantee ++ * a complete read on napi->thread. Only call ++ * wake_up_process() when it's not NULL. ++ */ ++ thread = READ_ONCE(napi->thread); ++ if (thread) { ++ wake_up_process(thread); ++ return; ++ } ++ } ++ + list_add_tail(&napi->poll_list, &sd->poll_list); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); + } +@@ -6276,6 +6313,12 @@ void netif_napi_add(struct net_device *d + set_bit(NAPI_STATE_NPSVC, &napi->state); + list_add_rcu(&napi->dev_list, &dev->napi_list); + napi_hash_add(napi); ++ /* Create kthread for this napi if dev->threaded is set. ++ * Clear dev->threaded if kthread creation failed so that ++ * threaded mode will not be enabled in napi_enable(). ++ */ ++ if (dev->threaded && napi_kthread_create(napi)) ++ dev->threaded = 0; + } + EXPORT_SYMBOL(netif_napi_add); + +@@ -6292,9 +6335,28 @@ void napi_disable(struct napi_struct *n) + hrtimer_cancel(&n->timer); + + clear_bit(NAPI_STATE_DISABLE, &n->state); ++ clear_bit(NAPI_STATE_THREADED, &n->state); + } + EXPORT_SYMBOL(napi_disable); + ++/** ++ * napi_enable - enable NAPI scheduling ++ * @n: NAPI context ++ * ++ * Resume NAPI from being scheduled on this context. ++ * Must be paired with napi_disable. ++ */ ++void napi_enable(struct napi_struct *n) ++{ ++ BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); ++ smp_mb__before_atomic(); ++ clear_bit(NAPI_STATE_SCHED, &n->state); ++ clear_bit(NAPI_STATE_NPSVC, &n->state); ++ if (n->dev->threaded && n->thread) ++ set_bit(NAPI_STATE_THREADED, &n->state); ++} ++EXPORT_SYMBOL(napi_enable); ++ + static void flush_gro_hash(struct napi_struct *napi) + { + int i; +@@ -6319,6 +6381,11 @@ void netif_napi_del(struct napi_struct * + + flush_gro_hash(napi); + napi->gro_bitmask = 0; ++ ++ if (napi->thread) { ++ kthread_stop(napi->thread); ++ napi->thread = NULL; ++ } + } + EXPORT_SYMBOL(netif_napi_del); + +@@ -6398,6 +6465,51 @@ static int napi_poll(struct napi_struct + return work; + } + ++static int napi_thread_wait(struct napi_struct *napi) ++{ ++ set_current_state(TASK_INTERRUPTIBLE); ++ ++ while (!kthread_should_stop() && !napi_disable_pending(napi)) { ++ if (test_bit(NAPI_STATE_SCHED, &napi->state)) { ++ WARN_ON(!list_empty(&napi->poll_list)); ++ __set_current_state(TASK_RUNNING); ++ return 0; ++ } ++ ++ schedule(); ++ set_current_state(TASK_INTERRUPTIBLE); ++ } ++ __set_current_state(TASK_RUNNING); ++ return -1; ++} ++ ++static int napi_threaded_poll(void *data) ++{ ++ struct napi_struct *napi = data; ++ void *have; ++ ++ while (!napi_thread_wait(napi)) { ++ for (;;) { ++ bool repoll = false; ++ ++ local_bh_disable(); ++ ++ have = netpoll_poll_lock(napi); ++ __napi_poll(napi, &repoll); ++ netpoll_poll_unlock(have); ++ ++ __kfree_skb_flush(); ++ local_bh_enable(); ++ ++ if (!repoll) ++ break; ++ ++ cond_resched(); ++ } ++ } ++ return 0; ++} ++ + static __latent_entropy void net_rx_action(struct softirq_action *h) + { + struct softnet_data *sd = this_cpu_ptr(&softnet_data); |