diff options
author | Felix Fietkau <nbd@nbd.name> | 2021-12-13 18:15:07 +0100 |
---|---|---|
committer | Felix Fietkau <nbd@nbd.name> | 2021-12-21 12:56:22 +0100 |
commit | 5414aa88aead04f1c54b4654f2e7e94384369527 (patch) | |
tree | b5b6a28da85cc1fbac5d84c69449c4e3405716e1 /target/linux/generic/backport-5.4 | |
parent | 4b52d895315e8416ab33e9276198428d7c0a1ba6 (diff) | |
download | upstream-5414aa88aead04f1c54b4654f2e7e94384369527.tar.gz upstream-5414aa88aead04f1c54b4654f2e7e94384369527.tar.bz2 upstream-5414aa88aead04f1c54b4654f2e7e94384369527.zip |
kernel: backport the upstream implementation of threaded NAPI to 5.4
The workqueue based implementation has a few corner cases and typically lower
performance than the upstream one
Signed-off-by: Felix Fietkau <nbd@nbd.name>
(cherry-picked from commit 01bebc070c35d87c24a594fff7ee1911965759aa)
Diffstat (limited to 'target/linux/generic/backport-5.4')
6 files changed, 673 insertions, 1 deletions
diff --git a/target/linux/generic/backport-5.4/600-v5.12-net-extract-napi-poll-functionality-to-__napi_poll.patch b/target/linux/generic/backport-5.4/600-v5.12-net-extract-napi-poll-functionality-to-__napi_poll.patch new file mode 100644 index 0000000000..961140aabb --- /dev/null +++ b/target/linux/generic/backport-5.4/600-v5.12-net-extract-napi-poll-functionality-to-__napi_poll.patch @@ -0,0 +1,88 @@ +From: Felix Fietkau <nbd@nbd.name> +Date: Mon, 8 Feb 2021 11:34:08 -0800 +Subject: [PATCH] net: extract napi poll functionality to __napi_poll() + +This commit introduces a new function __napi_poll() which does the main +logic of the existing napi_poll() function, and will be called by other +functions in later commits. +This idea and implementation is done by Felix Fietkau <nbd@nbd.name> and +is proposed as part of the patch to move napi work to work_queue +context. +This commit by itself is a code restructure. + +Signed-off-by: Felix Fietkau <nbd@nbd.name> +Signed-off-by: Wei Wang <weiwan@google.com> +Reviewed-by: Alexander Duyck <alexanderduyck@fb.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -6322,15 +6322,10 @@ void netif_napi_del(struct napi_struct * + } + EXPORT_SYMBOL(netif_napi_del); + +-static int napi_poll(struct napi_struct *n, struct list_head *repoll) ++static int __napi_poll(struct napi_struct *n, bool *repoll) + { +- void *have; + int work, weight; + +- list_del_init(&n->poll_list); +- +- have = netpoll_poll_lock(n); +- + weight = n->weight; + + /* This NAPI_STATE_SCHED test is for avoiding a race +@@ -6348,7 +6343,7 @@ static int napi_poll(struct napi_struct + WARN_ON_ONCE(work > weight); + + if (likely(work < weight)) +- goto out_unlock; ++ return work; + + /* Drivers must not modify the NAPI state if they + * consume the entire weight. In such cases this code +@@ -6357,7 +6352,7 @@ static int napi_poll(struct napi_struct + */ + if (unlikely(napi_disable_pending(n))) { + napi_complete(n); +- goto out_unlock; ++ return work; + } + + if (n->gro_bitmask) { +@@ -6375,12 +6370,29 @@ static int napi_poll(struct napi_struct + if (unlikely(!list_empty(&n->poll_list))) { + pr_warn_once("%s: Budget exhausted after napi rescheduled\n", + n->dev ? n->dev->name : "backlog"); +- goto out_unlock; ++ return work; + } + +- list_add_tail(&n->poll_list, repoll); ++ *repoll = true; ++ ++ return work; ++} ++ ++static int napi_poll(struct napi_struct *n, struct list_head *repoll) ++{ ++ bool do_repoll = false; ++ void *have; ++ int work; ++ ++ list_del_init(&n->poll_list); ++ ++ have = netpoll_poll_lock(n); ++ ++ work = __napi_poll(n, &do_repoll); ++ ++ if (do_repoll) ++ list_add_tail(&n->poll_list, repoll); + +-out_unlock: + netpoll_poll_unlock(have); + + return work; diff --git a/target/linux/generic/backport-5.4/601-v5.12-net-implement-threaded-able-napi-poll-loop-support.patch b/target/linux/generic/backport-5.4/601-v5.12-net-implement-threaded-able-napi-poll-loop-support.patch new file mode 100644 index 0000000000..c9bd4abb53 --- /dev/null +++ b/target/linux/generic/backport-5.4/601-v5.12-net-implement-threaded-able-napi-poll-loop-support.patch @@ -0,0 +1,261 @@ +From: Wei Wang <weiwan@google.com> +Date: Mon, 8 Feb 2021 11:34:09 -0800 +Subject: [PATCH] net: implement threaded-able napi poll loop support + +This patch allows running each napi poll loop inside its own +kernel thread. +The kthread is created during netif_napi_add() if dev->threaded +is set. And threaded mode is enabled in napi_enable(). We will +provide a way to set dev->threaded and enable threaded mode +without a device up/down in the following patch. + +Once that threaded mode is enabled and the kthread is +started, napi_schedule() will wake-up such thread instead +of scheduling the softirq. + +The threaded poll loop behaves quite likely the net_rx_action, +but it does not have to manipulate local irqs and uses +an explicit scheduling point based on netdev_budget. + +Co-developed-by: Paolo Abeni <pabeni@redhat.com> +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org> +Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org> +Co-developed-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Wei Wang <weiwan@google.com> +Reviewed-by: Alexander Duyck <alexanderduyck@fb.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -340,6 +340,7 @@ struct napi_struct { + struct list_head dev_list; + struct hlist_node napi_hash_node; + unsigned int napi_id; ++ struct task_struct *thread; + }; + + enum { +@@ -350,6 +351,7 @@ enum { + NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */ + NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */ + NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */ ++ NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/ + }; + + enum { +@@ -360,6 +362,7 @@ enum { + NAPIF_STATE_HASHED = BIT(NAPI_STATE_HASHED), + NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), + NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), ++ NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED), + }; + + enum gro_result { +@@ -504,20 +507,7 @@ bool napi_hash_del(struct napi_struct *n + */ + void napi_disable(struct napi_struct *n); + +-/** +- * napi_enable - enable NAPI scheduling +- * @n: NAPI context +- * +- * Resume NAPI from being scheduled on this context. +- * Must be paired with napi_disable. +- */ +-static inline void napi_enable(struct napi_struct *n) +-{ +- BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); +- smp_mb__before_atomic(); +- clear_bit(NAPI_STATE_SCHED, &n->state); +- clear_bit(NAPI_STATE_NPSVC, &n->state); +-} ++void napi_enable(struct napi_struct *n); + + /** + * napi_synchronize - wait until NAPI is not running +@@ -1783,6 +1773,8 @@ enum netdev_ml_priv_type { + * + * @wol_enabled: Wake-on-LAN is enabled + * ++ * @threaded: napi threaded mode is enabled ++ * + * FIXME: cleanup struct net_device such that network protocol info + * moves out. + */ +@@ -2075,6 +2067,7 @@ struct net_device { + struct lock_class_key addr_list_lock_key; + bool proto_down; + unsigned wol_enabled:1; ++ unsigned threaded:1; + }; + #define to_net_dev(d) container_of(d, struct net_device, dev) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -91,6 +91,7 @@ + #include <linux/etherdevice.h> + #include <linux/ethtool.h> + #include <linux/skbuff.h> ++#include <linux/kthread.h> + #include <linux/bpf.h> + #include <linux/bpf_trace.h> + #include <net/net_namespace.h> +@@ -1289,6 +1290,27 @@ void netdev_notify_peers(struct net_devi + } + EXPORT_SYMBOL(netdev_notify_peers); + ++static int napi_threaded_poll(void *data); ++ ++static int napi_kthread_create(struct napi_struct *n) ++{ ++ int err = 0; ++ ++ /* Create and wake up the kthread once to put it in ++ * TASK_INTERRUPTIBLE mode to avoid the blocked task ++ * warning and work with loadavg. ++ */ ++ n->thread = kthread_run(napi_threaded_poll, n, "napi/%s-%d", ++ n->dev->name, n->napi_id); ++ if (IS_ERR(n->thread)) { ++ err = PTR_ERR(n->thread); ++ pr_err("kthread_run failed with err %d\n", err); ++ n->thread = NULL; ++ } ++ ++ return err; ++} ++ + static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack) + { + const struct net_device_ops *ops = dev->netdev_ops; +@@ -3885,6 +3907,21 @@ int gro_normal_batch __read_mostly = 8; + static inline void ____napi_schedule(struct softnet_data *sd, + struct napi_struct *napi) + { ++ struct task_struct *thread; ++ ++ if (test_bit(NAPI_STATE_THREADED, &napi->state)) { ++ /* Paired with smp_mb__before_atomic() in ++ * napi_enable(). Use READ_ONCE() to guarantee ++ * a complete read on napi->thread. Only call ++ * wake_up_process() when it's not NULL. ++ */ ++ thread = READ_ONCE(napi->thread); ++ if (thread) { ++ wake_up_process(thread); ++ return; ++ } ++ } ++ + list_add_tail(&napi->poll_list, &sd->poll_list); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); + } +@@ -6276,6 +6313,12 @@ void netif_napi_add(struct net_device *d + set_bit(NAPI_STATE_NPSVC, &napi->state); + list_add_rcu(&napi->dev_list, &dev->napi_list); + napi_hash_add(napi); ++ /* Create kthread for this napi if dev->threaded is set. ++ * Clear dev->threaded if kthread creation failed so that ++ * threaded mode will not be enabled in napi_enable(). ++ */ ++ if (dev->threaded && napi_kthread_create(napi)) ++ dev->threaded = 0; + } + EXPORT_SYMBOL(netif_napi_add); + +@@ -6292,9 +6335,28 @@ void napi_disable(struct napi_struct *n) + hrtimer_cancel(&n->timer); + + clear_bit(NAPI_STATE_DISABLE, &n->state); ++ clear_bit(NAPI_STATE_THREADED, &n->state); + } + EXPORT_SYMBOL(napi_disable); + ++/** ++ * napi_enable - enable NAPI scheduling ++ * @n: NAPI context ++ * ++ * Resume NAPI from being scheduled on this context. ++ * Must be paired with napi_disable. ++ */ ++void napi_enable(struct napi_struct *n) ++{ ++ BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); ++ smp_mb__before_atomic(); ++ clear_bit(NAPI_STATE_SCHED, &n->state); ++ clear_bit(NAPI_STATE_NPSVC, &n->state); ++ if (n->dev->threaded && n->thread) ++ set_bit(NAPI_STATE_THREADED, &n->state); ++} ++EXPORT_SYMBOL(napi_enable); ++ + static void flush_gro_hash(struct napi_struct *napi) + { + int i; +@@ -6319,6 +6381,11 @@ void netif_napi_del(struct napi_struct * + + flush_gro_hash(napi); + napi->gro_bitmask = 0; ++ ++ if (napi->thread) { ++ kthread_stop(napi->thread); ++ napi->thread = NULL; ++ } + } + EXPORT_SYMBOL(netif_napi_del); + +@@ -6398,6 +6465,51 @@ static int napi_poll(struct napi_struct + return work; + } + ++static int napi_thread_wait(struct napi_struct *napi) ++{ ++ set_current_state(TASK_INTERRUPTIBLE); ++ ++ while (!kthread_should_stop() && !napi_disable_pending(napi)) { ++ if (test_bit(NAPI_STATE_SCHED, &napi->state)) { ++ WARN_ON(!list_empty(&napi->poll_list)); ++ __set_current_state(TASK_RUNNING); ++ return 0; ++ } ++ ++ schedule(); ++ set_current_state(TASK_INTERRUPTIBLE); ++ } ++ __set_current_state(TASK_RUNNING); ++ return -1; ++} ++ ++static int napi_threaded_poll(void *data) ++{ ++ struct napi_struct *napi = data; ++ void *have; ++ ++ while (!napi_thread_wait(napi)) { ++ for (;;) { ++ bool repoll = false; ++ ++ local_bh_disable(); ++ ++ have = netpoll_poll_lock(napi); ++ __napi_poll(napi, &repoll); ++ netpoll_poll_unlock(have); ++ ++ __kfree_skb_flush(); ++ local_bh_enable(); ++ ++ if (!repoll) ++ break; ++ ++ cond_resched(); ++ } ++ } ++ return 0; ++} ++ + static __latent_entropy void net_rx_action(struct softirq_action *h) + { + struct softnet_data *sd = this_cpu_ptr(&softnet_data); diff --git a/target/linux/generic/backport-5.4/602-v5.12-net-add-sysfs-attribute-to-control-napi-threaded-mod.patch b/target/linux/generic/backport-5.4/602-v5.12-net-add-sysfs-attribute-to-control-napi-threaded-mod.patch new file mode 100644 index 0000000000..d8b932978c --- /dev/null +++ b/target/linux/generic/backport-5.4/602-v5.12-net-add-sysfs-attribute-to-control-napi-threaded-mod.patch @@ -0,0 +1,177 @@ +From: Wei Wang <weiwan@google.com> +Date: Mon, 8 Feb 2021 11:34:10 -0800 +Subject: [PATCH] net: add sysfs attribute to control napi threaded mode + +This patch adds a new sysfs attribute to the network device class. +Said attribute provides a per-device control to enable/disable the +threaded mode for all the napi instances of the given network device, +without the need for a device up/down. +User sets it to 1 or 0 to enable or disable threaded mode. +Note: when switching between threaded and the current softirq based mode +for a napi instance, it will not immediately take effect if the napi is +currently being polled. The mode switch will happen for the next time +napi_schedule() is called. + +Co-developed-by: Paolo Abeni <pabeni@redhat.com> +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +Co-developed-by: Hannes Frederic Sowa <hannes@stressinduktion.org> +Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org> +Co-developed-by: Felix Fietkau <nbd@nbd.name> +Signed-off-by: Felix Fietkau <nbd@nbd.name> +Signed-off-by: Wei Wang <weiwan@google.com> +Reviewed-by: Alexander Duyck <alexanderduyck@fb.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +--- + +--- a/Documentation/ABI/testing/sysfs-class-net ++++ b/Documentation/ABI/testing/sysfs-class-net +@@ -301,3 +301,18 @@ Contact: netdev@vger.kernel.org + Description: + 32-bit unsigned integer counting the number of times the link has + been down ++ ++What: /sys/class/net/<iface>/threaded ++Date: Jan 2021 ++KernelVersion: 5.12 ++Contact: netdev@vger.kernel.org ++Description: ++ Boolean value to control the threaded mode per device. User could ++ set this value to enable/disable threaded mode for all napi ++ belonging to this device, without the need to do device up/down. ++ ++ Possible values: ++ == ================================== ++ 0 threaded mode disabled for this dev ++ 1 threaded mode enabled for this dev ++ == ================================== +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -498,6 +498,8 @@ static inline bool napi_complete(struct + */ + bool napi_hash_del(struct napi_struct *napi); + ++int dev_set_threaded(struct net_device *dev, bool threaded); ++ + /** + * napi_disable - prevent NAPI from scheduling + * @n: NAPI context +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -3911,8 +3911,9 @@ static inline void ____napi_schedule(str + + if (test_bit(NAPI_STATE_THREADED, &napi->state)) { + /* Paired with smp_mb__before_atomic() in +- * napi_enable(). Use READ_ONCE() to guarantee +- * a complete read on napi->thread. Only call ++ * napi_enable()/dev_set_threaded(). ++ * Use READ_ONCE() to guarantee a complete ++ * read on napi->thread. Only call + * wake_up_process() when it's not NULL. + */ + thread = READ_ONCE(napi->thread); +@@ -6290,6 +6291,49 @@ static void init_gro_hash(struct napi_st + napi->gro_bitmask = 0; + } + ++int dev_set_threaded(struct net_device *dev, bool threaded) ++{ ++ struct napi_struct *napi; ++ int err = 0; ++ ++ if (dev->threaded == threaded) ++ return 0; ++ ++ if (threaded) { ++ list_for_each_entry(napi, &dev->napi_list, dev_list) { ++ if (!napi->thread) { ++ err = napi_kthread_create(napi); ++ if (err) { ++ threaded = false; ++ break; ++ } ++ } ++ } ++ } ++ ++ dev->threaded = threaded; ++ ++ /* Make sure kthread is created before THREADED bit ++ * is set. ++ */ ++ smp_mb__before_atomic(); ++ ++ /* Setting/unsetting threaded mode on a napi might not immediately ++ * take effect, if the current napi instance is actively being ++ * polled. In this case, the switch between threaded mode and ++ * softirq mode will happen in the next round of napi_schedule(). ++ * This should not cause hiccups/stalls to the live traffic. ++ */ ++ list_for_each_entry(napi, &dev->napi_list, dev_list) { ++ if (threaded) ++ set_bit(NAPI_STATE_THREADED, &napi->state); ++ else ++ clear_bit(NAPI_STATE_THREADED, &napi->state); ++ } ++ ++ return err; ++} ++ + void netif_napi_add(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int weight) + { +--- a/net/core/net-sysfs.c ++++ b/net/core/net-sysfs.c +@@ -557,6 +557,45 @@ static ssize_t phys_switch_id_show(struc + } + static DEVICE_ATTR_RO(phys_switch_id); + ++static ssize_t threaded_show(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct net_device *netdev = to_net_dev(dev); ++ ssize_t ret = -EINVAL; ++ ++ if (!rtnl_trylock()) ++ return restart_syscall(); ++ ++ if (dev_isalive(netdev)) ++ ret = sprintf(buf, fmt_dec, netdev->threaded); ++ ++ rtnl_unlock(); ++ return ret; ++} ++ ++static int modify_napi_threaded(struct net_device *dev, unsigned long val) ++{ ++ int ret; ++ ++ if (list_empty(&dev->napi_list)) ++ return -EOPNOTSUPP; ++ ++ if (val != 0 && val != 1) ++ return -EOPNOTSUPP; ++ ++ ret = dev_set_threaded(dev, val); ++ ++ return ret; ++} ++ ++static ssize_t threaded_store(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t len) ++{ ++ return netdev_store(dev, attr, buf, len, modify_napi_threaded); ++} ++static DEVICE_ATTR_RW(threaded); ++ + static struct attribute *net_class_attrs[] __ro_after_init = { + &dev_attr_netdev_group.attr, + &dev_attr_type.attr, +@@ -587,6 +626,7 @@ static struct attribute *net_class_attrs + &dev_attr_proto_down.attr, + &dev_attr_carrier_up_count.attr, + &dev_attr_carrier_down_count.attr, ++ &dev_attr_threaded.attr, + NULL, + }; + ATTRIBUTE_GROUPS(net_class); diff --git a/target/linux/generic/backport-5.4/603-v5.12-net-fix-race-between-napi-kthread-mode-and-busy-poll.patch b/target/linux/generic/backport-5.4/603-v5.12-net-fix-race-between-napi-kthread-mode-and-busy-poll.patch new file mode 100644 index 0000000000..19c5a53a27 --- /dev/null +++ b/target/linux/generic/backport-5.4/603-v5.12-net-fix-race-between-napi-kthread-mode-and-busy-poll.patch @@ -0,0 +1,93 @@ +From: Wei Wang <weiwan@google.com> +Date: Mon, 1 Mar 2021 17:21:13 -0800 +Subject: [PATCH] net: fix race between napi kthread mode and busy poll + +Currently, napi_thread_wait() checks for NAPI_STATE_SCHED bit to +determine if the kthread owns this napi and could call napi->poll() on +it. However, if socket busy poll is enabled, it is possible that the +busy poll thread grabs this SCHED bit (after the previous napi->poll() +invokes napi_complete_done() and clears SCHED bit) and tries to poll +on the same napi. napi_disable() could grab the SCHED bit as well. +This patch tries to fix this race by adding a new bit +NAPI_STATE_SCHED_THREADED in napi->state. This bit gets set in +____napi_schedule() if the threaded mode is enabled, and gets cleared +in napi_complete_done(), and we only poll the napi in kthread if this +bit is set. This helps distinguish the ownership of the napi between +kthread and other scenarios and fixes the race issue. + +Fixes: 29863d41bb6e ("net: implement threaded-able napi poll loop support") +Reported-by: Martin Zaharinov <micron10@gmail.com> +Suggested-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Wei Wang <weiwan@google.com> +Cc: Alexander Duyck <alexanderduyck@fb.com> +Cc: Eric Dumazet <edumazet@google.com> +Cc: Paolo Abeni <pabeni@redhat.com> +Cc: Hannes Frederic Sowa <hannes@stressinduktion.org> +--- + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -352,6 +352,7 @@ enum { + NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */ + NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */ + NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/ ++ NAPI_STATE_SCHED_THREADED, /* Napi is currently scheduled in threaded mode */ + }; + + enum { +@@ -363,6 +364,7 @@ enum { + NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), + NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), + NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED), ++ NAPIF_STATE_SCHED_THREADED = BIT(NAPI_STATE_SCHED_THREADED), + }; + + enum gro_result { +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -3918,6 +3918,8 @@ static inline void ____napi_schedule(str + */ + thread = READ_ONCE(napi->thread); + if (thread) { ++ if (thread->state != TASK_INTERRUPTIBLE) ++ set_bit(NAPI_STATE_SCHED_THREADED, &napi->state); + wake_up_process(thread); + return; + } +@@ -6078,7 +6080,8 @@ bool napi_complete_done(struct napi_stru + + WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED)); + +- new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED); ++ new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED | ++ NAPIF_STATE_SCHED_THREADED); + + /* If STATE_MISSED was set, leave STATE_SCHED set, + * because we will call napi->poll() one more time. +@@ -6511,16 +6514,25 @@ static int napi_poll(struct napi_struct + + static int napi_thread_wait(struct napi_struct *napi) + { ++ bool woken = false; ++ + set_current_state(TASK_INTERRUPTIBLE); + + while (!kthread_should_stop() && !napi_disable_pending(napi)) { +- if (test_bit(NAPI_STATE_SCHED, &napi->state)) { ++ /* Testing SCHED_THREADED bit here to make sure the current ++ * kthread owns this napi and could poll on this napi. ++ * Testing SCHED bit is not enough because SCHED bit might be ++ * set by some other busy poll thread or by napi_disable(). ++ */ ++ if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state) || woken) { + WARN_ON(!list_empty(&napi->poll_list)); + __set_current_state(TASK_RUNNING); + return 0; + } + + schedule(); ++ /* woken being true indicates this thread owns this napi. */ ++ woken = true; + set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); diff --git a/target/linux/generic/backport-5.4/604-v5.12-net-fix-hangup-on-napi_disable-for-threaded-napi.patch b/target/linux/generic/backport-5.4/604-v5.12-net-fix-hangup-on-napi_disable-for-threaded-napi.patch new file mode 100644 index 0000000000..108cf809f8 --- /dev/null +++ b/target/linux/generic/backport-5.4/604-v5.12-net-fix-hangup-on-napi_disable-for-threaded-napi.patch @@ -0,0 +1,53 @@ +From: Paolo Abeni <pabeni@redhat.com> +Date: Fri, 9 Apr 2021 17:24:17 +0200 +Subject: [PATCH] net: fix hangup on napi_disable for threaded napi + +napi_disable() is subject to an hangup, when the threaded +mode is enabled and the napi is under heavy traffic. + +If the relevant napi has been scheduled and the napi_disable() +kicks in before the next napi_threaded_wait() completes - so +that the latter quits due to the napi_disable_pending() condition, +the existing code leaves the NAPI_STATE_SCHED bit set and the +napi_disable() loop waiting for such bit will hang. + +This patch addresses the issue by dropping the NAPI_STATE_DISABLE +bit test in napi_thread_wait(). The later napi_threaded_poll() +iteration will take care of clearing the NAPI_STATE_SCHED. + +This also addresses a related problem reported by Jakub: +before this patch a napi_disable()/napi_enable() pair killed +the napi thread, effectively disabling the threaded mode. +On the patched kernel napi_disable() simply stops scheduling +the relevant thread. + +v1 -> v2: + - let the main napi_thread_poll() loop clear the SCHED bit + +Reported-by: Jakub Kicinski <kuba@kernel.org> +Fixes: 29863d41bb6e ("net: implement threaded-able napi poll loop support") +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +Reviewed-by: Eric Dumazet <edumazet@google.com> +Link: https://lore.kernel.org/r/883923fa22745a9589e8610962b7dc59df09fb1f.1617981844.git.pabeni@redhat.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +--- + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -6518,7 +6518,7 @@ static int napi_thread_wait(struct napi_ + + set_current_state(TASK_INTERRUPTIBLE); + +- while (!kthread_should_stop() && !napi_disable_pending(napi)) { ++ while (!kthread_should_stop()) { + /* Testing SCHED_THREADED bit here to make sure the current + * kthread owns this napi and could poll on this napi. + * Testing SCHED bit is not enough because SCHED bit might be +@@ -6536,6 +6536,7 @@ static int napi_thread_wait(struct napi_ + set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); ++ + return -1; + } + diff --git a/target/linux/generic/backport-5.4/700-v5.5-net-core-allow-fast-GRO-for-skbs-with-Ethernet-heade.patch b/target/linux/generic/backport-5.4/700-v5.5-net-core-allow-fast-GRO-for-skbs-with-Ethernet-heade.patch index a8181bca94..34ad5a1f4e 100644 --- a/target/linux/generic/backport-5.4/700-v5.5-net-core-allow-fast-GRO-for-skbs-with-Ethernet-heade.patch +++ b/target/linux/generic/backport-5.4/700-v5.5-net-core-allow-fast-GRO-for-skbs-with-Ethernet-heade.patch @@ -66,7 +66,7 @@ Signed-off-by: David S. Miller <davem@davemloft.net> --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -5432,8 +5432,7 @@ static inline void skb_gro_reset_offset( +@@ -5472,8 +5472,7 @@ static inline void skb_gro_reset_offset( NAPI_GRO_CB(skb)->frag0 = NULL; NAPI_GRO_CB(skb)->frag0_len = 0; |