From 01bebc070c35d87c24a594fff7ee1911965759aa Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 13 Dec 2021 18:15:07 +0100 Subject: kernel: backport the upstream implementation of threaded NAPI to 5.4 The workqueue based implementation has a few corner cases and typically lower performance than the upstream one Signed-off-by: Felix Fietkau --- ...f_flow_table-add-hardware-offload-support.patch | 6 +- ...f_flow_table-support-hw-offload-through-v.patch | 6 +- ...80-NET-skip-GRO-for-foreign-MAC-addresses.patch | 12 +- ...net-add-support-for-threaded-NAPI-polling.patch | 355 --------------------- 4 files changed, 12 insertions(+), 367 deletions(-) delete mode 100644 target/linux/generic/pending-5.4/690-net-add-support-for-threaded-NAPI-polling.patch (limited to 'target/linux/generic/pending-5.4') diff --git a/target/linux/generic/pending-5.4/640-netfilter-nf_flow_table-add-hardware-offload-support.patch b/target/linux/generic/pending-5.4/640-netfilter-nf_flow_table-add-hardware-offload-support.patch index 02600ebed0..f5d9dcde99 100644 --- a/target/linux/generic/pending-5.4/640-netfilter-nf_flow_table-add-hardware-offload-support.patch +++ b/target/linux/generic/pending-5.4/640-netfilter-nf_flow_table-add-hardware-offload-support.patch @@ -23,7 +23,7 @@ Signed-off-by: Pablo Neira Ayuso --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h -@@ -928,6 +928,13 @@ struct devlink; +@@ -922,6 +922,13 @@ struct devlink; struct tlsdev_ops; @@ -37,7 +37,7 @@ Signed-off-by: Pablo Neira Ayuso /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are -@@ -1160,6 +1167,10 @@ struct tlsdev_ops; +@@ -1154,6 +1161,10 @@ struct tlsdev_ops; * int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh, * u16 flags); * @@ -48,7 +48,7 @@ Signed-off-by: Pablo Neira Ayuso * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); * Called to change device carrier. Soft-devices (like dummy, team, etc) * which do not represent real hardware may define this to allow their -@@ -1407,6 +1418,8 @@ struct net_device_ops { +@@ -1401,6 +1412,8 @@ struct net_device_ops { int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); diff --git a/target/linux/generic/pending-5.4/641-netfilter-nf_flow_table-support-hw-offload-through-v.patch b/target/linux/generic/pending-5.4/641-netfilter-nf_flow_table-support-hw-offload-through-v.patch index 9f113c7108..b808c0257d 100644 --- a/target/linux/generic/pending-5.4/641-netfilter-nf_flow_table-support-hw-offload-through-v.patch +++ b/target/linux/generic/pending-5.4/641-netfilter-nf_flow_table-support-hw-offload-through-v.patch @@ -15,7 +15,7 @@ Signed-off-by: Felix Fietkau --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h -@@ -929,6 +929,7 @@ struct tlsdev_ops; +@@ -923,6 +923,7 @@ struct tlsdev_ops; struct flow_offload; @@ -23,7 +23,7 @@ Signed-off-by: Felix Fietkau enum flow_offload_type { FLOW_OFFLOAD_ADD = 0, -@@ -1167,8 +1168,15 @@ enum flow_offload_type { +@@ -1161,8 +1162,15 @@ enum flow_offload_type { * int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh, * u16 flags); * @@ -40,7 +40,7 @@ Signed-off-by: Felix Fietkau * Adds/deletes flow entry to/from net device flowtable. * * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); -@@ -1418,8 +1426,11 @@ struct net_device_ops { +@@ -1412,8 +1420,11 @@ struct net_device_ops { int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); diff --git a/target/linux/generic/pending-5.4/680-NET-skip-GRO-for-foreign-MAC-addresses.patch b/target/linux/generic/pending-5.4/680-NET-skip-GRO-for-foreign-MAC-addresses.patch index b1ac7ffca0..45f643b650 100644 --- a/target/linux/generic/pending-5.4/680-NET-skip-GRO-for-foreign-MAC-addresses.patch +++ b/target/linux/generic/pending-5.4/680-NET-skip-GRO-for-foreign-MAC-addresses.patch @@ -11,7 +11,7 @@ Signed-off-by: Felix Fietkau --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h -@@ -1931,6 +1931,8 @@ struct net_device { +@@ -1927,6 +1927,8 @@ struct net_device { struct netdev_hw_addr_list mc; struct netdev_hw_addr_list dev_addrs; @@ -32,7 +32,7 @@ Signed-off-by: Felix Fietkau __u16 tc_index; /* traffic control index */ --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -5498,6 +5498,9 @@ static enum gro_result dev_gro_receive(s +@@ -5538,6 +5538,9 @@ static enum gro_result dev_gro_receive(s int same_flow; int grow; @@ -42,7 +42,7 @@ Signed-off-by: Felix Fietkau if (netif_elide_gro(skb->dev)) goto normal; -@@ -7300,6 +7303,48 @@ static void __netdev_adjacent_dev_unlink +@@ -7481,6 +7484,48 @@ static void __netdev_adjacent_dev_unlink &upper_dev->adj_list.lower); } @@ -91,7 +91,7 @@ Signed-off-by: Felix Fietkau static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, void *upper_priv, void *upper_info, -@@ -7350,6 +7395,7 @@ static int __netdev_upper_dev_link(struc +@@ -7531,6 +7576,7 @@ static int __netdev_upper_dev_link(struc if (ret) return ret; @@ -99,7 +99,7 @@ Signed-off-by: Felix Fietkau ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, &changeupper_info.info); ret = notifier_to_errno(ret); -@@ -7443,6 +7489,7 @@ void netdev_upper_dev_unlink(struct net_ +@@ -7624,6 +7670,7 @@ void netdev_upper_dev_unlink(struct net_ __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); @@ -107,7 +107,7 @@ Signed-off-by: Felix Fietkau call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, &changeupper_info.info); -@@ -8173,6 +8220,7 @@ int dev_set_mac_address(struct net_devic +@@ -8354,6 +8401,7 @@ int dev_set_mac_address(struct net_devic if (err) return err; dev->addr_assign_type = NET_ADDR_SET; diff --git a/target/linux/generic/pending-5.4/690-net-add-support-for-threaded-NAPI-polling.patch b/target/linux/generic/pending-5.4/690-net-add-support-for-threaded-NAPI-polling.patch deleted file mode 100644 index 0e97f2140c..0000000000 --- a/target/linux/generic/pending-5.4/690-net-add-support-for-threaded-NAPI-polling.patch +++ /dev/null @@ -1,355 +0,0 @@ -From: Felix Fietkau -Date: Sun, 26 Jul 2020 14:03:21 +0200 -Subject: [PATCH] net: add support for threaded NAPI polling - -For some drivers (especially 802.11 drivers), doing a lot of work in the NAPI -poll function does not perform well. Since NAPI poll is bound to the CPU it -was scheduled from, we can easily end up with a few very busy CPUs spending -most of their time in softirq/ksoftirqd and some idle ones. - -Introduce threaded NAPI for such drivers based on a workqueue. The API is the -same except for using netif_threaded_napi_add instead of netif_napi_add. - -In my tests with mt76 on MT7621 using threaded NAPI + a thread for tx scheduling -improves LAN->WLAN bridging throughput by 10-50%. Throughput without threaded -NAPI is wildly inconsistent, depending on the CPU that runs the tx scheduling -thread. - -With threaded NAPI it seems stable and consistent (and higher than the best -results I got without it). - -Based on a patch by Hillf Danton - -Cc: Hillf Danton -Signed-off-by: Felix Fietkau ---- - ---- a/include/linux/netdevice.h -+++ b/include/linux/netdevice.h -@@ -340,6 +340,7 @@ struct napi_struct { - struct list_head dev_list; - struct hlist_node napi_hash_node; - unsigned int napi_id; -+ struct work_struct work; - }; - - enum { -@@ -350,6 +351,7 @@ enum { - NAPI_STATE_HASHED, /* In NAPI hash (busy polling possible) */ - NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */ - NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */ -+ NAPI_STATE_THREADED, /* Use threaded NAPI */ - }; - - enum { -@@ -360,6 +362,7 @@ enum { - NAPIF_STATE_HASHED = BIT(NAPI_STATE_HASHED), - NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), - NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), -+ NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED), - }; - - enum gro_result { -@@ -2101,6 +2104,7 @@ struct net_device { - struct lock_class_key addr_list_lock_key; - bool proto_down; - unsigned wol_enabled:1; -+ unsigned threaded:1; - }; - #define to_net_dev(d) container_of(d, struct net_device, dev) - -@@ -2281,6 +2285,26 @@ void netif_napi_add(struct net_device *d - int (*poll)(struct napi_struct *, int), int weight); - - /** -+ * netif_threaded_napi_add - initialize a NAPI context -+ * @dev: network device -+ * @napi: NAPI context -+ * @poll: polling function -+ * @weight: default weight -+ * -+ * This variant of netif_napi_add() should be used from drivers using NAPI -+ * with CPU intensive poll functions. -+ * This will schedule polling from a high priority workqueue -+ */ -+static inline void netif_threaded_napi_add(struct net_device *dev, -+ struct napi_struct *napi, -+ int (*poll)(struct napi_struct *, int), -+ int weight) -+{ -+ set_bit(NAPI_STATE_THREADED, &napi->state); -+ netif_napi_add(dev, napi, poll, weight); -+} -+ -+/** - * netif_tx_napi_add - initialize a NAPI context - * @dev: network device - * @napi: NAPI context ---- a/net/core/dev.c -+++ b/net/core/dev.c -@@ -156,6 +156,7 @@ static DEFINE_SPINLOCK(offload_lock); - struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; - struct list_head ptype_all __read_mostly; /* Taps */ - static struct list_head offload_base __read_mostly; -+static struct workqueue_struct *napi_workq __read_mostly; - - static int netif_rx_internal(struct sk_buff *skb); - static int call_netdevice_notifiers_info(unsigned long val, -@@ -5940,6 +5941,11 @@ void __napi_schedule(struct napi_struct - { - unsigned long flags; - -+ if (test_bit(NAPI_STATE_THREADED, &n->state)) { -+ queue_work(napi_workq, &n->work); -+ return; -+ } -+ - local_irq_save(flags); - ____napi_schedule(this_cpu_ptr(&softnet_data), n); - local_irq_restore(flags); -@@ -5991,6 +5997,10 @@ EXPORT_SYMBOL(napi_schedule_prep); - */ - void __napi_schedule_irqoff(struct napi_struct *n) - { -+ if (test_bit(NAPI_STATE_THREADED, &n->state)) { -+ queue_work(napi_workq, &n->work); -+ return; -+ } - if (!IS_ENABLED(CONFIG_PREEMPT_RT)) - ____napi_schedule(this_cpu_ptr(&softnet_data), n); - else -@@ -6255,9 +6265,89 @@ static void init_gro_hash(struct napi_st - napi->gro_bitmask = 0; - } - -+static int __napi_poll(struct napi_struct *n, bool *repoll) -+{ -+ int work, weight; -+ -+ weight = n->weight; -+ -+ /* This NAPI_STATE_SCHED test is for avoiding a race -+ * with netpoll's poll_napi(). Only the entity which -+ * obtains the lock and sees NAPI_STATE_SCHED set will -+ * actually make the ->poll() call. Therefore we avoid -+ * accidentally calling ->poll() when NAPI is not scheduled. -+ */ -+ work = 0; -+ if (test_bit(NAPI_STATE_SCHED, &n->state)) { -+ work = n->poll(n, weight); -+ trace_napi_poll(n, work, weight); -+ } -+ -+ WARN_ON_ONCE(work > weight); -+ -+ if (likely(work < weight)) -+ return work; -+ -+ /* Drivers must not modify the NAPI state if they -+ * consume the entire weight. In such cases this code -+ * still "owns" the NAPI instance and therefore can -+ * move the instance around on the list at-will. -+ */ -+ if (unlikely(napi_disable_pending(n))) { -+ napi_complete(n); -+ return work; -+ } -+ -+ if (n->gro_bitmask) { -+ /* flush too old packets -+ * If HZ < 1000, flush all packets. -+ */ -+ napi_gro_flush(n, HZ >= 1000); -+ } -+ -+ gro_normal_list(n); -+ -+ *repoll = true; -+ -+ return work; -+} -+ -+static void napi_workfn(struct work_struct *work) -+{ -+ struct napi_struct *n = container_of(work, struct napi_struct, work); -+ void *have; -+ -+ for (;;) { -+ bool repoll = false; -+ -+ local_bh_disable(); -+ -+ have = netpoll_poll_lock(n); -+ __napi_poll(n, &repoll); -+ netpoll_poll_unlock(have); -+ -+ local_bh_enable(); -+ -+ if (!repoll) -+ return; -+ -+ if (!need_resched()) -+ continue; -+ -+ /* -+ * have to pay for the latency of task switch even if -+ * napi is scheduled -+ */ -+ queue_work(napi_workq, work); -+ return; -+ } -+} -+ - void netif_napi_add(struct net_device *dev, struct napi_struct *napi, - int (*poll)(struct napi_struct *, int), int weight) - { -+ if (dev->threaded) -+ set_bit(NAPI_STATE_THREADED, &napi->state); - INIT_LIST_HEAD(&napi->poll_list); - hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); - napi->timer.function = napi_watchdog; -@@ -6274,6 +6364,7 @@ void netif_napi_add(struct net_device *d - #ifdef CONFIG_NETPOLL - napi->poll_owner = -1; - #endif -+ INIT_WORK(&napi->work, napi_workfn); - set_bit(NAPI_STATE_SCHED, &napi->state); - set_bit(NAPI_STATE_NPSVC, &napi->state); - list_add_rcu(&napi->dev_list, &dev->napi_list); -@@ -6314,6 +6405,7 @@ static void flush_gro_hash(struct napi_s - void netif_napi_del(struct napi_struct *napi) - { - might_sleep(); -+ cancel_work_sync(&napi->work); - if (napi_hash_del(napi)) - synchronize_net(); - list_del_init(&napi->dev_list); -@@ -6326,50 +6418,18 @@ EXPORT_SYMBOL(netif_napi_del); - - static int napi_poll(struct napi_struct *n, struct list_head *repoll) - { -+ bool do_repoll = false; - void *have; -- int work, weight; -+ int work; - - list_del_init(&n->poll_list); - - have = netpoll_poll_lock(n); - -- weight = n->weight; -- -- /* This NAPI_STATE_SCHED test is for avoiding a race -- * with netpoll's poll_napi(). Only the entity which -- * obtains the lock and sees NAPI_STATE_SCHED set will -- * actually make the ->poll() call. Therefore we avoid -- * accidentally calling ->poll() when NAPI is not scheduled. -- */ -- work = 0; -- if (test_bit(NAPI_STATE_SCHED, &n->state)) { -- work = n->poll(n, weight); -- trace_napi_poll(n, work, weight); -- } -- -- WARN_ON_ONCE(work > weight); -+ work = __napi_poll(n, &do_repoll); - -- if (likely(work < weight)) -- goto out_unlock; -- -- /* Drivers must not modify the NAPI state if they -- * consume the entire weight. In such cases this code -- * still "owns" the NAPI instance and therefore can -- * move the instance around on the list at-will. -- */ -- if (unlikely(napi_disable_pending(n))) { -- napi_complete(n); -+ if (!do_repoll) - goto out_unlock; -- } -- -- if (n->gro_bitmask) { -- /* flush too old packets -- * If HZ < 1000, flush all packets. -- */ -- napi_gro_flush(n, HZ >= 1000); -- } -- -- gro_normal_list(n); - - /* Some drivers may have called napi_schedule - * prior to exhausting their budget. -@@ -10349,6 +10409,10 @@ static int __init net_dev_init(void) - sd->backlog.weight = weight_p; - } - -+ napi_workq = alloc_workqueue("napi_workq", WQ_UNBOUND | WQ_HIGHPRI, -+ WQ_UNBOUND_MAX_ACTIVE | WQ_SYSFS); -+ BUG_ON(!napi_workq); -+ - dev_boot_phase = 0; - - /* The loopback device is special if any other network devices ---- a/net/core/net-sysfs.c -+++ b/net/core/net-sysfs.c -@@ -470,6 +470,52 @@ static ssize_t proto_down_store(struct d - } - NETDEVICE_SHOW_RW(proto_down, fmt_dec); - -+static int change_napi_threaded(struct net_device *dev, unsigned long val) -+{ -+ struct napi_struct *napi; -+ -+ if (list_empty(&dev->napi_list)) -+ return -EOPNOTSUPP; -+ -+ list_for_each_entry(napi, &dev->napi_list, dev_list) { -+ if (val) -+ set_bit(NAPI_STATE_THREADED, &napi->state); -+ else -+ clear_bit(NAPI_STATE_THREADED, &napi->state); -+ } -+ -+ return 0; -+} -+ -+static ssize_t napi_threaded_store(struct device *dev, -+ struct device_attribute *attr, -+ const char *buf, size_t len) -+{ -+ return netdev_store(dev, attr, buf, len, change_napi_threaded); -+} -+ -+static ssize_t napi_threaded_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ struct net_device *netdev = to_net_dev(dev); -+ struct napi_struct *napi; -+ bool enabled = false; -+ -+ if (!rtnl_trylock()) -+ return restart_syscall(); -+ -+ list_for_each_entry(napi, &netdev->napi_list, dev_list) { -+ if (test_bit(NAPI_STATE_THREADED, &napi->state)) -+ enabled = true; -+ } -+ -+ rtnl_unlock(); -+ -+ return sprintf(buf, fmt_dec, enabled); -+} -+static DEVICE_ATTR_RW(napi_threaded); -+ - static ssize_t phys_port_id_show(struct device *dev, - struct device_attribute *attr, char *buf) - { -@@ -581,6 +627,7 @@ static struct attribute *net_class_attrs - &dev_attr_flags.attr, - &dev_attr_tx_queue_len.attr, - &dev_attr_gro_flush_timeout.attr, -+ &dev_attr_napi_threaded.attr, - &dev_attr_phys_port_id.attr, - &dev_attr_phys_port_name.attr, - &dev_attr_phys_switch_id.attr, -- cgit v1.2.3