From 268210cec8662ab6d6f8744a888c3a41bb27227c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 25 Nov 2020 19:51:31 +0100 Subject: mac80211: add fq performace improvements Improves performance under load Signed-off-by: Felix Fietkau --- ...-do-not-maintain-a-backlog-sorted-list-of.patch | 314 +++++++++++++++++++++ 1 file changed, 314 insertions(+) create mode 100644 package/kernel/mac80211/patches/subsys/312-net-fq_impl-do-not-maintain-a-backlog-sorted-list-of.patch (limited to 'package/kernel/mac80211/patches/subsys/312-net-fq_impl-do-not-maintain-a-backlog-sorted-list-of.patch') diff --git a/package/kernel/mac80211/patches/subsys/312-net-fq_impl-do-not-maintain-a-backlog-sorted-list-of.patch b/package/kernel/mac80211/patches/subsys/312-net-fq_impl-do-not-maintain-a-backlog-sorted-list-of.patch new file mode 100644 index 0000000000..0e923991db --- /dev/null +++ b/package/kernel/mac80211/patches/subsys/312-net-fq_impl-do-not-maintain-a-backlog-sorted-list-of.patch @@ -0,0 +1,314 @@ +From: Felix Fietkau +Date: Wed, 25 Nov 2020 18:10:34 +0100 +Subject: [PATCH] net/fq_impl: do not maintain a backlog-sorted list of + flows + +A sorted flow list is only needed to drop packets in the biggest flow when +hitting the overmemory condition. +By scanning flows only when needed, we can avoid paying the cost of +maintaining the list under normal conditions +In order to avoid scanning lots of empty flows and touching too many cold +cache lines, a bitmap of flows with backlog is maintained + +Signed-off-by: Felix Fietkau +--- + +--- a/include/net/fq.h ++++ b/include/net/fq.h +@@ -19,8 +19,6 @@ struct fq_tin; + * @flowchain: can be linked to fq_tin's new_flows or old_flows. Used for DRR++ + * (deficit round robin) based round robin queuing similar to the one + * found in net/sched/sch_fq_codel.c +- * @backlogchain: can be linked to other fq_flow and fq. Used to keep track of +- * fat flows and efficient head-dropping if packet limit is reached + * @queue: sk_buff queue to hold packets + * @backlog: number of bytes pending in the queue. The number of packets can be + * found in @queue.qlen +@@ -29,7 +27,6 @@ struct fq_tin; + struct fq_flow { + struct fq_tin *tin; + struct list_head flowchain; +- struct list_head backlogchain; + struct sk_buff_head queue; + u32 backlog; + int deficit; +@@ -47,6 +44,7 @@ struct fq_flow { + struct fq_tin { + struct list_head new_flows; + struct list_head old_flows; ++ struct list_head tin_list; + struct fq_flow default_flow; + u32 backlog_bytes; + u32 backlog_packets; +@@ -60,14 +58,14 @@ struct fq_tin { + /** + * struct fq - main container for fair queuing purposes + * +- * @backlogs: linked to fq_flows. Used to maintain fat flows for efficient +- * head-dropping when @backlog reaches @limit + * @limit: max number of packets that can be queued across all flows + * @backlog: number of packets queued across all flows + */ + struct fq { + struct fq_flow *flows; +- struct list_head backlogs; ++ unsigned long *flows_bitmap; ++ ++ struct list_head tin_backlog; + spinlock_t lock; + u32 flows_cnt; + u32 limit; +--- a/include/net/fq_impl.h ++++ b/include/net/fq_impl.h +@@ -17,12 +17,24 @@ __fq_adjust_removal(struct fq *fq, struc + unsigned int bytes, unsigned int truesize) + { + struct fq_tin *tin = flow->tin; ++ int idx; + + tin->backlog_bytes -= bytes; + tin->backlog_packets -= packets; + flow->backlog -= bytes; + fq->backlog -= packets; + fq->memory_usage -= truesize; ++ ++ if (flow->backlog) ++ return; ++ ++ if (flow == &tin->default_flow) { ++ list_del_init(&tin->tin_list); ++ return; ++ } ++ ++ idx = flow - fq->flows; ++ __clear_bit(idx, fq->flows_bitmap); + } + + static void fq_adjust_removal(struct fq *fq, +@@ -32,24 +44,6 @@ static void fq_adjust_removal(struct fq + __fq_adjust_removal(fq, flow, 1, skb->len, skb->truesize); + } + +-static void fq_rejigger_backlog(struct fq *fq, struct fq_flow *flow) +-{ +- struct fq_flow *i; +- +- if (flow->backlog == 0) { +- list_del_init(&flow->backlogchain); +- } else { +- i = flow; +- +- list_for_each_entry_continue(i, &fq->backlogs, backlogchain) +- if (i->backlog < flow->backlog) +- break; +- +- list_move_tail(&flow->backlogchain, +- &i->backlogchain); +- } +-} +- + static struct sk_buff *fq_flow_dequeue(struct fq *fq, + struct fq_flow *flow) + { +@@ -62,7 +56,6 @@ static struct sk_buff *fq_flow_dequeue(s + return NULL; + + fq_adjust_removal(fq, flow, skb); +- fq_rejigger_backlog(fq, flow); + + return skb; + } +@@ -90,7 +83,6 @@ static int fq_flow_drop(struct fq *fq, s + } while (packets < pending); + + __fq_adjust_removal(fq, flow, packets, bytes, truesize); +- fq_rejigger_backlog(fq, flow); + + return packets; + } +@@ -170,22 +162,36 @@ static struct fq_flow *fq_flow_classify( + return flow; + } + +-static void fq_recalc_backlog(struct fq *fq, +- struct fq_tin *tin, +- struct fq_flow *flow) ++static struct fq_flow *fq_find_fattest_flow(struct fq *fq) + { +- struct fq_flow *i; ++ struct fq_tin *tin; ++ struct fq_flow *flow = NULL; ++ u32 len = 0; ++ int i; + +- if (list_empty(&flow->backlogchain)) +- list_add_tail(&flow->backlogchain, &fq->backlogs); ++ for_each_set_bit(i, fq->flows_bitmap, fq->flows_cnt) { ++ struct fq_flow *cur = &fq->flows[i]; ++ unsigned int cur_len; + +- i = flow; +- list_for_each_entry_continue_reverse(i, &fq->backlogs, +- backlogchain) +- if (i->backlog > flow->backlog) +- break; ++ cur_len = cur->backlog; ++ if (cur_len <= len) ++ continue; + +- list_move(&flow->backlogchain, &i->backlogchain); ++ flow = cur; ++ len = cur_len; ++ } ++ ++ list_for_each_entry(tin, &fq->tin_backlog, tin_list) { ++ unsigned int cur_len = tin->default_flow.backlog; ++ ++ if (cur_len <= len) ++ continue; ++ ++ flow = &tin->default_flow; ++ len = cur_len; ++ } ++ ++ return flow; + } + + static void fq_tin_enqueue(struct fq *fq, +@@ -200,6 +206,13 @@ static void fq_tin_enqueue(struct fq *fq + + flow = fq_flow_classify(fq, tin, idx, skb); + ++ if (!flow->backlog) { ++ if (flow != &tin->default_flow) ++ __set_bit(idx, fq->flows_bitmap); ++ else if (list_empty(&tin->tin_list)) ++ list_add(&tin->tin_list, &fq->tin_backlog); ++ } ++ + flow->tin = tin; + flow->backlog += skb->len; + tin->backlog_bytes += skb->len; +@@ -207,8 +220,6 @@ static void fq_tin_enqueue(struct fq *fq + fq->memory_usage += skb->truesize; + fq->backlog++; + +- fq_recalc_backlog(fq, tin, flow); +- + if (list_empty(&flow->flowchain)) { + flow->deficit = fq->quantum; + list_add_tail(&flow->flowchain, +@@ -218,9 +229,7 @@ static void fq_tin_enqueue(struct fq *fq + __skb_queue_tail(&flow->queue, skb); + oom = (fq->memory_usage > fq->memory_limit); + while (fq->backlog > fq->limit || oom) { +- flow = list_first_entry_or_null(&fq->backlogs, +- struct fq_flow, +- backlogchain); ++ flow = fq_find_fattest_flow(fq); + if (!flow) + return; + +@@ -255,8 +264,6 @@ static void fq_flow_filter(struct fq *fq + fq_adjust_removal(fq, flow, skb); + free_func(fq, tin, flow, skb); + } +- +- fq_rejigger_backlog(fq, flow); + } + + static void fq_tin_filter(struct fq *fq, +@@ -279,16 +286,18 @@ static void fq_flow_reset(struct fq *fq, + struct fq_flow *flow, + fq_skb_free_t free_func) + { ++ struct fq_tin *tin = flow->tin; + struct sk_buff *skb; + + while ((skb = fq_flow_dequeue(fq, flow))) +- free_func(fq, flow->tin, flow, skb); ++ free_func(fq, tin, flow, skb); + +- if (!list_empty(&flow->flowchain)) ++ if (!list_empty(&flow->flowchain)) { + list_del_init(&flow->flowchain); +- +- if (!list_empty(&flow->backlogchain)) +- list_del_init(&flow->backlogchain); ++ if (list_empty(&tin->new_flows) && ++ list_empty(&tin->old_flows)) ++ list_del_init(&tin->tin_list); ++ } + + flow->tin = NULL; + +@@ -314,6 +323,7 @@ static void fq_tin_reset(struct fq *fq, + fq_flow_reset(fq, flow, free_func); + } + ++ WARN_ON_ONCE(!list_empty(&tin->tin_list)); + WARN_ON_ONCE(tin->backlog_bytes); + WARN_ON_ONCE(tin->backlog_packets); + } +@@ -321,7 +331,6 @@ static void fq_tin_reset(struct fq *fq, + static void fq_flow_init(struct fq_flow *flow) + { + INIT_LIST_HEAD(&flow->flowchain); +- INIT_LIST_HEAD(&flow->backlogchain); + __skb_queue_head_init(&flow->queue); + } + +@@ -329,6 +338,7 @@ static void fq_tin_init(struct fq_tin *t + { + INIT_LIST_HEAD(&tin->new_flows); + INIT_LIST_HEAD(&tin->old_flows); ++ INIT_LIST_HEAD(&tin->tin_list); + fq_flow_init(&tin->default_flow); + } + +@@ -337,8 +347,8 @@ static int fq_init(struct fq *fq, int fl + int i; + + memset(fq, 0, sizeof(fq[0])); +- INIT_LIST_HEAD(&fq->backlogs); + spin_lock_init(&fq->lock); ++ INIT_LIST_HEAD(&fq->tin_backlog); + fq->flows_cnt = max_t(u32, flows_cnt, 1); + fq->quantum = 300; + fq->limit = 8192; +@@ -348,6 +358,14 @@ static int fq_init(struct fq *fq, int fl + if (!fq->flows) + return -ENOMEM; + ++ fq->flows_bitmap = kcalloc(BITS_TO_LONGS(fq->flows_cnt), sizeof(long), ++ GFP_KERNEL); ++ if (!fq->flows_bitmap) { ++ kvfree(fq->flows); ++ fq->flows = NULL; ++ return -ENOMEM; ++ } ++ + for (i = 0; i < fq->flows_cnt; i++) + fq_flow_init(&fq->flows[i]); + +@@ -364,6 +382,9 @@ static void fq_reset(struct fq *fq, + + kvfree(fq->flows); + fq->flows = NULL; ++ ++ kfree(fq->flows_bitmap); ++ fq->flows_bitmap = NULL; + } + + #endif +--- a/net/mac80211/tx.c ++++ b/net/mac80211/tx.c +@@ -3364,8 +3364,6 @@ out_recalc: + if (head->len != orig_len) { + flow->backlog += head->len - orig_len; + tin->backlog_bytes += head->len - orig_len; +- +- fq_recalc_backlog(fq, tin, flow); + } + out: + spin_unlock_bh(&fq->lock); -- cgit v1.2.3