aboutsummaryrefslogtreecommitdiffstats
path: root/package
diff options
context:
space:
mode:
authorFelix Fietkau <nbd@nbd.name>2019-03-16 19:50:44 +0100
committerFelix Fietkau <nbd@nbd.name>2019-03-16 19:59:02 +0100
commit4cf1394f51ba4b28edab016e36fb0f7a1c208f30 (patch)
treeb4e35b4e35f2e72e37fcf5bf27e93517423d1b28 /package
parent1f68aac9d7b15013407afca581c5da09b45b9afa (diff)
downloadupstream-4cf1394f51ba4b28edab016e36fb0f7a1c208f30.tar.gz
upstream-4cf1394f51ba4b28edab016e36fb0f7a1c208f30.tar.bz2
upstream-4cf1394f51ba4b28edab016e36fb0f7a1c208f30.zip
mac80211: add a few performance improvement patches
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Diffstat (limited to 'package')
-rw-r--r--package/kernel/mac80211/patches/subsys/353-mac80211-mesh-drop-redundant-rcu_read_lock-unlock-ca.patch96
-rw-r--r--package/kernel/mac80211/patches/subsys/354-mac80211-calculate-hash-for-fq-without-holding-fq-lo.patch124
-rw-r--r--package/kernel/mac80211/patches/subsys/355-mac80211-run-late-dequeue-late-tx-handlers-without-h.patch55
-rw-r--r--package/kernel/mac80211/patches/subsys/356-mac80211-set-NETIF_F_LLTX-when-using-intermediate-tx.patch22
4 files changed, 297 insertions, 0 deletions
diff --git a/package/kernel/mac80211/patches/subsys/353-mac80211-mesh-drop-redundant-rcu_read_lock-unlock-ca.patch b/package/kernel/mac80211/patches/subsys/353-mac80211-mesh-drop-redundant-rcu_read_lock-unlock-ca.patch
new file mode 100644
index 0000000000..ae5be18170
--- /dev/null
+++ b/package/kernel/mac80211/patches/subsys/353-mac80211-mesh-drop-redundant-rcu_read_lock-unlock-ca.patch
@@ -0,0 +1,96 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Sat, 16 Mar 2019 17:43:58 +0100
+Subject: [PATCH] mac80211: mesh: drop redundant rcu_read_lock/unlock calls
+
+The callers of these functions are all within RCU locked sections
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/net/mac80211/mesh_hwmp.c
++++ b/net/mac80211/mesh_hwmp.c
+@@ -1112,16 +1112,13 @@ int mesh_nexthop_resolve(struct ieee8021
+ struct mesh_path *mpath;
+ struct sk_buff *skb_to_free = NULL;
+ u8 *target_addr = hdr->addr3;
+- int err = 0;
+
+ /* Nulls are only sent to peers for PS and should be pre-addressed */
+ if (ieee80211_is_qos_nullfunc(hdr->frame_control))
+ return 0;
+
+- rcu_read_lock();
+- err = mesh_nexthop_lookup(sdata, skb);
+- if (!err)
+- goto endlookup;
++ if (!mesh_nexthop_lookup(sdata, skb))
++ return 0;
+
+ /* no nexthop found, start resolving */
+ mpath = mesh_path_lookup(sdata, target_addr);
+@@ -1129,8 +1126,7 @@ int mesh_nexthop_resolve(struct ieee8021
+ mpath = mesh_path_add(sdata, target_addr);
+ if (IS_ERR(mpath)) {
+ mesh_path_discard_frame(sdata, skb);
+- err = PTR_ERR(mpath);
+- goto endlookup;
++ return PTR_ERR(mpath);
+ }
+ }
+
+@@ -1143,13 +1139,10 @@ int mesh_nexthop_resolve(struct ieee8021
+ info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
+ ieee80211_set_qos_hdr(sdata, skb);
+ skb_queue_tail(&mpath->frame_queue, skb);
+- err = -ENOENT;
+ if (skb_to_free)
+ mesh_path_discard_frame(sdata, skb_to_free);
+
+-endlookup:
+- rcu_read_unlock();
+- return err;
++ return -ENOENT;
+ }
+
+ /**
+@@ -1169,13 +1162,10 @@ int mesh_nexthop_lookup(struct ieee80211
+ struct sta_info *next_hop;
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+ u8 *target_addr = hdr->addr3;
+- int err = -ENOENT;
+
+- rcu_read_lock();
+ mpath = mesh_path_lookup(sdata, target_addr);
+-
+ if (!mpath || !(mpath->flags & MESH_PATH_ACTIVE))
+- goto endlookup;
++ return -ENOENT;
+
+ if (time_after(jiffies,
+ mpath->exp_time -
+@@ -1190,12 +1180,10 @@ int mesh_nexthop_lookup(struct ieee80211
+ memcpy(hdr->addr1, next_hop->sta.addr, ETH_ALEN);
+ memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
+ ieee80211_mps_set_frame_flags(sdata, next_hop, hdr);
+- err = 0;
++ return 0;
+ }
+
+-endlookup:
+- rcu_read_unlock();
+- return err;
++ return -ENOENT;
+ }
+
+ void mesh_path_timer(struct timer_list *t)
+--- a/net/mac80211/mesh_pathtbl.c
++++ b/net/mac80211/mesh_pathtbl.c
+@@ -217,7 +217,7 @@ static struct mesh_path *mpath_lookup(st
+ {
+ struct mesh_path *mpath;
+
+- mpath = rhashtable_lookup_fast(&tbl->rhead, dst, mesh_rht_params);
++ mpath = rhashtable_lookup(&tbl->rhead, dst, mesh_rht_params);
+
+ if (mpath && mpath_expired(mpath)) {
+ spin_lock_bh(&mpath->state_lock);
diff --git a/package/kernel/mac80211/patches/subsys/354-mac80211-calculate-hash-for-fq-without-holding-fq-lo.patch b/package/kernel/mac80211/patches/subsys/354-mac80211-calculate-hash-for-fq-without-holding-fq-lo.patch
new file mode 100644
index 0000000000..2b6d8ab525
--- /dev/null
+++ b/package/kernel/mac80211/patches/subsys/354-mac80211-calculate-hash-for-fq-without-holding-fq-lo.patch
@@ -0,0 +1,124 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Sat, 16 Mar 2019 17:57:38 +0100
+Subject: [PATCH] mac80211: calculate hash for fq without holding fq->lock
+ in itxq enqueue
+
+Reduces lock contention on enqueue/dequeue of iTXQ packets
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/include/net/fq_impl.h
++++ b/include/net/fq_impl.h
+@@ -107,21 +107,23 @@ begin:
+ return skb;
+ }
+
++static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb)
++{
++ u32 hash = skb_get_hash_perturb(skb, fq->perturbation);
++
++ return reciprocal_scale(hash, fq->flows_cnt);
++}
++
+ static struct fq_flow *fq_flow_classify(struct fq *fq,
+- struct fq_tin *tin,
++ struct fq_tin *tin, u32 idx,
+ struct sk_buff *skb,
+ fq_flow_get_default_t get_default_func)
+ {
+ struct fq_flow *flow;
+- u32 hash;
+- u32 idx;
+
+ lockdep_assert_held(&fq->lock);
+
+- hash = skb_get_hash_perturb(skb, fq->perturbation);
+- idx = reciprocal_scale(hash, fq->flows_cnt);
+ flow = &fq->flows[idx];
+-
+ if (flow->tin && flow->tin != tin) {
+ flow = get_default_func(fq, tin, idx, skb);
+ tin->collisions++;
+@@ -153,7 +155,7 @@ static void fq_recalc_backlog(struct fq
+ }
+
+ static void fq_tin_enqueue(struct fq *fq,
+- struct fq_tin *tin,
++ struct fq_tin *tin, u32 idx,
+ struct sk_buff *skb,
+ fq_skb_free_t free_func,
+ fq_flow_get_default_t get_default_func)
+@@ -163,7 +165,7 @@ static void fq_tin_enqueue(struct fq *fq
+
+ lockdep_assert_held(&fq->lock);
+
+- flow = fq_flow_classify(fq, tin, skb, get_default_func);
++ flow = fq_flow_classify(fq, tin, idx, skb, get_default_func);
+
+ flow->tin = tin;
+ flow->backlog += skb->len;
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -1390,11 +1390,15 @@ static void ieee80211_txq_enqueue(struct
+ {
+ struct fq *fq = &local->fq;
+ struct fq_tin *tin = &txqi->tin;
++ u32 flow_idx = fq_flow_idx(fq, skb);
+
+ ieee80211_set_skb_enqueue_time(skb);
+- fq_tin_enqueue(fq, tin, skb,
++
++ spin_lock_bh(&fq->lock);
++ fq_tin_enqueue(fq, tin, flow_idx, skb,
+ fq_skb_free_func,
+ fq_flow_get_default_func);
++ spin_unlock_bh(&fq->lock);
+ }
+
+ static bool fq_vlan_filter_func(struct fq *fq, struct fq_tin *tin,
+@@ -1564,7 +1568,6 @@ static bool ieee80211_queue_skb(struct i
+ struct sta_info *sta,
+ struct sk_buff *skb)
+ {
+- struct fq *fq = &local->fq;
+ struct ieee80211_vif *vif;
+ struct txq_info *txqi;
+
+@@ -1582,9 +1585,7 @@ static bool ieee80211_queue_skb(struct i
+ if (!txqi)
+ return false;
+
+- spin_lock_bh(&fq->lock);
+ ieee80211_txq_enqueue(local, txqi, skb);
+- spin_unlock_bh(&fq->lock);
+
+ schedule_and_wake_txq(local, txqi);
+
+@@ -3198,6 +3199,7 @@ static bool ieee80211_amsdu_aggregate(st
+ u8 max_subframes = sta->sta.max_amsdu_subframes;
+ int max_frags = local->hw.max_tx_fragments;
+ int max_amsdu_len = sta->sta.max_amsdu_len;
++ u32 flow_idx;
+ int orig_truesize;
+ __be16 len;
+ void *data;
+@@ -3220,6 +3222,8 @@ static bool ieee80211_amsdu_aggregate(st
+ max_amsdu_len = min_t(int, max_amsdu_len,
+ sta->sta.max_rc_amsdu_len);
+
++ flow_idx = fq_flow_idx(fq, skb);
++
+ spin_lock_bh(&fq->lock);
+
+ /* TODO: Ideally aggregation should be done on dequeue to remain
+@@ -3227,7 +3231,8 @@ static bool ieee80211_amsdu_aggregate(st
+ */
+
+ tin = &txqi->tin;
+- flow = fq_flow_classify(fq, tin, skb, fq_flow_get_default_func);
++ flow = fq_flow_classify(fq, tin, flow_idx, skb,
++ fq_flow_get_default_func);
+ head = skb_peek_tail(&flow->queue);
+ if (!head)
+ goto unlock;
diff --git a/package/kernel/mac80211/patches/subsys/355-mac80211-run-late-dequeue-late-tx-handlers-without-h.patch b/package/kernel/mac80211/patches/subsys/355-mac80211-run-late-dequeue-late-tx-handlers-without-h.patch
new file mode 100644
index 0000000000..3127c86822
--- /dev/null
+++ b/package/kernel/mac80211/patches/subsys/355-mac80211-run-late-dequeue-late-tx-handlers-without-h.patch
@@ -0,0 +1,55 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Sat, 16 Mar 2019 18:00:12 +0100
+Subject: [PATCH] mac80211: run late dequeue late tx handlers without
+ holding fq->lock
+
+Reduces lock contention on enqueue/dequeue of iTXQ packets
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -3507,6 +3507,7 @@ struct sk_buff *ieee80211_tx_dequeue(str
+ ieee80211_tx_result r;
+ struct ieee80211_vif *vif = txq->vif;
+
++begin:
+ spin_lock_bh(&fq->lock);
+
+ if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags) ||
+@@ -3523,11 +3524,12 @@ struct sk_buff *ieee80211_tx_dequeue(str
+ if (skb)
+ goto out;
+
+-begin:
+ skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func);
+ if (!skb)
+ goto out;
+
++ spin_unlock_bh(&fq->lock);
++
+ hdr = (struct ieee80211_hdr *)skb->data;
+ info = IEEE80211_SKB_CB(skb);
+
+@@ -3573,8 +3575,11 @@ begin:
+
+ skb = __skb_dequeue(&tx.skbs);
+
+- if (!skb_queue_empty(&tx.skbs))
++ if (!skb_queue_empty(&tx.skbs)) {
++ spin_lock_bh(&fq->lock);
+ skb_queue_splice_tail(&tx.skbs, &txqi->frags);
++ spin_unlock_bh(&fq->lock);
++ }
+ }
+
+ if (skb && skb_has_frag_list(skb) &&
+@@ -3613,6 +3618,7 @@ begin:
+ }
+
+ IEEE80211_SKB_CB(skb)->control.vif = vif;
++ return skb;
+
+ out:
+ spin_unlock_bh(&fq->lock);
diff --git a/package/kernel/mac80211/patches/subsys/356-mac80211-set-NETIF_F_LLTX-when-using-intermediate-tx.patch b/package/kernel/mac80211/patches/subsys/356-mac80211-set-NETIF_F_LLTX-when-using-intermediate-tx.patch
new file mode 100644
index 0000000000..95ab3ab9fb
--- /dev/null
+++ b/package/kernel/mac80211/patches/subsys/356-mac80211-set-NETIF_F_LLTX-when-using-intermediate-tx.patch
@@ -0,0 +1,22 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Sat, 16 Mar 2019 18:01:53 +0100
+Subject: [PATCH] mac80211: set NETIF_F_LLTX when using intermediate tx
+ queues
+
+When using iTXQ, tx sequence number allocation and statistics are run at
+dequeue time. Because of that, it is safe to enable NETIF_F_LLTX, which
+allows tx handlers to run on multiple CPUs in parallel.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/net/mac80211/iface.c
++++ b/net/mac80211/iface.c
+@@ -1301,6 +1301,7 @@ static void ieee80211_if_setup(struct ne
+ static void ieee80211_if_setup_no_queue(struct net_device *dev)
+ {
+ ieee80211_if_setup(dev);
++ dev->features |= NETIF_F_LLTX;
+ #if LINUX_VERSION_IS_GEQ(4,3,0)
+ dev->priv_flags |= IFF_NO_QUEUE;
+ #else