aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/generic/patches-3.18/080-09-fib_trie-Update-meaning-of-pos-to-represent-unchecke.patch
diff options
context:
space:
mode:
authorFelix Fietkau <nbd@openwrt.org>2015-03-12 15:27:44 +0000
committerFelix Fietkau <nbd@openwrt.org>2015-03-12 15:27:44 +0000
commit944612680d9382720671f1fb731a24eccbbf118c (patch)
tree66c5e327364854d239af23005e75553728c47128 /target/linux/generic/patches-3.18/080-09-fib_trie-Update-meaning-of-pos-to-represent-unchecke.patch
parent0e6ecf707d7e2f10bb7465508548b962cef56ac3 (diff)
downloadupstream-944612680d9382720671f1fb731a24eccbbf118c.tar.gz
upstream-944612680d9382720671f1fb731a24eccbbf118c.tar.bz2
upstream-944612680d9382720671f1fb731a24eccbbf118c.zip
kernel: backport fib_trie improvements/fixes from 4.0-rc
Signed-off-by: Felix Fietkau <nbd@openwrt.org> SVN-Revision: 44695
Diffstat (limited to 'target/linux/generic/patches-3.18/080-09-fib_trie-Update-meaning-of-pos-to-represent-unchecke.patch')
-rw-r--r--target/linux/generic/patches-3.18/080-09-fib_trie-Update-meaning-of-pos-to-represent-unchecke.patch346
1 files changed, 346 insertions, 0 deletions
diff --git a/target/linux/generic/patches-3.18/080-09-fib_trie-Update-meaning-of-pos-to-represent-unchecke.patch b/target/linux/generic/patches-3.18/080-09-fib_trie-Update-meaning-of-pos-to-represent-unchecke.patch
new file mode 100644
index 0000000000..53761d4207
--- /dev/null
+++ b/target/linux/generic/patches-3.18/080-09-fib_trie-Update-meaning-of-pos-to-represent-unchecke.patch
@@ -0,0 +1,346 @@
+From: Alexander Duyck <alexander.h.duyck@redhat.com>
+Date: Wed, 31 Dec 2014 10:56:12 -0800
+Subject: [PATCH] fib_trie: Update meaning of pos to represent unchecked
+ bits
+
+This change moves the pos value to the other side of the "bits" field. By
+doing this it actually simplifies a significant amount of code in the trie.
+
+For example when halving a tree we know that the bit lost exists at
+oldnode->pos, and if we inflate the tree the new bit being add is at
+tn->pos. Previously to find those bits you would have to subtract pos and
+bits from the keylength or start with a value of (1 << 31) and then shift
+that.
+
+There are a number of spots throughout the code that benefit from this. In
+the case of the hot-path searches the main advantage is that we can drop 2
+or more operations from the search path as we no longer need to compute the
+value for the index to be shifted by and can instead just use the raw pos
+value.
+
+In addition the tkey_extract_bits is now defunct and can be replaced by
+get_index since the two operations were doing the same thing, but now
+get_index does it much more quickly as it is only an xor and shift versus a
+pair of shifts and a subtraction.
+
+Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -90,8 +90,7 @@ typedef unsigned int t_key;
+ #define IS_TNODE(n) ((n)->bits)
+ #define IS_LEAF(n) (!(n)->bits)
+
+-#define get_shift(_kv) (KEYLENGTH - (_kv)->pos - (_kv)->bits)
+-#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> get_shift(_kv))
++#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> (_kv)->pos)
+
+ struct tnode {
+ t_key key;
+@@ -209,81 +208,64 @@ static inline struct tnode *tnode_get_ch
+ return rcu_dereference_rtnl(tn->child[i]);
+ }
+
+-static inline t_key mask_pfx(t_key k, unsigned int l)
+-{
+- return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
+-}
+-
+-static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits)
+-{
+- if (offset < KEYLENGTH)
+- return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
+- else
+- return 0;
+-}
+-
+-/*
+- To understand this stuff, an understanding of keys and all their bits is
+- necessary. Every node in the trie has a key associated with it, but not
+- all of the bits in that key are significant.
+-
+- Consider a node 'n' and its parent 'tp'.
+-
+- If n is a leaf, every bit in its key is significant. Its presence is
+- necessitated by path compression, since during a tree traversal (when
+- searching for a leaf - unless we are doing an insertion) we will completely
+- ignore all skipped bits we encounter. Thus we need to verify, at the end of
+- a potentially successful search, that we have indeed been walking the
+- correct key path.
+-
+- Note that we can never "miss" the correct key in the tree if present by
+- following the wrong path. Path compression ensures that segments of the key
+- that are the same for all keys with a given prefix are skipped, but the
+- skipped part *is* identical for each node in the subtrie below the skipped
+- bit! trie_insert() in this implementation takes care of that - note the
+- call to tkey_sub_equals() in trie_insert().
+-
+- if n is an internal node - a 'tnode' here, the various parts of its key
+- have many different meanings.
+-
+- Example:
+- _________________________________________________________________
+- | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
+- -----------------------------------------------------------------
+- 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+-
+- _________________________________________________________________
+- | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
+- -----------------------------------------------------------------
+- 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+-
+- tp->pos = 7
+- tp->bits = 3
+- n->pos = 15
+- n->bits = 4
+-
+- First, let's just ignore the bits that come before the parent tp, that is
+- the bits from 0 to (tp->pos-1). They are *known* but at this point we do
+- not use them for anything.
+-
+- The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
+- index into the parent's child array. That is, they will be used to find
+- 'n' among tp's children.
+-
+- The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
+- for the node n.
+-
+- All the bits we have seen so far are significant to the node n. The rest
+- of the bits are really not needed or indeed known in n->key.
+-
+- The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
+- n's child array, and will of course be different for each child.
+-
+-
+- The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
+- at this point.
+-
+-*/
++/* To understand this stuff, an understanding of keys and all their bits is
++ * necessary. Every node in the trie has a key associated with it, but not
++ * all of the bits in that key are significant.
++ *
++ * Consider a node 'n' and its parent 'tp'.
++ *
++ * If n is a leaf, every bit in its key is significant. Its presence is
++ * necessitated by path compression, since during a tree traversal (when
++ * searching for a leaf - unless we are doing an insertion) we will completely
++ * ignore all skipped bits we encounter. Thus we need to verify, at the end of
++ * a potentially successful search, that we have indeed been walking the
++ * correct key path.
++ *
++ * Note that we can never "miss" the correct key in the tree if present by
++ * following the wrong path. Path compression ensures that segments of the key
++ * that are the same for all keys with a given prefix are skipped, but the
++ * skipped part *is* identical for each node in the subtrie below the skipped
++ * bit! trie_insert() in this implementation takes care of that.
++ *
++ * if n is an internal node - a 'tnode' here, the various parts of its key
++ * have many different meanings.
++ *
++ * Example:
++ * _________________________________________________________________
++ * | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
++ * -----------------------------------------------------------------
++ * 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
++ *
++ * _________________________________________________________________
++ * | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
++ * -----------------------------------------------------------------
++ * 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
++ *
++ * tp->pos = 22
++ * tp->bits = 3
++ * n->pos = 13
++ * n->bits = 4
++ *
++ * First, let's just ignore the bits that come before the parent tp, that is
++ * the bits from (tp->pos + tp->bits) to 31. They are *known* but at this
++ * point we do not use them for anything.
++ *
++ * The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
++ * index into the parent's child array. That is, they will be used to find
++ * 'n' among tp's children.
++ *
++ * The bits from (n->pos + n->bits) to (tn->pos - 1) - "S" - are skipped bits
++ * for the node n.
++ *
++ * All the bits we have seen so far are significant to the node n. The rest
++ * of the bits are really not needed or indeed known in n->key.
++ *
++ * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
++ * n's child array, and will of course be different for each child.
++ *
++ * The rest of the bits, from 0 to (n->pos + n->bits), are completely unknown
++ * at this point.
++ */
+
+ static const int halve_threshold = 25;
+ static const int inflate_threshold = 50;
+@@ -367,7 +349,7 @@ static struct tnode *leaf_new(t_key key)
+ * as the nodes are searched
+ */
+ l->key = key;
+- l->pos = KEYLENGTH;
++ l->pos = 0;
+ /* set bits to 0 indicating we are not a tnode */
+ l->bits = 0;
+
+@@ -400,7 +382,7 @@ static struct tnode *tnode_new(t_key key
+ tn->parent = NULL;
+ tn->pos = pos;
+ tn->bits = bits;
+- tn->key = mask_pfx(key, pos);
++ tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
+ tn->full_children = 0;
+ tn->empty_children = 1<<bits;
+ }
+@@ -410,14 +392,12 @@ static struct tnode *tnode_new(t_key key
+ return tn;
+ }
+
+-/*
+- * Check whether a tnode 'n' is "full", i.e. it is an internal node
++/* Check whether a tnode 'n' is "full", i.e. it is an internal node
+ * and no bits are skipped. See discussion in dyntree paper p. 6
+ */
+-
+ static inline int tnode_full(const struct tnode *tn, const struct tnode *n)
+ {
+- return n && IS_TNODE(n) && (n->pos == (tn->pos + tn->bits));
++ return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n);
+ }
+
+ static inline void put_child(struct tnode *tn, int i,
+@@ -641,11 +621,12 @@ static struct tnode *inflate(struct trie
+ {
+ int olen = tnode_child_length(oldtnode);
+ struct tnode *tn;
++ t_key m;
+ int i;
+
+ pr_debug("In inflate\n");
+
+- tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
++ tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1);
+
+ if (!tn)
+ return ERR_PTR(-ENOMEM);
+@@ -656,21 +637,18 @@ static struct tnode *inflate(struct trie
+ * fails. In case of failure we return the oldnode and inflate
+ * of tnode is ignored.
+ */
++ for (i = 0, m = 1u << tn->pos; i < olen; i++) {
++ struct tnode *inode = tnode_get_child(oldtnode, i);
+
+- for (i = 0; i < olen; i++) {
+- struct tnode *inode;
+-
+- inode = tnode_get_child(oldtnode, i);
+- if (tnode_full(oldtnode, inode) && inode->bits > 1) {
++ if (tnode_full(oldtnode, inode) && (inode->bits > 1)) {
+ struct tnode *left, *right;
+- t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
+
+- left = tnode_new(inode->key&(~m), inode->pos + 1,
++ left = tnode_new(inode->key & ~m, inode->pos,
+ inode->bits - 1);
+ if (!left)
+ goto nomem;
+
+- right = tnode_new(inode->key|m, inode->pos + 1,
++ right = tnode_new(inode->key | m, inode->pos,
+ inode->bits - 1);
+
+ if (!right) {
+@@ -694,9 +672,7 @@ static struct tnode *inflate(struct trie
+
+ /* A leaf or an internal node with skipped bits */
+ if (!tnode_full(oldtnode, inode)) {
+- put_child(tn,
+- tkey_extract_bits(inode->key, tn->pos, tn->bits),
+- inode);
++ put_child(tn, get_index(inode->key, tn), inode);
+ continue;
+ }
+
+@@ -767,7 +743,7 @@ static struct tnode *halve(struct trie *
+
+ pr_debug("In halve\n");
+
+- tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
++ tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1);
+
+ if (!tn)
+ return ERR_PTR(-ENOMEM);
+@@ -787,7 +763,7 @@ static struct tnode *halve(struct trie *
+ if (left && right) {
+ struct tnode *newn;
+
+- newn = tnode_new(left->key, tn->pos + tn->bits, 1);
++ newn = tnode_new(left->key, oldtnode->pos, 1);
+
+ if (!newn)
+ goto nomem;
+@@ -915,7 +891,7 @@ static void trie_rebalance(struct trie *
+ key = tn->key;
+
+ while (tn != NULL && (tp = node_parent(tn)) != NULL) {
+- cindex = tkey_extract_bits(key, tp->pos, tp->bits);
++ cindex = get_index(key, tp);
+ wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
+ tn = resize(t, tn);
+
+@@ -1005,11 +981,8 @@ static struct list_head *fib_insert_node
+ */
+ if (n) {
+ struct tnode *tn;
+- int newpos;
+-
+- newpos = KEYLENGTH - __fls(n->key ^ key) - 1;
+
+- tn = tnode_new(key, newpos, 1);
++ tn = tnode_new(key, __fls(key ^ n->key), 1);
+ if (!tn) {
+ free_leaf_info(li);
+ node_free(l);
+@@ -1559,12 +1532,7 @@ static int trie_flush_leaf(struct tnode
+ static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c)
+ {
+ do {
+- t_key idx;
+-
+- if (c)
+- idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1;
+- else
+- idx = 0;
++ t_key idx = c ? idx = get_index(c->key, p) + 1 : 0;
+
+ while (idx < 1u << p->bits) {
+ c = tnode_get_child_rcu(p, idx++);
+@@ -1851,7 +1819,7 @@ rescan:
+ /* Current node exhausted, pop back up */
+ p = node_parent_rcu(tn);
+ if (p) {
+- cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
++ cindex = get_index(tn->key, p) + 1;
+ tn = p;
+ --iter->depth;
+ goto rescan;
+@@ -2187,10 +2155,10 @@ static int fib_trie_seq_show(struct seq_
+ if (IS_TNODE(n)) {
+ __be32 prf = htonl(n->key);
+
+- seq_indent(seq, iter->depth - 1);
+- seq_printf(seq, " +-- %pI4/%d %d %d %d\n",
+- &prf, n->pos, n->bits, n->full_children,
+- n->empty_children);
++ seq_indent(seq, iter->depth-1);
++ seq_printf(seq, " +-- %pI4/%zu %u %u %u\n",
++ &prf, KEYLENGTH - n->pos - n->bits, n->bits,
++ n->full_children, n->empty_children);
+ } else {
+ struct leaf_info *li;
+ __be32 val = htonl(n->key);