347 lines
12 KiB
Diff
347 lines
12 KiB
Diff
From: Alexander Duyck <alexander.h.duyck@redhat.com>
|
|
Date: Wed, 31 Dec 2014 10:56:12 -0800
|
|
Subject: [PATCH] fib_trie: Update meaning of pos to represent unchecked
|
|
bits
|
|
|
|
This change moves the pos value to the other side of the "bits" field. By
|
|
doing this it actually simplifies a significant amount of code in the trie.
|
|
|
|
For example when halving a tree we know that the bit lost exists at
|
|
oldnode->pos, and if we inflate the tree the new bit being add is at
|
|
tn->pos. Previously to find those bits you would have to subtract pos and
|
|
bits from the keylength or start with a value of (1 << 31) and then shift
|
|
that.
|
|
|
|
There are a number of spots throughout the code that benefit from this. In
|
|
the case of the hot-path searches the main advantage is that we can drop 2
|
|
or more operations from the search path as we no longer need to compute the
|
|
value for the index to be shifted by and can instead just use the raw pos
|
|
value.
|
|
|
|
In addition the tkey_extract_bits is now defunct and can be replaced by
|
|
get_index since the two operations were doing the same thing, but now
|
|
get_index does it much more quickly as it is only an xor and shift versus a
|
|
pair of shifts and a subtraction.
|
|
|
|
Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
|
|
Signed-off-by: David S. Miller <davem@davemloft.net>
|
|
---
|
|
|
|
--- a/net/ipv4/fib_trie.c
|
|
+++ b/net/ipv4/fib_trie.c
|
|
@@ -90,8 +90,7 @@ typedef unsigned int t_key;
|
|
#define IS_TNODE(n) ((n)->bits)
|
|
#define IS_LEAF(n) (!(n)->bits)
|
|
|
|
-#define get_shift(_kv) (KEYLENGTH - (_kv)->pos - (_kv)->bits)
|
|
-#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> get_shift(_kv))
|
|
+#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> (_kv)->pos)
|
|
|
|
struct tnode {
|
|
t_key key;
|
|
@@ -209,81 +208,64 @@ static inline struct tnode *tnode_get_ch
|
|
return rcu_dereference_rtnl(tn->child[i]);
|
|
}
|
|
|
|
-static inline t_key mask_pfx(t_key k, unsigned int l)
|
|
-{
|
|
- return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
|
|
-}
|
|
-
|
|
-static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits)
|
|
-{
|
|
- if (offset < KEYLENGTH)
|
|
- return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
|
|
- else
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/*
|
|
- To understand this stuff, an understanding of keys and all their bits is
|
|
- necessary. Every node in the trie has a key associated with it, but not
|
|
- all of the bits in that key are significant.
|
|
-
|
|
- Consider a node 'n' and its parent 'tp'.
|
|
-
|
|
- If n is a leaf, every bit in its key is significant. Its presence is
|
|
- necessitated by path compression, since during a tree traversal (when
|
|
- searching for a leaf - unless we are doing an insertion) we will completely
|
|
- ignore all skipped bits we encounter. Thus we need to verify, at the end of
|
|
- a potentially successful search, that we have indeed been walking the
|
|
- correct key path.
|
|
-
|
|
- Note that we can never "miss" the correct key in the tree if present by
|
|
- following the wrong path. Path compression ensures that segments of the key
|
|
- that are the same for all keys with a given prefix are skipped, but the
|
|
- skipped part *is* identical for each node in the subtrie below the skipped
|
|
- bit! trie_insert() in this implementation takes care of that - note the
|
|
- call to tkey_sub_equals() in trie_insert().
|
|
-
|
|
- if n is an internal node - a 'tnode' here, the various parts of its key
|
|
- have many different meanings.
|
|
-
|
|
- Example:
|
|
- _________________________________________________________________
|
|
- | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
|
|
- -----------------------------------------------------------------
|
|
- 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
|
|
-
|
|
- _________________________________________________________________
|
|
- | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
|
|
- -----------------------------------------------------------------
|
|
- 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
|
|
-
|
|
- tp->pos = 7
|
|
- tp->bits = 3
|
|
- n->pos = 15
|
|
- n->bits = 4
|
|
-
|
|
- First, let's just ignore the bits that come before the parent tp, that is
|
|
- the bits from 0 to (tp->pos-1). They are *known* but at this point we do
|
|
- not use them for anything.
|
|
-
|
|
- The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
|
|
- index into the parent's child array. That is, they will be used to find
|
|
- 'n' among tp's children.
|
|
-
|
|
- The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
|
|
- for the node n.
|
|
-
|
|
- All the bits we have seen so far are significant to the node n. The rest
|
|
- of the bits are really not needed or indeed known in n->key.
|
|
-
|
|
- The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
|
|
- n's child array, and will of course be different for each child.
|
|
-
|
|
-
|
|
- The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
|
|
- at this point.
|
|
-
|
|
-*/
|
|
+/* To understand this stuff, an understanding of keys and all their bits is
|
|
+ * necessary. Every node in the trie has a key associated with it, but not
|
|
+ * all of the bits in that key are significant.
|
|
+ *
|
|
+ * Consider a node 'n' and its parent 'tp'.
|
|
+ *
|
|
+ * If n is a leaf, every bit in its key is significant. Its presence is
|
|
+ * necessitated by path compression, since during a tree traversal (when
|
|
+ * searching for a leaf - unless we are doing an insertion) we will completely
|
|
+ * ignore all skipped bits we encounter. Thus we need to verify, at the end of
|
|
+ * a potentially successful search, that we have indeed been walking the
|
|
+ * correct key path.
|
|
+ *
|
|
+ * Note that we can never "miss" the correct key in the tree if present by
|
|
+ * following the wrong path. Path compression ensures that segments of the key
|
|
+ * that are the same for all keys with a given prefix are skipped, but the
|
|
+ * skipped part *is* identical for each node in the subtrie below the skipped
|
|
+ * bit! trie_insert() in this implementation takes care of that.
|
|
+ *
|
|
+ * if n is an internal node - a 'tnode' here, the various parts of its key
|
|
+ * have many different meanings.
|
|
+ *
|
|
+ * Example:
|
|
+ * _________________________________________________________________
|
|
+ * | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
|
|
+ * -----------------------------------------------------------------
|
|
+ * 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
|
|
+ *
|
|
+ * _________________________________________________________________
|
|
+ * | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
|
|
+ * -----------------------------------------------------------------
|
|
+ * 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
|
|
+ *
|
|
+ * tp->pos = 22
|
|
+ * tp->bits = 3
|
|
+ * n->pos = 13
|
|
+ * n->bits = 4
|
|
+ *
|
|
+ * First, let's just ignore the bits that come before the parent tp, that is
|
|
+ * the bits from (tp->pos + tp->bits) to 31. They are *known* but at this
|
|
+ * point we do not use them for anything.
|
|
+ *
|
|
+ * The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
|
|
+ * index into the parent's child array. That is, they will be used to find
|
|
+ * 'n' among tp's children.
|
|
+ *
|
|
+ * The bits from (n->pos + n->bits) to (tn->pos - 1) - "S" - are skipped bits
|
|
+ * for the node n.
|
|
+ *
|
|
+ * All the bits we have seen so far are significant to the node n. The rest
|
|
+ * of the bits are really not needed or indeed known in n->key.
|
|
+ *
|
|
+ * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
|
|
+ * n's child array, and will of course be different for each child.
|
|
+ *
|
|
+ * The rest of the bits, from 0 to (n->pos + n->bits), are completely unknown
|
|
+ * at this point.
|
|
+ */
|
|
|
|
static const int halve_threshold = 25;
|
|
static const int inflate_threshold = 50;
|
|
@@ -367,7 +349,7 @@ static struct tnode *leaf_new(t_key key)
|
|
* as the nodes are searched
|
|
*/
|
|
l->key = key;
|
|
- l->pos = KEYLENGTH;
|
|
+ l->pos = 0;
|
|
/* set bits to 0 indicating we are not a tnode */
|
|
l->bits = 0;
|
|
|
|
@@ -400,7 +382,7 @@ static struct tnode *tnode_new(t_key key
|
|
tn->parent = NULL;
|
|
tn->pos = pos;
|
|
tn->bits = bits;
|
|
- tn->key = mask_pfx(key, pos);
|
|
+ tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
|
|
tn->full_children = 0;
|
|
tn->empty_children = 1<<bits;
|
|
}
|
|
@@ -410,14 +392,12 @@ static struct tnode *tnode_new(t_key key
|
|
return tn;
|
|
}
|
|
|
|
-/*
|
|
- * Check whether a tnode 'n' is "full", i.e. it is an internal node
|
|
+/* Check whether a tnode 'n' is "full", i.e. it is an internal node
|
|
* and no bits are skipped. See discussion in dyntree paper p. 6
|
|
*/
|
|
-
|
|
static inline int tnode_full(const struct tnode *tn, const struct tnode *n)
|
|
{
|
|
- return n && IS_TNODE(n) && (n->pos == (tn->pos + tn->bits));
|
|
+ return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n);
|
|
}
|
|
|
|
static inline void put_child(struct tnode *tn, int i,
|
|
@@ -641,11 +621,12 @@ static struct tnode *inflate(struct trie
|
|
{
|
|
int olen = tnode_child_length(oldtnode);
|
|
struct tnode *tn;
|
|
+ t_key m;
|
|
int i;
|
|
|
|
pr_debug("In inflate\n");
|
|
|
|
- tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
|
|
+ tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1);
|
|
|
|
if (!tn)
|
|
return ERR_PTR(-ENOMEM);
|
|
@@ -656,21 +637,18 @@ static struct tnode *inflate(struct trie
|
|
* fails. In case of failure we return the oldnode and inflate
|
|
* of tnode is ignored.
|
|
*/
|
|
+ for (i = 0, m = 1u << tn->pos; i < olen; i++) {
|
|
+ struct tnode *inode = tnode_get_child(oldtnode, i);
|
|
|
|
- for (i = 0; i < olen; i++) {
|
|
- struct tnode *inode;
|
|
-
|
|
- inode = tnode_get_child(oldtnode, i);
|
|
- if (tnode_full(oldtnode, inode) && inode->bits > 1) {
|
|
+ if (tnode_full(oldtnode, inode) && (inode->bits > 1)) {
|
|
struct tnode *left, *right;
|
|
- t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
|
|
|
|
- left = tnode_new(inode->key&(~m), inode->pos + 1,
|
|
+ left = tnode_new(inode->key & ~m, inode->pos,
|
|
inode->bits - 1);
|
|
if (!left)
|
|
goto nomem;
|
|
|
|
- right = tnode_new(inode->key|m, inode->pos + 1,
|
|
+ right = tnode_new(inode->key | m, inode->pos,
|
|
inode->bits - 1);
|
|
|
|
if (!right) {
|
|
@@ -694,9 +672,7 @@ static struct tnode *inflate(struct trie
|
|
|
|
/* A leaf or an internal node with skipped bits */
|
|
if (!tnode_full(oldtnode, inode)) {
|
|
- put_child(tn,
|
|
- tkey_extract_bits(inode->key, tn->pos, tn->bits),
|
|
- inode);
|
|
+ put_child(tn, get_index(inode->key, tn), inode);
|
|
continue;
|
|
}
|
|
|
|
@@ -767,7 +743,7 @@ static struct tnode *halve(struct trie *
|
|
|
|
pr_debug("In halve\n");
|
|
|
|
- tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
|
|
+ tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1);
|
|
|
|
if (!tn)
|
|
return ERR_PTR(-ENOMEM);
|
|
@@ -787,7 +763,7 @@ static struct tnode *halve(struct trie *
|
|
if (left && right) {
|
|
struct tnode *newn;
|
|
|
|
- newn = tnode_new(left->key, tn->pos + tn->bits, 1);
|
|
+ newn = tnode_new(left->key, oldtnode->pos, 1);
|
|
|
|
if (!newn)
|
|
goto nomem;
|
|
@@ -915,7 +891,7 @@ static void trie_rebalance(struct trie *
|
|
key = tn->key;
|
|
|
|
while (tn != NULL && (tp = node_parent(tn)) != NULL) {
|
|
- cindex = tkey_extract_bits(key, tp->pos, tp->bits);
|
|
+ cindex = get_index(key, tp);
|
|
wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
|
|
tn = resize(t, tn);
|
|
|
|
@@ -1005,11 +981,8 @@ static struct list_head *fib_insert_node
|
|
*/
|
|
if (n) {
|
|
struct tnode *tn;
|
|
- int newpos;
|
|
-
|
|
- newpos = KEYLENGTH - __fls(n->key ^ key) - 1;
|
|
|
|
- tn = tnode_new(key, newpos, 1);
|
|
+ tn = tnode_new(key, __fls(key ^ n->key), 1);
|
|
if (!tn) {
|
|
free_leaf_info(li);
|
|
node_free(l);
|
|
@@ -1559,12 +1532,7 @@ static int trie_flush_leaf(struct tnode
|
|
static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c)
|
|
{
|
|
do {
|
|
- t_key idx;
|
|
-
|
|
- if (c)
|
|
- idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1;
|
|
- else
|
|
- idx = 0;
|
|
+ t_key idx = c ? idx = get_index(c->key, p) + 1 : 0;
|
|
|
|
while (idx < 1u << p->bits) {
|
|
c = tnode_get_child_rcu(p, idx++);
|
|
@@ -1851,7 +1819,7 @@ rescan:
|
|
/* Current node exhausted, pop back up */
|
|
p = node_parent_rcu(tn);
|
|
if (p) {
|
|
- cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
|
|
+ cindex = get_index(tn->key, p) + 1;
|
|
tn = p;
|
|
--iter->depth;
|
|
goto rescan;
|
|
@@ -2186,10 +2154,10 @@ static int fib_trie_seq_show(struct seq_
|
|
if (IS_TNODE(n)) {
|
|
__be32 prf = htonl(n->key);
|
|
|
|
- seq_indent(seq, iter->depth - 1);
|
|
- seq_printf(seq, " +-- %pI4/%d %d %d %d\n",
|
|
- &prf, n->pos, n->bits, n->full_children,
|
|
- n->empty_children);
|
|
+ seq_indent(seq, iter->depth-1);
|
|
+ seq_printf(seq, " +-- %pI4/%zu %u %u %u\n",
|
|
+ &prf, KEYLENGTH - n->pos - n->bits, n->bits,
|
|
+ n->full_children, n->empty_children);
|
|
} else {
|
|
struct leaf_info *li;
|
|
__be32 val = htonl(n->key);
|