aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorAlexander Duyck <alexander.h.duyck@redhat.com>2014-12-31 13:55:54 -0500
committerDavid S. Miller <davem@davemloft.net>2014-12-31 18:25:54 -0500
commit9f9e636d4f89f788c5cf9c6a5357501c0d405fcb (patch)
tree2013f76e4133071bbc24a70b54307d6fd446c786 /net
parentadaf981685b7b17d773d17f4ee6a128a3a6b6dc8 (diff)
fib_trie: Optimize fib_table_lookup to avoid wasting time on loops/variables
This patch is meant to reduce the complexity of fib_table_lookup by reducing the number of variables to the bare minimum while still keeping the same if not improved functionality versus the original. Most of this change was started off by the desire to rid the function of chopped_off and current_prefix_length as they actually added very little to the function since they only applied when computing the cindex. I was able to replace them mostly with just a check for the prefix match. As long as the prefix between the key and the node being tested was the same we know we can search the tnode fully versus just testing cindex 0. The second portion of the change ended up being a massive reordering. Originally the calls to check_leaf were up near the start of the loop, and the backtracing and descending into lower levels of tnodes was later. This didn't make much sense as the structure of the tree means the leaves are always the last thing to be tested. As such I reordered things so that we instead have a loop that will delve into the tree and only exit when we have either found a leaf or we have exhausted the tree. The advantage of rearranging things like this is that we can fully inline check_leaf since there is now only one reference to it in the function. Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/fib_trie.c250
1 files changed, 93 insertions, 157 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 94c929f6c752..3fe4dd917ce1 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -90,6 +90,9 @@ typedef unsigned int t_key;
90#define IS_TNODE(n) ((n)->bits) 90#define IS_TNODE(n) ((n)->bits)
91#define IS_LEAF(n) (!(n)->bits) 91#define IS_LEAF(n) (!(n)->bits)
92 92
93#define get_shift(_kv) (KEYLENGTH - (_kv)->pos - (_kv)->bits)
94#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> get_shift(_kv))
95
93struct tnode { 96struct tnode {
94 t_key key; 97 t_key key;
95 unsigned char bits; /* 2log(KEYLENGTH) bits needed */ 98 unsigned char bits; /* 2log(KEYLENGTH) bits needed */
@@ -1281,7 +1284,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct tnode *l,
1281 continue; 1284 continue;
1282 fib_alias_accessed(fa); 1285 fib_alias_accessed(fa);
1283 err = fib_props[fa->fa_type].error; 1286 err = fib_props[fa->fa_type].error;
1284 if (err) { 1287 if (unlikely(err < 0)) {
1285#ifdef CONFIG_IP_FIB_TRIE_STATS 1288#ifdef CONFIG_IP_FIB_TRIE_STATS
1286 this_cpu_inc(t->stats->semantic_match_passed); 1289 this_cpu_inc(t->stats->semantic_match_passed);
1287#endif 1290#endif
@@ -1303,7 +1306,7 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct tnode *l,
1303 res->prefixlen = li->plen; 1306 res->prefixlen = li->plen;
1304 res->nh_sel = nhsel; 1307 res->nh_sel = nhsel;
1305 res->type = fa->fa_type; 1308 res->type = fa->fa_type;
1306 res->scope = fa->fa_info->fib_scope; 1309 res->scope = fi->fib_scope;
1307 res->fi = fi; 1310 res->fi = fi;
1308 res->table = tb; 1311 res->table = tb;
1309 res->fa_head = &li->falh; 1312 res->fa_head = &li->falh;
@@ -1321,23 +1324,24 @@ static int check_leaf(struct fib_table *tb, struct trie *t, struct tnode *l,
1321 return 1; 1324 return 1;
1322} 1325}
1323 1326
1327static inline t_key prefix_mismatch(t_key key, struct tnode *n)
1328{
1329 t_key prefix = n->key;
1330
1331 return (key ^ prefix) & (prefix | -prefix);
1332}
1333
1324int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, 1334int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
1325 struct fib_result *res, int fib_flags) 1335 struct fib_result *res, int fib_flags)
1326{ 1336{
1327 struct trie *t = (struct trie *) tb->tb_data; 1337 struct trie *t = (struct trie *)tb->tb_data;
1328#ifdef CONFIG_IP_FIB_TRIE_STATS 1338#ifdef CONFIG_IP_FIB_TRIE_STATS
1329 struct trie_use_stats __percpu *stats = t->stats; 1339 struct trie_use_stats __percpu *stats = t->stats;
1330#endif 1340#endif
1331 int ret; 1341 const t_key key = ntohl(flp->daddr);
1332 struct tnode *n; 1342 struct tnode *n, *pn;
1333 struct tnode *pn; 1343 t_key cindex;
1334 unsigned int pos, bits; 1344 int ret = 1;
1335 t_key key = ntohl(flp->daddr);
1336 unsigned int chopped_off;
1337 t_key cindex = 0;
1338 unsigned int current_prefix_length = KEYLENGTH;
1339 struct tnode *cn;
1340 t_key pref_mismatch;
1341 1345
1342 rcu_read_lock(); 1346 rcu_read_lock();
1343 1347
@@ -1349,170 +1353,102 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
1349 this_cpu_inc(stats->gets); 1353 this_cpu_inc(stats->gets);
1350#endif 1354#endif
1351 1355
1352 /* Just a leaf? */
1353 if (IS_LEAF(n)) {
1354 ret = check_leaf(tb, t, n, key, flp, res, fib_flags);
1355 goto found;
1356 }
1357
1358 pn = n; 1356 pn = n;
1359 chopped_off = 0; 1357 cindex = 0;
1360 1358
1361 while (pn) { 1359 /* Step 1: Travel to the longest prefix match in the trie */
1362 pos = pn->pos; 1360 for (;;) {
1363 bits = pn->bits; 1361 unsigned long index = get_index(key, n);
1364 1362
1365 if (!chopped_off) 1363 /* This bit of code is a bit tricky but it combines multiple
1366 cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length), 1364 * checks into a single check. The prefix consists of the
1367 pos, bits); 1365 * prefix plus zeros for the "bits" in the prefix. The index
1368 1366 * is the difference between the key and this value. From
1369 n = tnode_get_child_rcu(pn, cindex); 1367 * this we can actually derive several pieces of data.
1370 1368 * if !(index >> bits)
1371 if (n == NULL) { 1369 * we know the value is child index
1372#ifdef CONFIG_IP_FIB_TRIE_STATS 1370 * else
1373 this_cpu_inc(stats->null_node_hit); 1371 * we have a mismatch in skip bits and failed
1374#endif 1372 */
1375 goto backtrace; 1373 if (index >> n->bits)
1376 } 1374 break;
1377 1375
1378 if (IS_LEAF(n)) { 1376 /* we have found a leaf. Prefixes have already been compared */
1379 ret = check_leaf(tb, t, n, key, flp, res, fib_flags); 1377 if (IS_LEAF(n))
1380 if (ret > 0)
1381 goto backtrace;
1382 goto found; 1378 goto found;
1383 }
1384 1379
1385 cn = n; 1380 /* only record pn and cindex if we are going to be chopping
1386 1381 * bits later. Otherwise we are just wasting cycles.
1387 /*
1388 * It's a tnode, and we can do some extra checks here if we
1389 * like, to avoid descending into a dead-end branch.
1390 * This tnode is in the parent's child array at index
1391 * key[p_pos..p_pos+p_bits] but potentially with some bits
1392 * chopped off, so in reality the index may be just a
1393 * subprefix, padded with zero at the end.
1394 * We can also take a look at any skipped bits in this
1395 * tnode - everything up to p_pos is supposed to be ok,
1396 * and the non-chopped bits of the index (se previous
1397 * paragraph) are also guaranteed ok, but the rest is
1398 * considered unknown.
1399 *
1400 * The skipped bits are key[pos+bits..cn->pos].
1401 */
1402
1403 /* If current_prefix_length < pos+bits, we are already doing
1404 * actual prefix matching, which means everything from
1405 * pos+(bits-chopped_off) onward must be zero along some
1406 * branch of this subtree - otherwise there is *no* valid
1407 * prefix present. Here we can only check the skipped
1408 * bits. Remember, since we have already indexed into the
1409 * parent's child array, we know that the bits we chopped of
1410 * *are* zero.
1411 */ 1382 */
1412 1383 if (index) {
1413 /* NOTA BENE: Checking only skipped bits 1384 pn = n;
1414 for the new node here */ 1385 cindex = index;
1415
1416 if (current_prefix_length < pos+bits) {
1417 if (tkey_extract_bits(cn->key, current_prefix_length,
1418 cn->pos - current_prefix_length)
1419 || !(cn->child[0]))
1420 goto backtrace;
1421 } 1386 }
1422 1387
1423 /* 1388 n = rcu_dereference(n->child[index]);
1424 * If chopped_off=0, the index is fully validated and we 1389 if (unlikely(!n))
1425 * only need to look at the skipped bits for this, the new, 1390 goto backtrace;
1426 * tnode. What we actually want to do is to find out if 1391 }
1427 * these skipped bits match our key perfectly, or if we will
1428 * have to count on finding a matching prefix further down,
1429 * because if we do, we would like to have some way of
1430 * verifying the existence of such a prefix at this point.
1431 */
1432 1392
1433 /* The only thing we can do at this point is to verify that 1393 /* Step 2: Sort out leaves and begin backtracing for longest prefix */
1434 * any such matching prefix can indeed be a prefix to our 1394 for (;;) {
1435 * key, and if the bits in the node we are inspecting that 1395 /* record the pointer where our next node pointer is stored */
1436 * do not match our key are not ZERO, this cannot be true. 1396 struct tnode __rcu **cptr = n->child;
1437 * Thus, find out where there is a mismatch (before cn->pos)
1438 * and verify that all the mismatching bits are zero in the
1439 * new tnode's key.
1440 */
1441 1397
1442 /* 1398 /* This test verifies that none of the bits that differ
1443 * Note: We aren't very concerned about the piece of 1399 * between the key and the prefix exist in the region of
1444 * the key that precede pn->pos+pn->bits, since these 1400 * the lsb and higher in the prefix.
1445 * have already been checked. The bits after cn->pos
1446 * aren't checked since these are by definition
1447 * "unknown" at this point. Thus, what we want to see
1448 * is if we are about to enter the "prefix matching"
1449 * state, and in that case verify that the skipped
1450 * bits that will prevail throughout this subtree are
1451 * zero, as they have to be if we are to find a
1452 * matching prefix.
1453 */ 1401 */
1402 if (unlikely(prefix_mismatch(key, n)))
1403 goto backtrace;
1454 1404
1455 pref_mismatch = mask_pfx(cn->key ^ key, cn->pos); 1405 /* exit out and process leaf */
1406 if (unlikely(IS_LEAF(n)))
1407 break;
1456 1408
1457 /* 1409 /* Don't bother recording parent info. Since we are in
1458 * In short: If skipped bits in this node do not match 1410 * prefix match mode we will have to come back to wherever
1459 * the search key, enter the "prefix matching" 1411 * we started this traversal anyway
1460 * state.directly.
1461 */ 1412 */
1462 if (pref_mismatch) {
1463 /* fls(x) = __fls(x) + 1 */
1464 int mp = KEYLENGTH - __fls(pref_mismatch) - 1;
1465
1466 if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0)
1467 goto backtrace;
1468
1469 if (current_prefix_length >= cn->pos)
1470 current_prefix_length = mp;
1471 }
1472
1473 pn = n; /* Descend */
1474 chopped_off = 0;
1475 continue;
1476 1413
1414 while ((n = rcu_dereference(*cptr)) == NULL) {
1477backtrace: 1415backtrace:
1478 chopped_off++;
1479
1480 /* As zero don't change the child key (cindex) */
1481 while ((chopped_off <= pn->bits)
1482 && !(cindex & (1<<(chopped_off-1))))
1483 chopped_off++;
1484
1485 /* Decrease current_... with bits chopped off */
1486 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
1487 current_prefix_length = pn->pos + pn->bits
1488 - chopped_off;
1489
1490 /*
1491 * Either we do the actual chop off according or if we have
1492 * chopped off all bits in this tnode walk up to our parent.
1493 */
1494
1495 if (chopped_off <= pn->bits) {
1496 cindex &= ~(1 << (chopped_off-1));
1497 } else {
1498 struct tnode *parent = node_parent_rcu(pn);
1499 if (!parent)
1500 goto failed;
1501
1502 /* Get Child's index */
1503 cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits);
1504 pn = parent;
1505 chopped_off = 0;
1506
1507#ifdef CONFIG_IP_FIB_TRIE_STATS 1416#ifdef CONFIG_IP_FIB_TRIE_STATS
1508 this_cpu_inc(stats->backtrack); 1417 if (!n)
1418 this_cpu_inc(stats->null_node_hit);
1509#endif 1419#endif
1510 goto backtrace; 1420 /* If we are at cindex 0 there are no more bits for
1421 * us to strip at this level so we must ascend back
1422 * up one level to see if there are any more bits to
1423 * be stripped there.
1424 */
1425 while (!cindex) {
1426 t_key pkey = pn->key;
1427
1428 pn = node_parent_rcu(pn);
1429 if (unlikely(!pn))
1430 goto failed;
1431#ifdef CONFIG_IP_FIB_TRIE_STATS
1432 this_cpu_inc(stats->backtrack);
1433#endif
1434 /* Get Child's index */
1435 cindex = get_index(pkey, pn);
1436 }
1437
1438 /* strip the least significant bit from the cindex */
1439 cindex &= cindex - 1;
1440
1441 /* grab pointer for next child node */
1442 cptr = &pn->child[cindex];
1511 } 1443 }
1512 } 1444 }
1513failed: 1445
1514 ret = 1;
1515found: 1446found:
1447 /* Step 3: Process the leaf, if that fails fall back to backtracing */
1448 ret = check_leaf(tb, t, n, key, flp, res, fib_flags);
1449 if (unlikely(ret > 0))
1450 goto backtrace;
1451failed:
1516 rcu_read_unlock(); 1452 rcu_read_unlock();
1517 return ret; 1453 return ret;
1518} 1454}