diff options
author | Peter Nørlund <pch@ordbogen.com> | 2015-09-30 04:12:21 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-10-05 05:59:21 -0400 |
commit | 0e884c78ee19e902f300ed147083c28a0c6302f0 (patch) | |
tree | 4a60b19eed14b18e7f5473cfd0df6cb6434a57c4 /net/ipv4/fib_semantics.c | |
parent | 2472186f58ee1e4b9ca194245fef03931f6de90a (diff) |
ipv4: L3 hash-based multipath
Replaces the per-packet multipath with a hash-based multipath using
source and destination address.
Signed-off-by: Peter Nørlund <pch@ordbogen.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/fib_semantics.c')
-rw-r--r-- | net/ipv4/fib_semantics.c | 140 |
1 files changed, 75 insertions, 65 deletions
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 064bd3caaa4f..0c49d2f3bbc0 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -57,8 +57,7 @@ static unsigned int fib_info_cnt; | |||
57 | static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; | 57 | static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; |
58 | 58 | ||
59 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 59 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
60 | 60 | u32 fib_multipath_secret __read_mostly; | |
61 | static DEFINE_SPINLOCK(fib_multipath_lock); | ||
62 | 61 | ||
63 | #define for_nexthops(fi) { \ | 62 | #define for_nexthops(fi) { \ |
64 | int nhsel; const struct fib_nh *nh; \ | 63 | int nhsel; const struct fib_nh *nh; \ |
@@ -532,7 +531,67 @@ errout: | |||
532 | return ret; | 531 | return ret; |
533 | } | 532 | } |
534 | 533 | ||
535 | #endif | 534 | static void fib_rebalance(struct fib_info *fi) |
535 | { | ||
536 | int total; | ||
537 | int w; | ||
538 | struct in_device *in_dev; | ||
539 | |||
540 | if (fi->fib_nhs < 2) | ||
541 | return; | ||
542 | |||
543 | total = 0; | ||
544 | for_nexthops(fi) { | ||
545 | if (nh->nh_flags & RTNH_F_DEAD) | ||
546 | continue; | ||
547 | |||
548 | in_dev = __in_dev_get_rcu(nh->nh_dev); | ||
549 | |||
550 | if (in_dev && | ||
551 | IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && | ||
552 | nh->nh_flags & RTNH_F_LINKDOWN) | ||
553 | continue; | ||
554 | |||
555 | total += nh->nh_weight; | ||
556 | } endfor_nexthops(fi); | ||
557 | |||
558 | w = 0; | ||
559 | change_nexthops(fi) { | ||
560 | int upper_bound; | ||
561 | |||
562 | in_dev = __in_dev_get_rcu(nexthop_nh->nh_dev); | ||
563 | |||
564 | if (nexthop_nh->nh_flags & RTNH_F_DEAD) { | ||
565 | upper_bound = -1; | ||
566 | } else if (in_dev && | ||
567 | IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && | ||
568 | nexthop_nh->nh_flags & RTNH_F_LINKDOWN) { | ||
569 | upper_bound = -1; | ||
570 | } else { | ||
571 | w += nexthop_nh->nh_weight; | ||
572 | upper_bound = DIV_ROUND_CLOSEST(2147483648LL * w, | ||
573 | total) - 1; | ||
574 | } | ||
575 | |||
576 | atomic_set(&nexthop_nh->nh_upper_bound, upper_bound); | ||
577 | } endfor_nexthops(fi); | ||
578 | |||
579 | net_get_random_once(&fib_multipath_secret, | ||
580 | sizeof(fib_multipath_secret)); | ||
581 | } | ||
582 | |||
583 | static inline void fib_add_weight(struct fib_info *fi, | ||
584 | const struct fib_nh *nh) | ||
585 | { | ||
586 | fi->fib_weight += nh->nh_weight; | ||
587 | } | ||
588 | |||
589 | #else /* CONFIG_IP_ROUTE_MULTIPATH */ | ||
590 | |||
591 | #define fib_rebalance(fi) do { } while (0) | ||
592 | #define fib_add_weight(fi, nh) do { } while (0) | ||
593 | |||
594 | #endif /* CONFIG_IP_ROUTE_MULTIPATH */ | ||
536 | 595 | ||
537 | static int fib_encap_match(struct net *net, u16 encap_type, | 596 | static int fib_encap_match(struct net *net, u16 encap_type, |
538 | struct nlattr *encap, | 597 | struct nlattr *encap, |
@@ -1094,8 +1153,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg) | |||
1094 | 1153 | ||
1095 | change_nexthops(fi) { | 1154 | change_nexthops(fi) { |
1096 | fib_info_update_nh_saddr(net, nexthop_nh); | 1155 | fib_info_update_nh_saddr(net, nexthop_nh); |
1156 | fib_add_weight(fi, nexthop_nh); | ||
1097 | } endfor_nexthops(fi) | 1157 | } endfor_nexthops(fi) |
1098 | 1158 | ||
1159 | fib_rebalance(fi); | ||
1160 | |||
1099 | link_it: | 1161 | link_it: |
1100 | ofi = fib_find_info(fi); | 1162 | ofi = fib_find_info(fi); |
1101 | if (ofi) { | 1163 | if (ofi) { |
@@ -1317,12 +1379,6 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event) | |||
1317 | nexthop_nh->nh_flags |= RTNH_F_LINKDOWN; | 1379 | nexthop_nh->nh_flags |= RTNH_F_LINKDOWN; |
1318 | break; | 1380 | break; |
1319 | } | 1381 | } |
1320 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | ||
1321 | spin_lock_bh(&fib_multipath_lock); | ||
1322 | fi->fib_power -= nexthop_nh->nh_power; | ||
1323 | nexthop_nh->nh_power = 0; | ||
1324 | spin_unlock_bh(&fib_multipath_lock); | ||
1325 | #endif | ||
1326 | dead++; | 1382 | dead++; |
1327 | } | 1383 | } |
1328 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 1384 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
@@ -1345,6 +1401,8 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event) | |||
1345 | } | 1401 | } |
1346 | ret++; | 1402 | ret++; |
1347 | } | 1403 | } |
1404 | |||
1405 | fib_rebalance(fi); | ||
1348 | } | 1406 | } |
1349 | 1407 | ||
1350 | return ret; | 1408 | return ret; |
@@ -1467,20 +1525,15 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags) | |||
1467 | !__in_dev_get_rtnl(dev)) | 1525 | !__in_dev_get_rtnl(dev)) |
1468 | continue; | 1526 | continue; |
1469 | alive++; | 1527 | alive++; |
1470 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | ||
1471 | spin_lock_bh(&fib_multipath_lock); | ||
1472 | nexthop_nh->nh_power = 0; | ||
1473 | nexthop_nh->nh_flags &= ~nh_flags; | ||
1474 | spin_unlock_bh(&fib_multipath_lock); | ||
1475 | #else | ||
1476 | nexthop_nh->nh_flags &= ~nh_flags; | 1528 | nexthop_nh->nh_flags &= ~nh_flags; |
1477 | #endif | ||
1478 | } endfor_nexthops(fi) | 1529 | } endfor_nexthops(fi) |
1479 | 1530 | ||
1480 | if (alive > 0) { | 1531 | if (alive > 0) { |
1481 | fi->fib_flags &= ~nh_flags; | 1532 | fi->fib_flags &= ~nh_flags; |
1482 | ret++; | 1533 | ret++; |
1483 | } | 1534 | } |
1535 | |||
1536 | fib_rebalance(fi); | ||
1484 | } | 1537 | } |
1485 | 1538 | ||
1486 | return ret; | 1539 | return ret; |
@@ -1488,62 +1541,19 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags) | |||
1488 | 1541 | ||
1489 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | 1542 | #ifdef CONFIG_IP_ROUTE_MULTIPATH |
1490 | 1543 | ||
1491 | /* | 1544 | void fib_select_multipath(struct fib_result *res, int hash) |
1492 | * The algorithm is suboptimal, but it provides really | ||
1493 | * fair weighted route distribution. | ||
1494 | */ | ||
1495 | void fib_select_multipath(struct fib_result *res) | ||
1496 | { | 1545 | { |
1497 | struct fib_info *fi = res->fi; | 1546 | struct fib_info *fi = res->fi; |
1498 | struct in_device *in_dev; | ||
1499 | int w; | ||
1500 | |||
1501 | spin_lock_bh(&fib_multipath_lock); | ||
1502 | if (fi->fib_power <= 0) { | ||
1503 | int power = 0; | ||
1504 | change_nexthops(fi) { | ||
1505 | in_dev = __in_dev_get_rcu(nexthop_nh->nh_dev); | ||
1506 | if (nexthop_nh->nh_flags & RTNH_F_DEAD) | ||
1507 | continue; | ||
1508 | if (in_dev && | ||
1509 | IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && | ||
1510 | nexthop_nh->nh_flags & RTNH_F_LINKDOWN) | ||
1511 | continue; | ||
1512 | power += nexthop_nh->nh_weight; | ||
1513 | nexthop_nh->nh_power = nexthop_nh->nh_weight; | ||
1514 | } endfor_nexthops(fi); | ||
1515 | fi->fib_power = power; | ||
1516 | if (power <= 0) { | ||
1517 | spin_unlock_bh(&fib_multipath_lock); | ||
1518 | /* Race condition: route has just become dead. */ | ||
1519 | res->nh_sel = 0; | ||
1520 | return; | ||
1521 | } | ||
1522 | } | ||
1523 | |||
1524 | 1547 | ||
1525 | /* w should be random number [0..fi->fib_power-1], | 1548 | for_nexthops(fi) { |
1526 | * it is pretty bad approximation. | 1549 | if (hash > atomic_read(&nh->nh_upper_bound)) |
1527 | */ | 1550 | continue; |
1528 | |||
1529 | w = jiffies % fi->fib_power; | ||
1530 | 1551 | ||
1531 | change_nexthops(fi) { | 1552 | res->nh_sel = nhsel; |
1532 | if (!(nexthop_nh->nh_flags & RTNH_F_DEAD) && | 1553 | return; |
1533 | nexthop_nh->nh_power) { | ||
1534 | w -= nexthop_nh->nh_power; | ||
1535 | if (w <= 0) { | ||
1536 | nexthop_nh->nh_power--; | ||
1537 | fi->fib_power--; | ||
1538 | res->nh_sel = nhsel; | ||
1539 | spin_unlock_bh(&fib_multipath_lock); | ||
1540 | return; | ||
1541 | } | ||
1542 | } | ||
1543 | } endfor_nexthops(fi); | 1554 | } endfor_nexthops(fi); |
1544 | 1555 | ||
1545 | /* Race condition: route has just become dead. */ | 1556 | /* Race condition: route has just become dead. */ |
1546 | res->nh_sel = 0; | 1557 | res->nh_sel = 0; |
1547 | spin_unlock_bh(&fib_multipath_lock); | ||
1548 | } | 1558 | } |
1549 | #endif | 1559 | #endif |