diff options
author | David S. Miller <davem@davemloft.net> | 2011-03-05 00:24:47 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-03-05 00:24:47 -0500 |
commit | 44713b67db10c774f14280c129b0d5fd13c70cf2 (patch) | |
tree | 2a5fac5524dbac7b30be6409751983b597a71766 /net/ipv4/route.c | |
parent | 65e8354ec13a45414045084166cb340c0d7ffe8a (diff) |
ipv4: Optimize flow initialization in output route lookup.
We burn a lot of useless cycles, cpu store buffer traffic, and
memory operations memset()'ing the on-stack flow used to perform
output route lookups in __ip_route_output_key().
Only the first half of the flow object members even matter for
output route lookups in this context, specifically:
FIB rules matching cares about:
dst, src, tos, iif, oif, mark
FIB trie lookup cares about:
dst
FIB semantic match cares about:
tos, scope, oif
Therefore only initialize these specific members and elide the
memset entirely.
On Niagara2 this kills about ~300 cycles from the output route
lookup path.
Likely, we can take things further, since all callers of output
route lookups essentially throw away the on-stack flow they use.
So they don't care if we use it as a scratch-pad to compute the
final flow key.
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r-- | net/ipv4/route.c | 18 |
1 files changed, 10 insertions, 8 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 432eee645648..6c8740362ef9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -2431,14 +2431,7 @@ static struct rtable *ip_route_output_slow(struct net *net, | |||
2431 | const struct flowi *oldflp) | 2431 | const struct flowi *oldflp) |
2432 | { | 2432 | { |
2433 | u32 tos = RT_FL_TOS(oldflp); | 2433 | u32 tos = RT_FL_TOS(oldflp); |
2434 | struct flowi fl = { .fl4_dst = oldflp->fl4_dst, | 2434 | struct flowi fl; |
2435 | .fl4_src = oldflp->fl4_src, | ||
2436 | .fl4_tos = tos & IPTOS_RT_MASK, | ||
2437 | .fl4_scope = ((tos & RTO_ONLINK) ? | ||
2438 | RT_SCOPE_LINK : RT_SCOPE_UNIVERSE), | ||
2439 | .mark = oldflp->mark, | ||
2440 | .iif = net->loopback_dev->ifindex, | ||
2441 | .oif = oldflp->oif }; | ||
2442 | struct fib_result res; | 2435 | struct fib_result res; |
2443 | unsigned int flags = 0; | 2436 | unsigned int flags = 0; |
2444 | struct net_device *dev_out = NULL; | 2437 | struct net_device *dev_out = NULL; |
@@ -2449,6 +2442,15 @@ static struct rtable *ip_route_output_slow(struct net *net, | |||
2449 | res.r = NULL; | 2442 | res.r = NULL; |
2450 | #endif | 2443 | #endif |
2451 | 2444 | ||
2445 | fl.oif = oldflp->oif; | ||
2446 | fl.iif = net->loopback_dev->ifindex; | ||
2447 | fl.mark = oldflp->mark; | ||
2448 | fl.fl4_dst = oldflp->fl4_dst; | ||
2449 | fl.fl4_src = oldflp->fl4_src; | ||
2450 | fl.fl4_tos = tos & IPTOS_RT_MASK; | ||
2451 | fl.fl4_scope = ((tos & RTO_ONLINK) ? | ||
2452 | RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); | ||
2453 | |||
2452 | rcu_read_lock(); | 2454 | rcu_read_lock(); |
2453 | if (oldflp->fl4_src) { | 2455 | if (oldflp->fl4_src) { |
2454 | rth = ERR_PTR(-EINVAL); | 2456 | rth = ERR_PTR(-EINVAL); |