diff options
author | David S. Miller <davem@davemloft.net> | 2014-10-24 00:14:52 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-10-24 00:14:52 -0400 |
commit | fad71e4a11443f782ce84ef2e5a1592667f7ee22 (patch) | |
tree | 81a13422d8797e22e2009d5e445f3dc30237db02 | |
parent | 105970f6087ae240b00deaff85773ed9bf381145 (diff) | |
parent | 367efcb932c1cfc134d5b1fd9db8665ae5e6a251 (diff) |
Merge branch 'ipv6_route'
Martin KaFai Lau says:
====================
ipv6: Reduce the number of fib6_lookup() calls from ip6_pol_route()
This patch set is trying to reduce the number of fib6_lookup()
calls from ip6_pol_route().
I have adapted davem's udpflooda and kbench_mod test
(https://git.kernel.org/pub/scm/linux/kernel/git/davem/net_test_tools.git) to
support IPv6 and here is the result:
Before:
[root]# for i in $(seq 1 3); do time ./udpflood -l 20000000 -c 250 2401:face:face:face::2; done
real 0m34.190s
user 0m3.047s
sys 0m31.108s
real 0m34.635s
user 0m3.125s
sys 0m31.475s
real 0m34.517s
user 0m3.034s
sys 0m31.449s
[root]# insmod ip6_route_kbench.ko oif=2 src=2401:face:face:face::1 dst=2401:face:face:face::2
[ 660.160976] ip6_route_kbench: ip6_route_output tdiff: 933
[ 660.207261] ip6_route_kbench: ip6_route_output tdiff: 988
[ 660.253492] ip6_route_kbench: ip6_route_output tdiff: 896
[ 660.298862] ip6_route_kbench: ip6_route_output tdiff: 898
After:
[root]# for i in $(seq 1 3); do time ./udpflood -l 20000000 -c 250 2401:face:face:face::2; done
real 0m32.695s
user 0m2.925s
sys 0m29.737s
real 0m32.636s
user 0m3.007s
sys 0m29.596s
real 0m32.797s
user 0m2.866s
sys 0m29.898s
[root]# insmod ip6_route_kbench.ko oif=2 src=2401:face:face:face::1 dst=2401:face:face:face::2
[ 881.220793] ip6_route_kbench: ip6_route_output tdiff: 684
[ 881.253477] ip6_route_kbench: ip6_route_output tdiff: 640
[ 881.286867] ip6_route_kbench: ip6_route_output tdiff: 630
[ 881.320749] ip6_route_kbench: ip6_route_output tdiff: 653
/****************************** udpflood.c ******************************/
/* It is an adaptation of the Eric Dumazet's and David Miller's
* udpflood tool, by adding IPv6 support.
*/
typedef uint32_t u32;
static int debug =3D 0;
/* Allow -fstrict-aliasing */
typedef union sa_u {
struct sockaddr_storage a46;
struct sockaddr_in a4;
struct sockaddr_in6 a6;
} sa_u;
static int usage(void)
{
printf("usage: udpflood [ -l count ] [ -m message_size ] [ -c num_ip_addrs=
] IP_ADDRESS\n");
return -1;
}
static u32 get_last32h(const sa_u *sa)
{
if (sa->a46.ss_family =3D=3D PF_INET)
return ntohl(sa->a4.sin_addr.s_addr);
else
return ntohl(sa->a6.sin6_addr.s6_addr32[3]);
}
static void set_last32h(sa_u *sa, u32 last32h)
{
if (sa->a46.ss_family =3D=3D PF_INET)
sa->a4.sin_addr.s_addr =3D htonl(last32h);
else
sa->a6.sin6_addr.s6_addr32[3] =3D htonl(last32h);
}
static void print_saddr(const sa_u *sa, const char *msg)
{
char buf[64];
if (!debug)
return;
switch (sa->a46.ss_family) {
case PF_INET:
inet_ntop(PF_INET, &(sa->a4.sin_addr.s_addr), buf,
sizeof(buf));
break;
case PF_INET6:
inet_ntop(PF_INET6, &(sa->a6.sin6_addr), buf, sizeof(buf));
break;
}
printf("%s: %s\n", msg, buf);
}
static int send_packets(const sa_u *sa, size_t num_addrs, int count, int ms=
g_sz)
{
char *msg =3D malloc(msg_sz);
sa_u saddr;
u32 start_addr32h, end_addr32h, cur_addr32h;
int fd, i, err;
if (!msg)
return -ENOMEM;
memset(msg, 0, msg_sz);
memcpy(&saddr, sa, sizeof(saddr));
cur_addr32h =3D start_addr32h =3D get_last32h(&saddr);
end_addr32h =3D start_addr32h + num_addrs;
fd =3D socket(saddr.a46.ss_family, SOCK_DGRAM, 0);
if (fd < 0) {
perror("socket");
err =3D fd;
goto out_nofd;
}
/* connect to avoid the kernel spending time in figuring
* out the source address (i.e pin the src address)
*/
err =3D connect(fd, (struct sockaddr *) &saddr, sizeof(saddr));
if (err < 0) {
perror("connect");
goto out;
}
print_saddr(&saddr, "start_addr");
for (i =3D 0; i < count; i++) {
print_saddr(&saddr, "sendto");
err =3D sendto(fd, msg, msg_sz, 0, (struct sockaddr *)&saddr,
sizeof(saddr));
if (err < 0) {
perror("sendto");
goto out;
}
if (++cur_addr32h >=3D end_addr32h)
cur_addr32h =3D start_addr32h;
set_last32h(&saddr, cur_addr32h);
}
err =3D 0;
out:
close(fd);
out_nofd:
free(msg);
return err;
}
int main(int argc, char **argv, char **envp)
{
int port, msg_sz, count, num_addrs, ret;
sa_u start_addr;
port =3D 6000;
msg_sz =3D 32;
count =3D 10000000;
num_addrs =3D 1;
while ((ret =3D getopt(argc, argv, "dl:s:p:c:")) >=3D 0) {
switch (ret) {
case 'l':
sscanf(optarg, "%d", &count);
break;
case 's':
sscanf(optarg, "%d", &msg_sz);
break;
case 'p':
sscanf(optarg, "%d", &port);
break;
case 'c':
sscanf(optarg, "%d", &num_addrs);
break;
case 'd':
debug =3D 1;
break;
case '?':
return usage();
}
}
if (num_addrs < 1)
return usage();
if (!argv[optind])
return usage();
start_addr.a4.sin_port =3D htons(port);
if (inet_pton(PF_INET, argv[optind], &start_addr.a4.sin_addr))
start_addr.a46.ss_family =3D PF_INET;
else if (inet_pton(PF_INET6, argv[optind], &start_addr.a6.sin6_addr.s6_add=
r))
start_addr.a46.ss_family =3D PF_INET6;
else
return usage();
return send_packets(&start_addr, num_addrs, count, msg_sz);
}
/****************** ip6_route_kbench_mod.c ******************/
/* We can't just use "get_cycles()" as on some platforms, such
* as sparc64, that gives system cycles rather than cpu clock
* cycles.
*/
static inline unsigned long long get_tick(void)
{
unsigned long long t;
__asm__ __volatile__("rd %%tick, %0" : "=r" (t));
return t;
}
static inline unsigned long long get_tick(void)
{
unsigned long long t;
rdtscll(t);
return t;
}
static inline unsigned long long get_tick(void)
{
return get_cycles();
}
static int flow_oif = DEFAULT_OIF;
static int flow_iif = DEFAULT_IIF;
static u32 flow_mark = DEFAULT_MARK;
static struct in6_addr flow_dst_ip_addr;
static struct in6_addr flow_src_ip_addr;
static int flow_tos = DEFAULT_TOS;
static char dst_string[64];
static char src_string[64];
module_param_string(dst, dst_string, sizeof(dst_string), 0);
module_param_string(src, src_string, sizeof(src_string), 0);
static int __init flow_setup(void)
{
if (dst_string[0] &&
!in6_pton(dst_string, -1, &flow_dst_ip_addr.s6_addr[0], -1, NULL)) {
pr_info("cannot parse \"%s\"\n", dst_string);
return -1;
}
if (src_string[0] &&
!in6_pton(src_string, -1, &flow_src_ip_addr.s6_addr[0], -1, NULL)) {
pr_info("cannot parse \"%s\"\n", dst_string);
return -1;
}
return 0;
}
module_param_named(oif, flow_oif, int, 0);
module_param_named(iif, flow_iif, int, 0);
module_param_named(mark, flow_mark, uint, 0);
module_param_named(tos, flow_tos, int, 0);
static int warmup_count = DEFAULT_WARMUP_COUNT;
module_param_named(count, warmup_count, int, 0);
static void flow_init(struct flowi6 *fl6)
{
memset(fl6, 0, sizeof(*fl6));
fl6->flowi6_proto = IPPROTO_ICMPV6;
fl6->flowi6_oif = flow_oif;
fl6->flowi6_iif = flow_iif;
fl6->flowi6_mark = flow_mark;
fl6->flowi6_tos = flow_tos;
fl6->daddr = flow_dst_ip_addr;
fl6->saddr = flow_src_ip_addr;
}
static struct sk_buff * fake_skb_get(void)
{
struct ipv6hdr *hdr;
struct sk_buff *skb;
skb = alloc_skb(4096, GFP_KERNEL);
if (!skb) {
pr_info("Cannot alloc SKB for test\n");
return NULL;
}
skb->dev = __dev_get_by_index(&init_net, flow_iif);
if (skb->dev == NULL) {
pr_info("Input device (%d) does not exist\n", flow_iif);
goto err;
}
skb_reset_mac_header(skb);
skb_reset_network_header(skb);
skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
hdr = ipv6_hdr(skb);
hdr->priority = 0;
hdr->version = 6;
memset(hdr->flow_lbl, 0, sizeof(hdr->flow_lbl));
hdr->payload_len = htons(sizeof(struct icmp6hdr));
hdr->nexthdr = IPPROTO_ICMPV6;
hdr->saddr = flow_src_ip_addr;
hdr->daddr = flow_dst_ip_addr;
skb->protocol = htons(ETH_P_IPV6);
skb->mark = flow_mark;
return skb;
err:
kfree_skb(skb);
return NULL;
}
static void do_full_output_lookup_bench(void)
{
unsigned long long t1, t2, tdiff;
struct rt6_info *rt;
struct flowi6 fl6;
int i;
rt = NULL;
for (i = 0; i < warmup_count; i++) {
flow_init(&fl6);
rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl6);
if (IS_ERR(rt))
break;
ip6_rt_put(rt);
}
if (IS_ERR(rt)) {
pr_info("ip_route_output_key: err=%ld\n", PTR_ERR(rt));
return;
}
flow_init(&fl6);
t1 = get_tick();
rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl6);
t2 = get_tick();
if (!IS_ERR(rt))
ip6_rt_put(rt);
tdiff = t2 - t1;
pr_info("ip6_route_output tdiff: %llu\n", tdiff);
}
static void do_full_input_lookup_bench(void)
{
unsigned long long t1, t2, tdiff;
struct sk_buff *skb;
struct rt6_info *rt;
int err, i;
skb = fake_skb_get();
if (skb == NULL)
goto out_free;
err = 0;
local_bh_disable();
for (i = 0; i < warmup_count; i++) {
ip6_route_input(skb);
rt = (struct rt6_info *)skb_dst(skb);
err = (!rt || rt == init_net.ipv6.ip6_null_entry);
skb_dst_drop(skb);
if (err)
break;
}
local_bh_enable();
if (err) {
pr_info("Input route lookup fails\n");
goto out_free;
}
local_bh_disable();
t1 = get_tick();
ip6_route_input(skb);
t2 = get_tick();
local_bh_enable();
rt = (struct rt6_info *)skb_dst(skb);
err = (!rt || rt == init_net.ipv6.ip6_null_entry);
skb_dst_drop(skb);
if (err) {
pr_info("Input route lookup fails\n");
goto out_free;
}
tdiff = t2 - t1;
pr_info("ip6_route_input tdiff: %llu\n", tdiff);
out_free:
kfree_skb(skb);
}
static void do_full_lookup_bench(void)
{
if (!flow_iif)
do_full_output_lookup_bench();
else
do_full_input_lookup_bench();
}
static void do_bench(void)
{
do_full_lookup_bench();
do_full_lookup_bench();
do_full_lookup_bench();
do_full_lookup_bench();
}
static int __init kbench_init(void)
{
if (flow_setup())
return -EINVAL;
pr_info("flow [IIF(%d),OIF(%d),MARK(0x%08x),D("IP6_FMT"),"
"S("IP6_FMT"),TOS(0x%02x)]\n",
flow_iif, flow_oif, flow_mark,
IP6_PRT(flow_dst_ip_addr),
IP6_PRT(flow_src_ip_addr),
flow_tos);
if (!cpu_has_tsc) {
pr_err("X86 TSC is required, but is unavailable.\n");
return -EINVAL;
}
pr_info("sizeof(struct rt6_info)==%zu\n", sizeof(struct rt6_info));
do_bench();
return -ENODEV;
}
static void __exit kbench_exit(void)
{
}
module_init(kbench_init);
module_exit(kbench_exit);
MODULE_LICENSE("GPL");
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/ipv6/route.c | 94 |
1 files changed, 53 insertions, 41 deletions
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a318dd89b6d9..c91083156edb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c | |||
@@ -772,23 +772,22 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, | |||
772 | } | 772 | } |
773 | #endif | 773 | #endif |
774 | 774 | ||
775 | #define BACKTRACK(__net, saddr) \ | 775 | static struct fib6_node* fib6_backtrack(struct fib6_node *fn, |
776 | do { \ | 776 | struct in6_addr *saddr) |
777 | if (rt == __net->ipv6.ip6_null_entry) { \ | 777 | { |
778 | struct fib6_node *pn; \ | 778 | struct fib6_node *pn; |
779 | while (1) { \ | 779 | while (1) { |
780 | if (fn->fn_flags & RTN_TL_ROOT) \ | 780 | if (fn->fn_flags & RTN_TL_ROOT) |
781 | goto out; \ | 781 | return NULL; |
782 | pn = fn->parent; \ | 782 | pn = fn->parent; |
783 | if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ | 783 | if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) |
784 | fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ | 784 | fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); |
785 | else \ | 785 | else |
786 | fn = pn; \ | 786 | fn = pn; |
787 | if (fn->fn_flags & RTN_RTINFO) \ | 787 | if (fn->fn_flags & RTN_RTINFO) |
788 | goto restart; \ | 788 | return fn; |
789 | } \ | 789 | } |
790 | } \ | 790 | } |
791 | } while (0) | ||
792 | 791 | ||
793 | static struct rt6_info *ip6_pol_route_lookup(struct net *net, | 792 | static struct rt6_info *ip6_pol_route_lookup(struct net *net, |
794 | struct fib6_table *table, | 793 | struct fib6_table *table, |
@@ -804,8 +803,11 @@ restart: | |||
804 | rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); | 803 | rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); |
805 | if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) | 804 | if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) |
806 | rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags); | 805 | rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags); |
807 | BACKTRACK(net, &fl6->saddr); | 806 | if (rt == net->ipv6.ip6_null_entry) { |
808 | out: | 807 | fn = fib6_backtrack(fn, &fl6->saddr); |
808 | if (fn) | ||
809 | goto restart; | ||
810 | } | ||
809 | dst_use(&rt->dst, jiffies); | 811 | dst_use(&rt->dst, jiffies); |
810 | read_unlock_bh(&table->tb6_lock); | 812 | read_unlock_bh(&table->tb6_lock); |
811 | return rt; | 813 | return rt; |
@@ -915,33 +917,48 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, | |||
915 | static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, | 917 | static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, |
916 | struct flowi6 *fl6, int flags) | 918 | struct flowi6 *fl6, int flags) |
917 | { | 919 | { |
918 | struct fib6_node *fn; | 920 | struct fib6_node *fn, *saved_fn; |
919 | struct rt6_info *rt, *nrt; | 921 | struct rt6_info *rt, *nrt; |
920 | int strict = 0; | 922 | int strict = 0; |
921 | int attempts = 3; | 923 | int attempts = 3; |
922 | int err; | 924 | int err; |
923 | int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; | ||
924 | 925 | ||
925 | strict |= flags & RT6_LOOKUP_F_IFACE; | 926 | strict |= flags & RT6_LOOKUP_F_IFACE; |
927 | if (net->ipv6.devconf_all->forwarding == 0) | ||
928 | strict |= RT6_LOOKUP_F_REACHABLE; | ||
926 | 929 | ||
927 | relookup: | 930 | redo_fib6_lookup_lock: |
928 | read_lock_bh(&table->tb6_lock); | 931 | read_lock_bh(&table->tb6_lock); |
929 | 932 | ||
930 | restart_2: | ||
931 | fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); | 933 | fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); |
934 | saved_fn = fn; | ||
932 | 935 | ||
933 | restart: | 936 | redo_rt6_select: |
934 | rt = rt6_select(fn, oif, strict | reachable); | 937 | rt = rt6_select(fn, oif, strict); |
935 | if (rt->rt6i_nsiblings) | 938 | if (rt->rt6i_nsiblings) |
936 | rt = rt6_multipath_select(rt, fl6, oif, strict | reachable); | 939 | rt = rt6_multipath_select(rt, fl6, oif, strict); |
937 | BACKTRACK(net, &fl6->saddr); | 940 | if (rt == net->ipv6.ip6_null_entry) { |
938 | if (rt == net->ipv6.ip6_null_entry || | 941 | fn = fib6_backtrack(fn, &fl6->saddr); |
939 | rt->rt6i_flags & RTF_CACHE) | 942 | if (fn) |
940 | goto out; | 943 | goto redo_rt6_select; |
944 | else if (strict & RT6_LOOKUP_F_REACHABLE) { | ||
945 | /* also consider unreachable route */ | ||
946 | strict &= ~RT6_LOOKUP_F_REACHABLE; | ||
947 | fn = saved_fn; | ||
948 | goto redo_rt6_select; | ||
949 | } else { | ||
950 | dst_hold(&rt->dst); | ||
951 | read_unlock_bh(&table->tb6_lock); | ||
952 | goto out2; | ||
953 | } | ||
954 | } | ||
941 | 955 | ||
942 | dst_hold(&rt->dst); | 956 | dst_hold(&rt->dst); |
943 | read_unlock_bh(&table->tb6_lock); | 957 | read_unlock_bh(&table->tb6_lock); |
944 | 958 | ||
959 | if (rt->rt6i_flags & RTF_CACHE) | ||
960 | goto out2; | ||
961 | |||
945 | if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY))) | 962 | if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY))) |
946 | nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); | 963 | nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); |
947 | else if (!(rt->dst.flags & DST_HOST)) | 964 | else if (!(rt->dst.flags & DST_HOST)) |
@@ -967,15 +984,8 @@ restart: | |||
967 | * released someone could insert this route. Relookup. | 984 | * released someone could insert this route. Relookup. |
968 | */ | 985 | */ |
969 | ip6_rt_put(rt); | 986 | ip6_rt_put(rt); |
970 | goto relookup; | 987 | goto redo_fib6_lookup_lock; |
971 | 988 | ||
972 | out: | ||
973 | if (reachable) { | ||
974 | reachable = 0; | ||
975 | goto restart_2; | ||
976 | } | ||
977 | dst_hold(&rt->dst); | ||
978 | read_unlock_bh(&table->tb6_lock); | ||
979 | out2: | 989 | out2: |
980 | rt->dst.lastuse = jiffies; | 990 | rt->dst.lastuse = jiffies; |
981 | rt->dst.__use++; | 991 | rt->dst.__use++; |
@@ -1235,10 +1245,12 @@ restart: | |||
1235 | rt = net->ipv6.ip6_null_entry; | 1245 | rt = net->ipv6.ip6_null_entry; |
1236 | else if (rt->dst.error) { | 1246 | else if (rt->dst.error) { |
1237 | rt = net->ipv6.ip6_null_entry; | 1247 | rt = net->ipv6.ip6_null_entry; |
1238 | goto out; | 1248 | } else if (rt == net->ipv6.ip6_null_entry) { |
1249 | fn = fib6_backtrack(fn, &fl6->saddr); | ||
1250 | if (fn) | ||
1251 | goto restart; | ||
1239 | } | 1252 | } |
1240 | BACKTRACK(net, &fl6->saddr); | 1253 | |
1241 | out: | ||
1242 | dst_hold(&rt->dst); | 1254 | dst_hold(&rt->dst); |
1243 | 1255 | ||
1244 | read_unlock_bh(&table->tb6_lock); | 1256 | read_unlock_bh(&table->tb6_lock); |