diff options
author | Paul Mundt <lethal@linux-sh.org> | 2010-08-16 00:32:24 -0400 |
---|---|---|
committer | Paul Mundt <lethal@linux-sh.org> | 2010-08-16 00:32:24 -0400 |
commit | bbcf6e8b66ab2fb5ddab4d0fe40c2e6a5ebe5301 (patch) | |
tree | 071fa9f86dc04a16570be367d04cff3b00c694ad /net/ipv4 | |
parent | 57682827b9a5edb52e33af0be9082b51bffcd5c7 (diff) | |
parent | da5cabf80e2433131bf0ed8993abc0f7ea618c73 (diff) |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts:
arch/sh/include/asm/Kbuild
drivers/Makefile
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'net/ipv4')
57 files changed, 1149 insertions, 955 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 551ce564b035..6a1100c25a9f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -355,6 +355,8 @@ lookup_protocol: | |||
355 | inet = inet_sk(sk); | 355 | inet = inet_sk(sk); |
356 | inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; | 356 | inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; |
357 | 357 | ||
358 | inet->nodefrag = 0; | ||
359 | |||
358 | if (SOCK_RAW == sock->type) { | 360 | if (SOCK_RAW == sock->type) { |
359 | inet->inet_num = protocol; | 361 | inet->inet_num = protocol; |
360 | if (IPPROTO_RAW == protocol) | 362 | if (IPPROTO_RAW == protocol) |
@@ -725,28 +727,31 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | |||
725 | sock_rps_record_flow(sk); | 727 | sock_rps_record_flow(sk); |
726 | 728 | ||
727 | /* We may need to bind the socket. */ | 729 | /* We may need to bind the socket. */ |
728 | if (!inet_sk(sk)->inet_num && inet_autobind(sk)) | 730 | if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind && |
731 | inet_autobind(sk)) | ||
729 | return -EAGAIN; | 732 | return -EAGAIN; |
730 | 733 | ||
731 | return sk->sk_prot->sendmsg(iocb, sk, msg, size); | 734 | return sk->sk_prot->sendmsg(iocb, sk, msg, size); |
732 | } | 735 | } |
733 | EXPORT_SYMBOL(inet_sendmsg); | 736 | EXPORT_SYMBOL(inet_sendmsg); |
734 | 737 | ||
735 | static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, | 738 | ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, |
736 | size_t size, int flags) | 739 | size_t size, int flags) |
737 | { | 740 | { |
738 | struct sock *sk = sock->sk; | 741 | struct sock *sk = sock->sk; |
739 | 742 | ||
740 | sock_rps_record_flow(sk); | 743 | sock_rps_record_flow(sk); |
741 | 744 | ||
742 | /* We may need to bind the socket. */ | 745 | /* We may need to bind the socket. */ |
743 | if (!inet_sk(sk)->inet_num && inet_autobind(sk)) | 746 | if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind && |
747 | inet_autobind(sk)) | ||
744 | return -EAGAIN; | 748 | return -EAGAIN; |
745 | 749 | ||
746 | if (sk->sk_prot->sendpage) | 750 | if (sk->sk_prot->sendpage) |
747 | return sk->sk_prot->sendpage(sk, page, offset, size, flags); | 751 | return sk->sk_prot->sendpage(sk, page, offset, size, flags); |
748 | return sock_no_sendpage(sock, page, offset, size, flags); | 752 | return sock_no_sendpage(sock, page, offset, size, flags); |
749 | } | 753 | } |
754 | EXPORT_SYMBOL(inet_sendpage); | ||
750 | 755 | ||
751 | int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | 756 | int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, |
752 | size_t size, int flags) | 757 | size_t size, int flags) |
@@ -892,10 +897,10 @@ const struct proto_ops inet_stream_ops = { | |||
892 | .shutdown = inet_shutdown, | 897 | .shutdown = inet_shutdown, |
893 | .setsockopt = sock_common_setsockopt, | 898 | .setsockopt = sock_common_setsockopt, |
894 | .getsockopt = sock_common_getsockopt, | 899 | .getsockopt = sock_common_getsockopt, |
895 | .sendmsg = tcp_sendmsg, | 900 | .sendmsg = inet_sendmsg, |
896 | .recvmsg = inet_recvmsg, | 901 | .recvmsg = inet_recvmsg, |
897 | .mmap = sock_no_mmap, | 902 | .mmap = sock_no_mmap, |
898 | .sendpage = tcp_sendpage, | 903 | .sendpage = inet_sendpage, |
899 | .splice_read = tcp_splice_read, | 904 | .splice_read = tcp_splice_read, |
900 | #ifdef CONFIG_COMPAT | 905 | #ifdef CONFIG_COMPAT |
901 | .compat_setsockopt = compat_sock_common_setsockopt, | 906 | .compat_setsockopt = compat_sock_common_setsockopt, |
@@ -1100,7 +1105,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) | |||
1100 | if (err) | 1105 | if (err) |
1101 | return err; | 1106 | return err; |
1102 | 1107 | ||
1103 | sk_setup_caps(sk, &rt->u.dst); | 1108 | sk_setup_caps(sk, &rt->dst); |
1104 | 1109 | ||
1105 | new_saddr = rt->rt_src; | 1110 | new_saddr = rt->rt_src; |
1106 | 1111 | ||
@@ -1166,7 +1171,7 @@ int inet_sk_rebuild_header(struct sock *sk) | |||
1166 | err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0); | 1171 | err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0); |
1167 | } | 1172 | } |
1168 | if (!err) | 1173 | if (!err) |
1169 | sk_setup_caps(sk, &rt->u.dst); | 1174 | sk_setup_caps(sk, &rt->dst); |
1170 | else { | 1175 | else { |
1171 | /* Routing failed... */ | 1176 | /* Routing failed... */ |
1172 | sk->sk_route_caps = 0; | 1177 | sk->sk_route_caps = 0; |
@@ -1425,13 +1430,49 @@ unsigned long snmp_fold_field(void __percpu *mib[], int offt) | |||
1425 | } | 1430 | } |
1426 | EXPORT_SYMBOL_GPL(snmp_fold_field); | 1431 | EXPORT_SYMBOL_GPL(snmp_fold_field); |
1427 | 1432 | ||
1428 | int snmp_mib_init(void __percpu *ptr[2], size_t mibsize) | 1433 | #if BITS_PER_LONG==32 |
1434 | |||
1435 | u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset) | ||
1436 | { | ||
1437 | u64 res = 0; | ||
1438 | int cpu; | ||
1439 | |||
1440 | for_each_possible_cpu(cpu) { | ||
1441 | void *bhptr, *userptr; | ||
1442 | struct u64_stats_sync *syncp; | ||
1443 | u64 v_bh, v_user; | ||
1444 | unsigned int start; | ||
1445 | |||
1446 | /* first mib used by softirq context, we must use _bh() accessors */ | ||
1447 | bhptr = per_cpu_ptr(SNMP_STAT_BHPTR(mib), cpu); | ||
1448 | syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); | ||
1449 | do { | ||
1450 | start = u64_stats_fetch_begin_bh(syncp); | ||
1451 | v_bh = *(((u64 *) bhptr) + offt); | ||
1452 | } while (u64_stats_fetch_retry_bh(syncp, start)); | ||
1453 | |||
1454 | /* second mib used in USER context */ | ||
1455 | userptr = per_cpu_ptr(SNMP_STAT_USRPTR(mib), cpu); | ||
1456 | syncp = (struct u64_stats_sync *)(userptr + syncp_offset); | ||
1457 | do { | ||
1458 | start = u64_stats_fetch_begin(syncp); | ||
1459 | v_user = *(((u64 *) userptr) + offt); | ||
1460 | } while (u64_stats_fetch_retry(syncp, start)); | ||
1461 | |||
1462 | res += v_bh + v_user; | ||
1463 | } | ||
1464 | return res; | ||
1465 | } | ||
1466 | EXPORT_SYMBOL_GPL(snmp_fold_field64); | ||
1467 | #endif | ||
1468 | |||
1469 | int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align) | ||
1429 | { | 1470 | { |
1430 | BUG_ON(ptr == NULL); | 1471 | BUG_ON(ptr == NULL); |
1431 | ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long)); | 1472 | ptr[0] = __alloc_percpu(mibsize, align); |
1432 | if (!ptr[0]) | 1473 | if (!ptr[0]) |
1433 | goto err0; | 1474 | goto err0; |
1434 | ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long)); | 1475 | ptr[1] = __alloc_percpu(mibsize, align); |
1435 | if (!ptr[1]) | 1476 | if (!ptr[1]) |
1436 | goto err1; | 1477 | goto err1; |
1437 | return 0; | 1478 | return 0; |
@@ -1488,25 +1529,32 @@ static const struct net_protocol icmp_protocol = { | |||
1488 | static __net_init int ipv4_mib_init_net(struct net *net) | 1529 | static __net_init int ipv4_mib_init_net(struct net *net) |
1489 | { | 1530 | { |
1490 | if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics, | 1531 | if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics, |
1491 | sizeof(struct tcp_mib)) < 0) | 1532 | sizeof(struct tcp_mib), |
1533 | __alignof__(struct tcp_mib)) < 0) | ||
1492 | goto err_tcp_mib; | 1534 | goto err_tcp_mib; |
1493 | if (snmp_mib_init((void __percpu **)net->mib.ip_statistics, | 1535 | if (snmp_mib_init((void __percpu **)net->mib.ip_statistics, |
1494 | sizeof(struct ipstats_mib)) < 0) | 1536 | sizeof(struct ipstats_mib), |
1537 | __alignof__(struct ipstats_mib)) < 0) | ||
1495 | goto err_ip_mib; | 1538 | goto err_ip_mib; |
1496 | if (snmp_mib_init((void __percpu **)net->mib.net_statistics, | 1539 | if (snmp_mib_init((void __percpu **)net->mib.net_statistics, |
1497 | sizeof(struct linux_mib)) < 0) | 1540 | sizeof(struct linux_mib), |
1541 | __alignof__(struct linux_mib)) < 0) | ||
1498 | goto err_net_mib; | 1542 | goto err_net_mib; |
1499 | if (snmp_mib_init((void __percpu **)net->mib.udp_statistics, | 1543 | if (snmp_mib_init((void __percpu **)net->mib.udp_statistics, |
1500 | sizeof(struct udp_mib)) < 0) | 1544 | sizeof(struct udp_mib), |
1545 | __alignof__(struct udp_mib)) < 0) | ||
1501 | goto err_udp_mib; | 1546 | goto err_udp_mib; |
1502 | if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics, | 1547 | if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics, |
1503 | sizeof(struct udp_mib)) < 0) | 1548 | sizeof(struct udp_mib), |
1549 | __alignof__(struct udp_mib)) < 0) | ||
1504 | goto err_udplite_mib; | 1550 | goto err_udplite_mib; |
1505 | if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics, | 1551 | if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics, |
1506 | sizeof(struct icmp_mib)) < 0) | 1552 | sizeof(struct icmp_mib), |
1553 | __alignof__(struct icmp_mib)) < 0) | ||
1507 | goto err_icmp_mib; | 1554 | goto err_icmp_mib; |
1508 | if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics, | 1555 | if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics, |
1509 | sizeof(struct icmpmsg_mib)) < 0) | 1556 | sizeof(struct icmpmsg_mib), |
1557 | __alignof__(struct icmpmsg_mib)) < 0) | ||
1510 | goto err_icmpmsg_mib; | 1558 | goto err_icmpmsg_mib; |
1511 | 1559 | ||
1512 | tcp_mib_init(net); | 1560 | tcp_mib_init(net); |
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index f094b75810db..96c1955b3e2f 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -116,6 +116,7 @@ | |||
116 | #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) | 116 | #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) |
117 | #include <net/atmclip.h> | 117 | #include <net/atmclip.h> |
118 | struct neigh_table *clip_tbl_hook; | 118 | struct neigh_table *clip_tbl_hook; |
119 | EXPORT_SYMBOL(clip_tbl_hook); | ||
119 | #endif | 120 | #endif |
120 | 121 | ||
121 | #include <asm/system.h> | 122 | #include <asm/system.h> |
@@ -169,6 +170,7 @@ const struct neigh_ops arp_broken_ops = { | |||
169 | .hh_output = dev_queue_xmit, | 170 | .hh_output = dev_queue_xmit, |
170 | .queue_xmit = dev_queue_xmit, | 171 | .queue_xmit = dev_queue_xmit, |
171 | }; | 172 | }; |
173 | EXPORT_SYMBOL(arp_broken_ops); | ||
172 | 174 | ||
173 | struct neigh_table arp_tbl = { | 175 | struct neigh_table arp_tbl = { |
174 | .family = AF_INET, | 176 | .family = AF_INET, |
@@ -198,6 +200,7 @@ struct neigh_table arp_tbl = { | |||
198 | .gc_thresh2 = 512, | 200 | .gc_thresh2 = 512, |
199 | .gc_thresh3 = 1024, | 201 | .gc_thresh3 = 1024, |
200 | }; | 202 | }; |
203 | EXPORT_SYMBOL(arp_tbl); | ||
201 | 204 | ||
202 | int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir) | 205 | int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir) |
203 | { | 206 | { |
@@ -333,11 +336,14 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) | |||
333 | struct net_device *dev = neigh->dev; | 336 | struct net_device *dev = neigh->dev; |
334 | __be32 target = *(__be32*)neigh->primary_key; | 337 | __be32 target = *(__be32*)neigh->primary_key; |
335 | int probes = atomic_read(&neigh->probes); | 338 | int probes = atomic_read(&neigh->probes); |
336 | struct in_device *in_dev = in_dev_get(dev); | 339 | struct in_device *in_dev; |
337 | 340 | ||
338 | if (!in_dev) | 341 | rcu_read_lock(); |
342 | in_dev = __in_dev_get_rcu(dev); | ||
343 | if (!in_dev) { | ||
344 | rcu_read_unlock(); | ||
339 | return; | 345 | return; |
340 | 346 | } | |
341 | switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { | 347 | switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { |
342 | default: | 348 | default: |
343 | case 0: /* By default announce any local IP */ | 349 | case 0: /* By default announce any local IP */ |
@@ -358,9 +364,8 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) | |||
358 | case 2: /* Avoid secondary IPs, get a primary/preferred one */ | 364 | case 2: /* Avoid secondary IPs, get a primary/preferred one */ |
359 | break; | 365 | break; |
360 | } | 366 | } |
367 | rcu_read_unlock(); | ||
361 | 368 | ||
362 | if (in_dev) | ||
363 | in_dev_put(in_dev); | ||
364 | if (!saddr) | 369 | if (!saddr) |
365 | saddr = inet_select_addr(dev, target, RT_SCOPE_LINK); | 370 | saddr = inet_select_addr(dev, target, RT_SCOPE_LINK); |
366 | 371 | ||
@@ -427,7 +432,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) | |||
427 | 432 | ||
428 | if (ip_route_output_key(net, &rt, &fl) < 0) | 433 | if (ip_route_output_key(net, &rt, &fl) < 0) |
429 | return 1; | 434 | return 1; |
430 | if (rt->u.dst.dev != dev) { | 435 | if (rt->dst.dev != dev) { |
431 | NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); | 436 | NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); |
432 | flag = 1; | 437 | flag = 1; |
433 | } | 438 | } |
@@ -497,6 +502,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) | |||
497 | kfree_skb(skb); | 502 | kfree_skb(skb); |
498 | return 1; | 503 | return 1; |
499 | } | 504 | } |
505 | EXPORT_SYMBOL(arp_find); | ||
500 | 506 | ||
501 | /* END OF OBSOLETE FUNCTIONS */ | 507 | /* END OF OBSOLETE FUNCTIONS */ |
502 | 508 | ||
@@ -532,7 +538,7 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, | |||
532 | struct in_device *out_dev; | 538 | struct in_device *out_dev; |
533 | int imi, omi = -1; | 539 | int imi, omi = -1; |
534 | 540 | ||
535 | if (rt->u.dst.dev == dev) | 541 | if (rt->dst.dev == dev) |
536 | return 0; | 542 | return 0; |
537 | 543 | ||
538 | if (!IN_DEV_PROXY_ARP(in_dev)) | 544 | if (!IN_DEV_PROXY_ARP(in_dev)) |
@@ -545,10 +551,10 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, | |||
545 | 551 | ||
546 | /* place to check for proxy_arp for routes */ | 552 | /* place to check for proxy_arp for routes */ |
547 | 553 | ||
548 | if ((out_dev = in_dev_get(rt->u.dst.dev)) != NULL) { | 554 | out_dev = __in_dev_get_rcu(rt->dst.dev); |
555 | if (out_dev) | ||
549 | omi = IN_DEV_MEDIUM_ID(out_dev); | 556 | omi = IN_DEV_MEDIUM_ID(out_dev); |
550 | in_dev_put(out_dev); | 557 | |
551 | } | ||
552 | return (omi != imi && omi != -1); | 558 | return (omi != imi && omi != -1); |
553 | } | 559 | } |
554 | 560 | ||
@@ -576,7 +582,7 @@ static inline int arp_fwd_pvlan(struct in_device *in_dev, | |||
576 | __be32 sip, __be32 tip) | 582 | __be32 sip, __be32 tip) |
577 | { | 583 | { |
578 | /* Private VLAN is only concerned about the same ethernet segment */ | 584 | /* Private VLAN is only concerned about the same ethernet segment */ |
579 | if (rt->u.dst.dev != dev) | 585 | if (rt->dst.dev != dev) |
580 | return 0; | 586 | return 0; |
581 | 587 | ||
582 | /* Don't reply on self probes (often done by windowz boxes)*/ | 588 | /* Don't reply on self probes (often done by windowz boxes)*/ |
@@ -698,6 +704,7 @@ out: | |||
698 | kfree_skb(skb); | 704 | kfree_skb(skb); |
699 | return NULL; | 705 | return NULL; |
700 | } | 706 | } |
707 | EXPORT_SYMBOL(arp_create); | ||
701 | 708 | ||
702 | /* | 709 | /* |
703 | * Send an arp packet. | 710 | * Send an arp packet. |
@@ -707,6 +714,7 @@ void arp_xmit(struct sk_buff *skb) | |||
707 | /* Send it off, maybe filter it using firewalling first. */ | 714 | /* Send it off, maybe filter it using firewalling first. */ |
708 | NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit); | 715 | NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit); |
709 | } | 716 | } |
717 | EXPORT_SYMBOL(arp_xmit); | ||
710 | 718 | ||
711 | /* | 719 | /* |
712 | * Create and send an arp packet. | 720 | * Create and send an arp packet. |
@@ -733,6 +741,7 @@ void arp_send(int type, int ptype, __be32 dest_ip, | |||
733 | 741 | ||
734 | arp_xmit(skb); | 742 | arp_xmit(skb); |
735 | } | 743 | } |
744 | EXPORT_SYMBOL(arp_send); | ||
736 | 745 | ||
737 | /* | 746 | /* |
738 | * Process an arp request. | 747 | * Process an arp request. |
@@ -741,7 +750,7 @@ void arp_send(int type, int ptype, __be32 dest_ip, | |||
741 | static int arp_process(struct sk_buff *skb) | 750 | static int arp_process(struct sk_buff *skb) |
742 | { | 751 | { |
743 | struct net_device *dev = skb->dev; | 752 | struct net_device *dev = skb->dev; |
744 | struct in_device *in_dev = in_dev_get(dev); | 753 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
745 | struct arphdr *arp; | 754 | struct arphdr *arp; |
746 | unsigned char *arp_ptr; | 755 | unsigned char *arp_ptr; |
747 | struct rtable *rt; | 756 | struct rtable *rt; |
@@ -890,7 +899,6 @@ static int arp_process(struct sk_buff *skb) | |||
890 | arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); | 899 | arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); |
891 | } else { | 900 | } else { |
892 | pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb); | 901 | pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb); |
893 | in_dev_put(in_dev); | ||
894 | return 0; | 902 | return 0; |
895 | } | 903 | } |
896 | goto out; | 904 | goto out; |
@@ -936,8 +944,6 @@ static int arp_process(struct sk_buff *skb) | |||
936 | } | 944 | } |
937 | 945 | ||
938 | out: | 946 | out: |
939 | if (in_dev) | ||
940 | in_dev_put(in_dev); | ||
941 | consume_skb(skb); | 947 | consume_skb(skb); |
942 | return 0; | 948 | return 0; |
943 | } | 949 | } |
@@ -1045,7 +1051,7 @@ static int arp_req_set(struct net *net, struct arpreq *r, | |||
1045 | struct rtable * rt; | 1051 | struct rtable * rt; |
1046 | if ((err = ip_route_output_key(net, &rt, &fl)) != 0) | 1052 | if ((err = ip_route_output_key(net, &rt, &fl)) != 0) |
1047 | return err; | 1053 | return err; |
1048 | dev = rt->u.dst.dev; | 1054 | dev = rt->dst.dev; |
1049 | ip_rt_put(rt); | 1055 | ip_rt_put(rt); |
1050 | if (!dev) | 1056 | if (!dev) |
1051 | return -EINVAL; | 1057 | return -EINVAL; |
@@ -1152,7 +1158,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r, | |||
1152 | struct rtable * rt; | 1158 | struct rtable * rt; |
1153 | if ((err = ip_route_output_key(net, &rt, &fl)) != 0) | 1159 | if ((err = ip_route_output_key(net, &rt, &fl)) != 0) |
1154 | return err; | 1160 | return err; |
1155 | dev = rt->u.dst.dev; | 1161 | dev = rt->dst.dev; |
1156 | ip_rt_put(rt); | 1162 | ip_rt_put(rt); |
1157 | if (!dev) | 1163 | if (!dev) |
1158 | return -EINVAL; | 1164 | return -EINVAL; |
@@ -1453,14 +1459,3 @@ static int __init arp_proc_init(void) | |||
1453 | } | 1459 | } |
1454 | 1460 | ||
1455 | #endif /* CONFIG_PROC_FS */ | 1461 | #endif /* CONFIG_PROC_FS */ |
1456 | |||
1457 | EXPORT_SYMBOL(arp_broken_ops); | ||
1458 | EXPORT_SYMBOL(arp_find); | ||
1459 | EXPORT_SYMBOL(arp_create); | ||
1460 | EXPORT_SYMBOL(arp_xmit); | ||
1461 | EXPORT_SYMBOL(arp_send); | ||
1462 | EXPORT_SYMBOL(arp_tbl); | ||
1463 | |||
1464 | #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) | ||
1465 | EXPORT_SYMBOL(clip_tbl_hook); | ||
1466 | #endif | ||
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index fb2465811b48..f0550941df7b 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c | |||
@@ -69,9 +69,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
69 | sk->sk_state = TCP_ESTABLISHED; | 69 | sk->sk_state = TCP_ESTABLISHED; |
70 | inet->inet_id = jiffies; | 70 | inet->inet_id = jiffies; |
71 | 71 | ||
72 | sk_dst_set(sk, &rt->u.dst); | 72 | sk_dst_set(sk, &rt->dst); |
73 | return(0); | 73 | return(0); |
74 | } | 74 | } |
75 | |||
76 | EXPORT_SYMBOL(ip4_datagram_connect); | 75 | EXPORT_SYMBOL(ip4_datagram_connect); |
77 | |||
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 382bc768ed56..da14c49284f4 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -1081,6 +1081,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, | |||
1081 | } | 1081 | } |
1082 | ip_mc_up(in_dev); | 1082 | ip_mc_up(in_dev); |
1083 | /* fall through */ | 1083 | /* fall through */ |
1084 | case NETDEV_NOTIFY_PEERS: | ||
1084 | case NETDEV_CHANGEADDR: | 1085 | case NETDEV_CHANGEADDR: |
1085 | /* Send gratuitous ARP to notify of link change */ | 1086 | /* Send gratuitous ARP to notify of link change */ |
1086 | if (IN_DEV_ARP_NOTIFY(in_dev)) { | 1087 | if (IN_DEV_ARP_NOTIFY(in_dev)) { |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 4f0ed458c883..a43968918350 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -175,6 +175,7 @@ out: | |||
175 | fib_res_put(&res); | 175 | fib_res_put(&res); |
176 | return dev; | 176 | return dev; |
177 | } | 177 | } |
178 | EXPORT_SYMBOL(ip_dev_find); | ||
178 | 179 | ||
179 | /* | 180 | /* |
180 | * Find address type as if only "dev" was present in the system. If | 181 | * Find address type as if only "dev" was present in the system. If |
@@ -214,12 +215,14 @@ unsigned int inet_addr_type(struct net *net, __be32 addr) | |||
214 | { | 215 | { |
215 | return __inet_dev_addr_type(net, NULL, addr); | 216 | return __inet_dev_addr_type(net, NULL, addr); |
216 | } | 217 | } |
218 | EXPORT_SYMBOL(inet_addr_type); | ||
217 | 219 | ||
218 | unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, | 220 | unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, |
219 | __be32 addr) | 221 | __be32 addr) |
220 | { | 222 | { |
221 | return __inet_dev_addr_type(net, dev, addr); | 223 | return __inet_dev_addr_type(net, dev, addr); |
222 | } | 224 | } |
225 | EXPORT_SYMBOL(inet_dev_addr_type); | ||
223 | 226 | ||
224 | /* Given (packet source, input interface) and optional (dst, oif, tos): | 227 | /* Given (packet source, input interface) and optional (dst, oif, tos): |
225 | - (main) check, that source is valid i.e. not broadcast or our local | 228 | - (main) check, that source is valid i.e. not broadcast or our local |
@@ -284,7 +287,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
284 | if (no_addr) | 287 | if (no_addr) |
285 | goto last_resort; | 288 | goto last_resort; |
286 | if (rpf == 1) | 289 | if (rpf == 1) |
287 | goto e_inval; | 290 | goto e_rpf; |
288 | fl.oif = dev->ifindex; | 291 | fl.oif = dev->ifindex; |
289 | 292 | ||
290 | ret = 0; | 293 | ret = 0; |
@@ -299,7 +302,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
299 | 302 | ||
300 | last_resort: | 303 | last_resort: |
301 | if (rpf) | 304 | if (rpf) |
302 | goto e_inval; | 305 | goto e_rpf; |
303 | *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); | 306 | *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); |
304 | *itag = 0; | 307 | *itag = 0; |
305 | return 0; | 308 | return 0; |
@@ -308,6 +311,8 @@ e_inval_res: | |||
308 | fib_res_put(&res); | 311 | fib_res_put(&res); |
309 | e_inval: | 312 | e_inval: |
310 | return -EINVAL; | 313 | return -EINVAL; |
314 | e_rpf: | ||
315 | return -EXDEV; | ||
311 | } | 316 | } |
312 | 317 | ||
313 | static inline __be32 sk_extract_addr(struct sockaddr *addr) | 318 | static inline __be32 sk_extract_addr(struct sockaddr *addr) |
@@ -1075,7 +1080,3 @@ void __init ip_fib_init(void) | |||
1075 | 1080 | ||
1076 | fib_hash_init(); | 1081 | fib_hash_init(); |
1077 | } | 1082 | } |
1078 | |||
1079 | EXPORT_SYMBOL(inet_addr_type); | ||
1080 | EXPORT_SYMBOL(inet_dev_addr_type); | ||
1081 | EXPORT_SYMBOL(ip_dev_find); | ||
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index d65e9215bcd7..a0d847c7cba5 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -181,6 +181,7 @@ const struct icmp_err icmp_err_convert[] = { | |||
181 | .fatal = 1, | 181 | .fatal = 1, |
182 | }, | 182 | }, |
183 | }; | 183 | }; |
184 | EXPORT_SYMBOL(icmp_err_convert); | ||
184 | 185 | ||
185 | /* | 186 | /* |
186 | * ICMP control array. This specifies what to do with each ICMP. | 187 | * ICMP control array. This specifies what to do with each ICMP. |
@@ -267,11 +268,12 @@ int xrlim_allow(struct dst_entry *dst, int timeout) | |||
267 | dst->rate_tokens = token; | 268 | dst->rate_tokens = token; |
268 | return rc; | 269 | return rc; |
269 | } | 270 | } |
271 | EXPORT_SYMBOL(xrlim_allow); | ||
270 | 272 | ||
271 | static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, | 273 | static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, |
272 | int type, int code) | 274 | int type, int code) |
273 | { | 275 | { |
274 | struct dst_entry *dst = &rt->u.dst; | 276 | struct dst_entry *dst = &rt->dst; |
275 | int rc = 1; | 277 | int rc = 1; |
276 | 278 | ||
277 | if (type > NR_ICMP_TYPES) | 279 | if (type > NR_ICMP_TYPES) |
@@ -327,7 +329,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, | |||
327 | struct sock *sk; | 329 | struct sock *sk; |
328 | struct sk_buff *skb; | 330 | struct sk_buff *skb; |
329 | 331 | ||
330 | sk = icmp_sk(dev_net((*rt)->u.dst.dev)); | 332 | sk = icmp_sk(dev_net((*rt)->dst.dev)); |
331 | if (ip_append_data(sk, icmp_glue_bits, icmp_param, | 333 | if (ip_append_data(sk, icmp_glue_bits, icmp_param, |
332 | icmp_param->data_len+icmp_param->head_len, | 334 | icmp_param->data_len+icmp_param->head_len, |
333 | icmp_param->head_len, | 335 | icmp_param->head_len, |
@@ -359,7 +361,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
359 | { | 361 | { |
360 | struct ipcm_cookie ipc; | 362 | struct ipcm_cookie ipc; |
361 | struct rtable *rt = skb_rtable(skb); | 363 | struct rtable *rt = skb_rtable(skb); |
362 | struct net *net = dev_net(rt->u.dst.dev); | 364 | struct net *net = dev_net(rt->dst.dev); |
363 | struct sock *sk; | 365 | struct sock *sk; |
364 | struct inet_sock *inet; | 366 | struct inet_sock *inet; |
365 | __be32 daddr; | 367 | __be32 daddr; |
@@ -427,7 +429,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
427 | 429 | ||
428 | if (!rt) | 430 | if (!rt) |
429 | goto out; | 431 | goto out; |
430 | net = dev_net(rt->u.dst.dev); | 432 | net = dev_net(rt->dst.dev); |
431 | 433 | ||
432 | /* | 434 | /* |
433 | * Find the original header. It is expected to be valid, of course. | 435 | * Find the original header. It is expected to be valid, of course. |
@@ -596,9 +598,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
596 | /* Ugh! */ | 598 | /* Ugh! */ |
597 | orefdst = skb_in->_skb_refdst; /* save old refdst */ | 599 | orefdst = skb_in->_skb_refdst; /* save old refdst */ |
598 | err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src, | 600 | err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src, |
599 | RT_TOS(tos), rt2->u.dst.dev); | 601 | RT_TOS(tos), rt2->dst.dev); |
600 | 602 | ||
601 | dst_release(&rt2->u.dst); | 603 | dst_release(&rt2->dst); |
602 | rt2 = skb_rtable(skb_in); | 604 | rt2 = skb_rtable(skb_in); |
603 | skb_in->_skb_refdst = orefdst; /* restore old refdst */ | 605 | skb_in->_skb_refdst = orefdst; /* restore old refdst */ |
604 | } | 606 | } |
@@ -610,7 +612,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
610 | XFRM_LOOKUP_ICMP); | 612 | XFRM_LOOKUP_ICMP); |
611 | switch (err) { | 613 | switch (err) { |
612 | case 0: | 614 | case 0: |
613 | dst_release(&rt->u.dst); | 615 | dst_release(&rt->dst); |
614 | rt = rt2; | 616 | rt = rt2; |
615 | break; | 617 | break; |
616 | case -EPERM: | 618 | case -EPERM: |
@@ -629,7 +631,7 @@ route_done: | |||
629 | 631 | ||
630 | /* RFC says return as much as we can without exceeding 576 bytes. */ | 632 | /* RFC says return as much as we can without exceeding 576 bytes. */ |
631 | 633 | ||
632 | room = dst_mtu(&rt->u.dst); | 634 | room = dst_mtu(&rt->dst); |
633 | if (room > 576) | 635 | if (room > 576) |
634 | room = 576; | 636 | room = 576; |
635 | room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen; | 637 | room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen; |
@@ -647,6 +649,7 @@ out_unlock: | |||
647 | icmp_xmit_unlock(sk); | 649 | icmp_xmit_unlock(sk); |
648 | out:; | 650 | out:; |
649 | } | 651 | } |
652 | EXPORT_SYMBOL(icmp_send); | ||
650 | 653 | ||
651 | 654 | ||
652 | /* | 655 | /* |
@@ -925,6 +928,7 @@ static void icmp_address(struct sk_buff *skb) | |||
925 | /* | 928 | /* |
926 | * RFC1812 (4.3.3.9). A router SHOULD listen all replies, and complain | 929 | * RFC1812 (4.3.3.9). A router SHOULD listen all replies, and complain |
927 | * loudly if an inconsistency is found. | 930 | * loudly if an inconsistency is found. |
931 | * called with rcu_read_lock() | ||
928 | */ | 932 | */ |
929 | 933 | ||
930 | static void icmp_address_reply(struct sk_buff *skb) | 934 | static void icmp_address_reply(struct sk_buff *skb) |
@@ -935,12 +939,12 @@ static void icmp_address_reply(struct sk_buff *skb) | |||
935 | struct in_ifaddr *ifa; | 939 | struct in_ifaddr *ifa; |
936 | 940 | ||
937 | if (skb->len < 4 || !(rt->rt_flags&RTCF_DIRECTSRC)) | 941 | if (skb->len < 4 || !(rt->rt_flags&RTCF_DIRECTSRC)) |
938 | goto out; | 942 | return; |
939 | 943 | ||
940 | in_dev = in_dev_get(dev); | 944 | in_dev = __in_dev_get_rcu(dev); |
941 | if (!in_dev) | 945 | if (!in_dev) |
942 | goto out; | 946 | return; |
943 | rcu_read_lock(); | 947 | |
944 | if (in_dev->ifa_list && | 948 | if (in_dev->ifa_list && |
945 | IN_DEV_LOG_MARTIANS(in_dev) && | 949 | IN_DEV_LOG_MARTIANS(in_dev) && |
946 | IN_DEV_FORWARD(in_dev)) { | 950 | IN_DEV_FORWARD(in_dev)) { |
@@ -958,9 +962,6 @@ static void icmp_address_reply(struct sk_buff *skb) | |||
958 | mp, dev->name, &rt->rt_src); | 962 | mp, dev->name, &rt->rt_src); |
959 | } | 963 | } |
960 | } | 964 | } |
961 | rcu_read_unlock(); | ||
962 | in_dev_put(in_dev); | ||
963 | out:; | ||
964 | } | 965 | } |
965 | 966 | ||
966 | static void icmp_discard(struct sk_buff *skb) | 967 | static void icmp_discard(struct sk_buff *skb) |
@@ -974,7 +975,7 @@ int icmp_rcv(struct sk_buff *skb) | |||
974 | { | 975 | { |
975 | struct icmphdr *icmph; | 976 | struct icmphdr *icmph; |
976 | struct rtable *rt = skb_rtable(skb); | 977 | struct rtable *rt = skb_rtable(skb); |
977 | struct net *net = dev_net(rt->u.dst.dev); | 978 | struct net *net = dev_net(rt->dst.dev); |
978 | 979 | ||
979 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { | 980 | if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { |
980 | struct sec_path *sp = skb_sec_path(skb); | 981 | struct sec_path *sp = skb_sec_path(skb); |
@@ -1216,7 +1217,3 @@ int __init icmp_init(void) | |||
1216 | { | 1217 | { |
1217 | return register_pernet_subsys(&icmp_sk_ops); | 1218 | return register_pernet_subsys(&icmp_sk_ops); |
1218 | } | 1219 | } |
1219 | |||
1220 | EXPORT_SYMBOL(icmp_err_convert); | ||
1221 | EXPORT_SYMBOL(icmp_send); | ||
1222 | EXPORT_SYMBOL(xrlim_allow); | ||
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 5fff865a4fa7..a1ad0e7180d2 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c | |||
@@ -312,7 +312,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) | |||
312 | return NULL; | 312 | return NULL; |
313 | } | 313 | } |
314 | 314 | ||
315 | skb_dst_set(skb, &rt->u.dst); | 315 | skb_dst_set(skb, &rt->dst); |
316 | skb->dev = dev; | 316 | skb->dev = dev; |
317 | 317 | ||
318 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); | 318 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); |
@@ -330,7 +330,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) | |||
330 | pip->saddr = rt->rt_src; | 330 | pip->saddr = rt->rt_src; |
331 | pip->protocol = IPPROTO_IGMP; | 331 | pip->protocol = IPPROTO_IGMP; |
332 | pip->tot_len = 0; /* filled in later */ | 332 | pip->tot_len = 0; /* filled in later */ |
333 | ip_select_ident(pip, &rt->u.dst, NULL); | 333 | ip_select_ident(pip, &rt->dst, NULL); |
334 | ((u8*)&pip[1])[0] = IPOPT_RA; | 334 | ((u8*)&pip[1])[0] = IPOPT_RA; |
335 | ((u8*)&pip[1])[1] = 4; | 335 | ((u8*)&pip[1])[1] = 4; |
336 | ((u8*)&pip[1])[2] = 0; | 336 | ((u8*)&pip[1])[2] = 0; |
@@ -660,7 +660,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, | |||
660 | return -1; | 660 | return -1; |
661 | } | 661 | } |
662 | 662 | ||
663 | skb_dst_set(skb, &rt->u.dst); | 663 | skb_dst_set(skb, &rt->dst); |
664 | 664 | ||
665 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); | 665 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); |
666 | 666 | ||
@@ -676,7 +676,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, | |||
676 | iph->daddr = dst; | 676 | iph->daddr = dst; |
677 | iph->saddr = rt->rt_src; | 677 | iph->saddr = rt->rt_src; |
678 | iph->protocol = IPPROTO_IGMP; | 678 | iph->protocol = IPPROTO_IGMP; |
679 | ip_select_ident(iph, &rt->u.dst, NULL); | 679 | ip_select_ident(iph, &rt->dst, NULL); |
680 | ((u8*)&iph[1])[0] = IPOPT_RA; | 680 | ((u8*)&iph[1])[0] = IPOPT_RA; |
681 | ((u8*)&iph[1])[1] = 4; | 681 | ((u8*)&iph[1])[1] = 4; |
682 | ((u8*)&iph[1])[2] = 0; | 682 | ((u8*)&iph[1])[2] = 0; |
@@ -916,18 +916,19 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, | |||
916 | read_unlock(&in_dev->mc_list_lock); | 916 | read_unlock(&in_dev->mc_list_lock); |
917 | } | 917 | } |
918 | 918 | ||
919 | /* called in rcu_read_lock() section */ | ||
919 | int igmp_rcv(struct sk_buff *skb) | 920 | int igmp_rcv(struct sk_buff *skb) |
920 | { | 921 | { |
921 | /* This basically follows the spec line by line -- see RFC1112 */ | 922 | /* This basically follows the spec line by line -- see RFC1112 */ |
922 | struct igmphdr *ih; | 923 | struct igmphdr *ih; |
923 | struct in_device *in_dev = in_dev_get(skb->dev); | 924 | struct in_device *in_dev = __in_dev_get_rcu(skb->dev); |
924 | int len = skb->len; | 925 | int len = skb->len; |
925 | 926 | ||
926 | if (in_dev == NULL) | 927 | if (in_dev == NULL) |
927 | goto drop; | 928 | goto drop; |
928 | 929 | ||
929 | if (!pskb_may_pull(skb, sizeof(struct igmphdr))) | 930 | if (!pskb_may_pull(skb, sizeof(struct igmphdr))) |
930 | goto drop_ref; | 931 | goto drop; |
931 | 932 | ||
932 | switch (skb->ip_summed) { | 933 | switch (skb->ip_summed) { |
933 | case CHECKSUM_COMPLETE: | 934 | case CHECKSUM_COMPLETE: |
@@ -937,7 +938,7 @@ int igmp_rcv(struct sk_buff *skb) | |||
937 | case CHECKSUM_NONE: | 938 | case CHECKSUM_NONE: |
938 | skb->csum = 0; | 939 | skb->csum = 0; |
939 | if (__skb_checksum_complete(skb)) | 940 | if (__skb_checksum_complete(skb)) |
940 | goto drop_ref; | 941 | goto drop; |
941 | } | 942 | } |
942 | 943 | ||
943 | ih = igmp_hdr(skb); | 944 | ih = igmp_hdr(skb); |
@@ -957,7 +958,6 @@ int igmp_rcv(struct sk_buff *skb) | |||
957 | break; | 958 | break; |
958 | case IGMP_PIM: | 959 | case IGMP_PIM: |
959 | #ifdef CONFIG_IP_PIMSM_V1 | 960 | #ifdef CONFIG_IP_PIMSM_V1 |
960 | in_dev_put(in_dev); | ||
961 | return pim_rcv_v1(skb); | 961 | return pim_rcv_v1(skb); |
962 | #endif | 962 | #endif |
963 | case IGMPV3_HOST_MEMBERSHIP_REPORT: | 963 | case IGMPV3_HOST_MEMBERSHIP_REPORT: |
@@ -971,8 +971,6 @@ int igmp_rcv(struct sk_buff *skb) | |||
971 | break; | 971 | break; |
972 | } | 972 | } |
973 | 973 | ||
974 | drop_ref: | ||
975 | in_dev_put(in_dev); | ||
976 | drop: | 974 | drop: |
977 | kfree_skb(skb); | 975 | kfree_skb(skb); |
978 | return 0; | 976 | return 0; |
@@ -1246,6 +1244,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) | |||
1246 | out: | 1244 | out: |
1247 | return; | 1245 | return; |
1248 | } | 1246 | } |
1247 | EXPORT_SYMBOL(ip_mc_inc_group); | ||
1249 | 1248 | ||
1250 | /* | 1249 | /* |
1251 | * Resend IGMP JOIN report; used for bonding. | 1250 | * Resend IGMP JOIN report; used for bonding. |
@@ -1268,6 +1267,7 @@ void ip_mc_rejoin_group(struct ip_mc_list *im) | |||
1268 | igmp_ifc_event(in_dev); | 1267 | igmp_ifc_event(in_dev); |
1269 | #endif | 1268 | #endif |
1270 | } | 1269 | } |
1270 | EXPORT_SYMBOL(ip_mc_rejoin_group); | ||
1271 | 1271 | ||
1272 | /* | 1272 | /* |
1273 | * A socket has left a multicast group on device dev | 1273 | * A socket has left a multicast group on device dev |
@@ -1298,6 +1298,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) | |||
1298 | } | 1298 | } |
1299 | } | 1299 | } |
1300 | } | 1300 | } |
1301 | EXPORT_SYMBOL(ip_mc_dec_group); | ||
1301 | 1302 | ||
1302 | /* Device changing type */ | 1303 | /* Device changing type */ |
1303 | 1304 | ||
@@ -1427,7 +1428,7 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) | |||
1427 | } | 1428 | } |
1428 | 1429 | ||
1429 | if (!dev && !ip_route_output_key(net, &rt, &fl)) { | 1430 | if (!dev && !ip_route_output_key(net, &rt, &fl)) { |
1430 | dev = rt->u.dst.dev; | 1431 | dev = rt->dst.dev; |
1431 | ip_rt_put(rt); | 1432 | ip_rt_put(rt); |
1432 | } | 1433 | } |
1433 | if (dev) { | 1434 | if (dev) { |
@@ -1646,8 +1647,7 @@ static int sf_setstate(struct ip_mc_list *pmc) | |||
1646 | if (dpsf->sf_inaddr == psf->sf_inaddr) | 1647 | if (dpsf->sf_inaddr == psf->sf_inaddr) |
1647 | break; | 1648 | break; |
1648 | if (!dpsf) { | 1649 | if (!dpsf) { |
1649 | dpsf = (struct ip_sf_list *) | 1650 | dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC); |
1650 | kmalloc(sizeof(*dpsf), GFP_ATOMIC); | ||
1651 | if (!dpsf) | 1651 | if (!dpsf) |
1652 | continue; | 1652 | continue; |
1653 | *dpsf = *psf; | 1653 | *dpsf = *psf; |
@@ -1807,6 +1807,7 @@ done: | |||
1807 | rtnl_unlock(); | 1807 | rtnl_unlock(); |
1808 | return err; | 1808 | return err; |
1809 | } | 1809 | } |
1810 | EXPORT_SYMBOL(ip_mc_join_group); | ||
1810 | 1811 | ||
1811 | static void ip_sf_socklist_reclaim(struct rcu_head *rp) | 1812 | static void ip_sf_socklist_reclaim(struct rcu_head *rp) |
1812 | { | 1813 | { |
@@ -2679,8 +2680,3 @@ int __init igmp_mc_proc_init(void) | |||
2679 | return register_pernet_subsys(&igmp_net_ops); | 2680 | return register_pernet_subsys(&igmp_net_ops); |
2680 | } | 2681 | } |
2681 | #endif | 2682 | #endif |
2682 | |||
2683 | EXPORT_SYMBOL(ip_mc_dec_group); | ||
2684 | EXPORT_SYMBOL(ip_mc_inc_group); | ||
2685 | EXPORT_SYMBOL(ip_mc_join_group); | ||
2686 | EXPORT_SYMBOL(ip_mc_rejoin_group); | ||
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 70eb3507c406..7174370b1195 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -84,7 +84,6 @@ int inet_csk_bind_conflict(const struct sock *sk, | |||
84 | } | 84 | } |
85 | return node != NULL; | 85 | return node != NULL; |
86 | } | 86 | } |
87 | |||
88 | EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); | 87 | EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); |
89 | 88 | ||
90 | /* Obtain a reference to a local port for the given sock, | 89 | /* Obtain a reference to a local port for the given sock, |
@@ -212,7 +211,6 @@ fail: | |||
212 | local_bh_enable(); | 211 | local_bh_enable(); |
213 | return ret; | 212 | return ret; |
214 | } | 213 | } |
215 | |||
216 | EXPORT_SYMBOL_GPL(inet_csk_get_port); | 214 | EXPORT_SYMBOL_GPL(inet_csk_get_port); |
217 | 215 | ||
218 | /* | 216 | /* |
@@ -305,7 +303,6 @@ out_err: | |||
305 | *err = error; | 303 | *err = error; |
306 | goto out; | 304 | goto out; |
307 | } | 305 | } |
308 | |||
309 | EXPORT_SYMBOL(inet_csk_accept); | 306 | EXPORT_SYMBOL(inet_csk_accept); |
310 | 307 | ||
311 | /* | 308 | /* |
@@ -327,7 +324,6 @@ void inet_csk_init_xmit_timers(struct sock *sk, | |||
327 | setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk); | 324 | setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk); |
328 | icsk->icsk_pending = icsk->icsk_ack.pending = 0; | 325 | icsk->icsk_pending = icsk->icsk_ack.pending = 0; |
329 | } | 326 | } |
330 | |||
331 | EXPORT_SYMBOL(inet_csk_init_xmit_timers); | 327 | EXPORT_SYMBOL(inet_csk_init_xmit_timers); |
332 | 328 | ||
333 | void inet_csk_clear_xmit_timers(struct sock *sk) | 329 | void inet_csk_clear_xmit_timers(struct sock *sk) |
@@ -340,21 +336,18 @@ void inet_csk_clear_xmit_timers(struct sock *sk) | |||
340 | sk_stop_timer(sk, &icsk->icsk_delack_timer); | 336 | sk_stop_timer(sk, &icsk->icsk_delack_timer); |
341 | sk_stop_timer(sk, &sk->sk_timer); | 337 | sk_stop_timer(sk, &sk->sk_timer); |
342 | } | 338 | } |
343 | |||
344 | EXPORT_SYMBOL(inet_csk_clear_xmit_timers); | 339 | EXPORT_SYMBOL(inet_csk_clear_xmit_timers); |
345 | 340 | ||
346 | void inet_csk_delete_keepalive_timer(struct sock *sk) | 341 | void inet_csk_delete_keepalive_timer(struct sock *sk) |
347 | { | 342 | { |
348 | sk_stop_timer(sk, &sk->sk_timer); | 343 | sk_stop_timer(sk, &sk->sk_timer); |
349 | } | 344 | } |
350 | |||
351 | EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); | 345 | EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); |
352 | 346 | ||
353 | void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) | 347 | void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) |
354 | { | 348 | { |
355 | sk_reset_timer(sk, &sk->sk_timer, jiffies + len); | 349 | sk_reset_timer(sk, &sk->sk_timer, jiffies + len); |
356 | } | 350 | } |
357 | |||
358 | EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); | 351 | EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); |
359 | 352 | ||
360 | struct dst_entry *inet_csk_route_req(struct sock *sk, | 353 | struct dst_entry *inet_csk_route_req(struct sock *sk, |
@@ -383,7 +376,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, | |||
383 | goto no_route; | 376 | goto no_route; |
384 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) | 377 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) |
385 | goto route_err; | 378 | goto route_err; |
386 | return &rt->u.dst; | 379 | return &rt->dst; |
387 | 380 | ||
388 | route_err: | 381 | route_err: |
389 | ip_rt_put(rt); | 382 | ip_rt_put(rt); |
@@ -391,7 +384,6 @@ no_route: | |||
391 | IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); | 384 | IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); |
392 | return NULL; | 385 | return NULL; |
393 | } | 386 | } |
394 | |||
395 | EXPORT_SYMBOL_GPL(inet_csk_route_req); | 387 | EXPORT_SYMBOL_GPL(inet_csk_route_req); |
396 | 388 | ||
397 | static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, | 389 | static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, |
@@ -433,7 +425,6 @@ struct request_sock *inet_csk_search_req(const struct sock *sk, | |||
433 | 425 | ||
434 | return req; | 426 | return req; |
435 | } | 427 | } |
436 | |||
437 | EXPORT_SYMBOL_GPL(inet_csk_search_req); | 428 | EXPORT_SYMBOL_GPL(inet_csk_search_req); |
438 | 429 | ||
439 | void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, | 430 | void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, |
@@ -447,11 +438,11 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, | |||
447 | reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); | 438 | reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); |
448 | inet_csk_reqsk_queue_added(sk, timeout); | 439 | inet_csk_reqsk_queue_added(sk, timeout); |
449 | } | 440 | } |
441 | EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); | ||
450 | 442 | ||
451 | /* Only thing we need from tcp.h */ | 443 | /* Only thing we need from tcp.h */ |
452 | extern int sysctl_tcp_synack_retries; | 444 | extern int sysctl_tcp_synack_retries; |
453 | 445 | ||
454 | EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); | ||
455 | 446 | ||
456 | /* Decide when to expire the request and when to resend SYN-ACK */ | 447 | /* Decide when to expire the request and when to resend SYN-ACK */ |
457 | static inline void syn_ack_recalc(struct request_sock *req, const int thresh, | 448 | static inline void syn_ack_recalc(struct request_sock *req, const int thresh, |
@@ -569,7 +560,6 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, | |||
569 | if (lopt->qlen) | 560 | if (lopt->qlen) |
570 | inet_csk_reset_keepalive_timer(parent, interval); | 561 | inet_csk_reset_keepalive_timer(parent, interval); |
571 | } | 562 | } |
572 | |||
573 | EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); | 563 | EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); |
574 | 564 | ||
575 | struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, | 565 | struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, |
@@ -599,7 +589,6 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, | |||
599 | } | 589 | } |
600 | return newsk; | 590 | return newsk; |
601 | } | 591 | } |
602 | |||
603 | EXPORT_SYMBOL_GPL(inet_csk_clone); | 592 | EXPORT_SYMBOL_GPL(inet_csk_clone); |
604 | 593 | ||
605 | /* | 594 | /* |
@@ -630,7 +619,6 @@ void inet_csk_destroy_sock(struct sock *sk) | |||
630 | percpu_counter_dec(sk->sk_prot->orphan_count); | 619 | percpu_counter_dec(sk->sk_prot->orphan_count); |
631 | sock_put(sk); | 620 | sock_put(sk); |
632 | } | 621 | } |
633 | |||
634 | EXPORT_SYMBOL(inet_csk_destroy_sock); | 622 | EXPORT_SYMBOL(inet_csk_destroy_sock); |
635 | 623 | ||
636 | int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) | 624 | int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) |
@@ -665,7 +653,6 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) | |||
665 | __reqsk_queue_destroy(&icsk->icsk_accept_queue); | 653 | __reqsk_queue_destroy(&icsk->icsk_accept_queue); |
666 | return -EADDRINUSE; | 654 | return -EADDRINUSE; |
667 | } | 655 | } |
668 | |||
669 | EXPORT_SYMBOL_GPL(inet_csk_listen_start); | 656 | EXPORT_SYMBOL_GPL(inet_csk_listen_start); |
670 | 657 | ||
671 | /* | 658 | /* |
@@ -720,7 +707,6 @@ void inet_csk_listen_stop(struct sock *sk) | |||
720 | } | 707 | } |
721 | WARN_ON(sk->sk_ack_backlog); | 708 | WARN_ON(sk->sk_ack_backlog); |
722 | } | 709 | } |
723 | |||
724 | EXPORT_SYMBOL_GPL(inet_csk_listen_stop); | 710 | EXPORT_SYMBOL_GPL(inet_csk_listen_stop); |
725 | 711 | ||
726 | void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) | 712 | void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) |
@@ -732,7 +718,6 @@ void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) | |||
732 | sin->sin_addr.s_addr = inet->inet_daddr; | 718 | sin->sin_addr.s_addr = inet->inet_daddr; |
733 | sin->sin_port = inet->inet_dport; | 719 | sin->sin_port = inet->inet_dport; |
734 | } | 720 | } |
735 | |||
736 | EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); | 721 | EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); |
737 | 722 | ||
738 | #ifdef CONFIG_COMPAT | 723 | #ifdef CONFIG_COMPAT |
@@ -747,7 +732,6 @@ int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, | |||
747 | return icsk->icsk_af_ops->getsockopt(sk, level, optname, | 732 | return icsk->icsk_af_ops->getsockopt(sk, level, optname, |
748 | optval, optlen); | 733 | optval, optlen); |
749 | } | 734 | } |
750 | |||
751 | EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt); | 735 | EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt); |
752 | 736 | ||
753 | int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, | 737 | int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, |
@@ -761,6 +745,5 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, | |||
761 | return icsk->icsk_af_ops->setsockopt(sk, level, optname, | 745 | return icsk->icsk_af_ops->setsockopt(sk, level, optname, |
762 | optval, optlen); | 746 | optval, optlen); |
763 | } | 747 | } |
764 | |||
765 | EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); | 748 | EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); |
766 | #endif | 749 | #endif |
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index a2ca6aed763b..5ff2a51b6d0c 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c | |||
@@ -114,7 +114,6 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) | |||
114 | fq->last_in |= INET_FRAG_COMPLETE; | 114 | fq->last_in |= INET_FRAG_COMPLETE; |
115 | } | 115 | } |
116 | } | 116 | } |
117 | |||
118 | EXPORT_SYMBOL(inet_frag_kill); | 117 | EXPORT_SYMBOL(inet_frag_kill); |
119 | 118 | ||
120 | static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, | 119 | static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, |
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index d3e160a88219..fb7ad5a21ff3 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
@@ -99,7 +99,6 @@ void inet_put_port(struct sock *sk) | |||
99 | __inet_put_port(sk); | 99 | __inet_put_port(sk); |
100 | local_bh_enable(); | 100 | local_bh_enable(); |
101 | } | 101 | } |
102 | |||
103 | EXPORT_SYMBOL(inet_put_port); | 102 | EXPORT_SYMBOL(inet_put_port); |
104 | 103 | ||
105 | void __inet_inherit_port(struct sock *sk, struct sock *child) | 104 | void __inet_inherit_port(struct sock *sk, struct sock *child) |
@@ -116,7 +115,6 @@ void __inet_inherit_port(struct sock *sk, struct sock *child) | |||
116 | inet_csk(child)->icsk_bind_hash = tb; | 115 | inet_csk(child)->icsk_bind_hash = tb; |
117 | spin_unlock(&head->lock); | 116 | spin_unlock(&head->lock); |
118 | } | 117 | } |
119 | |||
120 | EXPORT_SYMBOL_GPL(__inet_inherit_port); | 118 | EXPORT_SYMBOL_GPL(__inet_inherit_port); |
121 | 119 | ||
122 | static inline int compute_score(struct sock *sk, struct net *net, | 120 | static inline int compute_score(struct sock *sk, struct net *net, |
@@ -546,7 +544,6 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, | |||
546 | return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk), | 544 | return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk), |
547 | __inet_check_established, __inet_hash_nolisten); | 545 | __inet_check_established, __inet_hash_nolisten); |
548 | } | 546 | } |
549 | |||
550 | EXPORT_SYMBOL_GPL(inet_hash_connect); | 547 | EXPORT_SYMBOL_GPL(inet_hash_connect); |
551 | 548 | ||
552 | void inet_hashinfo_init(struct inet_hashinfo *h) | 549 | void inet_hashinfo_init(struct inet_hashinfo *h) |
@@ -560,5 +557,4 @@ void inet_hashinfo_init(struct inet_hashinfo *h) | |||
560 | i + LISTENING_NULLS_BASE); | 557 | i + LISTENING_NULLS_BASE); |
561 | } | 558 | } |
562 | } | 559 | } |
563 | |||
564 | EXPORT_SYMBOL_GPL(inet_hashinfo_init); | 560 | EXPORT_SYMBOL_GPL(inet_hashinfo_init); |
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 6bcfe52a9c87..9ffa24b9a804 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c | |||
@@ -51,8 +51,8 @@ | |||
51 | * lookups performed with disabled BHs. | 51 | * lookups performed with disabled BHs. |
52 | * | 52 | * |
53 | * Serialisation issues. | 53 | * Serialisation issues. |
54 | * 1. Nodes may appear in the tree only with the pool write lock held. | 54 | * 1. Nodes may appear in the tree only with the pool lock held. |
55 | * 2. Nodes may disappear from the tree only with the pool write lock held | 55 | * 2. Nodes may disappear from the tree only with the pool lock held |
56 | * AND reference count being 0. | 56 | * AND reference count being 0. |
57 | * 3. Nodes appears and disappears from unused node list only under | 57 | * 3. Nodes appears and disappears from unused node list only under |
58 | * "inet_peer_unused_lock". | 58 | * "inet_peer_unused_lock". |
@@ -64,23 +64,31 @@ | |||
64 | * usually under some other lock to prevent node disappearing | 64 | * usually under some other lock to prevent node disappearing |
65 | * dtime: unused node list lock | 65 | * dtime: unused node list lock |
66 | * v4daddr: unchangeable | 66 | * v4daddr: unchangeable |
67 | * ip_id_count: idlock | 67 | * ip_id_count: atomic value (no lock needed) |
68 | */ | 68 | */ |
69 | 69 | ||
70 | static struct kmem_cache *peer_cachep __read_mostly; | 70 | static struct kmem_cache *peer_cachep __read_mostly; |
71 | 71 | ||
72 | #define node_height(x) x->avl_height | 72 | #define node_height(x) x->avl_height |
73 | static struct inet_peer peer_fake_node = { | 73 | |
74 | .avl_left = &peer_fake_node, | 74 | #define peer_avl_empty ((struct inet_peer *)&peer_fake_node) |
75 | .avl_right = &peer_fake_node, | 75 | static const struct inet_peer peer_fake_node = { |
76 | .avl_left = peer_avl_empty, | ||
77 | .avl_right = peer_avl_empty, | ||
76 | .avl_height = 0 | 78 | .avl_height = 0 |
77 | }; | 79 | }; |
78 | #define peer_avl_empty (&peer_fake_node) | 80 | |
79 | static struct inet_peer *peer_root = peer_avl_empty; | 81 | static struct { |
80 | static DEFINE_RWLOCK(peer_pool_lock); | 82 | struct inet_peer *root; |
83 | spinlock_t lock; | ||
84 | int total; | ||
85 | } peers = { | ||
86 | .root = peer_avl_empty, | ||
87 | .lock = __SPIN_LOCK_UNLOCKED(peers.lock), | ||
88 | .total = 0, | ||
89 | }; | ||
81 | #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ | 90 | #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ |
82 | 91 | ||
83 | static int peer_total; | ||
84 | /* Exported for sysctl_net_ipv4. */ | 92 | /* Exported for sysctl_net_ipv4. */ |
85 | int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more | 93 | int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more |
86 | * aggressively at this stage */ | 94 | * aggressively at this stage */ |
@@ -89,8 +97,13 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min | |||
89 | int inet_peer_gc_mintime __read_mostly = 10 * HZ; | 97 | int inet_peer_gc_mintime __read_mostly = 10 * HZ; |
90 | int inet_peer_gc_maxtime __read_mostly = 120 * HZ; | 98 | int inet_peer_gc_maxtime __read_mostly = 120 * HZ; |
91 | 99 | ||
92 | static LIST_HEAD(unused_peers); | 100 | static struct { |
93 | static DEFINE_SPINLOCK(inet_peer_unused_lock); | 101 | struct list_head list; |
102 | spinlock_t lock; | ||
103 | } unused_peers = { | ||
104 | .list = LIST_HEAD_INIT(unused_peers.list), | ||
105 | .lock = __SPIN_LOCK_UNLOCKED(unused_peers.lock), | ||
106 | }; | ||
94 | 107 | ||
95 | static void peer_check_expire(unsigned long dummy); | 108 | static void peer_check_expire(unsigned long dummy); |
96 | static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0); | 109 | static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0); |
@@ -116,7 +129,7 @@ void __init inet_initpeers(void) | |||
116 | 129 | ||
117 | peer_cachep = kmem_cache_create("inet_peer_cache", | 130 | peer_cachep = kmem_cache_create("inet_peer_cache", |
118 | sizeof(struct inet_peer), | 131 | sizeof(struct inet_peer), |
119 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, | 132 | 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, |
120 | NULL); | 133 | NULL); |
121 | 134 | ||
122 | /* All the timers, started at system startup tend | 135 | /* All the timers, started at system startup tend |
@@ -131,38 +144,69 @@ void __init inet_initpeers(void) | |||
131 | /* Called with or without local BH being disabled. */ | 144 | /* Called with or without local BH being disabled. */ |
132 | static void unlink_from_unused(struct inet_peer *p) | 145 | static void unlink_from_unused(struct inet_peer *p) |
133 | { | 146 | { |
134 | spin_lock_bh(&inet_peer_unused_lock); | 147 | if (!list_empty(&p->unused)) { |
135 | list_del_init(&p->unused); | 148 | spin_lock_bh(&unused_peers.lock); |
136 | spin_unlock_bh(&inet_peer_unused_lock); | 149 | list_del_init(&p->unused); |
150 | spin_unlock_bh(&unused_peers.lock); | ||
151 | } | ||
137 | } | 152 | } |
138 | 153 | ||
139 | /* | 154 | /* |
140 | * Called with local BH disabled and the pool lock held. | 155 | * Called with local BH disabled and the pool lock held. |
141 | * _stack is known to be NULL or not at compile time, | ||
142 | * so compiler will optimize the if (_stack) tests. | ||
143 | */ | 156 | */ |
144 | #define lookup(_daddr, _stack) \ | 157 | #define lookup(_daddr, _stack) \ |
145 | ({ \ | 158 | ({ \ |
146 | struct inet_peer *u, **v; \ | 159 | struct inet_peer *u, **v; \ |
147 | if (_stack != NULL) { \ | 160 | \ |
148 | stackptr = _stack; \ | 161 | stackptr = _stack; \ |
149 | *stackptr++ = &peer_root; \ | 162 | *stackptr++ = &peers.root; \ |
150 | } \ | 163 | for (u = peers.root; u != peer_avl_empty; ) { \ |
151 | for (u = peer_root; u != peer_avl_empty; ) { \ | ||
152 | if (_daddr == u->v4daddr) \ | 164 | if (_daddr == u->v4daddr) \ |
153 | break; \ | 165 | break; \ |
154 | if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ | 166 | if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ |
155 | v = &u->avl_left; \ | 167 | v = &u->avl_left; \ |
156 | else \ | 168 | else \ |
157 | v = &u->avl_right; \ | 169 | v = &u->avl_right; \ |
158 | if (_stack != NULL) \ | 170 | *stackptr++ = v; \ |
159 | *stackptr++ = v; \ | ||
160 | u = *v; \ | 171 | u = *v; \ |
161 | } \ | 172 | } \ |
162 | u; \ | 173 | u; \ |
163 | }) | 174 | }) |
164 | 175 | ||
165 | /* Called with local BH disabled and the pool write lock held. */ | 176 | /* |
177 | * Called with rcu_read_lock_bh() | ||
178 | * Because we hold no lock against a writer, its quite possible we fall | ||
179 | * in an endless loop. | ||
180 | * But every pointer we follow is guaranteed to be valid thanks to RCU. | ||
181 | * We exit from this function if number of links exceeds PEER_MAXDEPTH | ||
182 | */ | ||
183 | static struct inet_peer *lookup_rcu_bh(__be32 daddr) | ||
184 | { | ||
185 | struct inet_peer *u = rcu_dereference_bh(peers.root); | ||
186 | int count = 0; | ||
187 | |||
188 | while (u != peer_avl_empty) { | ||
189 | if (daddr == u->v4daddr) { | ||
190 | /* Before taking a reference, check if this entry was | ||
191 | * deleted, unlink_from_pool() sets refcnt=-1 to make | ||
192 | * distinction between an unused entry (refcnt=0) and | ||
193 | * a freed one. | ||
194 | */ | ||
195 | if (unlikely(!atomic_add_unless(&u->refcnt, 1, -1))) | ||
196 | u = NULL; | ||
197 | return u; | ||
198 | } | ||
199 | if ((__force __u32)daddr < (__force __u32)u->v4daddr) | ||
200 | u = rcu_dereference_bh(u->avl_left); | ||
201 | else | ||
202 | u = rcu_dereference_bh(u->avl_right); | ||
203 | if (unlikely(++count == PEER_MAXDEPTH)) | ||
204 | break; | ||
205 | } | ||
206 | return NULL; | ||
207 | } | ||
208 | |||
209 | /* Called with local BH disabled and the pool lock held. */ | ||
166 | #define lookup_rightempty(start) \ | 210 | #define lookup_rightempty(start) \ |
167 | ({ \ | 211 | ({ \ |
168 | struct inet_peer *u, **v; \ | 212 | struct inet_peer *u, **v; \ |
@@ -176,9 +220,10 @@ static void unlink_from_unused(struct inet_peer *p) | |||
176 | u; \ | 220 | u; \ |
177 | }) | 221 | }) |
178 | 222 | ||
179 | /* Called with local BH disabled and the pool write lock held. | 223 | /* Called with local BH disabled and the pool lock held. |
180 | * Variable names are the proof of operation correctness. | 224 | * Variable names are the proof of operation correctness. |
181 | * Look into mm/map_avl.c for more detail description of the ideas. */ | 225 | * Look into mm/map_avl.c for more detail description of the ideas. |
226 | */ | ||
182 | static void peer_avl_rebalance(struct inet_peer **stack[], | 227 | static void peer_avl_rebalance(struct inet_peer **stack[], |
183 | struct inet_peer ***stackend) | 228 | struct inet_peer ***stackend) |
184 | { | 229 | { |
@@ -254,15 +299,21 @@ static void peer_avl_rebalance(struct inet_peer **stack[], | |||
254 | } | 299 | } |
255 | } | 300 | } |
256 | 301 | ||
257 | /* Called with local BH disabled and the pool write lock held. */ | 302 | /* Called with local BH disabled and the pool lock held. */ |
258 | #define link_to_pool(n) \ | 303 | #define link_to_pool(n) \ |
259 | do { \ | 304 | do { \ |
260 | n->avl_height = 1; \ | 305 | n->avl_height = 1; \ |
261 | n->avl_left = peer_avl_empty; \ | 306 | n->avl_left = peer_avl_empty; \ |
262 | n->avl_right = peer_avl_empty; \ | 307 | n->avl_right = peer_avl_empty; \ |
308 | smp_wmb(); /* lockless readers can catch us now */ \ | ||
263 | **--stackptr = n; \ | 309 | **--stackptr = n; \ |
264 | peer_avl_rebalance(stack, stackptr); \ | 310 | peer_avl_rebalance(stack, stackptr); \ |
265 | } while(0) | 311 | } while (0) |
312 | |||
313 | static void inetpeer_free_rcu(struct rcu_head *head) | ||
314 | { | ||
315 | kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu)); | ||
316 | } | ||
266 | 317 | ||
267 | /* May be called with local BH enabled. */ | 318 | /* May be called with local BH enabled. */ |
268 | static void unlink_from_pool(struct inet_peer *p) | 319 | static void unlink_from_pool(struct inet_peer *p) |
@@ -271,13 +322,14 @@ static void unlink_from_pool(struct inet_peer *p) | |||
271 | 322 | ||
272 | do_free = 0; | 323 | do_free = 0; |
273 | 324 | ||
274 | write_lock_bh(&peer_pool_lock); | 325 | spin_lock_bh(&peers.lock); |
275 | /* Check the reference counter. It was artificially incremented by 1 | 326 | /* Check the reference counter. It was artificially incremented by 1 |
276 | * in cleanup() function to prevent sudden disappearing. If the | 327 | * in cleanup() function to prevent sudden disappearing. If we can |
277 | * reference count is still 1 then the node is referenced only as `p' | 328 | * atomically (because of lockless readers) take this last reference, |
278 | * here and from the pool. So under the exclusive pool lock it's safe | 329 | * it's safe to remove the node and free it later. |
279 | * to remove the node and free it later. */ | 330 | * We use refcnt=-1 to alert lockless readers this entry is deleted. |
280 | if (atomic_read(&p->refcnt) == 1) { | 331 | */ |
332 | if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { | ||
281 | struct inet_peer **stack[PEER_MAXDEPTH]; | 333 | struct inet_peer **stack[PEER_MAXDEPTH]; |
282 | struct inet_peer ***stackptr, ***delp; | 334 | struct inet_peer ***stackptr, ***delp; |
283 | if (lookup(p->v4daddr, stack) != p) | 335 | if (lookup(p->v4daddr, stack) != p) |
@@ -303,20 +355,21 @@ static void unlink_from_pool(struct inet_peer *p) | |||
303 | delp[1] = &t->avl_left; /* was &p->avl_left */ | 355 | delp[1] = &t->avl_left; /* was &p->avl_left */ |
304 | } | 356 | } |
305 | peer_avl_rebalance(stack, stackptr); | 357 | peer_avl_rebalance(stack, stackptr); |
306 | peer_total--; | 358 | peers.total--; |
307 | do_free = 1; | 359 | do_free = 1; |
308 | } | 360 | } |
309 | write_unlock_bh(&peer_pool_lock); | 361 | spin_unlock_bh(&peers.lock); |
310 | 362 | ||
311 | if (do_free) | 363 | if (do_free) |
312 | kmem_cache_free(peer_cachep, p); | 364 | call_rcu_bh(&p->rcu, inetpeer_free_rcu); |
313 | else | 365 | else |
314 | /* The node is used again. Decrease the reference counter | 366 | /* The node is used again. Decrease the reference counter |
315 | * back. The loop "cleanup -> unlink_from_unused | 367 | * back. The loop "cleanup -> unlink_from_unused |
316 | * -> unlink_from_pool -> putpeer -> link_to_unused | 368 | * -> unlink_from_pool -> putpeer -> link_to_unused |
317 | * -> cleanup (for the same node)" | 369 | * -> cleanup (for the same node)" |
318 | * doesn't really exist because the entry will have a | 370 | * doesn't really exist because the entry will have a |
319 | * recent deletion time and will not be cleaned again soon. */ | 371 | * recent deletion time and will not be cleaned again soon. |
372 | */ | ||
320 | inet_putpeer(p); | 373 | inet_putpeer(p); |
321 | } | 374 | } |
322 | 375 | ||
@@ -326,16 +379,16 @@ static int cleanup_once(unsigned long ttl) | |||
326 | struct inet_peer *p = NULL; | 379 | struct inet_peer *p = NULL; |
327 | 380 | ||
328 | /* Remove the first entry from the list of unused nodes. */ | 381 | /* Remove the first entry from the list of unused nodes. */ |
329 | spin_lock_bh(&inet_peer_unused_lock); | 382 | spin_lock_bh(&unused_peers.lock); |
330 | if (!list_empty(&unused_peers)) { | 383 | if (!list_empty(&unused_peers.list)) { |
331 | __u32 delta; | 384 | __u32 delta; |
332 | 385 | ||
333 | p = list_first_entry(&unused_peers, struct inet_peer, unused); | 386 | p = list_first_entry(&unused_peers.list, struct inet_peer, unused); |
334 | delta = (__u32)jiffies - p->dtime; | 387 | delta = (__u32)jiffies - p->dtime; |
335 | 388 | ||
336 | if (delta < ttl) { | 389 | if (delta < ttl) { |
337 | /* Do not prune fresh entries. */ | 390 | /* Do not prune fresh entries. */ |
338 | spin_unlock_bh(&inet_peer_unused_lock); | 391 | spin_unlock_bh(&unused_peers.lock); |
339 | return -1; | 392 | return -1; |
340 | } | 393 | } |
341 | 394 | ||
@@ -345,7 +398,7 @@ static int cleanup_once(unsigned long ttl) | |||
345 | * before unlink_from_pool() call. */ | 398 | * before unlink_from_pool() call. */ |
346 | atomic_inc(&p->refcnt); | 399 | atomic_inc(&p->refcnt); |
347 | } | 400 | } |
348 | spin_unlock_bh(&inet_peer_unused_lock); | 401 | spin_unlock_bh(&unused_peers.lock); |
349 | 402 | ||
350 | if (p == NULL) | 403 | if (p == NULL) |
351 | /* It means that the total number of USED entries has | 404 | /* It means that the total number of USED entries has |
@@ -360,62 +413,56 @@ static int cleanup_once(unsigned long ttl) | |||
360 | /* Called with or without local BH being disabled. */ | 413 | /* Called with or without local BH being disabled. */ |
361 | struct inet_peer *inet_getpeer(__be32 daddr, int create) | 414 | struct inet_peer *inet_getpeer(__be32 daddr, int create) |
362 | { | 415 | { |
363 | struct inet_peer *p, *n; | 416 | struct inet_peer *p; |
364 | struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; | 417 | struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; |
365 | 418 | ||
366 | /* Look up for the address quickly. */ | 419 | /* Look up for the address quickly, lockless. |
367 | read_lock_bh(&peer_pool_lock); | 420 | * Because of a concurrent writer, we might not find an existing entry. |
368 | p = lookup(daddr, NULL); | 421 | */ |
369 | if (p != peer_avl_empty) | 422 | rcu_read_lock_bh(); |
370 | atomic_inc(&p->refcnt); | 423 | p = lookup_rcu_bh(daddr); |
371 | read_unlock_bh(&peer_pool_lock); | 424 | rcu_read_unlock_bh(); |
425 | |||
426 | if (p) { | ||
427 | /* The existing node has been found. | ||
428 | * Remove the entry from unused list if it was there. | ||
429 | */ | ||
430 | unlink_from_unused(p); | ||
431 | return p; | ||
432 | } | ||
372 | 433 | ||
434 | /* retry an exact lookup, taking the lock before. | ||
435 | * At least, nodes should be hot in our cache. | ||
436 | */ | ||
437 | spin_lock_bh(&peers.lock); | ||
438 | p = lookup(daddr, stack); | ||
373 | if (p != peer_avl_empty) { | 439 | if (p != peer_avl_empty) { |
374 | /* The existing node has been found. */ | 440 | atomic_inc(&p->refcnt); |
441 | spin_unlock_bh(&peers.lock); | ||
375 | /* Remove the entry from unused list if it was there. */ | 442 | /* Remove the entry from unused list if it was there. */ |
376 | unlink_from_unused(p); | 443 | unlink_from_unused(p); |
377 | return p; | 444 | return p; |
378 | } | 445 | } |
446 | p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; | ||
447 | if (p) { | ||
448 | p->v4daddr = daddr; | ||
449 | atomic_set(&p->refcnt, 1); | ||
450 | atomic_set(&p->rid, 0); | ||
451 | atomic_set(&p->ip_id_count, secure_ip_id(daddr)); | ||
452 | p->tcp_ts_stamp = 0; | ||
453 | INIT_LIST_HEAD(&p->unused); | ||
454 | |||
455 | |||
456 | /* Link the node. */ | ||
457 | link_to_pool(p); | ||
458 | peers.total++; | ||
459 | } | ||
460 | spin_unlock_bh(&peers.lock); | ||
379 | 461 | ||
380 | if (!create) | 462 | if (peers.total >= inet_peer_threshold) |
381 | return NULL; | ||
382 | |||
383 | /* Allocate the space outside the locked region. */ | ||
384 | n = kmem_cache_alloc(peer_cachep, GFP_ATOMIC); | ||
385 | if (n == NULL) | ||
386 | return NULL; | ||
387 | n->v4daddr = daddr; | ||
388 | atomic_set(&n->refcnt, 1); | ||
389 | atomic_set(&n->rid, 0); | ||
390 | atomic_set(&n->ip_id_count, secure_ip_id(daddr)); | ||
391 | n->tcp_ts_stamp = 0; | ||
392 | |||
393 | write_lock_bh(&peer_pool_lock); | ||
394 | /* Check if an entry has suddenly appeared. */ | ||
395 | p = lookup(daddr, stack); | ||
396 | if (p != peer_avl_empty) | ||
397 | goto out_free; | ||
398 | |||
399 | /* Link the node. */ | ||
400 | link_to_pool(n); | ||
401 | INIT_LIST_HEAD(&n->unused); | ||
402 | peer_total++; | ||
403 | write_unlock_bh(&peer_pool_lock); | ||
404 | |||
405 | if (peer_total >= inet_peer_threshold) | ||
406 | /* Remove one less-recently-used entry. */ | 463 | /* Remove one less-recently-used entry. */ |
407 | cleanup_once(0); | 464 | cleanup_once(0); |
408 | 465 | ||
409 | return n; | ||
410 | |||
411 | out_free: | ||
412 | /* The appropriate node is already in the pool. */ | ||
413 | atomic_inc(&p->refcnt); | ||
414 | write_unlock_bh(&peer_pool_lock); | ||
415 | /* Remove the entry from unused list if it was there. */ | ||
416 | unlink_from_unused(p); | ||
417 | /* Free preallocated the preallocated node. */ | ||
418 | kmem_cache_free(peer_cachep, n); | ||
419 | return p; | 466 | return p; |
420 | } | 467 | } |
421 | 468 | ||
@@ -425,12 +472,12 @@ static void peer_check_expire(unsigned long dummy) | |||
425 | unsigned long now = jiffies; | 472 | unsigned long now = jiffies; |
426 | int ttl; | 473 | int ttl; |
427 | 474 | ||
428 | if (peer_total >= inet_peer_threshold) | 475 | if (peers.total >= inet_peer_threshold) |
429 | ttl = inet_peer_minttl; | 476 | ttl = inet_peer_minttl; |
430 | else | 477 | else |
431 | ttl = inet_peer_maxttl | 478 | ttl = inet_peer_maxttl |
432 | - (inet_peer_maxttl - inet_peer_minttl) / HZ * | 479 | - (inet_peer_maxttl - inet_peer_minttl) / HZ * |
433 | peer_total / inet_peer_threshold * HZ; | 480 | peers.total / inet_peer_threshold * HZ; |
434 | while (!cleanup_once(ttl)) { | 481 | while (!cleanup_once(ttl)) { |
435 | if (jiffies != now) | 482 | if (jiffies != now) |
436 | break; | 483 | break; |
@@ -439,22 +486,25 @@ static void peer_check_expire(unsigned long dummy) | |||
439 | /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime | 486 | /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime |
440 | * interval depending on the total number of entries (more entries, | 487 | * interval depending on the total number of entries (more entries, |
441 | * less interval). */ | 488 | * less interval). */ |
442 | if (peer_total >= inet_peer_threshold) | 489 | if (peers.total >= inet_peer_threshold) |
443 | peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; | 490 | peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; |
444 | else | 491 | else |
445 | peer_periodic_timer.expires = jiffies | 492 | peer_periodic_timer.expires = jiffies |
446 | + inet_peer_gc_maxtime | 493 | + inet_peer_gc_maxtime |
447 | - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * | 494 | - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * |
448 | peer_total / inet_peer_threshold * HZ; | 495 | peers.total / inet_peer_threshold * HZ; |
449 | add_timer(&peer_periodic_timer); | 496 | add_timer(&peer_periodic_timer); |
450 | } | 497 | } |
451 | 498 | ||
452 | void inet_putpeer(struct inet_peer *p) | 499 | void inet_putpeer(struct inet_peer *p) |
453 | { | 500 | { |
454 | spin_lock_bh(&inet_peer_unused_lock); | 501 | local_bh_disable(); |
455 | if (atomic_dec_and_test(&p->refcnt)) { | 502 | |
456 | list_add_tail(&p->unused, &unused_peers); | 503 | if (atomic_dec_and_lock(&p->refcnt, &unused_peers.lock)) { |
504 | list_add_tail(&p->unused, &unused_peers.list); | ||
457 | p->dtime = (__u32)jiffies; | 505 | p->dtime = (__u32)jiffies; |
506 | spin_unlock(&unused_peers.lock); | ||
458 | } | 507 | } |
459 | spin_unlock_bh(&inet_peer_unused_lock); | 508 | |
509 | local_bh_enable(); | ||
460 | } | 510 | } |
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 56cdf68a074c..99461f09320f 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c | |||
@@ -87,16 +87,16 @@ int ip_forward(struct sk_buff *skb) | |||
87 | if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) | 87 | if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) |
88 | goto sr_failed; | 88 | goto sr_failed; |
89 | 89 | ||
90 | if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) && | 90 | if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && |
91 | (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { | 91 | (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { |
92 | IP_INC_STATS(dev_net(rt->u.dst.dev), IPSTATS_MIB_FRAGFAILS); | 92 | IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS); |
93 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, | 93 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, |
94 | htonl(dst_mtu(&rt->u.dst))); | 94 | htonl(dst_mtu(&rt->dst))); |
95 | goto drop; | 95 | goto drop; |
96 | } | 96 | } |
97 | 97 | ||
98 | /* We are about to mangle packet. Copy it! */ | 98 | /* We are about to mangle packet. Copy it! */ |
99 | if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len)) | 99 | if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+rt->dst.header_len)) |
100 | goto drop; | 100 | goto drop; |
101 | iph = ip_hdr(skb); | 101 | iph = ip_hdr(skb); |
102 | 102 | ||
@@ -113,7 +113,7 @@ int ip_forward(struct sk_buff *skb) | |||
113 | skb->priority = rt_tos2priority(iph->tos); | 113 | skb->priority = rt_tos2priority(iph->tos); |
114 | 114 | ||
115 | return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, | 115 | return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, |
116 | rt->u.dst.dev, ip_forward_finish); | 116 | rt->dst.dev, ip_forward_finish); |
117 | 117 | ||
118 | sr_failed: | 118 | sr_failed: |
119 | /* | 119 | /* |
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 75347ea70ea0..b7c41654dde5 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -124,11 +124,8 @@ static int ip4_frag_match(struct inet_frag_queue *q, void *a) | |||
124 | } | 124 | } |
125 | 125 | ||
126 | /* Memory Tracking Functions. */ | 126 | /* Memory Tracking Functions. */ |
127 | static __inline__ void frag_kfree_skb(struct netns_frags *nf, | 127 | static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb) |
128 | struct sk_buff *skb, int *work) | ||
129 | { | 128 | { |
130 | if (work) | ||
131 | *work -= skb->truesize; | ||
132 | atomic_sub(skb->truesize, &nf->mem); | 129 | atomic_sub(skb->truesize, &nf->mem); |
133 | kfree_skb(skb); | 130 | kfree_skb(skb); |
134 | } | 131 | } |
@@ -309,7 +306,7 @@ static int ip_frag_reinit(struct ipq *qp) | |||
309 | fp = qp->q.fragments; | 306 | fp = qp->q.fragments; |
310 | do { | 307 | do { |
311 | struct sk_buff *xp = fp->next; | 308 | struct sk_buff *xp = fp->next; |
312 | frag_kfree_skb(qp->q.net, fp, NULL); | 309 | frag_kfree_skb(qp->q.net, fp); |
313 | fp = xp; | 310 | fp = xp; |
314 | } while (fp); | 311 | } while (fp); |
315 | 312 | ||
@@ -317,6 +314,7 @@ static int ip_frag_reinit(struct ipq *qp) | |||
317 | qp->q.len = 0; | 314 | qp->q.len = 0; |
318 | qp->q.meat = 0; | 315 | qp->q.meat = 0; |
319 | qp->q.fragments = NULL; | 316 | qp->q.fragments = NULL; |
317 | qp->q.fragments_tail = NULL; | ||
320 | qp->iif = 0; | 318 | qp->iif = 0; |
321 | 319 | ||
322 | return 0; | 320 | return 0; |
@@ -389,6 +387,11 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
389 | * in the chain of fragments so far. We must know where to put | 387 | * in the chain of fragments so far. We must know where to put |
390 | * this fragment, right? | 388 | * this fragment, right? |
391 | */ | 389 | */ |
390 | prev = qp->q.fragments_tail; | ||
391 | if (!prev || FRAG_CB(prev)->offset < offset) { | ||
392 | next = NULL; | ||
393 | goto found; | ||
394 | } | ||
392 | prev = NULL; | 395 | prev = NULL; |
393 | for (next = qp->q.fragments; next != NULL; next = next->next) { | 396 | for (next = qp->q.fragments; next != NULL; next = next->next) { |
394 | if (FRAG_CB(next)->offset >= offset) | 397 | if (FRAG_CB(next)->offset >= offset) |
@@ -396,6 +399,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
396 | prev = next; | 399 | prev = next; |
397 | } | 400 | } |
398 | 401 | ||
402 | found: | ||
399 | /* We found where to put this one. Check for overlap with | 403 | /* We found where to put this one. Check for overlap with |
400 | * preceding fragment, and, if needed, align things so that | 404 | * preceding fragment, and, if needed, align things so that |
401 | * any overlaps are eliminated. | 405 | * any overlaps are eliminated. |
@@ -446,7 +450,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
446 | qp->q.fragments = next; | 450 | qp->q.fragments = next; |
447 | 451 | ||
448 | qp->q.meat -= free_it->len; | 452 | qp->q.meat -= free_it->len; |
449 | frag_kfree_skb(qp->q.net, free_it, NULL); | 453 | frag_kfree_skb(qp->q.net, free_it); |
450 | } | 454 | } |
451 | } | 455 | } |
452 | 456 | ||
@@ -454,6 +458,8 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
454 | 458 | ||
455 | /* Insert this fragment in the chain of fragments. */ | 459 | /* Insert this fragment in the chain of fragments. */ |
456 | skb->next = next; | 460 | skb->next = next; |
461 | if (!next) | ||
462 | qp->q.fragments_tail = skb; | ||
457 | if (prev) | 463 | if (prev) |
458 | prev->next = skb; | 464 | prev->next = skb; |
459 | else | 465 | else |
@@ -507,6 +513,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | |||
507 | goto out_nomem; | 513 | goto out_nomem; |
508 | 514 | ||
509 | fp->next = head->next; | 515 | fp->next = head->next; |
516 | if (!fp->next) | ||
517 | qp->q.fragments_tail = fp; | ||
510 | prev->next = fp; | 518 | prev->next = fp; |
511 | 519 | ||
512 | skb_morph(head, qp->q.fragments); | 520 | skb_morph(head, qp->q.fragments); |
@@ -556,7 +564,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | |||
556 | 564 | ||
557 | skb_shinfo(head)->frag_list = head->next; | 565 | skb_shinfo(head)->frag_list = head->next; |
558 | skb_push(head, head->data - skb_network_header(head)); | 566 | skb_push(head, head->data - skb_network_header(head)); |
559 | atomic_sub(head->truesize, &qp->q.net->mem); | ||
560 | 567 | ||
561 | for (fp=head->next; fp; fp = fp->next) { | 568 | for (fp=head->next; fp; fp = fp->next) { |
562 | head->data_len += fp->len; | 569 | head->data_len += fp->len; |
@@ -566,8 +573,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | |||
566 | else if (head->ip_summed == CHECKSUM_COMPLETE) | 573 | else if (head->ip_summed == CHECKSUM_COMPLETE) |
567 | head->csum = csum_add(head->csum, fp->csum); | 574 | head->csum = csum_add(head->csum, fp->csum); |
568 | head->truesize += fp->truesize; | 575 | head->truesize += fp->truesize; |
569 | atomic_sub(fp->truesize, &qp->q.net->mem); | ||
570 | } | 576 | } |
577 | atomic_sub(head->truesize, &qp->q.net->mem); | ||
571 | 578 | ||
572 | head->next = NULL; | 579 | head->next = NULL; |
573 | head->dev = dev; | 580 | head->dev = dev; |
@@ -578,6 +585,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | |||
578 | iph->tot_len = htons(len); | 585 | iph->tot_len = htons(len); |
579 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); | 586 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); |
580 | qp->q.fragments = NULL; | 587 | qp->q.fragments = NULL; |
588 | qp->q.fragments_tail = NULL; | ||
581 | return 0; | 589 | return 0; |
582 | 590 | ||
583 | out_nomem: | 591 | out_nomem: |
@@ -624,6 +632,7 @@ int ip_defrag(struct sk_buff *skb, u32 user) | |||
624 | kfree_skb(skb); | 632 | kfree_skb(skb); |
625 | return -ENOMEM; | 633 | return -ENOMEM; |
626 | } | 634 | } |
635 | EXPORT_SYMBOL(ip_defrag); | ||
627 | 636 | ||
628 | #ifdef CONFIG_SYSCTL | 637 | #ifdef CONFIG_SYSCTL |
629 | static int zero; | 638 | static int zero; |
@@ -777,5 +786,3 @@ void __init ipfrag_init(void) | |||
777 | ip4_frags.secret_interval = 10 * 60 * HZ; | 786 | ip4_frags.secret_interval = 10 * 60 * HZ; |
778 | inet_frags_init(&ip4_frags); | 787 | inet_frags_init(&ip4_frags); |
779 | } | 788 | } |
780 | |||
781 | EXPORT_SYMBOL(ip_defrag); | ||
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 32618e11076d..945b20a5ad50 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -731,6 +731,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
731 | tos = 0; | 731 | tos = 0; |
732 | if (skb->protocol == htons(ETH_P_IP)) | 732 | if (skb->protocol == htons(ETH_P_IP)) |
733 | tos = old_iph->tos; | 733 | tos = old_iph->tos; |
734 | else if (skb->protocol == htons(ETH_P_IPV6)) | ||
735 | tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph); | ||
734 | } | 736 | } |
735 | 737 | ||
736 | { | 738 | { |
@@ -745,7 +747,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
745 | goto tx_error; | 747 | goto tx_error; |
746 | } | 748 | } |
747 | } | 749 | } |
748 | tdev = rt->u.dst.dev; | 750 | tdev = rt->dst.dev; |
749 | 751 | ||
750 | if (tdev == dev) { | 752 | if (tdev == dev) { |
751 | ip_rt_put(rt); | 753 | ip_rt_put(rt); |
@@ -755,7 +757,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
755 | 757 | ||
756 | df = tiph->frag_off; | 758 | df = tiph->frag_off; |
757 | if (df) | 759 | if (df) |
758 | mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen; | 760 | mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen; |
759 | else | 761 | else |
760 | mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; | 762 | mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; |
761 | 763 | ||
@@ -803,7 +805,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
803 | tunnel->err_count = 0; | 805 | tunnel->err_count = 0; |
804 | } | 806 | } |
805 | 807 | ||
806 | max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len; | 808 | max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len; |
807 | 809 | ||
808 | if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| | 810 | if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| |
809 | (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { | 811 | (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { |
@@ -830,7 +832,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
830 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | | 832 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | |
831 | IPSKB_REROUTED); | 833 | IPSKB_REROUTED); |
832 | skb_dst_drop(skb); | 834 | skb_dst_drop(skb); |
833 | skb_dst_set(skb, &rt->u.dst); | 835 | skb_dst_set(skb, &rt->dst); |
834 | 836 | ||
835 | /* | 837 | /* |
836 | * Push down and install the IPIP header. | 838 | * Push down and install the IPIP header. |
@@ -853,7 +855,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev | |||
853 | iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; | 855 | iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; |
854 | #endif | 856 | #endif |
855 | else | 857 | else |
856 | iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); | 858 | iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT); |
857 | } | 859 | } |
858 | 860 | ||
859 | ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; | 861 | ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; |
@@ -915,7 +917,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) | |||
915 | .proto = IPPROTO_GRE }; | 917 | .proto = IPPROTO_GRE }; |
916 | struct rtable *rt; | 918 | struct rtable *rt; |
917 | if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { | 919 | if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { |
918 | tdev = rt->u.dst.dev; | 920 | tdev = rt->dst.dev; |
919 | ip_rt_put(rt); | 921 | ip_rt_put(rt); |
920 | } | 922 | } |
921 | 923 | ||
@@ -1174,7 +1176,7 @@ static int ipgre_open(struct net_device *dev) | |||
1174 | struct rtable *rt; | 1176 | struct rtable *rt; |
1175 | if (ip_route_output_key(dev_net(dev), &rt, &fl)) | 1177 | if (ip_route_output_key(dev_net(dev), &rt, &fl)) |
1176 | return -EADDRNOTAVAIL; | 1178 | return -EADDRNOTAVAIL; |
1177 | dev = rt->u.dst.dev; | 1179 | dev = rt->dst.dev; |
1178 | ip_rt_put(rt); | 1180 | ip_rt_put(rt); |
1179 | if (__in_dev_get_rtnl(dev) == NULL) | 1181 | if (__in_dev_get_rtnl(dev) == NULL) |
1180 | return -EADDRNOTAVAIL; | 1182 | return -EADDRNOTAVAIL; |
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index d930dc5e4d85..d859bcc26cb7 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c | |||
@@ -146,7 +146,7 @@ | |||
146 | #include <linux/netlink.h> | 146 | #include <linux/netlink.h> |
147 | 147 | ||
148 | /* | 148 | /* |
149 | * Process Router Attention IP option | 149 | * Process Router Attention IP option (RFC 2113) |
150 | */ | 150 | */ |
151 | int ip_call_ra_chain(struct sk_buff *skb) | 151 | int ip_call_ra_chain(struct sk_buff *skb) |
152 | { | 152 | { |
@@ -155,8 +155,7 @@ int ip_call_ra_chain(struct sk_buff *skb) | |||
155 | struct sock *last = NULL; | 155 | struct sock *last = NULL; |
156 | struct net_device *dev = skb->dev; | 156 | struct net_device *dev = skb->dev; |
157 | 157 | ||
158 | read_lock(&ip_ra_lock); | 158 | for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) { |
159 | for (ra = ip_ra_chain; ra; ra = ra->next) { | ||
160 | struct sock *sk = ra->sk; | 159 | struct sock *sk = ra->sk; |
161 | 160 | ||
162 | /* If socket is bound to an interface, only report | 161 | /* If socket is bound to an interface, only report |
@@ -167,10 +166,8 @@ int ip_call_ra_chain(struct sk_buff *skb) | |||
167 | sk->sk_bound_dev_if == dev->ifindex) && | 166 | sk->sk_bound_dev_if == dev->ifindex) && |
168 | net_eq(sock_net(sk), dev_net(dev))) { | 167 | net_eq(sock_net(sk), dev_net(dev))) { |
169 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { | 168 | if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { |
170 | if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { | 169 | if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) |
171 | read_unlock(&ip_ra_lock); | ||
172 | return 1; | 170 | return 1; |
173 | } | ||
174 | } | 171 | } |
175 | if (last) { | 172 | if (last) { |
176 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); | 173 | struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); |
@@ -183,10 +180,8 @@ int ip_call_ra_chain(struct sk_buff *skb) | |||
183 | 180 | ||
184 | if (last) { | 181 | if (last) { |
185 | raw_rcv(last, skb); | 182 | raw_rcv(last, skb); |
186 | read_unlock(&ip_ra_lock); | ||
187 | return 1; | 183 | return 1; |
188 | } | 184 | } |
189 | read_unlock(&ip_ra_lock); | ||
190 | return 0; | 185 | return 0; |
191 | } | 186 | } |
192 | 187 | ||
@@ -298,18 +293,16 @@ static inline int ip_rcv_options(struct sk_buff *skb) | |||
298 | } | 293 | } |
299 | 294 | ||
300 | if (unlikely(opt->srr)) { | 295 | if (unlikely(opt->srr)) { |
301 | struct in_device *in_dev = in_dev_get(dev); | 296 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
297 | |||
302 | if (in_dev) { | 298 | if (in_dev) { |
303 | if (!IN_DEV_SOURCE_ROUTE(in_dev)) { | 299 | if (!IN_DEV_SOURCE_ROUTE(in_dev)) { |
304 | if (IN_DEV_LOG_MARTIANS(in_dev) && | 300 | if (IN_DEV_LOG_MARTIANS(in_dev) && |
305 | net_ratelimit()) | 301 | net_ratelimit()) |
306 | printk(KERN_INFO "source route option %pI4 -> %pI4\n", | 302 | printk(KERN_INFO "source route option %pI4 -> %pI4\n", |
307 | &iph->saddr, &iph->daddr); | 303 | &iph->saddr, &iph->daddr); |
308 | in_dev_put(in_dev); | ||
309 | goto drop; | 304 | goto drop; |
310 | } | 305 | } |
311 | |||
312 | in_dev_put(in_dev); | ||
313 | } | 306 | } |
314 | 307 | ||
315 | if (ip_options_rcv_srr(skb)) | 308 | if (ip_options_rcv_srr(skb)) |
@@ -340,13 +333,16 @@ static int ip_rcv_finish(struct sk_buff *skb) | |||
340 | else if (err == -ENETUNREACH) | 333 | else if (err == -ENETUNREACH) |
341 | IP_INC_STATS_BH(dev_net(skb->dev), | 334 | IP_INC_STATS_BH(dev_net(skb->dev), |
342 | IPSTATS_MIB_INNOROUTES); | 335 | IPSTATS_MIB_INNOROUTES); |
336 | else if (err == -EXDEV) | ||
337 | NET_INC_STATS_BH(dev_net(skb->dev), | ||
338 | LINUX_MIB_IPRPFILTER); | ||
343 | goto drop; | 339 | goto drop; |
344 | } | 340 | } |
345 | } | 341 | } |
346 | 342 | ||
347 | #ifdef CONFIG_NET_CLS_ROUTE | 343 | #ifdef CONFIG_NET_CLS_ROUTE |
348 | if (unlikely(skb_dst(skb)->tclassid)) { | 344 | if (unlikely(skb_dst(skb)->tclassid)) { |
349 | struct ip_rt_acct *st = per_cpu_ptr(ip_rt_acct, smp_processor_id()); | 345 | struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); |
350 | u32 idx = skb_dst(skb)->tclassid; | 346 | u32 idx = skb_dst(skb)->tclassid; |
351 | st[idx&0xFF].o_packets++; | 347 | st[idx&0xFF].o_packets++; |
352 | st[idx&0xFF].o_bytes += skb->len; | 348 | st[idx&0xFF].o_bytes += skb->len; |
@@ -360,10 +356,10 @@ static int ip_rcv_finish(struct sk_buff *skb) | |||
360 | 356 | ||
361 | rt = skb_rtable(skb); | 357 | rt = skb_rtable(skb); |
362 | if (rt->rt_type == RTN_MULTICAST) { | 358 | if (rt->rt_type == RTN_MULTICAST) { |
363 | IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCAST, | 359 | IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INMCAST, |
364 | skb->len); | 360 | skb->len); |
365 | } else if (rt->rt_type == RTN_BROADCAST) | 361 | } else if (rt->rt_type == RTN_BROADCAST) |
366 | IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCAST, | 362 | IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INBCAST, |
367 | skb->len); | 363 | skb->len); |
368 | 364 | ||
369 | return dst_input(skb); | 365 | return dst_input(skb); |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 041d41df1224..04b69896df5f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -89,6 +89,7 @@ __inline__ void ip_send_check(struct iphdr *iph) | |||
89 | iph->check = 0; | 89 | iph->check = 0; |
90 | iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); | 90 | iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); |
91 | } | 91 | } |
92 | EXPORT_SYMBOL(ip_send_check); | ||
92 | 93 | ||
93 | int __ip_local_out(struct sk_buff *skb) | 94 | int __ip_local_out(struct sk_buff *skb) |
94 | { | 95 | { |
@@ -151,15 +152,15 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, | |||
151 | iph->version = 4; | 152 | iph->version = 4; |
152 | iph->ihl = 5; | 153 | iph->ihl = 5; |
153 | iph->tos = inet->tos; | 154 | iph->tos = inet->tos; |
154 | if (ip_dont_fragment(sk, &rt->u.dst)) | 155 | if (ip_dont_fragment(sk, &rt->dst)) |
155 | iph->frag_off = htons(IP_DF); | 156 | iph->frag_off = htons(IP_DF); |
156 | else | 157 | else |
157 | iph->frag_off = 0; | 158 | iph->frag_off = 0; |
158 | iph->ttl = ip_select_ttl(inet, &rt->u.dst); | 159 | iph->ttl = ip_select_ttl(inet, &rt->dst); |
159 | iph->daddr = rt->rt_dst; | 160 | iph->daddr = rt->rt_dst; |
160 | iph->saddr = rt->rt_src; | 161 | iph->saddr = rt->rt_src; |
161 | iph->protocol = sk->sk_protocol; | 162 | iph->protocol = sk->sk_protocol; |
162 | ip_select_ident(iph, &rt->u.dst, sk); | 163 | ip_select_ident(iph, &rt->dst, sk); |
163 | 164 | ||
164 | if (opt && opt->optlen) { | 165 | if (opt && opt->optlen) { |
165 | iph->ihl += opt->optlen>>2; | 166 | iph->ihl += opt->optlen>>2; |
@@ -172,7 +173,6 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, | |||
172 | /* Send it out. */ | 173 | /* Send it out. */ |
173 | return ip_local_out(skb); | 174 | return ip_local_out(skb); |
174 | } | 175 | } |
175 | |||
176 | EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); | 176 | EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); |
177 | 177 | ||
178 | static inline int ip_finish_output2(struct sk_buff *skb) | 178 | static inline int ip_finish_output2(struct sk_buff *skb) |
@@ -240,7 +240,7 @@ int ip_mc_output(struct sk_buff *skb) | |||
240 | { | 240 | { |
241 | struct sock *sk = skb->sk; | 241 | struct sock *sk = skb->sk; |
242 | struct rtable *rt = skb_rtable(skb); | 242 | struct rtable *rt = skb_rtable(skb); |
243 | struct net_device *dev = rt->u.dst.dev; | 243 | struct net_device *dev = rt->dst.dev; |
244 | 244 | ||
245 | /* | 245 | /* |
246 | * If the indicated interface is up and running, send the packet. | 246 | * If the indicated interface is up and running, send the packet. |
@@ -359,9 +359,9 @@ int ip_queue_xmit(struct sk_buff *skb) | |||
359 | if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) | 359 | if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) |
360 | goto no_route; | 360 | goto no_route; |
361 | } | 361 | } |
362 | sk_setup_caps(sk, &rt->u.dst); | 362 | sk_setup_caps(sk, &rt->dst); |
363 | } | 363 | } |
364 | skb_dst_set_noref(skb, &rt->u.dst); | 364 | skb_dst_set_noref(skb, &rt->dst); |
365 | 365 | ||
366 | packet_routed: | 366 | packet_routed: |
367 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) | 367 | if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) |
@@ -372,11 +372,11 @@ packet_routed: | |||
372 | skb_reset_network_header(skb); | 372 | skb_reset_network_header(skb); |
373 | iph = ip_hdr(skb); | 373 | iph = ip_hdr(skb); |
374 | *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); | 374 | *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); |
375 | if (ip_dont_fragment(sk, &rt->u.dst) && !skb->local_df) | 375 | if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df) |
376 | iph->frag_off = htons(IP_DF); | 376 | iph->frag_off = htons(IP_DF); |
377 | else | 377 | else |
378 | iph->frag_off = 0; | 378 | iph->frag_off = 0; |
379 | iph->ttl = ip_select_ttl(inet, &rt->u.dst); | 379 | iph->ttl = ip_select_ttl(inet, &rt->dst); |
380 | iph->protocol = sk->sk_protocol; | 380 | iph->protocol = sk->sk_protocol; |
381 | iph->saddr = rt->rt_src; | 381 | iph->saddr = rt->rt_src; |
382 | iph->daddr = rt->rt_dst; | 382 | iph->daddr = rt->rt_dst; |
@@ -387,7 +387,7 @@ packet_routed: | |||
387 | ip_options_build(skb, opt, inet->inet_daddr, rt, 0); | 387 | ip_options_build(skb, opt, inet->inet_daddr, rt, 0); |
388 | } | 388 | } |
389 | 389 | ||
390 | ip_select_ident_more(iph, &rt->u.dst, sk, | 390 | ip_select_ident_more(iph, &rt->dst, sk, |
391 | (skb_shinfo(skb)->gso_segs ?: 1) - 1); | 391 | (skb_shinfo(skb)->gso_segs ?: 1) - 1); |
392 | 392 | ||
393 | skb->priority = sk->sk_priority; | 393 | skb->priority = sk->sk_priority; |
@@ -403,6 +403,7 @@ no_route: | |||
403 | kfree_skb(skb); | 403 | kfree_skb(skb); |
404 | return -EHOSTUNREACH; | 404 | return -EHOSTUNREACH; |
405 | } | 405 | } |
406 | EXPORT_SYMBOL(ip_queue_xmit); | ||
406 | 407 | ||
407 | 408 | ||
408 | static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) | 409 | static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) |
@@ -411,7 +412,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) | |||
411 | to->priority = from->priority; | 412 | to->priority = from->priority; |
412 | to->protocol = from->protocol; | 413 | to->protocol = from->protocol; |
413 | skb_dst_drop(to); | 414 | skb_dst_drop(to); |
414 | skb_dst_set(to, dst_clone(skb_dst(from))); | 415 | skb_dst_copy(to, from); |
415 | to->dev = from->dev; | 416 | to->dev = from->dev; |
416 | to->mark = from->mark; | 417 | to->mark = from->mark; |
417 | 418 | ||
@@ -442,17 +443,16 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) | |||
442 | int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | 443 | int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) |
443 | { | 444 | { |
444 | struct iphdr *iph; | 445 | struct iphdr *iph; |
445 | int raw = 0; | ||
446 | int ptr; | 446 | int ptr; |
447 | struct net_device *dev; | 447 | struct net_device *dev; |
448 | struct sk_buff *skb2; | 448 | struct sk_buff *skb2; |
449 | unsigned int mtu, hlen, left, len, ll_rs, pad; | 449 | unsigned int mtu, hlen, left, len, ll_rs; |
450 | int offset; | 450 | int offset; |
451 | __be16 not_last_frag; | 451 | __be16 not_last_frag; |
452 | struct rtable *rt = skb_rtable(skb); | 452 | struct rtable *rt = skb_rtable(skb); |
453 | int err = 0; | 453 | int err = 0; |
454 | 454 | ||
455 | dev = rt->u.dst.dev; | 455 | dev = rt->dst.dev; |
456 | 456 | ||
457 | /* | 457 | /* |
458 | * Point into the IP datagram header. | 458 | * Point into the IP datagram header. |
@@ -473,7 +473,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
473 | */ | 473 | */ |
474 | 474 | ||
475 | hlen = iph->ihl * 4; | 475 | hlen = iph->ihl * 4; |
476 | mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */ | 476 | mtu = dst_mtu(&rt->dst) - hlen; /* Size of data space */ |
477 | #ifdef CONFIG_BRIDGE_NETFILTER | 477 | #ifdef CONFIG_BRIDGE_NETFILTER |
478 | if (skb->nf_bridge) | 478 | if (skb->nf_bridge) |
479 | mtu -= nf_bridge_mtu_reduction(skb); | 479 | mtu -= nf_bridge_mtu_reduction(skb); |
@@ -580,14 +580,12 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
580 | 580 | ||
581 | slow_path: | 581 | slow_path: |
582 | left = skb->len - hlen; /* Space per frame */ | 582 | left = skb->len - hlen; /* Space per frame */ |
583 | ptr = raw + hlen; /* Where to start from */ | 583 | ptr = hlen; /* Where to start from */ |
584 | 584 | ||
585 | /* for bridged IP traffic encapsulated inside f.e. a vlan header, | 585 | /* for bridged IP traffic encapsulated inside f.e. a vlan header, |
586 | * we need to make room for the encapsulating header | 586 | * we need to make room for the encapsulating header |
587 | */ | 587 | */ |
588 | pad = nf_bridge_pad(skb); | 588 | ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb)); |
589 | ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad); | ||
590 | mtu -= pad; | ||
591 | 589 | ||
592 | /* | 590 | /* |
593 | * Fragment the datagram. | 591 | * Fragment the datagram. |
@@ -697,7 +695,6 @@ fail: | |||
697 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); | 695 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); |
698 | return err; | 696 | return err; |
699 | } | 697 | } |
700 | |||
701 | EXPORT_SYMBOL(ip_fragment); | 698 | EXPORT_SYMBOL(ip_fragment); |
702 | 699 | ||
703 | int | 700 | int |
@@ -716,6 +713,7 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk | |||
716 | } | 713 | } |
717 | return 0; | 714 | return 0; |
718 | } | 715 | } |
716 | EXPORT_SYMBOL(ip_generic_getfrag); | ||
719 | 717 | ||
720 | static inline __wsum | 718 | static inline __wsum |
721 | csum_page(struct page *page, int offset, int copy) | 719 | csum_page(struct page *page, int offset, int copy) |
@@ -833,13 +831,13 @@ int ip_append_data(struct sock *sk, | |||
833 | */ | 831 | */ |
834 | *rtp = NULL; | 832 | *rtp = NULL; |
835 | inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? | 833 | inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? |
836 | rt->u.dst.dev->mtu : | 834 | rt->dst.dev->mtu : |
837 | dst_mtu(rt->u.dst.path); | 835 | dst_mtu(rt->dst.path); |
838 | inet->cork.dst = &rt->u.dst; | 836 | inet->cork.dst = &rt->dst; |
839 | inet->cork.length = 0; | 837 | inet->cork.length = 0; |
840 | sk->sk_sndmsg_page = NULL; | 838 | sk->sk_sndmsg_page = NULL; |
841 | sk->sk_sndmsg_off = 0; | 839 | sk->sk_sndmsg_off = 0; |
842 | if ((exthdrlen = rt->u.dst.header_len) != 0) { | 840 | if ((exthdrlen = rt->dst.header_len) != 0) { |
843 | length += exthdrlen; | 841 | length += exthdrlen; |
844 | transhdrlen += exthdrlen; | 842 | transhdrlen += exthdrlen; |
845 | } | 843 | } |
@@ -852,7 +850,7 @@ int ip_append_data(struct sock *sk, | |||
852 | exthdrlen = 0; | 850 | exthdrlen = 0; |
853 | mtu = inet->cork.fragsize; | 851 | mtu = inet->cork.fragsize; |
854 | } | 852 | } |
855 | hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); | 853 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); |
856 | 854 | ||
857 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); | 855 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); |
858 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; | 856 | maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; |
@@ -869,7 +867,7 @@ int ip_append_data(struct sock *sk, | |||
869 | */ | 867 | */ |
870 | if (transhdrlen && | 868 | if (transhdrlen && |
871 | length + fragheaderlen <= mtu && | 869 | length + fragheaderlen <= mtu && |
872 | rt->u.dst.dev->features & NETIF_F_V4_CSUM && | 870 | rt->dst.dev->features & NETIF_F_V4_CSUM && |
873 | !exthdrlen) | 871 | !exthdrlen) |
874 | csummode = CHECKSUM_PARTIAL; | 872 | csummode = CHECKSUM_PARTIAL; |
875 | 873 | ||
@@ -878,7 +876,7 @@ int ip_append_data(struct sock *sk, | |||
878 | inet->cork.length += length; | 876 | inet->cork.length += length; |
879 | if (((length > mtu) || (skb && skb_is_gso(skb))) && | 877 | if (((length > mtu) || (skb && skb_is_gso(skb))) && |
880 | (sk->sk_protocol == IPPROTO_UDP) && | 878 | (sk->sk_protocol == IPPROTO_UDP) && |
881 | (rt->u.dst.dev->features & NETIF_F_UFO)) { | 879 | (rt->dst.dev->features & NETIF_F_UFO)) { |
882 | err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, | 880 | err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, |
883 | fragheaderlen, transhdrlen, mtu, | 881 | fragheaderlen, transhdrlen, mtu, |
884 | flags); | 882 | flags); |
@@ -926,7 +924,7 @@ alloc_new_skb: | |||
926 | fraglen = datalen + fragheaderlen; | 924 | fraglen = datalen + fragheaderlen; |
927 | 925 | ||
928 | if ((flags & MSG_MORE) && | 926 | if ((flags & MSG_MORE) && |
929 | !(rt->u.dst.dev->features&NETIF_F_SG)) | 927 | !(rt->dst.dev->features&NETIF_F_SG)) |
930 | alloclen = mtu; | 928 | alloclen = mtu; |
931 | else | 929 | else |
932 | alloclen = datalen + fragheaderlen; | 930 | alloclen = datalen + fragheaderlen; |
@@ -937,7 +935,7 @@ alloc_new_skb: | |||
937 | * the last. | 935 | * the last. |
938 | */ | 936 | */ |
939 | if (datalen == length + fraggap) | 937 | if (datalen == length + fraggap) |
940 | alloclen += rt->u.dst.trailer_len; | 938 | alloclen += rt->dst.trailer_len; |
941 | 939 | ||
942 | if (transhdrlen) { | 940 | if (transhdrlen) { |
943 | skb = sock_alloc_send_skb(sk, | 941 | skb = sock_alloc_send_skb(sk, |
@@ -1010,7 +1008,7 @@ alloc_new_skb: | |||
1010 | if (copy > length) | 1008 | if (copy > length) |
1011 | copy = length; | 1009 | copy = length; |
1012 | 1010 | ||
1013 | if (!(rt->u.dst.dev->features&NETIF_F_SG)) { | 1011 | if (!(rt->dst.dev->features&NETIF_F_SG)) { |
1014 | unsigned int off; | 1012 | unsigned int off; |
1015 | 1013 | ||
1016 | off = skb->len; | 1014 | off = skb->len; |
@@ -1105,10 +1103,10 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, | |||
1105 | if (inet->cork.flags & IPCORK_OPT) | 1103 | if (inet->cork.flags & IPCORK_OPT) |
1106 | opt = inet->cork.opt; | 1104 | opt = inet->cork.opt; |
1107 | 1105 | ||
1108 | if (!(rt->u.dst.dev->features&NETIF_F_SG)) | 1106 | if (!(rt->dst.dev->features&NETIF_F_SG)) |
1109 | return -EOPNOTSUPP; | 1107 | return -EOPNOTSUPP; |
1110 | 1108 | ||
1111 | hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); | 1109 | hh_len = LL_RESERVED_SPACE(rt->dst.dev); |
1112 | mtu = inet->cork.fragsize; | 1110 | mtu = inet->cork.fragsize; |
1113 | 1111 | ||
1114 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); | 1112 | fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); |
@@ -1125,7 +1123,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, | |||
1125 | inet->cork.length += size; | 1123 | inet->cork.length += size; |
1126 | if ((size + skb->len > mtu) && | 1124 | if ((size + skb->len > mtu) && |
1127 | (sk->sk_protocol == IPPROTO_UDP) && | 1125 | (sk->sk_protocol == IPPROTO_UDP) && |
1128 | (rt->u.dst.dev->features & NETIF_F_UFO)) { | 1126 | (rt->dst.dev->features & NETIF_F_UFO)) { |
1129 | skb_shinfo(skb)->gso_size = mtu - fragheaderlen; | 1127 | skb_shinfo(skb)->gso_size = mtu - fragheaderlen; |
1130 | skb_shinfo(skb)->gso_type = SKB_GSO_UDP; | 1128 | skb_shinfo(skb)->gso_type = SKB_GSO_UDP; |
1131 | } | 1129 | } |
@@ -1277,8 +1275,8 @@ int ip_push_pending_frames(struct sock *sk) | |||
1277 | * If local_df is set too, we still allow to fragment this frame | 1275 | * If local_df is set too, we still allow to fragment this frame |
1278 | * locally. */ | 1276 | * locally. */ |
1279 | if (inet->pmtudisc >= IP_PMTUDISC_DO || | 1277 | if (inet->pmtudisc >= IP_PMTUDISC_DO || |
1280 | (skb->len <= dst_mtu(&rt->u.dst) && | 1278 | (skb->len <= dst_mtu(&rt->dst) && |
1281 | ip_dont_fragment(sk, &rt->u.dst))) | 1279 | ip_dont_fragment(sk, &rt->dst))) |
1282 | df = htons(IP_DF); | 1280 | df = htons(IP_DF); |
1283 | 1281 | ||
1284 | if (inet->cork.flags & IPCORK_OPT) | 1282 | if (inet->cork.flags & IPCORK_OPT) |
@@ -1287,7 +1285,7 @@ int ip_push_pending_frames(struct sock *sk) | |||
1287 | if (rt->rt_type == RTN_MULTICAST) | 1285 | if (rt->rt_type == RTN_MULTICAST) |
1288 | ttl = inet->mc_ttl; | 1286 | ttl = inet->mc_ttl; |
1289 | else | 1287 | else |
1290 | ttl = ip_select_ttl(inet, &rt->u.dst); | 1288 | ttl = ip_select_ttl(inet, &rt->dst); |
1291 | 1289 | ||
1292 | iph = (struct iphdr *)skb->data; | 1290 | iph = (struct iphdr *)skb->data; |
1293 | iph->version = 4; | 1291 | iph->version = 4; |
@@ -1298,7 +1296,7 @@ int ip_push_pending_frames(struct sock *sk) | |||
1298 | } | 1296 | } |
1299 | iph->tos = inet->tos; | 1297 | iph->tos = inet->tos; |
1300 | iph->frag_off = df; | 1298 | iph->frag_off = df; |
1301 | ip_select_ident(iph, &rt->u.dst, sk); | 1299 | ip_select_ident(iph, &rt->dst, sk); |
1302 | iph->ttl = ttl; | 1300 | iph->ttl = ttl; |
1303 | iph->protocol = sk->sk_protocol; | 1301 | iph->protocol = sk->sk_protocol; |
1304 | iph->saddr = rt->rt_src; | 1302 | iph->saddr = rt->rt_src; |
@@ -1311,7 +1309,7 @@ int ip_push_pending_frames(struct sock *sk) | |||
1311 | * on dst refcount | 1309 | * on dst refcount |
1312 | */ | 1310 | */ |
1313 | inet->cork.dst = NULL; | 1311 | inet->cork.dst = NULL; |
1314 | skb_dst_set(skb, &rt->u.dst); | 1312 | skb_dst_set(skb, &rt->dst); |
1315 | 1313 | ||
1316 | if (iph->protocol == IPPROTO_ICMP) | 1314 | if (iph->protocol == IPPROTO_ICMP) |
1317 | icmp_out_count(net, ((struct icmphdr *) | 1315 | icmp_out_count(net, ((struct icmphdr *) |
@@ -1448,7 +1446,3 @@ void __init ip_init(void) | |||
1448 | igmp_mc_proc_init(); | 1446 | igmp_mc_proc_init(); |
1449 | #endif | 1447 | #endif |
1450 | } | 1448 | } |
1451 | |||
1452 | EXPORT_SYMBOL(ip_generic_getfrag); | ||
1453 | EXPORT_SYMBOL(ip_queue_xmit); | ||
1454 | EXPORT_SYMBOL(ip_send_check); | ||
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ce231780a2b1..6c40a8c46e79 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c | |||
@@ -239,7 +239,16 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) | |||
239 | sent to multicast group to reach destination designated router. | 239 | sent to multicast group to reach destination designated router. |
240 | */ | 240 | */ |
241 | struct ip_ra_chain *ip_ra_chain; | 241 | struct ip_ra_chain *ip_ra_chain; |
242 | DEFINE_RWLOCK(ip_ra_lock); | 242 | static DEFINE_SPINLOCK(ip_ra_lock); |
243 | |||
244 | |||
245 | static void ip_ra_destroy_rcu(struct rcu_head *head) | ||
246 | { | ||
247 | struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu); | ||
248 | |||
249 | sock_put(ra->saved_sk); | ||
250 | kfree(ra); | ||
251 | } | ||
243 | 252 | ||
244 | int ip_ra_control(struct sock *sk, unsigned char on, | 253 | int ip_ra_control(struct sock *sk, unsigned char on, |
245 | void (*destructor)(struct sock *)) | 254 | void (*destructor)(struct sock *)) |
@@ -251,35 +260,42 @@ int ip_ra_control(struct sock *sk, unsigned char on, | |||
251 | 260 | ||
252 | new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; | 261 | new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; |
253 | 262 | ||
254 | write_lock_bh(&ip_ra_lock); | 263 | spin_lock_bh(&ip_ra_lock); |
255 | for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) { | 264 | for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) { |
256 | if (ra->sk == sk) { | 265 | if (ra->sk == sk) { |
257 | if (on) { | 266 | if (on) { |
258 | write_unlock_bh(&ip_ra_lock); | 267 | spin_unlock_bh(&ip_ra_lock); |
259 | kfree(new_ra); | 268 | kfree(new_ra); |
260 | return -EADDRINUSE; | 269 | return -EADDRINUSE; |
261 | } | 270 | } |
262 | *rap = ra->next; | 271 | /* dont let ip_call_ra_chain() use sk again */ |
263 | write_unlock_bh(&ip_ra_lock); | 272 | ra->sk = NULL; |
273 | rcu_assign_pointer(*rap, ra->next); | ||
274 | spin_unlock_bh(&ip_ra_lock); | ||
264 | 275 | ||
265 | if (ra->destructor) | 276 | if (ra->destructor) |
266 | ra->destructor(sk); | 277 | ra->destructor(sk); |
267 | sock_put(sk); | 278 | /* |
268 | kfree(ra); | 279 | * Delay sock_put(sk) and kfree(ra) after one rcu grace |
280 | * period. This guarantee ip_call_ra_chain() dont need | ||
281 | * to mess with socket refcounts. | ||
282 | */ | ||
283 | ra->saved_sk = sk; | ||
284 | call_rcu(&ra->rcu, ip_ra_destroy_rcu); | ||
269 | return 0; | 285 | return 0; |
270 | } | 286 | } |
271 | } | 287 | } |
272 | if (new_ra == NULL) { | 288 | if (new_ra == NULL) { |
273 | write_unlock_bh(&ip_ra_lock); | 289 | spin_unlock_bh(&ip_ra_lock); |
274 | return -ENOBUFS; | 290 | return -ENOBUFS; |
275 | } | 291 | } |
276 | new_ra->sk = sk; | 292 | new_ra->sk = sk; |
277 | new_ra->destructor = destructor; | 293 | new_ra->destructor = destructor; |
278 | 294 | ||
279 | new_ra->next = ra; | 295 | new_ra->next = ra; |
280 | *rap = new_ra; | 296 | rcu_assign_pointer(*rap, new_ra); |
281 | sock_hold(sk); | 297 | sock_hold(sk); |
282 | write_unlock_bh(&ip_ra_lock); | 298 | spin_unlock_bh(&ip_ra_lock); |
283 | 299 | ||
284 | return 0; | 300 | return 0; |
285 | } | 301 | } |
@@ -449,7 +465,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
449 | (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | | 465 | (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | |
450 | (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | | 466 | (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | |
451 | (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) | | 467 | (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) | |
452 | (1<<IP_MINTTL))) || | 468 | (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) || |
453 | optname == IP_MULTICAST_TTL || | 469 | optname == IP_MULTICAST_TTL || |
454 | optname == IP_MULTICAST_ALL || | 470 | optname == IP_MULTICAST_ALL || |
455 | optname == IP_MULTICAST_LOOP || | 471 | optname == IP_MULTICAST_LOOP || |
@@ -572,6 +588,13 @@ static int do_ip_setsockopt(struct sock *sk, int level, | |||
572 | } | 588 | } |
573 | inet->hdrincl = val ? 1 : 0; | 589 | inet->hdrincl = val ? 1 : 0; |
574 | break; | 590 | break; |
591 | case IP_NODEFRAG: | ||
592 | if (sk->sk_type != SOCK_RAW) { | ||
593 | err = -ENOPROTOOPT; | ||
594 | break; | ||
595 | } | ||
596 | inet->nodefrag = val ? 1 : 0; | ||
597 | break; | ||
575 | case IP_MTU_DISCOVER: | 598 | case IP_MTU_DISCOVER: |
576 | if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE) | 599 | if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE) |
577 | goto e_inval; | 600 | goto e_inval; |
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index b9d84e800cf4..3a6e1ec5e9ae 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
@@ -665,6 +665,13 @@ ic_dhcp_init_options(u8 *options) | |||
665 | memcpy(e, ic_req_params, sizeof(ic_req_params)); | 665 | memcpy(e, ic_req_params, sizeof(ic_req_params)); |
666 | e += sizeof(ic_req_params); | 666 | e += sizeof(ic_req_params); |
667 | 667 | ||
668 | if (ic_host_name_set) { | ||
669 | *e++ = 12; /* host-name */ | ||
670 | len = strlen(utsname()->nodename); | ||
671 | *e++ = len; | ||
672 | memcpy(e, utsname()->nodename, len); | ||
673 | e += len; | ||
674 | } | ||
668 | if (*vendor_class_identifier) { | 675 | if (*vendor_class_identifier) { |
669 | printk(KERN_INFO "DHCP: sending class identifier \"%s\"\n", | 676 | printk(KERN_INFO "DHCP: sending class identifier \"%s\"\n", |
670 | vendor_class_identifier); | 677 | vendor_class_identifier); |
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 7fd636711037..ec036731a70b 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
@@ -435,7 +435,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
435 | goto tx_error_icmp; | 435 | goto tx_error_icmp; |
436 | } | 436 | } |
437 | } | 437 | } |
438 | tdev = rt->u.dst.dev; | 438 | tdev = rt->dst.dev; |
439 | 439 | ||
440 | if (tdev == dev) { | 440 | if (tdev == dev) { |
441 | ip_rt_put(rt); | 441 | ip_rt_put(rt); |
@@ -446,7 +446,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
446 | df |= old_iph->frag_off & htons(IP_DF); | 446 | df |= old_iph->frag_off & htons(IP_DF); |
447 | 447 | ||
448 | if (df) { | 448 | if (df) { |
449 | mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); | 449 | mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); |
450 | 450 | ||
451 | if (mtu < 68) { | 451 | if (mtu < 68) { |
452 | stats->collisions++; | 452 | stats->collisions++; |
@@ -503,7 +503,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
503 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | | 503 | IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | |
504 | IPSKB_REROUTED); | 504 | IPSKB_REROUTED); |
505 | skb_dst_drop(skb); | 505 | skb_dst_drop(skb); |
506 | skb_dst_set(skb, &rt->u.dst); | 506 | skb_dst_set(skb, &rt->dst); |
507 | 507 | ||
508 | /* | 508 | /* |
509 | * Push down and install the IPIP header. | 509 | * Push down and install the IPIP header. |
@@ -552,7 +552,7 @@ static void ipip_tunnel_bind_dev(struct net_device *dev) | |||
552 | .proto = IPPROTO_IPIP }; | 552 | .proto = IPPROTO_IPIP }; |
553 | struct rtable *rt; | 553 | struct rtable *rt; |
554 | if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { | 554 | if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { |
555 | tdev = rt->u.dst.dev; | 555 | tdev = rt->dst.dev; |
556 | ip_rt_put(rt); | 556 | ip_rt_put(rt); |
557 | } | 557 | } |
558 | dev->flags |= IFF_POINTOPOINT; | 558 | dev->flags |= IFF_POINTOPOINT; |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 7f6273506eea..179fcab866fc 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
@@ -1555,9 +1555,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, | |||
1555 | goto out_free; | 1555 | goto out_free; |
1556 | } | 1556 | } |
1557 | 1557 | ||
1558 | dev = rt->u.dst.dev; | 1558 | dev = rt->dst.dev; |
1559 | 1559 | ||
1560 | if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) { | 1560 | if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { |
1561 | /* Do not fragment multicasts. Alas, IPv4 does not | 1561 | /* Do not fragment multicasts. Alas, IPv4 does not |
1562 | allow to send ICMP, so that packets will disappear | 1562 | allow to send ICMP, so that packets will disappear |
1563 | to blackhole. | 1563 | to blackhole. |
@@ -1568,7 +1568,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, | |||
1568 | goto out_free; | 1568 | goto out_free; |
1569 | } | 1569 | } |
1570 | 1570 | ||
1571 | encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len; | 1571 | encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len; |
1572 | 1572 | ||
1573 | if (skb_cow(skb, encap)) { | 1573 | if (skb_cow(skb, encap)) { |
1574 | ip_rt_put(rt); | 1574 | ip_rt_put(rt); |
@@ -1579,7 +1579,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, | |||
1579 | vif->bytes_out += skb->len; | 1579 | vif->bytes_out += skb->len; |
1580 | 1580 | ||
1581 | skb_dst_drop(skb); | 1581 | skb_dst_drop(skb); |
1582 | skb_dst_set(skb, &rt->u.dst); | 1582 | skb_dst_set(skb, &rt->dst); |
1583 | ip_decrease_ttl(ip_hdr(skb)); | 1583 | ip_decrease_ttl(ip_hdr(skb)); |
1584 | 1584 | ||
1585 | /* FIXME: forward and output firewalls used to be called here. | 1585 | /* FIXME: forward and output firewalls used to be called here. |
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 07de855e2175..d88a46c54fd1 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c | |||
@@ -43,7 +43,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) | |||
43 | 43 | ||
44 | /* Drop old route. */ | 44 | /* Drop old route. */ |
45 | skb_dst_drop(skb); | 45 | skb_dst_drop(skb); |
46 | skb_dst_set(skb, &rt->u.dst); | 46 | skb_dst_set(skb, &rt->dst); |
47 | } else { | 47 | } else { |
48 | /* non-local src, find valid iif to satisfy | 48 | /* non-local src, find valid iif to satisfy |
49 | * rp-filter when calling ip_route_input. */ | 49 | * rp-filter when calling ip_route_input. */ |
@@ -53,11 +53,11 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) | |||
53 | 53 | ||
54 | orefdst = skb->_skb_refdst; | 54 | orefdst = skb->_skb_refdst; |
55 | if (ip_route_input(skb, iph->daddr, iph->saddr, | 55 | if (ip_route_input(skb, iph->daddr, iph->saddr, |
56 | RT_TOS(iph->tos), rt->u.dst.dev) != 0) { | 56 | RT_TOS(iph->tos), rt->dst.dev) != 0) { |
57 | dst_release(&rt->u.dst); | 57 | dst_release(&rt->dst); |
58 | return -1; | 58 | return -1; |
59 | } | 59 | } |
60 | dst_release(&rt->u.dst); | 60 | dst_release(&rt->dst); |
61 | refdst_drop(orefdst); | 61 | refdst_drop(orefdst); |
62 | } | 62 | } |
63 | 63 | ||
@@ -212,9 +212,7 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, | |||
212 | skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol, | 212 | skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol, |
213 | skb->len - dataoff, 0); | 213 | skb->len - dataoff, 0); |
214 | skb->ip_summed = CHECKSUM_NONE; | 214 | skb->ip_summed = CHECKSUM_NONE; |
215 | csum = __skb_checksum_complete_head(skb, dataoff + len); | 215 | return __skb_checksum_complete_head(skb, dataoff + len); |
216 | if (!csum) | ||
217 | skb->ip_summed = CHECKSUM_UNNECESSARY; | ||
218 | } | 216 | } |
219 | return csum; | 217 | return csum; |
220 | } | 218 | } |
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 1ac01b128621..6bccba31d132 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -283,16 +283,13 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
283 | arp = arp_hdr(skb); | 283 | arp = arp_hdr(skb); |
284 | do { | 284 | do { |
285 | const struct arpt_entry_target *t; | 285 | const struct arpt_entry_target *t; |
286 | int hdr_len; | ||
287 | 286 | ||
288 | if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { | 287 | if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { |
289 | e = arpt_next_entry(e); | 288 | e = arpt_next_entry(e); |
290 | continue; | 289 | continue; |
291 | } | 290 | } |
292 | 291 | ||
293 | hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + | 292 | ADD_COUNTER(e->counters, arp_hdr_len(skb->dev), 1); |
294 | (2 * skb->dev->addr_len); | ||
295 | ADD_COUNTER(e->counters, hdr_len, 1); | ||
296 | 293 | ||
297 | t = arpt_get_target_c(e); | 294 | t = arpt_get_target_c(e); |
298 | 295 | ||
@@ -713,7 +710,7 @@ static void get_counters(const struct xt_table_info *t, | |||
713 | struct arpt_entry *iter; | 710 | struct arpt_entry *iter; |
714 | unsigned int cpu; | 711 | unsigned int cpu; |
715 | unsigned int i; | 712 | unsigned int i; |
716 | unsigned int curcpu; | 713 | unsigned int curcpu = get_cpu(); |
717 | 714 | ||
718 | /* Instead of clearing (by a previous call to memset()) | 715 | /* Instead of clearing (by a previous call to memset()) |
719 | * the counters and using adds, we set the counters | 716 | * the counters and using adds, we set the counters |
@@ -723,14 +720,16 @@ static void get_counters(const struct xt_table_info *t, | |||
723 | * if new softirq were to run and call ipt_do_table | 720 | * if new softirq were to run and call ipt_do_table |
724 | */ | 721 | */ |
725 | local_bh_disable(); | 722 | local_bh_disable(); |
726 | curcpu = smp_processor_id(); | ||
727 | |||
728 | i = 0; | 723 | i = 0; |
729 | xt_entry_foreach(iter, t->entries[curcpu], t->size) { | 724 | xt_entry_foreach(iter, t->entries[curcpu], t->size) { |
730 | SET_COUNTER(counters[i], iter->counters.bcnt, | 725 | SET_COUNTER(counters[i], iter->counters.bcnt, |
731 | iter->counters.pcnt); | 726 | iter->counters.pcnt); |
732 | ++i; | 727 | ++i; |
733 | } | 728 | } |
729 | local_bh_enable(); | ||
730 | /* Processing counters from other cpus, we can let bottom half enabled, | ||
731 | * (preemption is disabled) | ||
732 | */ | ||
734 | 733 | ||
735 | for_each_possible_cpu(cpu) { | 734 | for_each_possible_cpu(cpu) { |
736 | if (cpu == curcpu) | 735 | if (cpu == curcpu) |
@@ -744,7 +743,7 @@ static void get_counters(const struct xt_table_info *t, | |||
744 | } | 743 | } |
745 | xt_info_wrunlock(cpu); | 744 | xt_info_wrunlock(cpu); |
746 | } | 745 | } |
747 | local_bh_enable(); | 746 | put_cpu(); |
748 | } | 747 | } |
749 | 748 | ||
750 | static struct xt_counters *alloc_counters(const struct xt_table *table) | 749 | static struct xt_counters *alloc_counters(const struct xt_table *table) |
@@ -758,7 +757,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) | |||
758 | * about). | 757 | * about). |
759 | */ | 758 | */ |
760 | countersize = sizeof(struct xt_counters) * private->number; | 759 | countersize = sizeof(struct xt_counters) * private->number; |
761 | counters = vmalloc_node(countersize, numa_node_id()); | 760 | counters = vmalloc(countersize); |
762 | 761 | ||
763 | if (counters == NULL) | 762 | if (counters == NULL) |
764 | return ERR_PTR(-ENOMEM); | 763 | return ERR_PTR(-ENOMEM); |
@@ -1005,8 +1004,7 @@ static int __do_replace(struct net *net, const char *name, | |||
1005 | struct arpt_entry *iter; | 1004 | struct arpt_entry *iter; |
1006 | 1005 | ||
1007 | ret = 0; | 1006 | ret = 0; |
1008 | counters = vmalloc_node(num_counters * sizeof(struct xt_counters), | 1007 | counters = vmalloc(num_counters * sizeof(struct xt_counters)); |
1009 | numa_node_id()); | ||
1010 | if (!counters) { | 1008 | if (!counters) { |
1011 | ret = -ENOMEM; | 1009 | ret = -ENOMEM; |
1012 | goto out; | 1010 | goto out; |
@@ -1159,7 +1157,7 @@ static int do_add_counters(struct net *net, const void __user *user, | |||
1159 | if (len != size + num_counters * sizeof(struct xt_counters)) | 1157 | if (len != size + num_counters * sizeof(struct xt_counters)) |
1160 | return -EINVAL; | 1158 | return -EINVAL; |
1161 | 1159 | ||
1162 | paddc = vmalloc_node(len - size, numa_node_id()); | 1160 | paddc = vmalloc(len - size); |
1163 | if (!paddc) | 1161 | if (!paddc) |
1164 | return -ENOMEM; | 1162 | return -ENOMEM; |
1165 | 1163 | ||
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index a4e5fc5df4bf..d2c1311cb28d 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c | |||
@@ -42,7 +42,7 @@ typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long); | |||
42 | 42 | ||
43 | static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; | 43 | static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; |
44 | static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; | 44 | static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; |
45 | static DEFINE_RWLOCK(queue_lock); | 45 | static DEFINE_SPINLOCK(queue_lock); |
46 | static int peer_pid __read_mostly; | 46 | static int peer_pid __read_mostly; |
47 | static unsigned int copy_range __read_mostly; | 47 | static unsigned int copy_range __read_mostly; |
48 | static unsigned int queue_total; | 48 | static unsigned int queue_total; |
@@ -72,10 +72,10 @@ __ipq_set_mode(unsigned char mode, unsigned int range) | |||
72 | break; | 72 | break; |
73 | 73 | ||
74 | case IPQ_COPY_PACKET: | 74 | case IPQ_COPY_PACKET: |
75 | copy_mode = mode; | 75 | if (range > 0xFFFF) |
76 | range = 0xFFFF; | ||
76 | copy_range = range; | 77 | copy_range = range; |
77 | if (copy_range > 0xFFFF) | 78 | copy_mode = mode; |
78 | copy_range = 0xFFFF; | ||
79 | break; | 79 | break; |
80 | 80 | ||
81 | default: | 81 | default: |
@@ -101,7 +101,7 @@ ipq_find_dequeue_entry(unsigned long id) | |||
101 | { | 101 | { |
102 | struct nf_queue_entry *entry = NULL, *i; | 102 | struct nf_queue_entry *entry = NULL, *i; |
103 | 103 | ||
104 | write_lock_bh(&queue_lock); | 104 | spin_lock_bh(&queue_lock); |
105 | 105 | ||
106 | list_for_each_entry(i, &queue_list, list) { | 106 | list_for_each_entry(i, &queue_list, list) { |
107 | if ((unsigned long)i == id) { | 107 | if ((unsigned long)i == id) { |
@@ -115,7 +115,7 @@ ipq_find_dequeue_entry(unsigned long id) | |||
115 | queue_total--; | 115 | queue_total--; |
116 | } | 116 | } |
117 | 117 | ||
118 | write_unlock_bh(&queue_lock); | 118 | spin_unlock_bh(&queue_lock); |
119 | return entry; | 119 | return entry; |
120 | } | 120 | } |
121 | 121 | ||
@@ -136,9 +136,9 @@ __ipq_flush(ipq_cmpfn cmpfn, unsigned long data) | |||
136 | static void | 136 | static void |
137 | ipq_flush(ipq_cmpfn cmpfn, unsigned long data) | 137 | ipq_flush(ipq_cmpfn cmpfn, unsigned long data) |
138 | { | 138 | { |
139 | write_lock_bh(&queue_lock); | 139 | spin_lock_bh(&queue_lock); |
140 | __ipq_flush(cmpfn, data); | 140 | __ipq_flush(cmpfn, data); |
141 | write_unlock_bh(&queue_lock); | 141 | spin_unlock_bh(&queue_lock); |
142 | } | 142 | } |
143 | 143 | ||
144 | static struct sk_buff * | 144 | static struct sk_buff * |
@@ -152,9 +152,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) | |||
152 | struct nlmsghdr *nlh; | 152 | struct nlmsghdr *nlh; |
153 | struct timeval tv; | 153 | struct timeval tv; |
154 | 154 | ||
155 | read_lock_bh(&queue_lock); | 155 | switch (ACCESS_ONCE(copy_mode)) { |
156 | |||
157 | switch (copy_mode) { | ||
158 | case IPQ_COPY_META: | 156 | case IPQ_COPY_META: |
159 | case IPQ_COPY_NONE: | 157 | case IPQ_COPY_NONE: |
160 | size = NLMSG_SPACE(sizeof(*pmsg)); | 158 | size = NLMSG_SPACE(sizeof(*pmsg)); |
@@ -162,26 +160,21 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) | |||
162 | 160 | ||
163 | case IPQ_COPY_PACKET: | 161 | case IPQ_COPY_PACKET: |
164 | if (entry->skb->ip_summed == CHECKSUM_PARTIAL && | 162 | if (entry->skb->ip_summed == CHECKSUM_PARTIAL && |
165 | (*errp = skb_checksum_help(entry->skb))) { | 163 | (*errp = skb_checksum_help(entry->skb))) |
166 | read_unlock_bh(&queue_lock); | ||
167 | return NULL; | 164 | return NULL; |
168 | } | 165 | |
169 | if (copy_range == 0 || copy_range > entry->skb->len) | 166 | data_len = ACCESS_ONCE(copy_range); |
167 | if (data_len == 0 || data_len > entry->skb->len) | ||
170 | data_len = entry->skb->len; | 168 | data_len = entry->skb->len; |
171 | else | ||
172 | data_len = copy_range; | ||
173 | 169 | ||
174 | size = NLMSG_SPACE(sizeof(*pmsg) + data_len); | 170 | size = NLMSG_SPACE(sizeof(*pmsg) + data_len); |
175 | break; | 171 | break; |
176 | 172 | ||
177 | default: | 173 | default: |
178 | *errp = -EINVAL; | 174 | *errp = -EINVAL; |
179 | read_unlock_bh(&queue_lock); | ||
180 | return NULL; | 175 | return NULL; |
181 | } | 176 | } |
182 | 177 | ||
183 | read_unlock_bh(&queue_lock); | ||
184 | |||
185 | skb = alloc_skb(size, GFP_ATOMIC); | 178 | skb = alloc_skb(size, GFP_ATOMIC); |
186 | if (!skb) | 179 | if (!skb) |
187 | goto nlmsg_failure; | 180 | goto nlmsg_failure; |
@@ -242,7 +235,7 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) | |||
242 | if (nskb == NULL) | 235 | if (nskb == NULL) |
243 | return status; | 236 | return status; |
244 | 237 | ||
245 | write_lock_bh(&queue_lock); | 238 | spin_lock_bh(&queue_lock); |
246 | 239 | ||
247 | if (!peer_pid) | 240 | if (!peer_pid) |
248 | goto err_out_free_nskb; | 241 | goto err_out_free_nskb; |
@@ -266,14 +259,14 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) | |||
266 | 259 | ||
267 | __ipq_enqueue_entry(entry); | 260 | __ipq_enqueue_entry(entry); |
268 | 261 | ||
269 | write_unlock_bh(&queue_lock); | 262 | spin_unlock_bh(&queue_lock); |
270 | return status; | 263 | return status; |
271 | 264 | ||
272 | err_out_free_nskb: | 265 | err_out_free_nskb: |
273 | kfree_skb(nskb); | 266 | kfree_skb(nskb); |
274 | 267 | ||
275 | err_out_unlock: | 268 | err_out_unlock: |
276 | write_unlock_bh(&queue_lock); | 269 | spin_unlock_bh(&queue_lock); |
277 | return status; | 270 | return status; |
278 | } | 271 | } |
279 | 272 | ||
@@ -342,9 +335,9 @@ ipq_set_mode(unsigned char mode, unsigned int range) | |||
342 | { | 335 | { |
343 | int status; | 336 | int status; |
344 | 337 | ||
345 | write_lock_bh(&queue_lock); | 338 | spin_lock_bh(&queue_lock); |
346 | status = __ipq_set_mode(mode, range); | 339 | status = __ipq_set_mode(mode, range); |
347 | write_unlock_bh(&queue_lock); | 340 | spin_unlock_bh(&queue_lock); |
348 | return status; | 341 | return status; |
349 | } | 342 | } |
350 | 343 | ||
@@ -440,11 +433,11 @@ __ipq_rcv_skb(struct sk_buff *skb) | |||
440 | if (security_netlink_recv(skb, CAP_NET_ADMIN)) | 433 | if (security_netlink_recv(skb, CAP_NET_ADMIN)) |
441 | RCV_SKB_FAIL(-EPERM); | 434 | RCV_SKB_FAIL(-EPERM); |
442 | 435 | ||
443 | write_lock_bh(&queue_lock); | 436 | spin_lock_bh(&queue_lock); |
444 | 437 | ||
445 | if (peer_pid) { | 438 | if (peer_pid) { |
446 | if (peer_pid != pid) { | 439 | if (peer_pid != pid) { |
447 | write_unlock_bh(&queue_lock); | 440 | spin_unlock_bh(&queue_lock); |
448 | RCV_SKB_FAIL(-EBUSY); | 441 | RCV_SKB_FAIL(-EBUSY); |
449 | } | 442 | } |
450 | } else { | 443 | } else { |
@@ -452,7 +445,7 @@ __ipq_rcv_skb(struct sk_buff *skb) | |||
452 | peer_pid = pid; | 445 | peer_pid = pid; |
453 | } | 446 | } |
454 | 447 | ||
455 | write_unlock_bh(&queue_lock); | 448 | spin_unlock_bh(&queue_lock); |
456 | 449 | ||
457 | status = ipq_receive_peer(NLMSG_DATA(nlh), type, | 450 | status = ipq_receive_peer(NLMSG_DATA(nlh), type, |
458 | nlmsglen - NLMSG_LENGTH(0)); | 451 | nlmsglen - NLMSG_LENGTH(0)); |
@@ -497,10 +490,10 @@ ipq_rcv_nl_event(struct notifier_block *this, | |||
497 | struct netlink_notify *n = ptr; | 490 | struct netlink_notify *n = ptr; |
498 | 491 | ||
499 | if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) { | 492 | if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) { |
500 | write_lock_bh(&queue_lock); | 493 | spin_lock_bh(&queue_lock); |
501 | if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) | 494 | if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) |
502 | __ipq_reset(); | 495 | __ipq_reset(); |
503 | write_unlock_bh(&queue_lock); | 496 | spin_unlock_bh(&queue_lock); |
504 | } | 497 | } |
505 | return NOTIFY_DONE; | 498 | return NOTIFY_DONE; |
506 | } | 499 | } |
@@ -527,7 +520,7 @@ static ctl_table ipq_table[] = { | |||
527 | #ifdef CONFIG_PROC_FS | 520 | #ifdef CONFIG_PROC_FS |
528 | static int ip_queue_show(struct seq_file *m, void *v) | 521 | static int ip_queue_show(struct seq_file *m, void *v) |
529 | { | 522 | { |
530 | read_lock_bh(&queue_lock); | 523 | spin_lock_bh(&queue_lock); |
531 | 524 | ||
532 | seq_printf(m, | 525 | seq_printf(m, |
533 | "Peer PID : %d\n" | 526 | "Peer PID : %d\n" |
@@ -545,7 +538,7 @@ static int ip_queue_show(struct seq_file *m, void *v) | |||
545 | queue_dropped, | 538 | queue_dropped, |
546 | queue_user_dropped); | 539 | queue_user_dropped); |
547 | 540 | ||
548 | read_unlock_bh(&queue_lock); | 541 | spin_unlock_bh(&queue_lock); |
549 | return 0; | 542 | return 0; |
550 | } | 543 | } |
551 | 544 | ||
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 4b6c5ca610fc..c439721b165a 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -364,7 +364,7 @@ ipt_do_table(struct sk_buff *skb, | |||
364 | goto no_match; | 364 | goto no_match; |
365 | } | 365 | } |
366 | 366 | ||
367 | ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); | 367 | ADD_COUNTER(e->counters, skb->len, 1); |
368 | 368 | ||
369 | t = ipt_get_target(e); | 369 | t = ipt_get_target(e); |
370 | IP_NF_ASSERT(t->u.kernel.target); | 370 | IP_NF_ASSERT(t->u.kernel.target); |
@@ -884,7 +884,7 @@ get_counters(const struct xt_table_info *t, | |||
884 | struct ipt_entry *iter; | 884 | struct ipt_entry *iter; |
885 | unsigned int cpu; | 885 | unsigned int cpu; |
886 | unsigned int i; | 886 | unsigned int i; |
887 | unsigned int curcpu; | 887 | unsigned int curcpu = get_cpu(); |
888 | 888 | ||
889 | /* Instead of clearing (by a previous call to memset()) | 889 | /* Instead of clearing (by a previous call to memset()) |
890 | * the counters and using adds, we set the counters | 890 | * the counters and using adds, we set the counters |
@@ -894,14 +894,16 @@ get_counters(const struct xt_table_info *t, | |||
894 | * if new softirq were to run and call ipt_do_table | 894 | * if new softirq were to run and call ipt_do_table |
895 | */ | 895 | */ |
896 | local_bh_disable(); | 896 | local_bh_disable(); |
897 | curcpu = smp_processor_id(); | ||
898 | |||
899 | i = 0; | 897 | i = 0; |
900 | xt_entry_foreach(iter, t->entries[curcpu], t->size) { | 898 | xt_entry_foreach(iter, t->entries[curcpu], t->size) { |
901 | SET_COUNTER(counters[i], iter->counters.bcnt, | 899 | SET_COUNTER(counters[i], iter->counters.bcnt, |
902 | iter->counters.pcnt); | 900 | iter->counters.pcnt); |
903 | ++i; | 901 | ++i; |
904 | } | 902 | } |
903 | local_bh_enable(); | ||
904 | /* Processing counters from other cpus, we can let bottom half enabled, | ||
905 | * (preemption is disabled) | ||
906 | */ | ||
905 | 907 | ||
906 | for_each_possible_cpu(cpu) { | 908 | for_each_possible_cpu(cpu) { |
907 | if (cpu == curcpu) | 909 | if (cpu == curcpu) |
@@ -915,7 +917,7 @@ get_counters(const struct xt_table_info *t, | |||
915 | } | 917 | } |
916 | xt_info_wrunlock(cpu); | 918 | xt_info_wrunlock(cpu); |
917 | } | 919 | } |
918 | local_bh_enable(); | 920 | put_cpu(); |
919 | } | 921 | } |
920 | 922 | ||
921 | static struct xt_counters *alloc_counters(const struct xt_table *table) | 923 | static struct xt_counters *alloc_counters(const struct xt_table *table) |
@@ -928,7 +930,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) | |||
928 | (other than comefrom, which userspace doesn't care | 930 | (other than comefrom, which userspace doesn't care |
929 | about). */ | 931 | about). */ |
930 | countersize = sizeof(struct xt_counters) * private->number; | 932 | countersize = sizeof(struct xt_counters) * private->number; |
931 | counters = vmalloc_node(countersize, numa_node_id()); | 933 | counters = vmalloc(countersize); |
932 | 934 | ||
933 | if (counters == NULL) | 935 | if (counters == NULL) |
934 | return ERR_PTR(-ENOMEM); | 936 | return ERR_PTR(-ENOMEM); |
@@ -1352,7 +1354,7 @@ do_add_counters(struct net *net, const void __user *user, | |||
1352 | if (len != size + num_counters * sizeof(struct xt_counters)) | 1354 | if (len != size + num_counters * sizeof(struct xt_counters)) |
1353 | return -EINVAL; | 1355 | return -EINVAL; |
1354 | 1356 | ||
1355 | paddc = vmalloc_node(len - size, numa_node_id()); | 1357 | paddc = vmalloc(len - size); |
1356 | if (!paddc) | 1358 | if (!paddc) |
1357 | return -ENOMEM; | 1359 | return -ENOMEM; |
1358 | 1360 | ||
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index f91c94b9a790..3a43cf36db87 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c | |||
@@ -53,12 +53,13 @@ struct clusterip_config { | |||
53 | #endif | 53 | #endif |
54 | enum clusterip_hashmode hash_mode; /* which hashing mode */ | 54 | enum clusterip_hashmode hash_mode; /* which hashing mode */ |
55 | u_int32_t hash_initval; /* hash initialization */ | 55 | u_int32_t hash_initval; /* hash initialization */ |
56 | struct rcu_head rcu; | ||
56 | }; | 57 | }; |
57 | 58 | ||
58 | static LIST_HEAD(clusterip_configs); | 59 | static LIST_HEAD(clusterip_configs); |
59 | 60 | ||
60 | /* clusterip_lock protects the clusterip_configs list */ | 61 | /* clusterip_lock protects the clusterip_configs list */ |
61 | static DEFINE_RWLOCK(clusterip_lock); | 62 | static DEFINE_SPINLOCK(clusterip_lock); |
62 | 63 | ||
63 | #ifdef CONFIG_PROC_FS | 64 | #ifdef CONFIG_PROC_FS |
64 | static const struct file_operations clusterip_proc_fops; | 65 | static const struct file_operations clusterip_proc_fops; |
@@ -71,11 +72,17 @@ clusterip_config_get(struct clusterip_config *c) | |||
71 | atomic_inc(&c->refcount); | 72 | atomic_inc(&c->refcount); |
72 | } | 73 | } |
73 | 74 | ||
75 | |||
76 | static void clusterip_config_rcu_free(struct rcu_head *head) | ||
77 | { | ||
78 | kfree(container_of(head, struct clusterip_config, rcu)); | ||
79 | } | ||
80 | |||
74 | static inline void | 81 | static inline void |
75 | clusterip_config_put(struct clusterip_config *c) | 82 | clusterip_config_put(struct clusterip_config *c) |
76 | { | 83 | { |
77 | if (atomic_dec_and_test(&c->refcount)) | 84 | if (atomic_dec_and_test(&c->refcount)) |
78 | kfree(c); | 85 | call_rcu_bh(&c->rcu, clusterip_config_rcu_free); |
79 | } | 86 | } |
80 | 87 | ||
81 | /* decrease the count of entries using/referencing this config. If last | 88 | /* decrease the count of entries using/referencing this config. If last |
@@ -84,10 +91,11 @@ clusterip_config_put(struct clusterip_config *c) | |||
84 | static inline void | 91 | static inline void |
85 | clusterip_config_entry_put(struct clusterip_config *c) | 92 | clusterip_config_entry_put(struct clusterip_config *c) |
86 | { | 93 | { |
87 | write_lock_bh(&clusterip_lock); | 94 | local_bh_disable(); |
88 | if (atomic_dec_and_test(&c->entries)) { | 95 | if (atomic_dec_and_lock(&c->entries, &clusterip_lock)) { |
89 | list_del(&c->list); | 96 | list_del_rcu(&c->list); |
90 | write_unlock_bh(&clusterip_lock); | 97 | spin_unlock(&clusterip_lock); |
98 | local_bh_enable(); | ||
91 | 99 | ||
92 | dev_mc_del(c->dev, c->clustermac); | 100 | dev_mc_del(c->dev, c->clustermac); |
93 | dev_put(c->dev); | 101 | dev_put(c->dev); |
@@ -100,7 +108,7 @@ clusterip_config_entry_put(struct clusterip_config *c) | |||
100 | #endif | 108 | #endif |
101 | return; | 109 | return; |
102 | } | 110 | } |
103 | write_unlock_bh(&clusterip_lock); | 111 | local_bh_enable(); |
104 | } | 112 | } |
105 | 113 | ||
106 | static struct clusterip_config * | 114 | static struct clusterip_config * |
@@ -108,7 +116,7 @@ __clusterip_config_find(__be32 clusterip) | |||
108 | { | 116 | { |
109 | struct clusterip_config *c; | 117 | struct clusterip_config *c; |
110 | 118 | ||
111 | list_for_each_entry(c, &clusterip_configs, list) { | 119 | list_for_each_entry_rcu(c, &clusterip_configs, list) { |
112 | if (c->clusterip == clusterip) | 120 | if (c->clusterip == clusterip) |
113 | return c; | 121 | return c; |
114 | } | 122 | } |
@@ -121,16 +129,15 @@ clusterip_config_find_get(__be32 clusterip, int entry) | |||
121 | { | 129 | { |
122 | struct clusterip_config *c; | 130 | struct clusterip_config *c; |
123 | 131 | ||
124 | read_lock_bh(&clusterip_lock); | 132 | rcu_read_lock_bh(); |
125 | c = __clusterip_config_find(clusterip); | 133 | c = __clusterip_config_find(clusterip); |
126 | if (!c) { | 134 | if (c) { |
127 | read_unlock_bh(&clusterip_lock); | 135 | if (unlikely(!atomic_inc_not_zero(&c->refcount))) |
128 | return NULL; | 136 | c = NULL; |
137 | else if (entry) | ||
138 | atomic_inc(&c->entries); | ||
129 | } | 139 | } |
130 | atomic_inc(&c->refcount); | 140 | rcu_read_unlock_bh(); |
131 | if (entry) | ||
132 | atomic_inc(&c->entries); | ||
133 | read_unlock_bh(&clusterip_lock); | ||
134 | 141 | ||
135 | return c; | 142 | return c; |
136 | } | 143 | } |
@@ -181,9 +188,9 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, | |||
181 | } | 188 | } |
182 | #endif | 189 | #endif |
183 | 190 | ||
184 | write_lock_bh(&clusterip_lock); | 191 | spin_lock_bh(&clusterip_lock); |
185 | list_add(&c->list, &clusterip_configs); | 192 | list_add_rcu(&c->list, &clusterip_configs); |
186 | write_unlock_bh(&clusterip_lock); | 193 | spin_unlock_bh(&clusterip_lock); |
187 | 194 | ||
188 | return c; | 195 | return c; |
189 | } | 196 | } |
@@ -462,7 +469,7 @@ struct arp_payload { | |||
462 | __be32 src_ip; | 469 | __be32 src_ip; |
463 | u_int8_t dst_hw[ETH_ALEN]; | 470 | u_int8_t dst_hw[ETH_ALEN]; |
464 | __be32 dst_ip; | 471 | __be32 dst_ip; |
465 | } __attribute__ ((packed)); | 472 | } __packed; |
466 | 473 | ||
467 | #ifdef DEBUG | 474 | #ifdef DEBUG |
468 | static void arp_print(struct arp_payload *payload) | 475 | static void arp_print(struct arp_payload *payload) |
@@ -733,6 +740,9 @@ static void __exit clusterip_tg_exit(void) | |||
733 | #endif | 740 | #endif |
734 | nf_unregister_hook(&cip_arp_ops); | 741 | nf_unregister_hook(&cip_arp_ops); |
735 | xt_unregister_target(&clusterip_tg_reg); | 742 | xt_unregister_target(&clusterip_tg_reg); |
743 | |||
744 | /* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */ | ||
745 | rcu_barrier_bh(); | ||
736 | } | 746 | } |
737 | 747 | ||
738 | module_init(clusterip_tg_init); | 748 | module_init(clusterip_tg_init); |
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 5234f4f3499a..915fc17d7ce2 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/spinlock.h> | 14 | #include <linux/spinlock.h> |
15 | #include <linux/skbuff.h> | 15 | #include <linux/skbuff.h> |
16 | #include <linux/if_arp.h> | ||
16 | #include <linux/ip.h> | 17 | #include <linux/ip.h> |
17 | #include <net/icmp.h> | 18 | #include <net/icmp.h> |
18 | #include <net/udp.h> | 19 | #include <net/udp.h> |
@@ -363,6 +364,42 @@ static void dump_packet(const struct nf_loginfo *info, | |||
363 | /* maxlen = 230+ 91 + 230 + 252 = 803 */ | 364 | /* maxlen = 230+ 91 + 230 + 252 = 803 */ |
364 | } | 365 | } |
365 | 366 | ||
367 | static void dump_mac_header(const struct nf_loginfo *info, | ||
368 | const struct sk_buff *skb) | ||
369 | { | ||
370 | struct net_device *dev = skb->dev; | ||
371 | unsigned int logflags = 0; | ||
372 | |||
373 | if (info->type == NF_LOG_TYPE_LOG) | ||
374 | logflags = info->u.log.logflags; | ||
375 | |||
376 | if (!(logflags & IPT_LOG_MACDECODE)) | ||
377 | goto fallback; | ||
378 | |||
379 | switch (dev->type) { | ||
380 | case ARPHRD_ETHER: | ||
381 | printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ", | ||
382 | eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, | ||
383 | ntohs(eth_hdr(skb)->h_proto)); | ||
384 | return; | ||
385 | default: | ||
386 | break; | ||
387 | } | ||
388 | |||
389 | fallback: | ||
390 | printk("MAC="); | ||
391 | if (dev->hard_header_len && | ||
392 | skb->mac_header != skb->network_header) { | ||
393 | const unsigned char *p = skb_mac_header(skb); | ||
394 | unsigned int i; | ||
395 | |||
396 | printk("%02x", *p++); | ||
397 | for (i = 1; i < dev->hard_header_len; i++, p++) | ||
398 | printk(":%02x", *p); | ||
399 | } | ||
400 | printk(" "); | ||
401 | } | ||
402 | |||
366 | static struct nf_loginfo default_loginfo = { | 403 | static struct nf_loginfo default_loginfo = { |
367 | .type = NF_LOG_TYPE_LOG, | 404 | .type = NF_LOG_TYPE_LOG, |
368 | .u = { | 405 | .u = { |
@@ -404,20 +441,9 @@ ipt_log_packet(u_int8_t pf, | |||
404 | } | 441 | } |
405 | #endif | 442 | #endif |
406 | 443 | ||
407 | if (in && !out) { | 444 | /* MAC logging for input path only. */ |
408 | /* MAC logging for input chain only. */ | 445 | if (in && !out) |
409 | printk("MAC="); | 446 | dump_mac_header(loginfo, skb); |
410 | if (skb->dev && skb->dev->hard_header_len && | ||
411 | skb->mac_header != skb->network_header) { | ||
412 | int i; | ||
413 | const unsigned char *p = skb_mac_header(skb); | ||
414 | for (i = 0; i < skb->dev->hard_header_len; i++,p++) | ||
415 | printk("%02x%c", *p, | ||
416 | i==skb->dev->hard_header_len - 1 | ||
417 | ? ' ':':'); | ||
418 | } else | ||
419 | printk(" "); | ||
420 | } | ||
421 | 447 | ||
422 | dump_packet(loginfo, skb, 0); | 448 | dump_packet(loginfo, skb, 0); |
423 | printk("\n"); | 449 | printk("\n"); |
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index f43867d1697f..6cdb298f1035 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c | |||
@@ -48,7 +48,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par) | |||
48 | 48 | ||
49 | NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || | 49 | NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || |
50 | par->hooknum == NF_INET_POST_ROUTING || | 50 | par->hooknum == NF_INET_POST_ROUTING || |
51 | par->hooknum == NF_INET_LOCAL_OUT); | 51 | par->hooknum == NF_INET_LOCAL_OUT || |
52 | par->hooknum == NF_INET_LOCAL_IN); | ||
52 | ct = nf_ct_get(skb, &ctinfo); | 53 | ct = nf_ct_get(skb, &ctinfo); |
53 | 54 | ||
54 | netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); | 55 | netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); |
@@ -77,7 +78,8 @@ static struct xt_target netmap_tg_reg __read_mostly = { | |||
77 | .table = "nat", | 78 | .table = "nat", |
78 | .hooks = (1 << NF_INET_PRE_ROUTING) | | 79 | .hooks = (1 << NF_INET_PRE_ROUTING) | |
79 | (1 << NF_INET_POST_ROUTING) | | 80 | (1 << NF_INET_POST_ROUTING) | |
80 | (1 << NF_INET_LOCAL_OUT), | 81 | (1 << NF_INET_LOCAL_OUT) | |
82 | (1 << NF_INET_LOCAL_IN), | ||
81 | .checkentry = netmap_tg_check, | 83 | .checkentry = netmap_tg_check, |
82 | .me = THIS_MODULE | 84 | .me = THIS_MODULE |
83 | }; | 85 | }; |
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index f5f4a888e4ec..b254dafaf429 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c | |||
@@ -95,10 +95,11 @@ static void send_reset(struct sk_buff *oldskb, int hook) | |||
95 | } | 95 | } |
96 | 96 | ||
97 | tcph->rst = 1; | 97 | tcph->rst = 1; |
98 | tcph->check = tcp_v4_check(sizeof(struct tcphdr), | 98 | tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr, |
99 | niph->saddr, niph->daddr, | 99 | niph->daddr, 0); |
100 | csum_partial(tcph, | 100 | nskb->ip_summed = CHECKSUM_PARTIAL; |
101 | sizeof(struct tcphdr), 0)); | 101 | nskb->csum_start = (unsigned char *)tcph - nskb->head; |
102 | nskb->csum_offset = offsetof(struct tcphdr, check); | ||
102 | 103 | ||
103 | addr_type = RTN_UNSPEC; | 104 | addr_type = RTN_UNSPEC; |
104 | if (hook != NF_INET_FORWARD | 105 | if (hook != NF_INET_FORWARD |
@@ -109,13 +110,12 @@ static void send_reset(struct sk_buff *oldskb, int hook) | |||
109 | addr_type = RTN_LOCAL; | 110 | addr_type = RTN_LOCAL; |
110 | 111 | ||
111 | /* ip_route_me_harder expects skb->dst to be set */ | 112 | /* ip_route_me_harder expects skb->dst to be set */ |
112 | skb_dst_set(nskb, dst_clone(skb_dst(oldskb))); | 113 | skb_dst_set_noref(nskb, skb_dst(oldskb)); |
113 | 114 | ||
114 | if (ip_route_me_harder(nskb, addr_type)) | 115 | if (ip_route_me_harder(nskb, addr_type)) |
115 | goto free_nskb; | 116 | goto free_nskb; |
116 | 117 | ||
117 | niph->ttl = dst_metric(skb_dst(nskb), RTAX_HOPLIMIT); | 118 | niph->ttl = dst_metric(skb_dst(nskb), RTAX_HOPLIMIT); |
118 | nskb->ip_summed = CHECKSUM_NONE; | ||
119 | 119 | ||
120 | /* "Never happens" */ | 120 | /* "Never happens" */ |
121 | if (nskb->len > dst_mtu(skb_dst(nskb))) | 121 | if (nskb->len > dst_mtu(skb_dst(nskb))) |
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index cb763ae9ed90..eab8de32f200 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c | |||
@@ -66,6 +66,11 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, | |||
66 | const struct net_device *out, | 66 | const struct net_device *out, |
67 | int (*okfn)(struct sk_buff *)) | 67 | int (*okfn)(struct sk_buff *)) |
68 | { | 68 | { |
69 | struct inet_sock *inet = inet_sk(skb->sk); | ||
70 | |||
71 | if (inet && inet->nodefrag) | ||
72 | return NF_ACCEPT; | ||
73 | |||
69 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | 74 | #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) |
70 | #if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE) | 75 | #if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE) |
71 | /* Previously seen (loopback)? Ignore. Do this before | 76 | /* Previously seen (loopback)? Ignore. Do this before |
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 4f8bddb760c9..8c8632d9b93c 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c | |||
@@ -261,14 +261,9 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
261 | rcu_read_lock(); | 261 | rcu_read_lock(); |
262 | proto = __nf_nat_proto_find(orig_tuple->dst.protonum); | 262 | proto = __nf_nat_proto_find(orig_tuple->dst.protonum); |
263 | 263 | ||
264 | /* Change protocol info to have some randomization */ | ||
265 | if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) { | ||
266 | proto->unique_tuple(tuple, range, maniptype, ct); | ||
267 | goto out; | ||
268 | } | ||
269 | |||
270 | /* Only bother mapping if it's not already in range and unique */ | 264 | /* Only bother mapping if it's not already in range and unique */ |
271 | if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || | 265 | if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM) && |
266 | (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || | ||
272 | proto->in_range(tuple, maniptype, &range->min, &range->max)) && | 267 | proto->in_range(tuple, maniptype, &range->min, &range->max)) && |
273 | !nf_nat_used_tuple(tuple, ct)) | 268 | !nf_nat_used_tuple(tuple, ct)) |
274 | goto out; | 269 | goto out; |
@@ -440,7 +435,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, | |||
440 | if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) | 435 | if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) |
441 | return 0; | 436 | return 0; |
442 | 437 | ||
443 | inside = (void *)skb->data + ip_hdrlen(skb); | 438 | inside = (void *)skb->data + hdrlen; |
444 | 439 | ||
445 | /* We're actually going to mangle it beyond trivial checksum | 440 | /* We're actually going to mangle it beyond trivial checksum |
446 | adjustment, so make sure the current checksum is correct. */ | 441 | adjustment, so make sure the current checksum is correct. */ |
@@ -470,12 +465,10 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, | |||
470 | /* rcu_read_lock()ed by nf_hook_slow */ | 465 | /* rcu_read_lock()ed by nf_hook_slow */ |
471 | l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); | 466 | l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); |
472 | 467 | ||
473 | if (!nf_ct_get_tuple(skb, | 468 | if (!nf_ct_get_tuple(skb, hdrlen + sizeof(struct icmphdr), |
474 | ip_hdrlen(skb) + sizeof(struct icmphdr), | 469 | (hdrlen + |
475 | (ip_hdrlen(skb) + | ||
476 | sizeof(struct icmphdr) + inside->ip.ihl * 4), | 470 | sizeof(struct icmphdr) + inside->ip.ihl * 4), |
477 | (u_int16_t)AF_INET, | 471 | (u_int16_t)AF_INET, inside->ip.protocol, |
478 | inside->ip.protocol, | ||
479 | &inner, l3proto, l4proto)) | 472 | &inner, l3proto, l4proto)) |
480 | return 0; | 473 | return 0; |
481 | 474 | ||
@@ -484,15 +477,13 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, | |||
484 | pass all hooks (locally-generated ICMP). Consider incoming | 477 | pass all hooks (locally-generated ICMP). Consider incoming |
485 | packet: PREROUTING (DST manip), routing produces ICMP, goes | 478 | packet: PREROUTING (DST manip), routing produces ICMP, goes |
486 | through POSTROUTING (which must correct the DST manip). */ | 479 | through POSTROUTING (which must correct the DST manip). */ |
487 | if (!manip_pkt(inside->ip.protocol, skb, | 480 | if (!manip_pkt(inside->ip.protocol, skb, hdrlen + sizeof(inside->icmp), |
488 | ip_hdrlen(skb) + sizeof(inside->icmp), | 481 | &ct->tuplehash[!dir].tuple, !manip)) |
489 | &ct->tuplehash[!dir].tuple, | ||
490 | !manip)) | ||
491 | return 0; | 482 | return 0; |
492 | 483 | ||
493 | if (skb->ip_summed != CHECKSUM_PARTIAL) { | 484 | if (skb->ip_summed != CHECKSUM_PARTIAL) { |
494 | /* Reloading "inside" here since manip_pkt inner. */ | 485 | /* Reloading "inside" here since manip_pkt inner. */ |
495 | inside = (void *)skb->data + ip_hdrlen(skb); | 486 | inside = (void *)skb->data + hdrlen; |
496 | inside->icmp.checksum = 0; | 487 | inside->icmp.checksum = 0; |
497 | inside->icmp.checksum = | 488 | inside->icmp.checksum = |
498 | csum_fold(skb_checksum(skb, hdrlen, | 489 | csum_fold(skb_checksum(skb, hdrlen, |
@@ -742,7 +733,7 @@ static int __init nf_nat_init(void) | |||
742 | spin_unlock_bh(&nf_nat_lock); | 733 | spin_unlock_bh(&nf_nat_lock); |
743 | 734 | ||
744 | /* Initialize fake conntrack so that NAT will skip it */ | 735 | /* Initialize fake conntrack so that NAT will skip it */ |
745 | nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; | 736 | nf_ct_untracked_status_or(IPS_NAT_DONE_MASK); |
746 | 737 | ||
747 | l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); | 738 | l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); |
748 | 739 | ||
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c index 6c4f11f51446..3e61faf23a9a 100644 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ b/net/ipv4/netfilter/nf_nat_proto_common.c | |||
@@ -34,7 +34,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple, | |||
34 | } | 34 | } |
35 | EXPORT_SYMBOL_GPL(nf_nat_proto_in_range); | 35 | EXPORT_SYMBOL_GPL(nf_nat_proto_in_range); |
36 | 36 | ||
37 | bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, | 37 | void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, |
38 | const struct nf_nat_range *range, | 38 | const struct nf_nat_range *range, |
39 | enum nf_nat_manip_type maniptype, | 39 | enum nf_nat_manip_type maniptype, |
40 | const struct nf_conn *ct, | 40 | const struct nf_conn *ct, |
@@ -53,7 +53,7 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
53 | if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { | 53 | if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { |
54 | /* If it's dst rewrite, can't change port */ | 54 | /* If it's dst rewrite, can't change port */ |
55 | if (maniptype == IP_NAT_MANIP_DST) | 55 | if (maniptype == IP_NAT_MANIP_DST) |
56 | return false; | 56 | return; |
57 | 57 | ||
58 | if (ntohs(*portptr) < 1024) { | 58 | if (ntohs(*portptr) < 1024) { |
59 | /* Loose convention: >> 512 is credential passing */ | 59 | /* Loose convention: >> 512 is credential passing */ |
@@ -81,15 +81,15 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
81 | else | 81 | else |
82 | off = *rover; | 82 | off = *rover; |
83 | 83 | ||
84 | for (i = 0; i < range_size; i++, off++) { | 84 | for (i = 0; ; ++off) { |
85 | *portptr = htons(min + off % range_size); | 85 | *portptr = htons(min + off % range_size); |
86 | if (nf_nat_used_tuple(tuple, ct)) | 86 | if (++i != range_size && nf_nat_used_tuple(tuple, ct)) |
87 | continue; | 87 | continue; |
88 | if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) | 88 | if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) |
89 | *rover = off; | 89 | *rover = off; |
90 | return true; | 90 | return; |
91 | } | 91 | } |
92 | return false; | 92 | return; |
93 | } | 93 | } |
94 | EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple); | 94 | EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple); |
95 | 95 | ||
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c index 22485ce306d4..570faf2667b2 100644 --- a/net/ipv4/netfilter/nf_nat_proto_dccp.c +++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c | |||
@@ -22,14 +22,14 @@ | |||
22 | 22 | ||
23 | static u_int16_t dccp_port_rover; | 23 | static u_int16_t dccp_port_rover; |
24 | 24 | ||
25 | static bool | 25 | static void |
26 | dccp_unique_tuple(struct nf_conntrack_tuple *tuple, | 26 | dccp_unique_tuple(struct nf_conntrack_tuple *tuple, |
27 | const struct nf_nat_range *range, | 27 | const struct nf_nat_range *range, |
28 | enum nf_nat_manip_type maniptype, | 28 | enum nf_nat_manip_type maniptype, |
29 | const struct nf_conn *ct) | 29 | const struct nf_conn *ct) |
30 | { | 30 | { |
31 | return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, | 31 | nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, |
32 | &dccp_port_rover); | 32 | &dccp_port_rover); |
33 | } | 33 | } |
34 | 34 | ||
35 | static bool | 35 | static bool |
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index d7e89201351e..bc8d83a31c73 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c | |||
@@ -37,7 +37,7 @@ MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); | |||
37 | MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); | 37 | MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); |
38 | 38 | ||
39 | /* generate unique tuple ... */ | 39 | /* generate unique tuple ... */ |
40 | static bool | 40 | static void |
41 | gre_unique_tuple(struct nf_conntrack_tuple *tuple, | 41 | gre_unique_tuple(struct nf_conntrack_tuple *tuple, |
42 | const struct nf_nat_range *range, | 42 | const struct nf_nat_range *range, |
43 | enum nf_nat_manip_type maniptype, | 43 | enum nf_nat_manip_type maniptype, |
@@ -50,7 +50,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
50 | /* If there is no master conntrack we are not PPTP, | 50 | /* If there is no master conntrack we are not PPTP, |
51 | do not change tuples */ | 51 | do not change tuples */ |
52 | if (!ct->master) | 52 | if (!ct->master) |
53 | return false; | 53 | return; |
54 | 54 | ||
55 | if (maniptype == IP_NAT_MANIP_SRC) | 55 | if (maniptype == IP_NAT_MANIP_SRC) |
56 | keyptr = &tuple->src.u.gre.key; | 56 | keyptr = &tuple->src.u.gre.key; |
@@ -68,14 +68,14 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
68 | 68 | ||
69 | pr_debug("min = %u, range_size = %u\n", min, range_size); | 69 | pr_debug("min = %u, range_size = %u\n", min, range_size); |
70 | 70 | ||
71 | for (i = 0; i < range_size; i++, key++) { | 71 | for (i = 0; ; ++key) { |
72 | *keyptr = htons(min + key % range_size); | 72 | *keyptr = htons(min + key % range_size); |
73 | if (!nf_nat_used_tuple(tuple, ct)) | 73 | if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) |
74 | return true; | 74 | return; |
75 | } | 75 | } |
76 | 76 | ||
77 | pr_debug("%p: no NAT mapping\n", ct); | 77 | pr_debug("%p: no NAT mapping\n", ct); |
78 | return false; | 78 | return; |
79 | } | 79 | } |
80 | 80 | ||
81 | /* manipulate a GRE packet according to maniptype */ | 81 | /* manipulate a GRE packet according to maniptype */ |
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index 19a8b0b07d8e..5744c3ec847c 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c | |||
@@ -27,7 +27,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple, | |||
27 | ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); | 27 | ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); |
28 | } | 28 | } |
29 | 29 | ||
30 | static bool | 30 | static void |
31 | icmp_unique_tuple(struct nf_conntrack_tuple *tuple, | 31 | icmp_unique_tuple(struct nf_conntrack_tuple *tuple, |
32 | const struct nf_nat_range *range, | 32 | const struct nf_nat_range *range, |
33 | enum nf_nat_manip_type maniptype, | 33 | enum nf_nat_manip_type maniptype, |
@@ -42,13 +42,13 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple, | |||
42 | if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) | 42 | if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) |
43 | range_size = 0xFFFF; | 43 | range_size = 0xFFFF; |
44 | 44 | ||
45 | for (i = 0; i < range_size; i++, id++) { | 45 | for (i = 0; ; ++id) { |
46 | tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) + | 46 | tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) + |
47 | (id % range_size)); | 47 | (id % range_size)); |
48 | if (!nf_nat_used_tuple(tuple, ct)) | 48 | if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) |
49 | return true; | 49 | return; |
50 | } | 50 | } |
51 | return false; | 51 | return; |
52 | } | 52 | } |
53 | 53 | ||
54 | static bool | 54 | static bool |
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c index 3fc598eeeb1a..756331d42661 100644 --- a/net/ipv4/netfilter/nf_nat_proto_sctp.c +++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c | |||
@@ -16,14 +16,14 @@ | |||
16 | 16 | ||
17 | static u_int16_t nf_sctp_port_rover; | 17 | static u_int16_t nf_sctp_port_rover; |
18 | 18 | ||
19 | static bool | 19 | static void |
20 | sctp_unique_tuple(struct nf_conntrack_tuple *tuple, | 20 | sctp_unique_tuple(struct nf_conntrack_tuple *tuple, |
21 | const struct nf_nat_range *range, | 21 | const struct nf_nat_range *range, |
22 | enum nf_nat_manip_type maniptype, | 22 | enum nf_nat_manip_type maniptype, |
23 | const struct nf_conn *ct) | 23 | const struct nf_conn *ct) |
24 | { | 24 | { |
25 | return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, | 25 | nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, |
26 | &nf_sctp_port_rover); | 26 | &nf_sctp_port_rover); |
27 | } | 27 | } |
28 | 28 | ||
29 | static bool | 29 | static bool |
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c index 399e2cfa263b..aa460a595d5d 100644 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c | |||
@@ -20,14 +20,13 @@ | |||
20 | 20 | ||
21 | static u_int16_t tcp_port_rover; | 21 | static u_int16_t tcp_port_rover; |
22 | 22 | ||
23 | static bool | 23 | static void |
24 | tcp_unique_tuple(struct nf_conntrack_tuple *tuple, | 24 | tcp_unique_tuple(struct nf_conntrack_tuple *tuple, |
25 | const struct nf_nat_range *range, | 25 | const struct nf_nat_range *range, |
26 | enum nf_nat_manip_type maniptype, | 26 | enum nf_nat_manip_type maniptype, |
27 | const struct nf_conn *ct) | 27 | const struct nf_conn *ct) |
28 | { | 28 | { |
29 | return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, | 29 | nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &tcp_port_rover); |
30 | &tcp_port_rover); | ||
31 | } | 30 | } |
32 | 31 | ||
33 | static bool | 32 | static bool |
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c index 9e61c79492e4..dfe65c7e2925 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udp.c +++ b/net/ipv4/netfilter/nf_nat_proto_udp.c | |||
@@ -19,14 +19,13 @@ | |||
19 | 19 | ||
20 | static u_int16_t udp_port_rover; | 20 | static u_int16_t udp_port_rover; |
21 | 21 | ||
22 | static bool | 22 | static void |
23 | udp_unique_tuple(struct nf_conntrack_tuple *tuple, | 23 | udp_unique_tuple(struct nf_conntrack_tuple *tuple, |
24 | const struct nf_nat_range *range, | 24 | const struct nf_nat_range *range, |
25 | enum nf_nat_manip_type maniptype, | 25 | enum nf_nat_manip_type maniptype, |
26 | const struct nf_conn *ct) | 26 | const struct nf_conn *ct) |
27 | { | 27 | { |
28 | return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, | 28 | nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &udp_port_rover); |
29 | &udp_port_rover); | ||
30 | } | 29 | } |
31 | 30 | ||
32 | static bool | 31 | static bool |
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c index 440a229bbd87..3cc8c8af39ef 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udplite.c +++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c | |||
@@ -18,14 +18,14 @@ | |||
18 | 18 | ||
19 | static u_int16_t udplite_port_rover; | 19 | static u_int16_t udplite_port_rover; |
20 | 20 | ||
21 | static bool | 21 | static void |
22 | udplite_unique_tuple(struct nf_conntrack_tuple *tuple, | 22 | udplite_unique_tuple(struct nf_conntrack_tuple *tuple, |
23 | const struct nf_nat_range *range, | 23 | const struct nf_nat_range *range, |
24 | enum nf_nat_manip_type maniptype, | 24 | enum nf_nat_manip_type maniptype, |
25 | const struct nf_conn *ct) | 25 | const struct nf_conn *ct) |
26 | { | 26 | { |
27 | return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, | 27 | nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, |
28 | &udplite_port_rover); | 28 | &udplite_port_rover); |
29 | } | 29 | } |
30 | 30 | ||
31 | static bool | 31 | static bool |
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c index 14381c62acea..a50f2bc1c732 100644 --- a/net/ipv4/netfilter/nf_nat_proto_unknown.c +++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c | |||
@@ -26,14 +26,14 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple, | |||
26 | return true; | 26 | return true; |
27 | } | 27 | } |
28 | 28 | ||
29 | static bool unknown_unique_tuple(struct nf_conntrack_tuple *tuple, | 29 | static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple, |
30 | const struct nf_nat_range *range, | 30 | const struct nf_nat_range *range, |
31 | enum nf_nat_manip_type maniptype, | 31 | enum nf_nat_manip_type maniptype, |
32 | const struct nf_conn *ct) | 32 | const struct nf_conn *ct) |
33 | { | 33 | { |
34 | /* Sorry: we can't help you; if it's not unique, we can't frob | 34 | /* Sorry: we can't help you; if it's not unique, we can't frob |
35 | anything. */ | 35 | anything. */ |
36 | return false; | 36 | return; |
37 | } | 37 | } |
38 | 38 | ||
39 | static bool | 39 | static bool |
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 98ed78281aee..ebbd319f62f5 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c | |||
@@ -28,7 +28,8 @@ | |||
28 | 28 | ||
29 | #define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ | 29 | #define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ |
30 | (1 << NF_INET_POST_ROUTING) | \ | 30 | (1 << NF_INET_POST_ROUTING) | \ |
31 | (1 << NF_INET_LOCAL_OUT)) | 31 | (1 << NF_INET_LOCAL_OUT) | \ |
32 | (1 << NF_INET_LOCAL_IN)) | ||
32 | 33 | ||
33 | static const struct xt_table nat_table = { | 34 | static const struct xt_table nat_table = { |
34 | .name = "nat", | 35 | .name = "nat", |
@@ -45,7 +46,8 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par) | |||
45 | enum ip_conntrack_info ctinfo; | 46 | enum ip_conntrack_info ctinfo; |
46 | const struct nf_nat_multi_range_compat *mr = par->targinfo; | 47 | const struct nf_nat_multi_range_compat *mr = par->targinfo; |
47 | 48 | ||
48 | NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); | 49 | NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING || |
50 | par->hooknum == NF_INET_LOCAL_IN); | ||
49 | 51 | ||
50 | ct = nf_ct_get(skb, &ctinfo); | 52 | ct = nf_ct_get(skb, &ctinfo); |
51 | 53 | ||
@@ -99,7 +101,7 @@ static int ipt_dnat_checkentry(const struct xt_tgchk_param *par) | |||
99 | return 0; | 101 | return 0; |
100 | } | 102 | } |
101 | 103 | ||
102 | unsigned int | 104 | static unsigned int |
103 | alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) | 105 | alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) |
104 | { | 106 | { |
105 | /* Force range to this IP; let proto decide mapping for | 107 | /* Force range to this IP; let proto decide mapping for |
@@ -141,7 +143,7 @@ static struct xt_target ipt_snat_reg __read_mostly = { | |||
141 | .target = ipt_snat_target, | 143 | .target = ipt_snat_target, |
142 | .targetsize = sizeof(struct nf_nat_multi_range_compat), | 144 | .targetsize = sizeof(struct nf_nat_multi_range_compat), |
143 | .table = "nat", | 145 | .table = "nat", |
144 | .hooks = 1 << NF_INET_POST_ROUTING, | 146 | .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN), |
145 | .checkentry = ipt_snat_checkentry, | 147 | .checkentry = ipt_snat_checkentry, |
146 | .family = AF_INET, | 148 | .family = AF_INET, |
147 | }; | 149 | }; |
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index beb25819c9c9..95481fee8bdb 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c | |||
@@ -98,7 +98,7 @@ nf_nat_fn(unsigned int hooknum, | |||
98 | return NF_ACCEPT; | 98 | return NF_ACCEPT; |
99 | 99 | ||
100 | /* Don't try to NAT if this packet is not conntracked */ | 100 | /* Don't try to NAT if this packet is not conntracked */ |
101 | if (ct == &nf_conntrack_untracked) | 101 | if (nf_ct_is_untracked(ct)) |
102 | return NF_ACCEPT; | 102 | return NF_ACCEPT; |
103 | 103 | ||
104 | nat = nfct_nat(ct); | 104 | nat = nfct_nat(ct); |
@@ -131,13 +131,7 @@ nf_nat_fn(unsigned int hooknum, | |||
131 | if (!nf_nat_initialized(ct, maniptype)) { | 131 | if (!nf_nat_initialized(ct, maniptype)) { |
132 | unsigned int ret; | 132 | unsigned int ret; |
133 | 133 | ||
134 | if (hooknum == NF_INET_LOCAL_IN) | 134 | ret = nf_nat_rule_find(skb, hooknum, in, out, ct); |
135 | /* LOCAL_IN hook doesn't have a chain! */ | ||
136 | ret = alloc_null_binding(ct, hooknum); | ||
137 | else | ||
138 | ret = nf_nat_rule_find(skb, hooknum, in, out, | ||
139 | ct); | ||
140 | |||
141 | if (ret != NF_ACCEPT) | 135 | if (ret != NF_ACCEPT) |
142 | return ret; | 136 | return ret; |
143 | } else | 137 | } else |
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 3dc9914c1dce..4ae1f203f7cb 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -252,6 +252,7 @@ static const struct snmp_mib snmp4_net_list[] = { | |||
252 | SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP), | 252 | SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP), |
253 | SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), | 253 | SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), |
254 | SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), | 254 | SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), |
255 | SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER), | ||
255 | SNMP_MIB_SENTINEL | 256 | SNMP_MIB_SENTINEL |
256 | }; | 257 | }; |
257 | 258 | ||
@@ -342,10 +343,12 @@ static int snmp_seq_show(struct seq_file *seq, void *v) | |||
342 | IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2, | 343 | IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2, |
343 | sysctl_ip_default_ttl); | 344 | sysctl_ip_default_ttl); |
344 | 345 | ||
346 | BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); | ||
345 | for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) | 347 | for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) |
346 | seq_printf(seq, " %lu", | 348 | seq_printf(seq, " %llu", |
347 | snmp_fold_field((void __percpu **)net->mib.ip_statistics, | 349 | snmp_fold_field64((void __percpu **)net->mib.ip_statistics, |
348 | snmp4_ipstats_list[i].entry)); | 350 | snmp4_ipstats_list[i].entry, |
351 | offsetof(struct ipstats_mib, syncp))); | ||
349 | 352 | ||
350 | icmp_put(seq); /* RFC 2011 compatibility */ | 353 | icmp_put(seq); /* RFC 2011 compatibility */ |
351 | icmpmsg_put(seq); | 354 | icmpmsg_put(seq); |
@@ -431,9 +434,10 @@ static int netstat_seq_show(struct seq_file *seq, void *v) | |||
431 | 434 | ||
432 | seq_puts(seq, "\nIpExt:"); | 435 | seq_puts(seq, "\nIpExt:"); |
433 | for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) | 436 | for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) |
434 | seq_printf(seq, " %lu", | 437 | seq_printf(seq, " %llu", |
435 | snmp_fold_field((void __percpu **)net->mib.ip_statistics, | 438 | snmp_fold_field64((void __percpu **)net->mib.ip_statistics, |
436 | snmp4_ipextstats_list[i].entry)); | 439 | snmp4_ipextstats_list[i].entry, |
440 | offsetof(struct ipstats_mib, syncp))); | ||
437 | 441 | ||
438 | seq_putc(seq, '\n'); | 442 | seq_putc(seq, '\n'); |
439 | return 0; | 443 | return 0; |
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 542f22fc98b3..f2d297351405 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c | |||
@@ -52,6 +52,7 @@ int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) | |||
52 | 52 | ||
53 | return ret; | 53 | return ret; |
54 | } | 54 | } |
55 | EXPORT_SYMBOL(inet_add_protocol); | ||
55 | 56 | ||
56 | /* | 57 | /* |
57 | * Remove a protocol from the hash tables. | 58 | * Remove a protocol from the hash tables. |
@@ -76,6 +77,4 @@ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) | |||
76 | 77 | ||
77 | return ret; | 78 | return ret; |
78 | } | 79 | } |
79 | |||
80 | EXPORT_SYMBOL(inet_add_protocol); | ||
81 | EXPORT_SYMBOL(inet_del_protocol); | 80 | EXPORT_SYMBOL(inet_del_protocol); |
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 2c7a1639388a..009a7b2aa1ef 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -314,7 +314,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb) | |||
314 | } | 314 | } |
315 | 315 | ||
316 | static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, | 316 | static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, |
317 | struct rtable *rt, | 317 | struct rtable **rtp, |
318 | unsigned int flags) | 318 | unsigned int flags) |
319 | { | 319 | { |
320 | struct inet_sock *inet = inet_sk(sk); | 320 | struct inet_sock *inet = inet_sk(sk); |
@@ -323,25 +323,27 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, | |||
323 | struct sk_buff *skb; | 323 | struct sk_buff *skb; |
324 | unsigned int iphlen; | 324 | unsigned int iphlen; |
325 | int err; | 325 | int err; |
326 | struct rtable *rt = *rtp; | ||
326 | 327 | ||
327 | if (length > rt->u.dst.dev->mtu) { | 328 | if (length > rt->dst.dev->mtu) { |
328 | ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, | 329 | ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, |
329 | rt->u.dst.dev->mtu); | 330 | rt->dst.dev->mtu); |
330 | return -EMSGSIZE; | 331 | return -EMSGSIZE; |
331 | } | 332 | } |
332 | if (flags&MSG_PROBE) | 333 | if (flags&MSG_PROBE) |
333 | goto out; | 334 | goto out; |
334 | 335 | ||
335 | skb = sock_alloc_send_skb(sk, | 336 | skb = sock_alloc_send_skb(sk, |
336 | length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15, | 337 | length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15, |
337 | flags & MSG_DONTWAIT, &err); | 338 | flags & MSG_DONTWAIT, &err); |
338 | if (skb == NULL) | 339 | if (skb == NULL) |
339 | goto error; | 340 | goto error; |
340 | skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev)); | 341 | skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev)); |
341 | 342 | ||
342 | skb->priority = sk->sk_priority; | 343 | skb->priority = sk->sk_priority; |
343 | skb->mark = sk->sk_mark; | 344 | skb->mark = sk->sk_mark; |
344 | skb_dst_set(skb, dst_clone(&rt->u.dst)); | 345 | skb_dst_set(skb, &rt->dst); |
346 | *rtp = NULL; | ||
345 | 347 | ||
346 | skb_reset_network_header(skb); | 348 | skb_reset_network_header(skb); |
347 | iph = ip_hdr(skb); | 349 | iph = ip_hdr(skb); |
@@ -373,7 +375,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, | |||
373 | iph->check = 0; | 375 | iph->check = 0; |
374 | iph->tot_len = htons(length); | 376 | iph->tot_len = htons(length); |
375 | if (!iph->id) | 377 | if (!iph->id) |
376 | ip_select_ident(iph, &rt->u.dst, NULL); | 378 | ip_select_ident(iph, &rt->dst, NULL); |
377 | 379 | ||
378 | iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); | 380 | iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); |
379 | } | 381 | } |
@@ -382,7 +384,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, | |||
382 | skb_transport_header(skb))->type); | 384 | skb_transport_header(skb))->type); |
383 | 385 | ||
384 | err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL, | 386 | err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL, |
385 | rt->u.dst.dev, dst_output); | 387 | rt->dst.dev, dst_output); |
386 | if (err > 0) | 388 | if (err > 0) |
387 | err = net_xmit_errno(err); | 389 | err = net_xmit_errno(err); |
388 | if (err) | 390 | if (err) |
@@ -576,7 +578,7 @@ back_from_confirm: | |||
576 | 578 | ||
577 | if (inet->hdrincl) | 579 | if (inet->hdrincl) |
578 | err = raw_send_hdrinc(sk, msg->msg_iov, len, | 580 | err = raw_send_hdrinc(sk, msg->msg_iov, len, |
579 | rt, msg->msg_flags); | 581 | &rt, msg->msg_flags); |
580 | 582 | ||
581 | else { | 583 | else { |
582 | if (!ipc.addr) | 584 | if (!ipc.addr) |
@@ -604,7 +606,7 @@ out: | |||
604 | return len; | 606 | return len; |
605 | 607 | ||
606 | do_confirm: | 608 | do_confirm: |
607 | dst_confirm(&rt->u.dst); | 609 | dst_confirm(&rt->dst); |
608 | if (!(msg->msg_flags & MSG_PROBE) || len) | 610 | if (!(msg->msg_flags & MSG_PROBE) || len) |
609 | goto back_from_confirm; | 611 | goto back_from_confirm; |
610 | err = 0; | 612 | err = 0; |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 560acc677ce4..3f56b6e6c6aa 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -253,8 +253,7 @@ static unsigned rt_hash_mask __read_mostly; | |||
253 | static unsigned int rt_hash_log __read_mostly; | 253 | static unsigned int rt_hash_log __read_mostly; |
254 | 254 | ||
255 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); | 255 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); |
256 | #define RT_CACHE_STAT_INC(field) \ | 256 | #define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field) |
257 | (__raw_get_cpu_var(rt_cache_stat).field++) | ||
258 | 257 | ||
259 | static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, | 258 | static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, |
260 | int genid) | 259 | int genid) |
@@ -287,10 +286,10 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq) | |||
287 | rcu_read_lock_bh(); | 286 | rcu_read_lock_bh(); |
288 | r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); | 287 | r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); |
289 | while (r) { | 288 | while (r) { |
290 | if (dev_net(r->u.dst.dev) == seq_file_net(seq) && | 289 | if (dev_net(r->dst.dev) == seq_file_net(seq) && |
291 | r->rt_genid == st->genid) | 290 | r->rt_genid == st->genid) |
292 | return r; | 291 | return r; |
293 | r = rcu_dereference_bh(r->u.dst.rt_next); | 292 | r = rcu_dereference_bh(r->dst.rt_next); |
294 | } | 293 | } |
295 | rcu_read_unlock_bh(); | 294 | rcu_read_unlock_bh(); |
296 | } | 295 | } |
@@ -302,7 +301,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, | |||
302 | { | 301 | { |
303 | struct rt_cache_iter_state *st = seq->private; | 302 | struct rt_cache_iter_state *st = seq->private; |
304 | 303 | ||
305 | r = r->u.dst.rt_next; | 304 | r = r->dst.rt_next; |
306 | while (!r) { | 305 | while (!r) { |
307 | rcu_read_unlock_bh(); | 306 | rcu_read_unlock_bh(); |
308 | do { | 307 | do { |
@@ -320,7 +319,7 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq, | |||
320 | { | 319 | { |
321 | struct rt_cache_iter_state *st = seq->private; | 320 | struct rt_cache_iter_state *st = seq->private; |
322 | while ((r = __rt_cache_get_next(seq, r)) != NULL) { | 321 | while ((r = __rt_cache_get_next(seq, r)) != NULL) { |
323 | if (dev_net(r->u.dst.dev) != seq_file_net(seq)) | 322 | if (dev_net(r->dst.dev) != seq_file_net(seq)) |
324 | continue; | 323 | continue; |
325 | if (r->rt_genid == st->genid) | 324 | if (r->rt_genid == st->genid) |
326 | break; | 325 | break; |
@@ -378,19 +377,19 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) | |||
378 | 377 | ||
379 | seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" | 378 | seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t" |
380 | "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", | 379 | "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", |
381 | r->u.dst.dev ? r->u.dst.dev->name : "*", | 380 | r->dst.dev ? r->dst.dev->name : "*", |
382 | (__force u32)r->rt_dst, | 381 | (__force u32)r->rt_dst, |
383 | (__force u32)r->rt_gateway, | 382 | (__force u32)r->rt_gateway, |
384 | r->rt_flags, atomic_read(&r->u.dst.__refcnt), | 383 | r->rt_flags, atomic_read(&r->dst.__refcnt), |
385 | r->u.dst.__use, 0, (__force u32)r->rt_src, | 384 | r->dst.__use, 0, (__force u32)r->rt_src, |
386 | (dst_metric(&r->u.dst, RTAX_ADVMSS) ? | 385 | (dst_metric(&r->dst, RTAX_ADVMSS) ? |
387 | (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0), | 386 | (int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0), |
388 | dst_metric(&r->u.dst, RTAX_WINDOW), | 387 | dst_metric(&r->dst, RTAX_WINDOW), |
389 | (int)((dst_metric(&r->u.dst, RTAX_RTT) >> 3) + | 388 | (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + |
390 | dst_metric(&r->u.dst, RTAX_RTTVAR)), | 389 | dst_metric(&r->dst, RTAX_RTTVAR)), |
391 | r->fl.fl4_tos, | 390 | r->fl.fl4_tos, |
392 | r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1, | 391 | r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1, |
393 | r->u.dst.hh ? (r->u.dst.hh->hh_output == | 392 | r->dst.hh ? (r->dst.hh->hh_output == |
394 | dev_queue_xmit) : 0, | 393 | dev_queue_xmit) : 0, |
395 | r->rt_spec_dst, &len); | 394 | r->rt_spec_dst, &len); |
396 | 395 | ||
@@ -609,13 +608,13 @@ static inline int ip_rt_proc_init(void) | |||
609 | 608 | ||
610 | static inline void rt_free(struct rtable *rt) | 609 | static inline void rt_free(struct rtable *rt) |
611 | { | 610 | { |
612 | call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); | 611 | call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); |
613 | } | 612 | } |
614 | 613 | ||
615 | static inline void rt_drop(struct rtable *rt) | 614 | static inline void rt_drop(struct rtable *rt) |
616 | { | 615 | { |
617 | ip_rt_put(rt); | 616 | ip_rt_put(rt); |
618 | call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); | 617 | call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); |
619 | } | 618 | } |
620 | 619 | ||
621 | static inline int rt_fast_clean(struct rtable *rth) | 620 | static inline int rt_fast_clean(struct rtable *rth) |
@@ -623,13 +622,13 @@ static inline int rt_fast_clean(struct rtable *rth) | |||
623 | /* Kill broadcast/multicast entries very aggresively, if they | 622 | /* Kill broadcast/multicast entries very aggresively, if they |
624 | collide in hash table with more useful entries */ | 623 | collide in hash table with more useful entries */ |
625 | return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && | 624 | return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && |
626 | rth->fl.iif && rth->u.dst.rt_next; | 625 | rth->fl.iif && rth->dst.rt_next; |
627 | } | 626 | } |
628 | 627 | ||
629 | static inline int rt_valuable(struct rtable *rth) | 628 | static inline int rt_valuable(struct rtable *rth) |
630 | { | 629 | { |
631 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || | 630 | return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || |
632 | rth->u.dst.expires; | 631 | rth->dst.expires; |
633 | } | 632 | } |
634 | 633 | ||
635 | static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) | 634 | static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) |
@@ -637,15 +636,15 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t | |||
637 | unsigned long age; | 636 | unsigned long age; |
638 | int ret = 0; | 637 | int ret = 0; |
639 | 638 | ||
640 | if (atomic_read(&rth->u.dst.__refcnt)) | 639 | if (atomic_read(&rth->dst.__refcnt)) |
641 | goto out; | 640 | goto out; |
642 | 641 | ||
643 | ret = 1; | 642 | ret = 1; |
644 | if (rth->u.dst.expires && | 643 | if (rth->dst.expires && |
645 | time_after_eq(jiffies, rth->u.dst.expires)) | 644 | time_after_eq(jiffies, rth->dst.expires)) |
646 | goto out; | 645 | goto out; |
647 | 646 | ||
648 | age = jiffies - rth->u.dst.lastuse; | 647 | age = jiffies - rth->dst.lastuse; |
649 | ret = 0; | 648 | ret = 0; |
650 | if ((age <= tmo1 && !rt_fast_clean(rth)) || | 649 | if ((age <= tmo1 && !rt_fast_clean(rth)) || |
651 | (age <= tmo2 && rt_valuable(rth))) | 650 | (age <= tmo2 && rt_valuable(rth))) |
@@ -661,7 +660,7 @@ out: return ret; | |||
661 | */ | 660 | */ |
662 | static inline u32 rt_score(struct rtable *rt) | 661 | static inline u32 rt_score(struct rtable *rt) |
663 | { | 662 | { |
664 | u32 score = jiffies - rt->u.dst.lastuse; | 663 | u32 score = jiffies - rt->dst.lastuse; |
665 | 664 | ||
666 | score = ~score & ~(3<<30); | 665 | score = ~score & ~(3<<30); |
667 | 666 | ||
@@ -701,12 +700,12 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) | |||
701 | 700 | ||
702 | static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) | 701 | static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) |
703 | { | 702 | { |
704 | return net_eq(dev_net(rt1->u.dst.dev), dev_net(rt2->u.dst.dev)); | 703 | return net_eq(dev_net(rt1->dst.dev), dev_net(rt2->dst.dev)); |
705 | } | 704 | } |
706 | 705 | ||
707 | static inline int rt_is_expired(struct rtable *rth) | 706 | static inline int rt_is_expired(struct rtable *rth) |
708 | { | 707 | { |
709 | return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev)); | 708 | return rth->rt_genid != rt_genid(dev_net(rth->dst.dev)); |
710 | } | 709 | } |
711 | 710 | ||
712 | /* | 711 | /* |
@@ -735,7 +734,7 @@ static void rt_do_flush(int process_context) | |||
735 | rth = rt_hash_table[i].chain; | 734 | rth = rt_hash_table[i].chain; |
736 | 735 | ||
737 | /* defer releasing the head of the list after spin_unlock */ | 736 | /* defer releasing the head of the list after spin_unlock */ |
738 | for (tail = rth; tail; tail = tail->u.dst.rt_next) | 737 | for (tail = rth; tail; tail = tail->dst.rt_next) |
739 | if (!rt_is_expired(tail)) | 738 | if (!rt_is_expired(tail)) |
740 | break; | 739 | break; |
741 | if (rth != tail) | 740 | if (rth != tail) |
@@ -744,9 +743,9 @@ static void rt_do_flush(int process_context) | |||
744 | /* call rt_free on entries after the tail requiring flush */ | 743 | /* call rt_free on entries after the tail requiring flush */ |
745 | prev = &rt_hash_table[i].chain; | 744 | prev = &rt_hash_table[i].chain; |
746 | for (p = *prev; p; p = next) { | 745 | for (p = *prev; p; p = next) { |
747 | next = p->u.dst.rt_next; | 746 | next = p->dst.rt_next; |
748 | if (!rt_is_expired(p)) { | 747 | if (!rt_is_expired(p)) { |
749 | prev = &p->u.dst.rt_next; | 748 | prev = &p->dst.rt_next; |
750 | } else { | 749 | } else { |
751 | *prev = next; | 750 | *prev = next; |
752 | rt_free(p); | 751 | rt_free(p); |
@@ -761,7 +760,7 @@ static void rt_do_flush(int process_context) | |||
761 | spin_unlock_bh(rt_hash_lock_addr(i)); | 760 | spin_unlock_bh(rt_hash_lock_addr(i)); |
762 | 761 | ||
763 | for (; rth != tail; rth = next) { | 762 | for (; rth != tail; rth = next) { |
764 | next = rth->u.dst.rt_next; | 763 | next = rth->dst.rt_next; |
765 | rt_free(rth); | 764 | rt_free(rth); |
766 | } | 765 | } |
767 | } | 766 | } |
@@ -792,7 +791,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) | |||
792 | while (aux != rth) { | 791 | while (aux != rth) { |
793 | if (compare_hash_inputs(&aux->fl, &rth->fl)) | 792 | if (compare_hash_inputs(&aux->fl, &rth->fl)) |
794 | return 0; | 793 | return 0; |
795 | aux = aux->u.dst.rt_next; | 794 | aux = aux->dst.rt_next; |
796 | } | 795 | } |
797 | return ONE; | 796 | return ONE; |
798 | } | 797 | } |
@@ -832,18 +831,18 @@ static void rt_check_expire(void) | |||
832 | length = 0; | 831 | length = 0; |
833 | spin_lock_bh(rt_hash_lock_addr(i)); | 832 | spin_lock_bh(rt_hash_lock_addr(i)); |
834 | while ((rth = *rthp) != NULL) { | 833 | while ((rth = *rthp) != NULL) { |
835 | prefetch(rth->u.dst.rt_next); | 834 | prefetch(rth->dst.rt_next); |
836 | if (rt_is_expired(rth)) { | 835 | if (rt_is_expired(rth)) { |
837 | *rthp = rth->u.dst.rt_next; | 836 | *rthp = rth->dst.rt_next; |
838 | rt_free(rth); | 837 | rt_free(rth); |
839 | continue; | 838 | continue; |
840 | } | 839 | } |
841 | if (rth->u.dst.expires) { | 840 | if (rth->dst.expires) { |
842 | /* Entry is expired even if it is in use */ | 841 | /* Entry is expired even if it is in use */ |
843 | if (time_before_eq(jiffies, rth->u.dst.expires)) { | 842 | if (time_before_eq(jiffies, rth->dst.expires)) { |
844 | nofree: | 843 | nofree: |
845 | tmo >>= 1; | 844 | tmo >>= 1; |
846 | rthp = &rth->u.dst.rt_next; | 845 | rthp = &rth->dst.rt_next; |
847 | /* | 846 | /* |
848 | * We only count entries on | 847 | * We only count entries on |
849 | * a chain with equal hash inputs once | 848 | * a chain with equal hash inputs once |
@@ -859,7 +858,7 @@ nofree: | |||
859 | goto nofree; | 858 | goto nofree; |
860 | 859 | ||
861 | /* Cleanup aged off entries. */ | 860 | /* Cleanup aged off entries. */ |
862 | *rthp = rth->u.dst.rt_next; | 861 | *rthp = rth->dst.rt_next; |
863 | rt_free(rth); | 862 | rt_free(rth); |
864 | } | 863 | } |
865 | spin_unlock_bh(rt_hash_lock_addr(i)); | 864 | spin_unlock_bh(rt_hash_lock_addr(i)); |
@@ -1000,10 +999,10 @@ static int rt_garbage_collect(struct dst_ops *ops) | |||
1000 | if (!rt_is_expired(rth) && | 999 | if (!rt_is_expired(rth) && |
1001 | !rt_may_expire(rth, tmo, expire)) { | 1000 | !rt_may_expire(rth, tmo, expire)) { |
1002 | tmo >>= 1; | 1001 | tmo >>= 1; |
1003 | rthp = &rth->u.dst.rt_next; | 1002 | rthp = &rth->dst.rt_next; |
1004 | continue; | 1003 | continue; |
1005 | } | 1004 | } |
1006 | *rthp = rth->u.dst.rt_next; | 1005 | *rthp = rth->dst.rt_next; |
1007 | rt_free(rth); | 1006 | rt_free(rth); |
1008 | goal--; | 1007 | goal--; |
1009 | } | 1008 | } |
@@ -1069,7 +1068,7 @@ static int slow_chain_length(const struct rtable *head) | |||
1069 | 1068 | ||
1070 | while (rth) { | 1069 | while (rth) { |
1071 | length += has_noalias(head, rth); | 1070 | length += has_noalias(head, rth); |
1072 | rth = rth->u.dst.rt_next; | 1071 | rth = rth->dst.rt_next; |
1073 | } | 1072 | } |
1074 | return length >> FRACT_BITS; | 1073 | return length >> FRACT_BITS; |
1075 | } | 1074 | } |
@@ -1091,7 +1090,7 @@ restart: | |||
1091 | candp = NULL; | 1090 | candp = NULL; |
1092 | now = jiffies; | 1091 | now = jiffies; |
1093 | 1092 | ||
1094 | if (!rt_caching(dev_net(rt->u.dst.dev))) { | 1093 | if (!rt_caching(dev_net(rt->dst.dev))) { |
1095 | /* | 1094 | /* |
1096 | * If we're not caching, just tell the caller we | 1095 | * If we're not caching, just tell the caller we |
1097 | * were successful and don't touch the route. The | 1096 | * were successful and don't touch the route. The |
@@ -1109,7 +1108,7 @@ restart: | |||
1109 | */ | 1108 | */ |
1110 | 1109 | ||
1111 | if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { | 1110 | if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { |
1112 | int err = arp_bind_neighbour(&rt->u.dst); | 1111 | int err = arp_bind_neighbour(&rt->dst); |
1113 | if (err) { | 1112 | if (err) { |
1114 | if (net_ratelimit()) | 1113 | if (net_ratelimit()) |
1115 | printk(KERN_WARNING | 1114 | printk(KERN_WARNING |
@@ -1128,19 +1127,19 @@ restart: | |||
1128 | spin_lock_bh(rt_hash_lock_addr(hash)); | 1127 | spin_lock_bh(rt_hash_lock_addr(hash)); |
1129 | while ((rth = *rthp) != NULL) { | 1128 | while ((rth = *rthp) != NULL) { |
1130 | if (rt_is_expired(rth)) { | 1129 | if (rt_is_expired(rth)) { |
1131 | *rthp = rth->u.dst.rt_next; | 1130 | *rthp = rth->dst.rt_next; |
1132 | rt_free(rth); | 1131 | rt_free(rth); |
1133 | continue; | 1132 | continue; |
1134 | } | 1133 | } |
1135 | if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { | 1134 | if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { |
1136 | /* Put it first */ | 1135 | /* Put it first */ |
1137 | *rthp = rth->u.dst.rt_next; | 1136 | *rthp = rth->dst.rt_next; |
1138 | /* | 1137 | /* |
1139 | * Since lookup is lockfree, the deletion | 1138 | * Since lookup is lockfree, the deletion |
1140 | * must be visible to another weakly ordered CPU before | 1139 | * must be visible to another weakly ordered CPU before |
1141 | * the insertion at the start of the hash chain. | 1140 | * the insertion at the start of the hash chain. |
1142 | */ | 1141 | */ |
1143 | rcu_assign_pointer(rth->u.dst.rt_next, | 1142 | rcu_assign_pointer(rth->dst.rt_next, |
1144 | rt_hash_table[hash].chain); | 1143 | rt_hash_table[hash].chain); |
1145 | /* | 1144 | /* |
1146 | * Since lookup is lockfree, the update writes | 1145 | * Since lookup is lockfree, the update writes |
@@ -1148,18 +1147,18 @@ restart: | |||
1148 | */ | 1147 | */ |
1149 | rcu_assign_pointer(rt_hash_table[hash].chain, rth); | 1148 | rcu_assign_pointer(rt_hash_table[hash].chain, rth); |
1150 | 1149 | ||
1151 | dst_use(&rth->u.dst, now); | 1150 | dst_use(&rth->dst, now); |
1152 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1151 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1153 | 1152 | ||
1154 | rt_drop(rt); | 1153 | rt_drop(rt); |
1155 | if (rp) | 1154 | if (rp) |
1156 | *rp = rth; | 1155 | *rp = rth; |
1157 | else | 1156 | else |
1158 | skb_dst_set(skb, &rth->u.dst); | 1157 | skb_dst_set(skb, &rth->dst); |
1159 | return 0; | 1158 | return 0; |
1160 | } | 1159 | } |
1161 | 1160 | ||
1162 | if (!atomic_read(&rth->u.dst.__refcnt)) { | 1161 | if (!atomic_read(&rth->dst.__refcnt)) { |
1163 | u32 score = rt_score(rth); | 1162 | u32 score = rt_score(rth); |
1164 | 1163 | ||
1165 | if (score <= min_score) { | 1164 | if (score <= min_score) { |
@@ -1171,7 +1170,7 @@ restart: | |||
1171 | 1170 | ||
1172 | chain_length++; | 1171 | chain_length++; |
1173 | 1172 | ||
1174 | rthp = &rth->u.dst.rt_next; | 1173 | rthp = &rth->dst.rt_next; |
1175 | } | 1174 | } |
1176 | 1175 | ||
1177 | if (cand) { | 1176 | if (cand) { |
@@ -1182,17 +1181,17 @@ restart: | |||
1182 | * only 2 entries per bucket. We will see. | 1181 | * only 2 entries per bucket. We will see. |
1183 | */ | 1182 | */ |
1184 | if (chain_length > ip_rt_gc_elasticity) { | 1183 | if (chain_length > ip_rt_gc_elasticity) { |
1185 | *candp = cand->u.dst.rt_next; | 1184 | *candp = cand->dst.rt_next; |
1186 | rt_free(cand); | 1185 | rt_free(cand); |
1187 | } | 1186 | } |
1188 | } else { | 1187 | } else { |
1189 | if (chain_length > rt_chain_length_max && | 1188 | if (chain_length > rt_chain_length_max && |
1190 | slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { | 1189 | slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { |
1191 | struct net *net = dev_net(rt->u.dst.dev); | 1190 | struct net *net = dev_net(rt->dst.dev); |
1192 | int num = ++net->ipv4.current_rt_cache_rebuild_count; | 1191 | int num = ++net->ipv4.current_rt_cache_rebuild_count; |
1193 | if (!rt_caching(net)) { | 1192 | if (!rt_caching(net)) { |
1194 | printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", | 1193 | printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", |
1195 | rt->u.dst.dev->name, num); | 1194 | rt->dst.dev->name, num); |
1196 | } | 1195 | } |
1197 | rt_emergency_hash_rebuild(net); | 1196 | rt_emergency_hash_rebuild(net); |
1198 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1197 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
@@ -1207,7 +1206,7 @@ restart: | |||
1207 | route or unicast forwarding path. | 1206 | route or unicast forwarding path. |
1208 | */ | 1207 | */ |
1209 | if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { | 1208 | if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { |
1210 | int err = arp_bind_neighbour(&rt->u.dst); | 1209 | int err = arp_bind_neighbour(&rt->dst); |
1211 | if (err) { | 1210 | if (err) { |
1212 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1211 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1213 | 1212 | ||
@@ -1238,14 +1237,14 @@ restart: | |||
1238 | } | 1237 | } |
1239 | } | 1238 | } |
1240 | 1239 | ||
1241 | rt->u.dst.rt_next = rt_hash_table[hash].chain; | 1240 | rt->dst.rt_next = rt_hash_table[hash].chain; |
1242 | 1241 | ||
1243 | #if RT_CACHE_DEBUG >= 2 | 1242 | #if RT_CACHE_DEBUG >= 2 |
1244 | if (rt->u.dst.rt_next) { | 1243 | if (rt->dst.rt_next) { |
1245 | struct rtable *trt; | 1244 | struct rtable *trt; |
1246 | printk(KERN_DEBUG "rt_cache @%02x: %pI4", | 1245 | printk(KERN_DEBUG "rt_cache @%02x: %pI4", |
1247 | hash, &rt->rt_dst); | 1246 | hash, &rt->rt_dst); |
1248 | for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) | 1247 | for (trt = rt->dst.rt_next; trt; trt = trt->dst.rt_next) |
1249 | printk(" . %pI4", &trt->rt_dst); | 1248 | printk(" . %pI4", &trt->rt_dst); |
1250 | printk("\n"); | 1249 | printk("\n"); |
1251 | } | 1250 | } |
@@ -1263,7 +1262,7 @@ skip_hashing: | |||
1263 | if (rp) | 1262 | if (rp) |
1264 | *rp = rt; | 1263 | *rp = rt; |
1265 | else | 1264 | else |
1266 | skb_dst_set(skb, &rt->u.dst); | 1265 | skb_dst_set(skb, &rt->dst); |
1267 | return 0; | 1266 | return 0; |
1268 | } | 1267 | } |
1269 | 1268 | ||
@@ -1325,6 +1324,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more) | |||
1325 | 1324 | ||
1326 | ip_select_fb_ident(iph); | 1325 | ip_select_fb_ident(iph); |
1327 | } | 1326 | } |
1327 | EXPORT_SYMBOL(__ip_select_ident); | ||
1328 | 1328 | ||
1329 | static void rt_del(unsigned hash, struct rtable *rt) | 1329 | static void rt_del(unsigned hash, struct rtable *rt) |
1330 | { | 1330 | { |
@@ -1335,20 +1335,21 @@ static void rt_del(unsigned hash, struct rtable *rt) | |||
1335 | ip_rt_put(rt); | 1335 | ip_rt_put(rt); |
1336 | while ((aux = *rthp) != NULL) { | 1336 | while ((aux = *rthp) != NULL) { |
1337 | if (aux == rt || rt_is_expired(aux)) { | 1337 | if (aux == rt || rt_is_expired(aux)) { |
1338 | *rthp = aux->u.dst.rt_next; | 1338 | *rthp = aux->dst.rt_next; |
1339 | rt_free(aux); | 1339 | rt_free(aux); |
1340 | continue; | 1340 | continue; |
1341 | } | 1341 | } |
1342 | rthp = &aux->u.dst.rt_next; | 1342 | rthp = &aux->dst.rt_next; |
1343 | } | 1343 | } |
1344 | spin_unlock_bh(rt_hash_lock_addr(hash)); | 1344 | spin_unlock_bh(rt_hash_lock_addr(hash)); |
1345 | } | 1345 | } |
1346 | 1346 | ||
1347 | /* called in rcu_read_lock() section */ | ||
1347 | void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | 1348 | void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, |
1348 | __be32 saddr, struct net_device *dev) | 1349 | __be32 saddr, struct net_device *dev) |
1349 | { | 1350 | { |
1350 | int i, k; | 1351 | int i, k; |
1351 | struct in_device *in_dev = in_dev_get(dev); | 1352 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
1352 | struct rtable *rth, **rthp; | 1353 | struct rtable *rth, **rthp; |
1353 | __be32 skeys[2] = { saddr, 0 }; | 1354 | __be32 skeys[2] = { saddr, 0 }; |
1354 | int ikeys[2] = { dev->ifindex, 0 }; | 1355 | int ikeys[2] = { dev->ifindex, 0 }; |
@@ -1384,7 +1385,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1384 | 1385 | ||
1385 | rthp=&rt_hash_table[hash].chain; | 1386 | rthp=&rt_hash_table[hash].chain; |
1386 | 1387 | ||
1387 | rcu_read_lock(); | ||
1388 | while ((rth = rcu_dereference(*rthp)) != NULL) { | 1388 | while ((rth = rcu_dereference(*rthp)) != NULL) { |
1389 | struct rtable *rt; | 1389 | struct rtable *rt; |
1390 | 1390 | ||
@@ -1393,44 +1393,42 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1393 | rth->fl.oif != ikeys[k] || | 1393 | rth->fl.oif != ikeys[k] || |
1394 | rth->fl.iif != 0 || | 1394 | rth->fl.iif != 0 || |
1395 | rt_is_expired(rth) || | 1395 | rt_is_expired(rth) || |
1396 | !net_eq(dev_net(rth->u.dst.dev), net)) { | 1396 | !net_eq(dev_net(rth->dst.dev), net)) { |
1397 | rthp = &rth->u.dst.rt_next; | 1397 | rthp = &rth->dst.rt_next; |
1398 | continue; | 1398 | continue; |
1399 | } | 1399 | } |
1400 | 1400 | ||
1401 | if (rth->rt_dst != daddr || | 1401 | if (rth->rt_dst != daddr || |
1402 | rth->rt_src != saddr || | 1402 | rth->rt_src != saddr || |
1403 | rth->u.dst.error || | 1403 | rth->dst.error || |
1404 | rth->rt_gateway != old_gw || | 1404 | rth->rt_gateway != old_gw || |
1405 | rth->u.dst.dev != dev) | 1405 | rth->dst.dev != dev) |
1406 | break; | 1406 | break; |
1407 | 1407 | ||
1408 | dst_hold(&rth->u.dst); | 1408 | dst_hold(&rth->dst); |
1409 | rcu_read_unlock(); | ||
1410 | 1409 | ||
1411 | rt = dst_alloc(&ipv4_dst_ops); | 1410 | rt = dst_alloc(&ipv4_dst_ops); |
1412 | if (rt == NULL) { | 1411 | if (rt == NULL) { |
1413 | ip_rt_put(rth); | 1412 | ip_rt_put(rth); |
1414 | in_dev_put(in_dev); | ||
1415 | return; | 1413 | return; |
1416 | } | 1414 | } |
1417 | 1415 | ||
1418 | /* Copy all the information. */ | 1416 | /* Copy all the information. */ |
1419 | *rt = *rth; | 1417 | *rt = *rth; |
1420 | rt->u.dst.__use = 1; | 1418 | rt->dst.__use = 1; |
1421 | atomic_set(&rt->u.dst.__refcnt, 1); | 1419 | atomic_set(&rt->dst.__refcnt, 1); |
1422 | rt->u.dst.child = NULL; | 1420 | rt->dst.child = NULL; |
1423 | if (rt->u.dst.dev) | 1421 | if (rt->dst.dev) |
1424 | dev_hold(rt->u.dst.dev); | 1422 | dev_hold(rt->dst.dev); |
1425 | if (rt->idev) | 1423 | if (rt->idev) |
1426 | in_dev_hold(rt->idev); | 1424 | in_dev_hold(rt->idev); |
1427 | rt->u.dst.obsolete = -1; | 1425 | rt->dst.obsolete = -1; |
1428 | rt->u.dst.lastuse = jiffies; | 1426 | rt->dst.lastuse = jiffies; |
1429 | rt->u.dst.path = &rt->u.dst; | 1427 | rt->dst.path = &rt->dst; |
1430 | rt->u.dst.neighbour = NULL; | 1428 | rt->dst.neighbour = NULL; |
1431 | rt->u.dst.hh = NULL; | 1429 | rt->dst.hh = NULL; |
1432 | #ifdef CONFIG_XFRM | 1430 | #ifdef CONFIG_XFRM |
1433 | rt->u.dst.xfrm = NULL; | 1431 | rt->dst.xfrm = NULL; |
1434 | #endif | 1432 | #endif |
1435 | rt->rt_genid = rt_genid(net); | 1433 | rt->rt_genid = rt_genid(net); |
1436 | rt->rt_flags |= RTCF_REDIRECTED; | 1434 | rt->rt_flags |= RTCF_REDIRECTED; |
@@ -1439,23 +1437,23 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1439 | rt->rt_gateway = new_gw; | 1437 | rt->rt_gateway = new_gw; |
1440 | 1438 | ||
1441 | /* Redirect received -> path was valid */ | 1439 | /* Redirect received -> path was valid */ |
1442 | dst_confirm(&rth->u.dst); | 1440 | dst_confirm(&rth->dst); |
1443 | 1441 | ||
1444 | if (rt->peer) | 1442 | if (rt->peer) |
1445 | atomic_inc(&rt->peer->refcnt); | 1443 | atomic_inc(&rt->peer->refcnt); |
1446 | 1444 | ||
1447 | if (arp_bind_neighbour(&rt->u.dst) || | 1445 | if (arp_bind_neighbour(&rt->dst) || |
1448 | !(rt->u.dst.neighbour->nud_state & | 1446 | !(rt->dst.neighbour->nud_state & |
1449 | NUD_VALID)) { | 1447 | NUD_VALID)) { |
1450 | if (rt->u.dst.neighbour) | 1448 | if (rt->dst.neighbour) |
1451 | neigh_event_send(rt->u.dst.neighbour, NULL); | 1449 | neigh_event_send(rt->dst.neighbour, NULL); |
1452 | ip_rt_put(rth); | 1450 | ip_rt_put(rth); |
1453 | rt_drop(rt); | 1451 | rt_drop(rt); |
1454 | goto do_next; | 1452 | goto do_next; |
1455 | } | 1453 | } |
1456 | 1454 | ||
1457 | netevent.old = &rth->u.dst; | 1455 | netevent.old = &rth->dst; |
1458 | netevent.new = &rt->u.dst; | 1456 | netevent.new = &rt->dst; |
1459 | call_netevent_notifiers(NETEVENT_REDIRECT, | 1457 | call_netevent_notifiers(NETEVENT_REDIRECT, |
1460 | &netevent); | 1458 | &netevent); |
1461 | 1459 | ||
@@ -1464,12 +1462,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, | |||
1464 | ip_rt_put(rt); | 1462 | ip_rt_put(rt); |
1465 | goto do_next; | 1463 | goto do_next; |
1466 | } | 1464 | } |
1467 | rcu_read_unlock(); | ||
1468 | do_next: | 1465 | do_next: |
1469 | ; | 1466 | ; |
1470 | } | 1467 | } |
1471 | } | 1468 | } |
1472 | in_dev_put(in_dev); | ||
1473 | return; | 1469 | return; |
1474 | 1470 | ||
1475 | reject_redirect: | 1471 | reject_redirect: |
@@ -1480,7 +1476,7 @@ reject_redirect: | |||
1480 | &old_gw, dev->name, &new_gw, | 1476 | &old_gw, dev->name, &new_gw, |
1481 | &saddr, &daddr); | 1477 | &saddr, &daddr); |
1482 | #endif | 1478 | #endif |
1483 | in_dev_put(in_dev); | 1479 | ; |
1484 | } | 1480 | } |
1485 | 1481 | ||
1486 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | 1482 | static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) |
@@ -1493,8 +1489,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
1493 | ip_rt_put(rt); | 1489 | ip_rt_put(rt); |
1494 | ret = NULL; | 1490 | ret = NULL; |
1495 | } else if ((rt->rt_flags & RTCF_REDIRECTED) || | 1491 | } else if ((rt->rt_flags & RTCF_REDIRECTED) || |
1496 | (rt->u.dst.expires && | 1492 | (rt->dst.expires && |
1497 | time_after_eq(jiffies, rt->u.dst.expires))) { | 1493 | time_after_eq(jiffies, rt->dst.expires))) { |
1498 | unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, | 1494 | unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, |
1499 | rt->fl.oif, | 1495 | rt->fl.oif, |
1500 | rt_genid(dev_net(dst->dev))); | 1496 | rt_genid(dev_net(dst->dev))); |
@@ -1532,7 +1528,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1532 | int log_martians; | 1528 | int log_martians; |
1533 | 1529 | ||
1534 | rcu_read_lock(); | 1530 | rcu_read_lock(); |
1535 | in_dev = __in_dev_get_rcu(rt->u.dst.dev); | 1531 | in_dev = __in_dev_get_rcu(rt->dst.dev); |
1536 | if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { | 1532 | if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { |
1537 | rcu_read_unlock(); | 1533 | rcu_read_unlock(); |
1538 | return; | 1534 | return; |
@@ -1543,30 +1539,30 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1543 | /* No redirected packets during ip_rt_redirect_silence; | 1539 | /* No redirected packets during ip_rt_redirect_silence; |
1544 | * reset the algorithm. | 1540 | * reset the algorithm. |
1545 | */ | 1541 | */ |
1546 | if (time_after(jiffies, rt->u.dst.rate_last + ip_rt_redirect_silence)) | 1542 | if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence)) |
1547 | rt->u.dst.rate_tokens = 0; | 1543 | rt->dst.rate_tokens = 0; |
1548 | 1544 | ||
1549 | /* Too many ignored redirects; do not send anything | 1545 | /* Too many ignored redirects; do not send anything |
1550 | * set u.dst.rate_last to the last seen redirected packet. | 1546 | * set dst.rate_last to the last seen redirected packet. |
1551 | */ | 1547 | */ |
1552 | if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) { | 1548 | if (rt->dst.rate_tokens >= ip_rt_redirect_number) { |
1553 | rt->u.dst.rate_last = jiffies; | 1549 | rt->dst.rate_last = jiffies; |
1554 | return; | 1550 | return; |
1555 | } | 1551 | } |
1556 | 1552 | ||
1557 | /* Check for load limit; set rate_last to the latest sent | 1553 | /* Check for load limit; set rate_last to the latest sent |
1558 | * redirect. | 1554 | * redirect. |
1559 | */ | 1555 | */ |
1560 | if (rt->u.dst.rate_tokens == 0 || | 1556 | if (rt->dst.rate_tokens == 0 || |
1561 | time_after(jiffies, | 1557 | time_after(jiffies, |
1562 | (rt->u.dst.rate_last + | 1558 | (rt->dst.rate_last + |
1563 | (ip_rt_redirect_load << rt->u.dst.rate_tokens)))) { | 1559 | (ip_rt_redirect_load << rt->dst.rate_tokens)))) { |
1564 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); | 1560 | icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); |
1565 | rt->u.dst.rate_last = jiffies; | 1561 | rt->dst.rate_last = jiffies; |
1566 | ++rt->u.dst.rate_tokens; | 1562 | ++rt->dst.rate_tokens; |
1567 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 1563 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
1568 | if (log_martians && | 1564 | if (log_martians && |
1569 | rt->u.dst.rate_tokens == ip_rt_redirect_number && | 1565 | rt->dst.rate_tokens == ip_rt_redirect_number && |
1570 | net_ratelimit()) | 1566 | net_ratelimit()) |
1571 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", | 1567 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", |
1572 | &rt->rt_src, rt->rt_iif, | 1568 | &rt->rt_src, rt->rt_iif, |
@@ -1581,7 +1577,7 @@ static int ip_error(struct sk_buff *skb) | |||
1581 | unsigned long now; | 1577 | unsigned long now; |
1582 | int code; | 1578 | int code; |
1583 | 1579 | ||
1584 | switch (rt->u.dst.error) { | 1580 | switch (rt->dst.error) { |
1585 | case EINVAL: | 1581 | case EINVAL: |
1586 | default: | 1582 | default: |
1587 | goto out; | 1583 | goto out; |
@@ -1590,7 +1586,7 @@ static int ip_error(struct sk_buff *skb) | |||
1590 | break; | 1586 | break; |
1591 | case ENETUNREACH: | 1587 | case ENETUNREACH: |
1592 | code = ICMP_NET_UNREACH; | 1588 | code = ICMP_NET_UNREACH; |
1593 | IP_INC_STATS_BH(dev_net(rt->u.dst.dev), | 1589 | IP_INC_STATS_BH(dev_net(rt->dst.dev), |
1594 | IPSTATS_MIB_INNOROUTES); | 1590 | IPSTATS_MIB_INNOROUTES); |
1595 | break; | 1591 | break; |
1596 | case EACCES: | 1592 | case EACCES: |
@@ -1599,12 +1595,12 @@ static int ip_error(struct sk_buff *skb) | |||
1599 | } | 1595 | } |
1600 | 1596 | ||
1601 | now = jiffies; | 1597 | now = jiffies; |
1602 | rt->u.dst.rate_tokens += now - rt->u.dst.rate_last; | 1598 | rt->dst.rate_tokens += now - rt->dst.rate_last; |
1603 | if (rt->u.dst.rate_tokens > ip_rt_error_burst) | 1599 | if (rt->dst.rate_tokens > ip_rt_error_burst) |
1604 | rt->u.dst.rate_tokens = ip_rt_error_burst; | 1600 | rt->dst.rate_tokens = ip_rt_error_burst; |
1605 | rt->u.dst.rate_last = now; | 1601 | rt->dst.rate_last = now; |
1606 | if (rt->u.dst.rate_tokens >= ip_rt_error_cost) { | 1602 | if (rt->dst.rate_tokens >= ip_rt_error_cost) { |
1607 | rt->u.dst.rate_tokens -= ip_rt_error_cost; | 1603 | rt->dst.rate_tokens -= ip_rt_error_cost; |
1608 | icmp_send(skb, ICMP_DEST_UNREACH, code, 0); | 1604 | icmp_send(skb, ICMP_DEST_UNREACH, code, 0); |
1609 | } | 1605 | } |
1610 | 1606 | ||
@@ -1649,7 +1645,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | |||
1649 | 1645 | ||
1650 | rcu_read_lock(); | 1646 | rcu_read_lock(); |
1651 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | 1647 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; |
1652 | rth = rcu_dereference(rth->u.dst.rt_next)) { | 1648 | rth = rcu_dereference(rth->dst.rt_next)) { |
1653 | unsigned short mtu = new_mtu; | 1649 | unsigned short mtu = new_mtu; |
1654 | 1650 | ||
1655 | if (rth->fl.fl4_dst != daddr || | 1651 | if (rth->fl.fl4_dst != daddr || |
@@ -1658,8 +1654,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | |||
1658 | rth->rt_src != iph->saddr || | 1654 | rth->rt_src != iph->saddr || |
1659 | rth->fl.oif != ikeys[k] || | 1655 | rth->fl.oif != ikeys[k] || |
1660 | rth->fl.iif != 0 || | 1656 | rth->fl.iif != 0 || |
1661 | dst_metric_locked(&rth->u.dst, RTAX_MTU) || | 1657 | dst_metric_locked(&rth->dst, RTAX_MTU) || |
1662 | !net_eq(dev_net(rth->u.dst.dev), net) || | 1658 | !net_eq(dev_net(rth->dst.dev), net) || |
1663 | rt_is_expired(rth)) | 1659 | rt_is_expired(rth)) |
1664 | continue; | 1660 | continue; |
1665 | 1661 | ||
@@ -1667,22 +1663,22 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, | |||
1667 | 1663 | ||
1668 | /* BSD 4.2 compatibility hack :-( */ | 1664 | /* BSD 4.2 compatibility hack :-( */ |
1669 | if (mtu == 0 && | 1665 | if (mtu == 0 && |
1670 | old_mtu >= dst_mtu(&rth->u.dst) && | 1666 | old_mtu >= dst_mtu(&rth->dst) && |
1671 | old_mtu >= 68 + (iph->ihl << 2)) | 1667 | old_mtu >= 68 + (iph->ihl << 2)) |
1672 | old_mtu -= iph->ihl << 2; | 1668 | old_mtu -= iph->ihl << 2; |
1673 | 1669 | ||
1674 | mtu = guess_mtu(old_mtu); | 1670 | mtu = guess_mtu(old_mtu); |
1675 | } | 1671 | } |
1676 | if (mtu <= dst_mtu(&rth->u.dst)) { | 1672 | if (mtu <= dst_mtu(&rth->dst)) { |
1677 | if (mtu < dst_mtu(&rth->u.dst)) { | 1673 | if (mtu < dst_mtu(&rth->dst)) { |
1678 | dst_confirm(&rth->u.dst); | 1674 | dst_confirm(&rth->dst); |
1679 | if (mtu < ip_rt_min_pmtu) { | 1675 | if (mtu < ip_rt_min_pmtu) { |
1680 | mtu = ip_rt_min_pmtu; | 1676 | mtu = ip_rt_min_pmtu; |
1681 | rth->u.dst.metrics[RTAX_LOCK-1] |= | 1677 | rth->dst.metrics[RTAX_LOCK-1] |= |
1682 | (1 << RTAX_MTU); | 1678 | (1 << RTAX_MTU); |
1683 | } | 1679 | } |
1684 | rth->u.dst.metrics[RTAX_MTU-1] = mtu; | 1680 | rth->dst.metrics[RTAX_MTU-1] = mtu; |
1685 | dst_set_expires(&rth->u.dst, | 1681 | dst_set_expires(&rth->dst, |
1686 | ip_rt_mtu_expires); | 1682 | ip_rt_mtu_expires); |
1687 | } | 1683 | } |
1688 | est_mtu = mtu; | 1684 | est_mtu = mtu; |
@@ -1755,7 +1751,7 @@ static void ipv4_link_failure(struct sk_buff *skb) | |||
1755 | 1751 | ||
1756 | rt = skb_rtable(skb); | 1752 | rt = skb_rtable(skb); |
1757 | if (rt) | 1753 | if (rt) |
1758 | dst_set_expires(&rt->u.dst, 0); | 1754 | dst_set_expires(&rt->dst, 0); |
1759 | } | 1755 | } |
1760 | 1756 | ||
1761 | static int ip_rt_bug(struct sk_buff *skb) | 1757 | static int ip_rt_bug(struct sk_buff *skb) |
@@ -1783,11 +1779,11 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) | |||
1783 | 1779 | ||
1784 | if (rt->fl.iif == 0) | 1780 | if (rt->fl.iif == 0) |
1785 | src = rt->rt_src; | 1781 | src = rt->rt_src; |
1786 | else if (fib_lookup(dev_net(rt->u.dst.dev), &rt->fl, &res) == 0) { | 1782 | else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) { |
1787 | src = FIB_RES_PREFSRC(res); | 1783 | src = FIB_RES_PREFSRC(res); |
1788 | fib_res_put(&res); | 1784 | fib_res_put(&res); |
1789 | } else | 1785 | } else |
1790 | src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, | 1786 | src = inet_select_addr(rt->dst.dev, rt->rt_gateway, |
1791 | RT_SCOPE_UNIVERSE); | 1787 | RT_SCOPE_UNIVERSE); |
1792 | memcpy(addr, &src, 4); | 1788 | memcpy(addr, &src, 4); |
1793 | } | 1789 | } |
@@ -1795,10 +1791,10 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) | |||
1795 | #ifdef CONFIG_NET_CLS_ROUTE | 1791 | #ifdef CONFIG_NET_CLS_ROUTE |
1796 | static void set_class_tag(struct rtable *rt, u32 tag) | 1792 | static void set_class_tag(struct rtable *rt, u32 tag) |
1797 | { | 1793 | { |
1798 | if (!(rt->u.dst.tclassid & 0xFFFF)) | 1794 | if (!(rt->dst.tclassid & 0xFFFF)) |
1799 | rt->u.dst.tclassid |= tag & 0xFFFF; | 1795 | rt->dst.tclassid |= tag & 0xFFFF; |
1800 | if (!(rt->u.dst.tclassid & 0xFFFF0000)) | 1796 | if (!(rt->dst.tclassid & 0xFFFF0000)) |
1801 | rt->u.dst.tclassid |= tag & 0xFFFF0000; | 1797 | rt->dst.tclassid |= tag & 0xFFFF0000; |
1802 | } | 1798 | } |
1803 | #endif | 1799 | #endif |
1804 | 1800 | ||
@@ -1810,30 +1806,30 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) | |||
1810 | if (FIB_RES_GW(*res) && | 1806 | if (FIB_RES_GW(*res) && |
1811 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) | 1807 | FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) |
1812 | rt->rt_gateway = FIB_RES_GW(*res); | 1808 | rt->rt_gateway = FIB_RES_GW(*res); |
1813 | memcpy(rt->u.dst.metrics, fi->fib_metrics, | 1809 | memcpy(rt->dst.metrics, fi->fib_metrics, |
1814 | sizeof(rt->u.dst.metrics)); | 1810 | sizeof(rt->dst.metrics)); |
1815 | if (fi->fib_mtu == 0) { | 1811 | if (fi->fib_mtu == 0) { |
1816 | rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; | 1812 | rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu; |
1817 | if (dst_metric_locked(&rt->u.dst, RTAX_MTU) && | 1813 | if (dst_metric_locked(&rt->dst, RTAX_MTU) && |
1818 | rt->rt_gateway != rt->rt_dst && | 1814 | rt->rt_gateway != rt->rt_dst && |
1819 | rt->u.dst.dev->mtu > 576) | 1815 | rt->dst.dev->mtu > 576) |
1820 | rt->u.dst.metrics[RTAX_MTU-1] = 576; | 1816 | rt->dst.metrics[RTAX_MTU-1] = 576; |
1821 | } | 1817 | } |
1822 | #ifdef CONFIG_NET_CLS_ROUTE | 1818 | #ifdef CONFIG_NET_CLS_ROUTE |
1823 | rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid; | 1819 | rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid; |
1824 | #endif | 1820 | #endif |
1825 | } else | 1821 | } else |
1826 | rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu; | 1822 | rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu; |
1827 | 1823 | ||
1828 | if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) | 1824 | if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0) |
1829 | rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; | 1825 | rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; |
1830 | if (dst_mtu(&rt->u.dst) > IP_MAX_MTU) | 1826 | if (dst_mtu(&rt->dst) > IP_MAX_MTU) |
1831 | rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; | 1827 | rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; |
1832 | if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0) | 1828 | if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0) |
1833 | rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40, | 1829 | rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40, |
1834 | ip_rt_min_advmss); | 1830 | ip_rt_min_advmss); |
1835 | if (dst_metric(&rt->u.dst, RTAX_ADVMSS) > 65535 - 40) | 1831 | if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40) |
1836 | rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; | 1832 | rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; |
1837 | 1833 | ||
1838 | #ifdef CONFIG_NET_CLS_ROUTE | 1834 | #ifdef CONFIG_NET_CLS_ROUTE |
1839 | #ifdef CONFIG_IP_MULTIPLE_TABLES | 1835 | #ifdef CONFIG_IP_MULTIPLE_TABLES |
@@ -1844,14 +1840,16 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) | |||
1844 | rt->rt_type = res->type; | 1840 | rt->rt_type = res->type; |
1845 | } | 1841 | } |
1846 | 1842 | ||
1843 | /* called in rcu_read_lock() section */ | ||
1847 | static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 1844 | static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
1848 | u8 tos, struct net_device *dev, int our) | 1845 | u8 tos, struct net_device *dev, int our) |
1849 | { | 1846 | { |
1850 | unsigned hash; | 1847 | unsigned int hash; |
1851 | struct rtable *rth; | 1848 | struct rtable *rth; |
1852 | __be32 spec_dst; | 1849 | __be32 spec_dst; |
1853 | struct in_device *in_dev = in_dev_get(dev); | 1850 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
1854 | u32 itag = 0; | 1851 | u32 itag = 0; |
1852 | int err; | ||
1855 | 1853 | ||
1856 | /* Primary sanity checks. */ | 1854 | /* Primary sanity checks. */ |
1857 | 1855 | ||
@@ -1866,21 +1864,23 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1866 | if (!ipv4_is_local_multicast(daddr)) | 1864 | if (!ipv4_is_local_multicast(daddr)) |
1867 | goto e_inval; | 1865 | goto e_inval; |
1868 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); | 1866 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); |
1869 | } else if (fib_validate_source(saddr, 0, tos, 0, | 1867 | } else { |
1870 | dev, &spec_dst, &itag, 0) < 0) | 1868 | err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, |
1871 | goto e_inval; | 1869 | &itag, 0); |
1872 | 1870 | if (err < 0) | |
1871 | goto e_err; | ||
1872 | } | ||
1873 | rth = dst_alloc(&ipv4_dst_ops); | 1873 | rth = dst_alloc(&ipv4_dst_ops); |
1874 | if (!rth) | 1874 | if (!rth) |
1875 | goto e_nobufs; | 1875 | goto e_nobufs; |
1876 | 1876 | ||
1877 | rth->u.dst.output = ip_rt_bug; | 1877 | rth->dst.output = ip_rt_bug; |
1878 | rth->u.dst.obsolete = -1; | 1878 | rth->dst.obsolete = -1; |
1879 | 1879 | ||
1880 | atomic_set(&rth->u.dst.__refcnt, 1); | 1880 | atomic_set(&rth->dst.__refcnt, 1); |
1881 | rth->u.dst.flags= DST_HOST; | 1881 | rth->dst.flags= DST_HOST; |
1882 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | 1882 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) |
1883 | rth->u.dst.flags |= DST_NOPOLICY; | 1883 | rth->dst.flags |= DST_NOPOLICY; |
1884 | rth->fl.fl4_dst = daddr; | 1884 | rth->fl.fl4_dst = daddr; |
1885 | rth->rt_dst = daddr; | 1885 | rth->rt_dst = daddr; |
1886 | rth->fl.fl4_tos = tos; | 1886 | rth->fl.fl4_tos = tos; |
@@ -1888,13 +1888,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1888 | rth->fl.fl4_src = saddr; | 1888 | rth->fl.fl4_src = saddr; |
1889 | rth->rt_src = saddr; | 1889 | rth->rt_src = saddr; |
1890 | #ifdef CONFIG_NET_CLS_ROUTE | 1890 | #ifdef CONFIG_NET_CLS_ROUTE |
1891 | rth->u.dst.tclassid = itag; | 1891 | rth->dst.tclassid = itag; |
1892 | #endif | 1892 | #endif |
1893 | rth->rt_iif = | 1893 | rth->rt_iif = |
1894 | rth->fl.iif = dev->ifindex; | 1894 | rth->fl.iif = dev->ifindex; |
1895 | rth->u.dst.dev = init_net.loopback_dev; | 1895 | rth->dst.dev = init_net.loopback_dev; |
1896 | dev_hold(rth->u.dst.dev); | 1896 | dev_hold(rth->dst.dev); |
1897 | rth->idev = in_dev_get(rth->u.dst.dev); | 1897 | rth->idev = in_dev_get(rth->dst.dev); |
1898 | rth->fl.oif = 0; | 1898 | rth->fl.oif = 0; |
1899 | rth->rt_gateway = daddr; | 1899 | rth->rt_gateway = daddr; |
1900 | rth->rt_spec_dst= spec_dst; | 1900 | rth->rt_spec_dst= spec_dst; |
@@ -1902,27 +1902,25 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
1902 | rth->rt_flags = RTCF_MULTICAST; | 1902 | rth->rt_flags = RTCF_MULTICAST; |
1903 | rth->rt_type = RTN_MULTICAST; | 1903 | rth->rt_type = RTN_MULTICAST; |
1904 | if (our) { | 1904 | if (our) { |
1905 | rth->u.dst.input= ip_local_deliver; | 1905 | rth->dst.input= ip_local_deliver; |
1906 | rth->rt_flags |= RTCF_LOCAL; | 1906 | rth->rt_flags |= RTCF_LOCAL; |
1907 | } | 1907 | } |
1908 | 1908 | ||
1909 | #ifdef CONFIG_IP_MROUTE | 1909 | #ifdef CONFIG_IP_MROUTE |
1910 | if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev)) | 1910 | if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev)) |
1911 | rth->u.dst.input = ip_mr_input; | 1911 | rth->dst.input = ip_mr_input; |
1912 | #endif | 1912 | #endif |
1913 | RT_CACHE_STAT_INC(in_slow_mc); | 1913 | RT_CACHE_STAT_INC(in_slow_mc); |
1914 | 1914 | ||
1915 | in_dev_put(in_dev); | ||
1916 | hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); | 1915 | hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); |
1917 | return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); | 1916 | return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); |
1918 | 1917 | ||
1919 | e_nobufs: | 1918 | e_nobufs: |
1920 | in_dev_put(in_dev); | ||
1921 | return -ENOBUFS; | 1919 | return -ENOBUFS; |
1922 | |||
1923 | e_inval: | 1920 | e_inval: |
1924 | in_dev_put(in_dev); | ||
1925 | return -EINVAL; | 1921 | return -EINVAL; |
1922 | e_err: | ||
1923 | return err; | ||
1926 | } | 1924 | } |
1927 | 1925 | ||
1928 | 1926 | ||
@@ -1956,22 +1954,22 @@ static void ip_handle_martian_source(struct net_device *dev, | |||
1956 | #endif | 1954 | #endif |
1957 | } | 1955 | } |
1958 | 1956 | ||
1957 | /* called in rcu_read_lock() section */ | ||
1959 | static int __mkroute_input(struct sk_buff *skb, | 1958 | static int __mkroute_input(struct sk_buff *skb, |
1960 | struct fib_result *res, | 1959 | struct fib_result *res, |
1961 | struct in_device *in_dev, | 1960 | struct in_device *in_dev, |
1962 | __be32 daddr, __be32 saddr, u32 tos, | 1961 | __be32 daddr, __be32 saddr, u32 tos, |
1963 | struct rtable **result) | 1962 | struct rtable **result) |
1964 | { | 1963 | { |
1965 | |||
1966 | struct rtable *rth; | 1964 | struct rtable *rth; |
1967 | int err; | 1965 | int err; |
1968 | struct in_device *out_dev; | 1966 | struct in_device *out_dev; |
1969 | unsigned flags = 0; | 1967 | unsigned int flags = 0; |
1970 | __be32 spec_dst; | 1968 | __be32 spec_dst; |
1971 | u32 itag; | 1969 | u32 itag; |
1972 | 1970 | ||
1973 | /* get a working reference to the output device */ | 1971 | /* get a working reference to the output device */ |
1974 | out_dev = in_dev_get(FIB_RES_DEV(*res)); | 1972 | out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); |
1975 | if (out_dev == NULL) { | 1973 | if (out_dev == NULL) { |
1976 | if (net_ratelimit()) | 1974 | if (net_ratelimit()) |
1977 | printk(KERN_CRIT "Bug in ip_route_input" \ | 1975 | printk(KERN_CRIT "Bug in ip_route_input" \ |
@@ -1986,7 +1984,6 @@ static int __mkroute_input(struct sk_buff *skb, | |||
1986 | ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, | 1984 | ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, |
1987 | saddr); | 1985 | saddr); |
1988 | 1986 | ||
1989 | err = -EINVAL; | ||
1990 | goto cleanup; | 1987 | goto cleanup; |
1991 | } | 1988 | } |
1992 | 1989 | ||
@@ -2020,12 +2017,12 @@ static int __mkroute_input(struct sk_buff *skb, | |||
2020 | goto cleanup; | 2017 | goto cleanup; |
2021 | } | 2018 | } |
2022 | 2019 | ||
2023 | atomic_set(&rth->u.dst.__refcnt, 1); | 2020 | atomic_set(&rth->dst.__refcnt, 1); |
2024 | rth->u.dst.flags= DST_HOST; | 2021 | rth->dst.flags= DST_HOST; |
2025 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | 2022 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) |
2026 | rth->u.dst.flags |= DST_NOPOLICY; | 2023 | rth->dst.flags |= DST_NOPOLICY; |
2027 | if (IN_DEV_CONF_GET(out_dev, NOXFRM)) | 2024 | if (IN_DEV_CONF_GET(out_dev, NOXFRM)) |
2028 | rth->u.dst.flags |= DST_NOXFRM; | 2025 | rth->dst.flags |= DST_NOXFRM; |
2029 | rth->fl.fl4_dst = daddr; | 2026 | rth->fl.fl4_dst = daddr; |
2030 | rth->rt_dst = daddr; | 2027 | rth->rt_dst = daddr; |
2031 | rth->fl.fl4_tos = tos; | 2028 | rth->fl.fl4_tos = tos; |
@@ -2035,16 +2032,16 @@ static int __mkroute_input(struct sk_buff *skb, | |||
2035 | rth->rt_gateway = daddr; | 2032 | rth->rt_gateway = daddr; |
2036 | rth->rt_iif = | 2033 | rth->rt_iif = |
2037 | rth->fl.iif = in_dev->dev->ifindex; | 2034 | rth->fl.iif = in_dev->dev->ifindex; |
2038 | rth->u.dst.dev = (out_dev)->dev; | 2035 | rth->dst.dev = (out_dev)->dev; |
2039 | dev_hold(rth->u.dst.dev); | 2036 | dev_hold(rth->dst.dev); |
2040 | rth->idev = in_dev_get(rth->u.dst.dev); | 2037 | rth->idev = in_dev_get(rth->dst.dev); |
2041 | rth->fl.oif = 0; | 2038 | rth->fl.oif = 0; |
2042 | rth->rt_spec_dst= spec_dst; | 2039 | rth->rt_spec_dst= spec_dst; |
2043 | 2040 | ||
2044 | rth->u.dst.obsolete = -1; | 2041 | rth->dst.obsolete = -1; |
2045 | rth->u.dst.input = ip_forward; | 2042 | rth->dst.input = ip_forward; |
2046 | rth->u.dst.output = ip_output; | 2043 | rth->dst.output = ip_output; |
2047 | rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev)); | 2044 | rth->rt_genid = rt_genid(dev_net(rth->dst.dev)); |
2048 | 2045 | ||
2049 | rt_set_nexthop(rth, res, itag); | 2046 | rt_set_nexthop(rth, res, itag); |
2050 | 2047 | ||
@@ -2053,8 +2050,6 @@ static int __mkroute_input(struct sk_buff *skb, | |||
2053 | *result = rth; | 2050 | *result = rth; |
2054 | err = 0; | 2051 | err = 0; |
2055 | cleanup: | 2052 | cleanup: |
2056 | /* release the working reference to the output device */ | ||
2057 | in_dev_put(out_dev); | ||
2058 | return err; | 2053 | return err; |
2059 | } | 2054 | } |
2060 | 2055 | ||
@@ -2080,7 +2075,7 @@ static int ip_mkroute_input(struct sk_buff *skb, | |||
2080 | 2075 | ||
2081 | /* put it into the cache */ | 2076 | /* put it into the cache */ |
2082 | hash = rt_hash(daddr, saddr, fl->iif, | 2077 | hash = rt_hash(daddr, saddr, fl->iif, |
2083 | rt_genid(dev_net(rth->u.dst.dev))); | 2078 | rt_genid(dev_net(rth->dst.dev))); |
2084 | return rt_intern_hash(hash, rth, NULL, skb, fl->iif); | 2079 | return rt_intern_hash(hash, rth, NULL, skb, fl->iif); |
2085 | } | 2080 | } |
2086 | 2081 | ||
@@ -2098,7 +2093,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2098 | u8 tos, struct net_device *dev) | 2093 | u8 tos, struct net_device *dev) |
2099 | { | 2094 | { |
2100 | struct fib_result res; | 2095 | struct fib_result res; |
2101 | struct in_device *in_dev = in_dev_get(dev); | 2096 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
2102 | struct flowi fl = { .nl_u = { .ip4_u = | 2097 | struct flowi fl = { .nl_u = { .ip4_u = |
2103 | { .daddr = daddr, | 2098 | { .daddr = daddr, |
2104 | .saddr = saddr, | 2099 | .saddr = saddr, |
@@ -2158,13 +2153,12 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2158 | goto brd_input; | 2153 | goto brd_input; |
2159 | 2154 | ||
2160 | if (res.type == RTN_LOCAL) { | 2155 | if (res.type == RTN_LOCAL) { |
2161 | int result; | 2156 | err = fib_validate_source(saddr, daddr, tos, |
2162 | result = fib_validate_source(saddr, daddr, tos, | ||
2163 | net->loopback_dev->ifindex, | 2157 | net->loopback_dev->ifindex, |
2164 | dev, &spec_dst, &itag, skb->mark); | 2158 | dev, &spec_dst, &itag, skb->mark); |
2165 | if (result < 0) | 2159 | if (err < 0) |
2166 | goto martian_source; | 2160 | goto martian_source_keep_err; |
2167 | if (result) | 2161 | if (err) |
2168 | flags |= RTCF_DIRECTSRC; | 2162 | flags |= RTCF_DIRECTSRC; |
2169 | spec_dst = daddr; | 2163 | spec_dst = daddr; |
2170 | goto local_input; | 2164 | goto local_input; |
@@ -2177,7 +2171,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2177 | 2171 | ||
2178 | err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); | 2172 | err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); |
2179 | done: | 2173 | done: |
2180 | in_dev_put(in_dev); | ||
2181 | if (free_res) | 2174 | if (free_res) |
2182 | fib_res_put(&res); | 2175 | fib_res_put(&res); |
2183 | out: return err; | 2176 | out: return err; |
@@ -2192,7 +2185,7 @@ brd_input: | |||
2192 | err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, | 2185 | err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, |
2193 | &itag, skb->mark); | 2186 | &itag, skb->mark); |
2194 | if (err < 0) | 2187 | if (err < 0) |
2195 | goto martian_source; | 2188 | goto martian_source_keep_err; |
2196 | if (err) | 2189 | if (err) |
2197 | flags |= RTCF_DIRECTSRC; | 2190 | flags |= RTCF_DIRECTSRC; |
2198 | } | 2191 | } |
@@ -2205,14 +2198,14 @@ local_input: | |||
2205 | if (!rth) | 2198 | if (!rth) |
2206 | goto e_nobufs; | 2199 | goto e_nobufs; |
2207 | 2200 | ||
2208 | rth->u.dst.output= ip_rt_bug; | 2201 | rth->dst.output= ip_rt_bug; |
2209 | rth->u.dst.obsolete = -1; | 2202 | rth->dst.obsolete = -1; |
2210 | rth->rt_genid = rt_genid(net); | 2203 | rth->rt_genid = rt_genid(net); |
2211 | 2204 | ||
2212 | atomic_set(&rth->u.dst.__refcnt, 1); | 2205 | atomic_set(&rth->dst.__refcnt, 1); |
2213 | rth->u.dst.flags= DST_HOST; | 2206 | rth->dst.flags= DST_HOST; |
2214 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | 2207 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) |
2215 | rth->u.dst.flags |= DST_NOPOLICY; | 2208 | rth->dst.flags |= DST_NOPOLICY; |
2216 | rth->fl.fl4_dst = daddr; | 2209 | rth->fl.fl4_dst = daddr; |
2217 | rth->rt_dst = daddr; | 2210 | rth->rt_dst = daddr; |
2218 | rth->fl.fl4_tos = tos; | 2211 | rth->fl.fl4_tos = tos; |
@@ -2220,20 +2213,20 @@ local_input: | |||
2220 | rth->fl.fl4_src = saddr; | 2213 | rth->fl.fl4_src = saddr; |
2221 | rth->rt_src = saddr; | 2214 | rth->rt_src = saddr; |
2222 | #ifdef CONFIG_NET_CLS_ROUTE | 2215 | #ifdef CONFIG_NET_CLS_ROUTE |
2223 | rth->u.dst.tclassid = itag; | 2216 | rth->dst.tclassid = itag; |
2224 | #endif | 2217 | #endif |
2225 | rth->rt_iif = | 2218 | rth->rt_iif = |
2226 | rth->fl.iif = dev->ifindex; | 2219 | rth->fl.iif = dev->ifindex; |
2227 | rth->u.dst.dev = net->loopback_dev; | 2220 | rth->dst.dev = net->loopback_dev; |
2228 | dev_hold(rth->u.dst.dev); | 2221 | dev_hold(rth->dst.dev); |
2229 | rth->idev = in_dev_get(rth->u.dst.dev); | 2222 | rth->idev = in_dev_get(rth->dst.dev); |
2230 | rth->rt_gateway = daddr; | 2223 | rth->rt_gateway = daddr; |
2231 | rth->rt_spec_dst= spec_dst; | 2224 | rth->rt_spec_dst= spec_dst; |
2232 | rth->u.dst.input= ip_local_deliver; | 2225 | rth->dst.input= ip_local_deliver; |
2233 | rth->rt_flags = flags|RTCF_LOCAL; | 2226 | rth->rt_flags = flags|RTCF_LOCAL; |
2234 | if (res.type == RTN_UNREACHABLE) { | 2227 | if (res.type == RTN_UNREACHABLE) { |
2235 | rth->u.dst.input= ip_error; | 2228 | rth->dst.input= ip_error; |
2236 | rth->u.dst.error= -err; | 2229 | rth->dst.error= -err; |
2237 | rth->rt_flags &= ~RTCF_LOCAL; | 2230 | rth->rt_flags &= ~RTCF_LOCAL; |
2238 | } | 2231 | } |
2239 | rth->rt_type = res.type; | 2232 | rth->rt_type = res.type; |
@@ -2273,8 +2266,10 @@ e_nobufs: | |||
2273 | goto done; | 2266 | goto done; |
2274 | 2267 | ||
2275 | martian_source: | 2268 | martian_source: |
2269 | err = -EINVAL; | ||
2270 | martian_source_keep_err: | ||
2276 | ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); | 2271 | ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); |
2277 | goto e_inval; | 2272 | goto done; |
2278 | } | 2273 | } |
2279 | 2274 | ||
2280 | int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, | 2275 | int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, |
@@ -2284,32 +2279,34 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2284 | unsigned hash; | 2279 | unsigned hash; |
2285 | int iif = dev->ifindex; | 2280 | int iif = dev->ifindex; |
2286 | struct net *net; | 2281 | struct net *net; |
2282 | int res; | ||
2287 | 2283 | ||
2288 | net = dev_net(dev); | 2284 | net = dev_net(dev); |
2289 | 2285 | ||
2286 | rcu_read_lock(); | ||
2287 | |||
2290 | if (!rt_caching(net)) | 2288 | if (!rt_caching(net)) |
2291 | goto skip_cache; | 2289 | goto skip_cache; |
2292 | 2290 | ||
2293 | tos &= IPTOS_RT_MASK; | 2291 | tos &= IPTOS_RT_MASK; |
2294 | hash = rt_hash(daddr, saddr, iif, rt_genid(net)); | 2292 | hash = rt_hash(daddr, saddr, iif, rt_genid(net)); |
2295 | 2293 | ||
2296 | rcu_read_lock(); | ||
2297 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; | 2294 | for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; |
2298 | rth = rcu_dereference(rth->u.dst.rt_next)) { | 2295 | rth = rcu_dereference(rth->dst.rt_next)) { |
2299 | if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | | 2296 | if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) | |
2300 | ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | | 2297 | ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) | |
2301 | (rth->fl.iif ^ iif) | | 2298 | (rth->fl.iif ^ iif) | |
2302 | rth->fl.oif | | 2299 | rth->fl.oif | |
2303 | (rth->fl.fl4_tos ^ tos)) == 0 && | 2300 | (rth->fl.fl4_tos ^ tos)) == 0 && |
2304 | rth->fl.mark == skb->mark && | 2301 | rth->fl.mark == skb->mark && |
2305 | net_eq(dev_net(rth->u.dst.dev), net) && | 2302 | net_eq(dev_net(rth->dst.dev), net) && |
2306 | !rt_is_expired(rth)) { | 2303 | !rt_is_expired(rth)) { |
2307 | if (noref) { | 2304 | if (noref) { |
2308 | dst_use_noref(&rth->u.dst, jiffies); | 2305 | dst_use_noref(&rth->dst, jiffies); |
2309 | skb_dst_set_noref(skb, &rth->u.dst); | 2306 | skb_dst_set_noref(skb, &rth->dst); |
2310 | } else { | 2307 | } else { |
2311 | dst_use(&rth->u.dst, jiffies); | 2308 | dst_use(&rth->dst, jiffies); |
2312 | skb_dst_set(skb, &rth->u.dst); | 2309 | skb_dst_set(skb, &rth->dst); |
2313 | } | 2310 | } |
2314 | RT_CACHE_STAT_INC(in_hit); | 2311 | RT_CACHE_STAT_INC(in_hit); |
2315 | rcu_read_unlock(); | 2312 | rcu_read_unlock(); |
@@ -2317,7 +2314,6 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |||
2317 | } | 2314 | } |
2318 | RT_CACHE_STAT_INC(in_hlist_search); | 2315 | RT_CACHE_STAT_INC(in_hlist_search); |
2319 | } | 2316 | } |
2320 | rcu_read_unlock(); | ||
2321 | 2317 | ||
2322 | skip_cache: | 2318 | skip_cache: |
2323 | /* Multicast recognition logic is moved from route cache to here. | 2319 | /* Multicast recognition logic is moved from route cache to here. |
@@ -2332,12 +2328,11 @@ skip_cache: | |||
2332 | route cache entry is created eventually. | 2328 | route cache entry is created eventually. |
2333 | */ | 2329 | */ |
2334 | if (ipv4_is_multicast(daddr)) { | 2330 | if (ipv4_is_multicast(daddr)) { |
2335 | struct in_device *in_dev; | 2331 | struct in_device *in_dev = __in_dev_get_rcu(dev); |
2336 | 2332 | ||
2337 | rcu_read_lock(); | 2333 | if (in_dev) { |
2338 | if ((in_dev = __in_dev_get_rcu(dev)) != NULL) { | ||
2339 | int our = ip_check_mc(in_dev, daddr, saddr, | 2334 | int our = ip_check_mc(in_dev, daddr, saddr, |
2340 | ip_hdr(skb)->protocol); | 2335 | ip_hdr(skb)->protocol); |
2341 | if (our | 2336 | if (our |
2342 | #ifdef CONFIG_IP_MROUTE | 2337 | #ifdef CONFIG_IP_MROUTE |
2343 | || | 2338 | || |
@@ -2345,15 +2340,18 @@ skip_cache: | |||
2345 | IN_DEV_MFORWARD(in_dev)) | 2340 | IN_DEV_MFORWARD(in_dev)) |
2346 | #endif | 2341 | #endif |
2347 | ) { | 2342 | ) { |
2343 | int res = ip_route_input_mc(skb, daddr, saddr, | ||
2344 | tos, dev, our); | ||
2348 | rcu_read_unlock(); | 2345 | rcu_read_unlock(); |
2349 | return ip_route_input_mc(skb, daddr, saddr, | 2346 | return res; |
2350 | tos, dev, our); | ||
2351 | } | 2347 | } |
2352 | } | 2348 | } |
2353 | rcu_read_unlock(); | 2349 | rcu_read_unlock(); |
2354 | return -EINVAL; | 2350 | return -EINVAL; |
2355 | } | 2351 | } |
2356 | return ip_route_input_slow(skb, daddr, saddr, tos, dev); | 2352 | res = ip_route_input_slow(skb, daddr, saddr, tos, dev); |
2353 | rcu_read_unlock(); | ||
2354 | return res; | ||
2357 | } | 2355 | } |
2358 | EXPORT_SYMBOL(ip_route_input_common); | 2356 | EXPORT_SYMBOL(ip_route_input_common); |
2359 | 2357 | ||
@@ -2415,12 +2413,12 @@ static int __mkroute_output(struct rtable **result, | |||
2415 | goto cleanup; | 2413 | goto cleanup; |
2416 | } | 2414 | } |
2417 | 2415 | ||
2418 | atomic_set(&rth->u.dst.__refcnt, 1); | 2416 | atomic_set(&rth->dst.__refcnt, 1); |
2419 | rth->u.dst.flags= DST_HOST; | 2417 | rth->dst.flags= DST_HOST; |
2420 | if (IN_DEV_CONF_GET(in_dev, NOXFRM)) | 2418 | if (IN_DEV_CONF_GET(in_dev, NOXFRM)) |
2421 | rth->u.dst.flags |= DST_NOXFRM; | 2419 | rth->dst.flags |= DST_NOXFRM; |
2422 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) | 2420 | if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) |
2423 | rth->u.dst.flags |= DST_NOPOLICY; | 2421 | rth->dst.flags |= DST_NOPOLICY; |
2424 | 2422 | ||
2425 | rth->fl.fl4_dst = oldflp->fl4_dst; | 2423 | rth->fl.fl4_dst = oldflp->fl4_dst; |
2426 | rth->fl.fl4_tos = tos; | 2424 | rth->fl.fl4_tos = tos; |
@@ -2432,35 +2430,35 @@ static int __mkroute_output(struct rtable **result, | |||
2432 | rth->rt_iif = oldflp->oif ? : dev_out->ifindex; | 2430 | rth->rt_iif = oldflp->oif ? : dev_out->ifindex; |
2433 | /* get references to the devices that are to be hold by the routing | 2431 | /* get references to the devices that are to be hold by the routing |
2434 | cache entry */ | 2432 | cache entry */ |
2435 | rth->u.dst.dev = dev_out; | 2433 | rth->dst.dev = dev_out; |
2436 | dev_hold(dev_out); | 2434 | dev_hold(dev_out); |
2437 | rth->idev = in_dev_get(dev_out); | 2435 | rth->idev = in_dev_get(dev_out); |
2438 | rth->rt_gateway = fl->fl4_dst; | 2436 | rth->rt_gateway = fl->fl4_dst; |
2439 | rth->rt_spec_dst= fl->fl4_src; | 2437 | rth->rt_spec_dst= fl->fl4_src; |
2440 | 2438 | ||
2441 | rth->u.dst.output=ip_output; | 2439 | rth->dst.output=ip_output; |
2442 | rth->u.dst.obsolete = -1; | 2440 | rth->dst.obsolete = -1; |
2443 | rth->rt_genid = rt_genid(dev_net(dev_out)); | 2441 | rth->rt_genid = rt_genid(dev_net(dev_out)); |
2444 | 2442 | ||
2445 | RT_CACHE_STAT_INC(out_slow_tot); | 2443 | RT_CACHE_STAT_INC(out_slow_tot); |
2446 | 2444 | ||
2447 | if (flags & RTCF_LOCAL) { | 2445 | if (flags & RTCF_LOCAL) { |
2448 | rth->u.dst.input = ip_local_deliver; | 2446 | rth->dst.input = ip_local_deliver; |
2449 | rth->rt_spec_dst = fl->fl4_dst; | 2447 | rth->rt_spec_dst = fl->fl4_dst; |
2450 | } | 2448 | } |
2451 | if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { | 2449 | if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { |
2452 | rth->rt_spec_dst = fl->fl4_src; | 2450 | rth->rt_spec_dst = fl->fl4_src; |
2453 | if (flags & RTCF_LOCAL && | 2451 | if (flags & RTCF_LOCAL && |
2454 | !(dev_out->flags & IFF_LOOPBACK)) { | 2452 | !(dev_out->flags & IFF_LOOPBACK)) { |
2455 | rth->u.dst.output = ip_mc_output; | 2453 | rth->dst.output = ip_mc_output; |
2456 | RT_CACHE_STAT_INC(out_slow_mc); | 2454 | RT_CACHE_STAT_INC(out_slow_mc); |
2457 | } | 2455 | } |
2458 | #ifdef CONFIG_IP_MROUTE | 2456 | #ifdef CONFIG_IP_MROUTE |
2459 | if (res->type == RTN_MULTICAST) { | 2457 | if (res->type == RTN_MULTICAST) { |
2460 | if (IN_DEV_MFORWARD(in_dev) && | 2458 | if (IN_DEV_MFORWARD(in_dev) && |
2461 | !ipv4_is_local_multicast(oldflp->fl4_dst)) { | 2459 | !ipv4_is_local_multicast(oldflp->fl4_dst)) { |
2462 | rth->u.dst.input = ip_mr_input; | 2460 | rth->dst.input = ip_mr_input; |
2463 | rth->u.dst.output = ip_mc_output; | 2461 | rth->dst.output = ip_mc_output; |
2464 | } | 2462 | } |
2465 | } | 2463 | } |
2466 | #endif | 2464 | #endif |
@@ -2715,7 +2713,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
2715 | 2713 | ||
2716 | rcu_read_lock_bh(); | 2714 | rcu_read_lock_bh(); |
2717 | for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; | 2715 | for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; |
2718 | rth = rcu_dereference_bh(rth->u.dst.rt_next)) { | 2716 | rth = rcu_dereference_bh(rth->dst.rt_next)) { |
2719 | if (rth->fl.fl4_dst == flp->fl4_dst && | 2717 | if (rth->fl.fl4_dst == flp->fl4_dst && |
2720 | rth->fl.fl4_src == flp->fl4_src && | 2718 | rth->fl.fl4_src == flp->fl4_src && |
2721 | rth->fl.iif == 0 && | 2719 | rth->fl.iif == 0 && |
@@ -2723,9 +2721,9 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
2723 | rth->fl.mark == flp->mark && | 2721 | rth->fl.mark == flp->mark && |
2724 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & | 2722 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & |
2725 | (IPTOS_RT_MASK | RTO_ONLINK)) && | 2723 | (IPTOS_RT_MASK | RTO_ONLINK)) && |
2726 | net_eq(dev_net(rth->u.dst.dev), net) && | 2724 | net_eq(dev_net(rth->dst.dev), net) && |
2727 | !rt_is_expired(rth)) { | 2725 | !rt_is_expired(rth)) { |
2728 | dst_use(&rth->u.dst, jiffies); | 2726 | dst_use(&rth->dst, jiffies); |
2729 | RT_CACHE_STAT_INC(out_hit); | 2727 | RT_CACHE_STAT_INC(out_hit); |
2730 | rcu_read_unlock_bh(); | 2728 | rcu_read_unlock_bh(); |
2731 | *rp = rth; | 2729 | *rp = rth; |
@@ -2738,7 +2736,6 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, | |||
2738 | slow_output: | 2736 | slow_output: |
2739 | return ip_route_output_slow(net, rp, flp); | 2737 | return ip_route_output_slow(net, rp, flp); |
2740 | } | 2738 | } |
2741 | |||
2742 | EXPORT_SYMBOL_GPL(__ip_route_output_key); | 2739 | EXPORT_SYMBOL_GPL(__ip_route_output_key); |
2743 | 2740 | ||
2744 | static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) | 2741 | static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) |
@@ -2762,15 +2759,15 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi | |||
2762 | dst_alloc(&ipv4_dst_blackhole_ops); | 2759 | dst_alloc(&ipv4_dst_blackhole_ops); |
2763 | 2760 | ||
2764 | if (rt) { | 2761 | if (rt) { |
2765 | struct dst_entry *new = &rt->u.dst; | 2762 | struct dst_entry *new = &rt->dst; |
2766 | 2763 | ||
2767 | atomic_set(&new->__refcnt, 1); | 2764 | atomic_set(&new->__refcnt, 1); |
2768 | new->__use = 1; | 2765 | new->__use = 1; |
2769 | new->input = dst_discard; | 2766 | new->input = dst_discard; |
2770 | new->output = dst_discard; | 2767 | new->output = dst_discard; |
2771 | memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); | 2768 | memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); |
2772 | 2769 | ||
2773 | new->dev = ort->u.dst.dev; | 2770 | new->dev = ort->dst.dev; |
2774 | if (new->dev) | 2771 | if (new->dev) |
2775 | dev_hold(new->dev); | 2772 | dev_hold(new->dev); |
2776 | 2773 | ||
@@ -2794,7 +2791,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi | |||
2794 | dst_free(new); | 2791 | dst_free(new); |
2795 | } | 2792 | } |
2796 | 2793 | ||
2797 | dst_release(&(*rp)->u.dst); | 2794 | dst_release(&(*rp)->dst); |
2798 | *rp = rt; | 2795 | *rp = rt; |
2799 | return (rt ? 0 : -ENOMEM); | 2796 | return (rt ? 0 : -ENOMEM); |
2800 | } | 2797 | } |
@@ -2822,13 +2819,13 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, | |||
2822 | 2819 | ||
2823 | return 0; | 2820 | return 0; |
2824 | } | 2821 | } |
2825 | |||
2826 | EXPORT_SYMBOL_GPL(ip_route_output_flow); | 2822 | EXPORT_SYMBOL_GPL(ip_route_output_flow); |
2827 | 2823 | ||
2828 | int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) | 2824 | int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) |
2829 | { | 2825 | { |
2830 | return ip_route_output_flow(net, rp, flp, NULL, 0); | 2826 | return ip_route_output_flow(net, rp, flp, NULL, 0); |
2831 | } | 2827 | } |
2828 | EXPORT_SYMBOL(ip_route_output_key); | ||
2832 | 2829 | ||
2833 | static int rt_fill_info(struct net *net, | 2830 | static int rt_fill_info(struct net *net, |
2834 | struct sk_buff *skb, u32 pid, u32 seq, int event, | 2831 | struct sk_buff *skb, u32 pid, u32 seq, int event, |
@@ -2864,11 +2861,11 @@ static int rt_fill_info(struct net *net, | |||
2864 | r->rtm_src_len = 32; | 2861 | r->rtm_src_len = 32; |
2865 | NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); | 2862 | NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); |
2866 | } | 2863 | } |
2867 | if (rt->u.dst.dev) | 2864 | if (rt->dst.dev) |
2868 | NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex); | 2865 | NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); |
2869 | #ifdef CONFIG_NET_CLS_ROUTE | 2866 | #ifdef CONFIG_NET_CLS_ROUTE |
2870 | if (rt->u.dst.tclassid) | 2867 | if (rt->dst.tclassid) |
2871 | NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); | 2868 | NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); |
2872 | #endif | 2869 | #endif |
2873 | if (rt->fl.iif) | 2870 | if (rt->fl.iif) |
2874 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); | 2871 | NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); |
@@ -2878,12 +2875,16 @@ static int rt_fill_info(struct net *net, | |||
2878 | if (rt->rt_dst != rt->rt_gateway) | 2875 | if (rt->rt_dst != rt->rt_gateway) |
2879 | NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); | 2876 | NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); |
2880 | 2877 | ||
2881 | if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) | 2878 | if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) |
2882 | goto nla_put_failure; | 2879 | goto nla_put_failure; |
2883 | 2880 | ||
2884 | error = rt->u.dst.error; | 2881 | if (rt->fl.mark) |
2885 | expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; | 2882 | NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark); |
2883 | |||
2884 | error = rt->dst.error; | ||
2885 | expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; | ||
2886 | if (rt->peer) { | 2886 | if (rt->peer) { |
2887 | inet_peer_refcheck(rt->peer); | ||
2887 | id = atomic_read(&rt->peer->ip_id_count) & 0xffff; | 2888 | id = atomic_read(&rt->peer->ip_id_count) & 0xffff; |
2888 | if (rt->peer->tcp_ts_stamp) { | 2889 | if (rt->peer->tcp_ts_stamp) { |
2889 | ts = rt->peer->tcp_ts; | 2890 | ts = rt->peer->tcp_ts; |
@@ -2914,7 +2915,7 @@ static int rt_fill_info(struct net *net, | |||
2914 | NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); | 2915 | NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); |
2915 | } | 2916 | } |
2916 | 2917 | ||
2917 | if (rtnl_put_cacheinfo(skb, &rt->u.dst, id, ts, tsage, | 2918 | if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage, |
2918 | expires, error) < 0) | 2919 | expires, error) < 0) |
2919 | goto nla_put_failure; | 2920 | goto nla_put_failure; |
2920 | 2921 | ||
@@ -2935,6 +2936,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
2935 | __be32 src = 0; | 2936 | __be32 src = 0; |
2936 | u32 iif; | 2937 | u32 iif; |
2937 | int err; | 2938 | int err; |
2939 | int mark; | ||
2938 | struct sk_buff *skb; | 2940 | struct sk_buff *skb; |
2939 | 2941 | ||
2940 | err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); | 2942 | err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); |
@@ -2962,6 +2964,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
2962 | src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0; | 2964 | src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0; |
2963 | dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0; | 2965 | dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0; |
2964 | iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; | 2966 | iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; |
2967 | mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0; | ||
2965 | 2968 | ||
2966 | if (iif) { | 2969 | if (iif) { |
2967 | struct net_device *dev; | 2970 | struct net_device *dev; |
@@ -2974,13 +2977,14 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
2974 | 2977 | ||
2975 | skb->protocol = htons(ETH_P_IP); | 2978 | skb->protocol = htons(ETH_P_IP); |
2976 | skb->dev = dev; | 2979 | skb->dev = dev; |
2980 | skb->mark = mark; | ||
2977 | local_bh_disable(); | 2981 | local_bh_disable(); |
2978 | err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); | 2982 | err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); |
2979 | local_bh_enable(); | 2983 | local_bh_enable(); |
2980 | 2984 | ||
2981 | rt = skb_rtable(skb); | 2985 | rt = skb_rtable(skb); |
2982 | if (err == 0 && rt->u.dst.error) | 2986 | if (err == 0 && rt->dst.error) |
2983 | err = -rt->u.dst.error; | 2987 | err = -rt->dst.error; |
2984 | } else { | 2988 | } else { |
2985 | struct flowi fl = { | 2989 | struct flowi fl = { |
2986 | .nl_u = { | 2990 | .nl_u = { |
@@ -2991,6 +2995,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
2991 | }, | 2995 | }, |
2992 | }, | 2996 | }, |
2993 | .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, | 2997 | .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, |
2998 | .mark = mark, | ||
2994 | }; | 2999 | }; |
2995 | err = ip_route_output_key(net, &rt, &fl); | 3000 | err = ip_route_output_key(net, &rt, &fl); |
2996 | } | 3001 | } |
@@ -2998,7 +3003,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
2998 | if (err) | 3003 | if (err) |
2999 | goto errout_free; | 3004 | goto errout_free; |
3000 | 3005 | ||
3001 | skb_dst_set(skb, &rt->u.dst); | 3006 | skb_dst_set(skb, &rt->dst); |
3002 | if (rtm->rtm_flags & RTM_F_NOTIFY) | 3007 | if (rtm->rtm_flags & RTM_F_NOTIFY) |
3003 | rt->rt_flags |= RTCF_NOTIFY; | 3008 | rt->rt_flags |= RTCF_NOTIFY; |
3004 | 3009 | ||
@@ -3034,12 +3039,12 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
3034 | continue; | 3039 | continue; |
3035 | rcu_read_lock_bh(); | 3040 | rcu_read_lock_bh(); |
3036 | for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt; | 3041 | for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt; |
3037 | rt = rcu_dereference_bh(rt->u.dst.rt_next), idx++) { | 3042 | rt = rcu_dereference_bh(rt->dst.rt_next), idx++) { |
3038 | if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) | 3043 | if (!net_eq(dev_net(rt->dst.dev), net) || idx < s_idx) |
3039 | continue; | 3044 | continue; |
3040 | if (rt_is_expired(rt)) | 3045 | if (rt_is_expired(rt)) |
3041 | continue; | 3046 | continue; |
3042 | skb_dst_set_noref(skb, &rt->u.dst); | 3047 | skb_dst_set_noref(skb, &rt->dst); |
3043 | if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, | 3048 | if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, |
3044 | cb->nlh->nlmsg_seq, RTM_NEWROUTE, | 3049 | cb->nlh->nlmsg_seq, RTM_NEWROUTE, |
3045 | 1, NLM_F_MULTI) <= 0) { | 3050 | 1, NLM_F_MULTI) <= 0) { |
@@ -3365,6 +3370,3 @@ void __init ip_static_sysctl_init(void) | |||
3365 | register_sysctl_paths(ipv4_path, ipv4_skeleton); | 3370 | register_sysctl_paths(ipv4_path, ipv4_skeleton); |
3366 | } | 3371 | } |
3367 | #endif | 3372 | #endif |
3368 | |||
3369 | EXPORT_SYMBOL(__ip_select_ident); | ||
3370 | EXPORT_SYMBOL(ip_route_output_key); | ||
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 9f6b22206c52..650cace2180d 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c | |||
@@ -18,8 +18,8 @@ | |||
18 | #include <net/tcp.h> | 18 | #include <net/tcp.h> |
19 | #include <net/route.h> | 19 | #include <net/route.h> |
20 | 20 | ||
21 | /* Timestamps: lowest 9 bits store TCP options */ | 21 | /* Timestamps: lowest bits store TCP options */ |
22 | #define TSBITS 9 | 22 | #define TSBITS 6 |
23 | #define TSMASK (((__u32)1 << TSBITS) - 1) | 23 | #define TSMASK (((__u32)1 << TSBITS) - 1) |
24 | 24 | ||
25 | extern int sysctl_tcp_syncookies; | 25 | extern int sysctl_tcp_syncookies; |
@@ -58,7 +58,7 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, | |||
58 | 58 | ||
59 | /* | 59 | /* |
60 | * when syncookies are in effect and tcp timestamps are enabled we encode | 60 | * when syncookies are in effect and tcp timestamps are enabled we encode |
61 | * tcp options in the lowest 9 bits of the timestamp value that will be | 61 | * tcp options in the lower bits of the timestamp value that will be |
62 | * sent in the syn-ack. | 62 | * sent in the syn-ack. |
63 | * Since subsequent timestamps use the normal tcp_time_stamp value, we | 63 | * Since subsequent timestamps use the normal tcp_time_stamp value, we |
64 | * must make sure that the resulting initial timestamp is <= tcp_time_stamp. | 64 | * must make sure that the resulting initial timestamp is <= tcp_time_stamp. |
@@ -70,11 +70,10 @@ __u32 cookie_init_timestamp(struct request_sock *req) | |||
70 | u32 options = 0; | 70 | u32 options = 0; |
71 | 71 | ||
72 | ireq = inet_rsk(req); | 72 | ireq = inet_rsk(req); |
73 | if (ireq->wscale_ok) { | 73 | |
74 | options = ireq->snd_wscale; | 74 | options = ireq->wscale_ok ? ireq->snd_wscale : 0xf; |
75 | options |= ireq->rcv_wscale << 4; | 75 | options |= ireq->sack_ok << 4; |
76 | } | 76 | options |= ireq->ecn_ok << 5; |
77 | options |= ireq->sack_ok << 8; | ||
78 | 77 | ||
79 | ts = ts_now & ~TSMASK; | 78 | ts = ts_now & ~TSMASK; |
80 | ts |= options; | 79 | ts |= options; |
@@ -138,23 +137,23 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr, | |||
138 | } | 137 | } |
139 | 138 | ||
140 | /* | 139 | /* |
141 | * This table has to be sorted and terminated with (__u16)-1. | 140 | * MSS Values are taken from the 2009 paper |
142 | * XXX generate a better table. | 141 | * 'Measuring TCP Maximum Segment Size' by S. Alcock and R. Nelson: |
143 | * Unresolved Issues: HIPPI with a 64k MSS is not well supported. | 142 | * - values 1440 to 1460 accounted for 80% of observed mss values |
143 | * - values outside the 536-1460 range are rare (<0.2%). | ||
144 | * | ||
145 | * Table must be sorted. | ||
144 | */ | 146 | */ |
145 | static __u16 const msstab[] = { | 147 | static __u16 const msstab[] = { |
146 | 64 - 1, | 148 | 64, |
147 | 256 - 1, | 149 | 512, |
148 | 512 - 1, | 150 | 536, |
149 | 536 - 1, | 151 | 1024, |
150 | 1024 - 1, | 152 | 1440, |
151 | 1440 - 1, | 153 | 1460, |
152 | 1460 - 1, | 154 | 4312, |
153 | 4312 - 1, | 155 | 8960, |
154 | (__u16)-1 | ||
155 | }; | 156 | }; |
156 | /* The number doesn't include the -1 terminator */ | ||
157 | #define NUM_MSS (ARRAY_SIZE(msstab) - 1) | ||
158 | 157 | ||
159 | /* | 158 | /* |
160 | * Generate a syncookie. mssp points to the mss, which is returned | 159 | * Generate a syncookie. mssp points to the mss, which is returned |
@@ -169,10 +168,10 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) | |||
169 | 168 | ||
170 | tcp_synq_overflow(sk); | 169 | tcp_synq_overflow(sk); |
171 | 170 | ||
172 | /* XXX sort msstab[] by probability? Binary search? */ | 171 | for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--) |
173 | for (mssind = 0; mss > msstab[mssind + 1]; mssind++) | 172 | if (mss >= msstab[mssind]) |
174 | ; | 173 | break; |
175 | *mssp = msstab[mssind] + 1; | 174 | *mssp = msstab[mssind]; |
176 | 175 | ||
177 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); | 176 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); |
178 | 177 | ||
@@ -202,7 +201,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie) | |||
202 | jiffies / (HZ * 60), | 201 | jiffies / (HZ * 60), |
203 | COUNTER_TRIES); | 202 | COUNTER_TRIES); |
204 | 203 | ||
205 | return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; | 204 | return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; |
206 | } | 205 | } |
207 | 206 | ||
208 | static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, | 207 | static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, |
@@ -227,26 +226,38 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, | |||
227 | * additional tcp options in the timestamp. | 226 | * additional tcp options in the timestamp. |
228 | * This extracts these options from the timestamp echo. | 227 | * This extracts these options from the timestamp echo. |
229 | * | 228 | * |
230 | * The lowest 4 bits are for snd_wscale | 229 | * The lowest 4 bits store snd_wscale. |
231 | * The next 4 lsb are for rcv_wscale | 230 | * next 2 bits indicate SACK and ECN support. |
232 | * The next lsb is for sack_ok | 231 | * |
232 | * return false if we decode an option that should not be. | ||
233 | */ | 233 | */ |
234 | void cookie_check_timestamp(struct tcp_options_received *tcp_opt) | 234 | bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok) |
235 | { | 235 | { |
236 | /* echoed timestamp, 9 lowest bits contain options */ | 236 | /* echoed timestamp, lowest bits contain options */ |
237 | u32 options = tcp_opt->rcv_tsecr & TSMASK; | 237 | u32 options = tcp_opt->rcv_tsecr & TSMASK; |
238 | 238 | ||
239 | tcp_opt->snd_wscale = options & 0xf; | 239 | if (!tcp_opt->saw_tstamp) { |
240 | options >>= 4; | 240 | tcp_clear_options(tcp_opt); |
241 | tcp_opt->rcv_wscale = options & 0xf; | 241 | return true; |
242 | } | ||
243 | |||
244 | if (!sysctl_tcp_timestamps) | ||
245 | return false; | ||
242 | 246 | ||
243 | tcp_opt->sack_ok = (options >> 4) & 0x1; | 247 | tcp_opt->sack_ok = (options >> 4) & 0x1; |
248 | *ecn_ok = (options >> 5) & 1; | ||
249 | if (*ecn_ok && !sysctl_tcp_ecn) | ||
250 | return false; | ||
251 | |||
252 | if (tcp_opt->sack_ok && !sysctl_tcp_sack) | ||
253 | return false; | ||
244 | 254 | ||
245 | if (tcp_opt->sack_ok) | 255 | if ((options & 0xf) == 0xf) |
246 | tcp_sack_reset(tcp_opt); | 256 | return true; /* no window scaling */ |
247 | 257 | ||
248 | if (tcp_opt->snd_wscale || tcp_opt->rcv_wscale) | 258 | tcp_opt->wscale_ok = 1; |
249 | tcp_opt->wscale_ok = 1; | 259 | tcp_opt->snd_wscale = options & 0xf; |
260 | return sysctl_tcp_window_scaling != 0; | ||
250 | } | 261 | } |
251 | EXPORT_SYMBOL(cookie_check_timestamp); | 262 | EXPORT_SYMBOL(cookie_check_timestamp); |
252 | 263 | ||
@@ -265,8 +276,9 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
265 | int mss; | 276 | int mss; |
266 | struct rtable *rt; | 277 | struct rtable *rt; |
267 | __u8 rcv_wscale; | 278 | __u8 rcv_wscale; |
279 | bool ecn_ok; | ||
268 | 280 | ||
269 | if (!sysctl_tcp_syncookies || !th->ack) | 281 | if (!sysctl_tcp_syncookies || !th->ack || th->rst) |
270 | goto out; | 282 | goto out; |
271 | 283 | ||
272 | if (tcp_synq_no_recent_overflow(sk) || | 284 | if (tcp_synq_no_recent_overflow(sk) || |
@@ -281,8 +293,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
281 | memset(&tcp_opt, 0, sizeof(tcp_opt)); | 293 | memset(&tcp_opt, 0, sizeof(tcp_opt)); |
282 | tcp_parse_options(skb, &tcp_opt, &hash_location, 0); | 294 | tcp_parse_options(skb, &tcp_opt, &hash_location, 0); |
283 | 295 | ||
284 | if (tcp_opt.saw_tstamp) | 296 | if (!cookie_check_timestamp(&tcp_opt, &ecn_ok)) |
285 | cookie_check_timestamp(&tcp_opt); | 297 | goto out; |
286 | 298 | ||
287 | ret = NULL; | 299 | ret = NULL; |
288 | req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ | 300 | req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ |
@@ -298,9 +310,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
298 | ireq->rmt_port = th->source; | 310 | ireq->rmt_port = th->source; |
299 | ireq->loc_addr = ip_hdr(skb)->daddr; | 311 | ireq->loc_addr = ip_hdr(skb)->daddr; |
300 | ireq->rmt_addr = ip_hdr(skb)->saddr; | 312 | ireq->rmt_addr = ip_hdr(skb)->saddr; |
301 | ireq->ecn_ok = 0; | 313 | ireq->ecn_ok = ecn_ok; |
302 | ireq->snd_wscale = tcp_opt.snd_wscale; | 314 | ireq->snd_wscale = tcp_opt.snd_wscale; |
303 | ireq->rcv_wscale = tcp_opt.rcv_wscale; | ||
304 | ireq->sack_ok = tcp_opt.sack_ok; | 315 | ireq->sack_ok = tcp_opt.sack_ok; |
305 | ireq->wscale_ok = tcp_opt.wscale_ok; | 316 | ireq->wscale_ok = tcp_opt.wscale_ok; |
306 | ireq->tstamp_ok = tcp_opt.saw_tstamp; | 317 | ireq->tstamp_ok = tcp_opt.saw_tstamp; |
@@ -354,15 +365,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, | |||
354 | } | 365 | } |
355 | 366 | ||
356 | /* Try to redo what tcp_v4_send_synack did. */ | 367 | /* Try to redo what tcp_v4_send_synack did. */ |
357 | req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW); | 368 | req->window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW); |
358 | 369 | ||
359 | tcp_select_initial_window(tcp_full_space(sk), req->mss, | 370 | tcp_select_initial_window(tcp_full_space(sk), req->mss, |
360 | &req->rcv_wnd, &req->window_clamp, | 371 | &req->rcv_wnd, &req->window_clamp, |
361 | ireq->wscale_ok, &rcv_wscale, | 372 | ireq->wscale_ok, &rcv_wscale, |
362 | dst_metric(&rt->u.dst, RTAX_INITRWND)); | 373 | dst_metric(&rt->dst, RTAX_INITRWND)); |
363 | 374 | ||
364 | ireq->rcv_wscale = rcv_wscale; | 375 | ireq->rcv_wscale = rcv_wscale; |
365 | 376 | ||
366 | ret = get_cookie_sock(sk, skb, req, &rt->u.dst); | 377 | ret = get_cookie_sock(sk, skb, req, &rt->dst); |
367 | out: return ret; | 378 | out: return ret; |
368 | } | 379 | } |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 65afeaec15b7..176e11aaea77 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -315,7 +315,6 @@ struct tcp_splice_state { | |||
315 | * is strict, actions are advisory and have some latency. | 315 | * is strict, actions are advisory and have some latency. |
316 | */ | 316 | */ |
317 | int tcp_memory_pressure __read_mostly; | 317 | int tcp_memory_pressure __read_mostly; |
318 | |||
319 | EXPORT_SYMBOL(tcp_memory_pressure); | 318 | EXPORT_SYMBOL(tcp_memory_pressure); |
320 | 319 | ||
321 | void tcp_enter_memory_pressure(struct sock *sk) | 320 | void tcp_enter_memory_pressure(struct sock *sk) |
@@ -325,7 +324,6 @@ void tcp_enter_memory_pressure(struct sock *sk) | |||
325 | tcp_memory_pressure = 1; | 324 | tcp_memory_pressure = 1; |
326 | } | 325 | } |
327 | } | 326 | } |
328 | |||
329 | EXPORT_SYMBOL(tcp_enter_memory_pressure); | 327 | EXPORT_SYMBOL(tcp_enter_memory_pressure); |
330 | 328 | ||
331 | /* Convert seconds to retransmits based on initial and max timeout */ | 329 | /* Convert seconds to retransmits based on initial and max timeout */ |
@@ -460,6 +458,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
460 | } | 458 | } |
461 | return mask; | 459 | return mask; |
462 | } | 460 | } |
461 | EXPORT_SYMBOL(tcp_poll); | ||
463 | 462 | ||
464 | int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) | 463 | int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) |
465 | { | 464 | { |
@@ -508,10 +507,11 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
508 | 507 | ||
509 | return put_user(answ, (int __user *)arg); | 508 | return put_user(answ, (int __user *)arg); |
510 | } | 509 | } |
510 | EXPORT_SYMBOL(tcp_ioctl); | ||
511 | 511 | ||
512 | static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) | 512 | static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) |
513 | { | 513 | { |
514 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; | 514 | TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; |
515 | tp->pushed_seq = tp->write_seq; | 515 | tp->pushed_seq = tp->write_seq; |
516 | } | 516 | } |
517 | 517 | ||
@@ -527,7 +527,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) | |||
527 | 527 | ||
528 | skb->csum = 0; | 528 | skb->csum = 0; |
529 | tcb->seq = tcb->end_seq = tp->write_seq; | 529 | tcb->seq = tcb->end_seq = tp->write_seq; |
530 | tcb->flags = TCPCB_FLAG_ACK; | 530 | tcb->flags = TCPHDR_ACK; |
531 | tcb->sacked = 0; | 531 | tcb->sacked = 0; |
532 | skb_header_release(skb); | 532 | skb_header_release(skb); |
533 | tcp_add_write_queue_tail(sk, skb); | 533 | tcp_add_write_queue_tail(sk, skb); |
@@ -676,6 +676,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, | |||
676 | 676 | ||
677 | return ret; | 677 | return ret; |
678 | } | 678 | } |
679 | EXPORT_SYMBOL(tcp_splice_read); | ||
679 | 680 | ||
680 | struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) | 681 | struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) |
681 | { | 682 | { |
@@ -816,7 +817,7 @@ new_segment: | |||
816 | skb_shinfo(skb)->gso_segs = 0; | 817 | skb_shinfo(skb)->gso_segs = 0; |
817 | 818 | ||
818 | if (!copied) | 819 | if (!copied) |
819 | TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; | 820 | TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; |
820 | 821 | ||
821 | copied += copy; | 822 | copied += copy; |
822 | poffset += copy; | 823 | poffset += copy; |
@@ -857,15 +858,15 @@ out_err: | |||
857 | return sk_stream_error(sk, flags, err); | 858 | return sk_stream_error(sk, flags, err); |
858 | } | 859 | } |
859 | 860 | ||
860 | ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, | 861 | int tcp_sendpage(struct sock *sk, struct page *page, int offset, |
861 | size_t size, int flags) | 862 | size_t size, int flags) |
862 | { | 863 | { |
863 | ssize_t res; | 864 | ssize_t res; |
864 | struct sock *sk = sock->sk; | ||
865 | 865 | ||
866 | if (!(sk->sk_route_caps & NETIF_F_SG) || | 866 | if (!(sk->sk_route_caps & NETIF_F_SG) || |
867 | !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) | 867 | !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) |
868 | return sock_no_sendpage(sock, page, offset, size, flags); | 868 | return sock_no_sendpage(sk->sk_socket, page, offset, size, |
869 | flags); | ||
869 | 870 | ||
870 | lock_sock(sk); | 871 | lock_sock(sk); |
871 | TCP_CHECK_TIMER(sk); | 872 | TCP_CHECK_TIMER(sk); |
@@ -874,6 +875,7 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, | |||
874 | release_sock(sk); | 875 | release_sock(sk); |
875 | return res; | 876 | return res; |
876 | } | 877 | } |
878 | EXPORT_SYMBOL(tcp_sendpage); | ||
877 | 879 | ||
878 | #define TCP_PAGE(sk) (sk->sk_sndmsg_page) | 880 | #define TCP_PAGE(sk) (sk->sk_sndmsg_page) |
879 | #define TCP_OFF(sk) (sk->sk_sndmsg_off) | 881 | #define TCP_OFF(sk) (sk->sk_sndmsg_off) |
@@ -898,10 +900,9 @@ static inline int select_size(struct sock *sk, int sg) | |||
898 | return tmp; | 900 | return tmp; |
899 | } | 901 | } |
900 | 902 | ||
901 | int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | 903 | int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, |
902 | size_t size) | 904 | size_t size) |
903 | { | 905 | { |
904 | struct sock *sk = sock->sk; | ||
905 | struct iovec *iov; | 906 | struct iovec *iov; |
906 | struct tcp_sock *tp = tcp_sk(sk); | 907 | struct tcp_sock *tp = tcp_sk(sk); |
907 | struct sk_buff *skb; | 908 | struct sk_buff *skb; |
@@ -1062,7 +1063,7 @@ new_segment: | |||
1062 | } | 1063 | } |
1063 | 1064 | ||
1064 | if (!copied) | 1065 | if (!copied) |
1065 | TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; | 1066 | TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; |
1066 | 1067 | ||
1067 | tp->write_seq += copy; | 1068 | tp->write_seq += copy; |
1068 | TCP_SKB_CB(skb)->end_seq += copy; | 1069 | TCP_SKB_CB(skb)->end_seq += copy; |
@@ -1122,6 +1123,7 @@ out_err: | |||
1122 | release_sock(sk); | 1123 | release_sock(sk); |
1123 | return err; | 1124 | return err; |
1124 | } | 1125 | } |
1126 | EXPORT_SYMBOL(tcp_sendmsg); | ||
1125 | 1127 | ||
1126 | /* | 1128 | /* |
1127 | * Handle reading urgent data. BSD has very simple semantics for | 1129 | * Handle reading urgent data. BSD has very simple semantics for |
@@ -1381,6 +1383,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, | |||
1381 | tcp_cleanup_rbuf(sk, copied); | 1383 | tcp_cleanup_rbuf(sk, copied); |
1382 | return copied; | 1384 | return copied; |
1383 | } | 1385 | } |
1386 | EXPORT_SYMBOL(tcp_read_sock); | ||
1384 | 1387 | ||
1385 | /* | 1388 | /* |
1386 | * This routine copies from a sock struct into the user buffer. | 1389 | * This routine copies from a sock struct into the user buffer. |
@@ -1775,6 +1778,7 @@ recv_urg: | |||
1775 | err = tcp_recv_urg(sk, msg, len, flags); | 1778 | err = tcp_recv_urg(sk, msg, len, flags); |
1776 | goto out; | 1779 | goto out; |
1777 | } | 1780 | } |
1781 | EXPORT_SYMBOL(tcp_recvmsg); | ||
1778 | 1782 | ||
1779 | void tcp_set_state(struct sock *sk, int state) | 1783 | void tcp_set_state(struct sock *sk, int state) |
1780 | { | 1784 | { |
@@ -1867,6 +1871,7 @@ void tcp_shutdown(struct sock *sk, int how) | |||
1867 | tcp_send_fin(sk); | 1871 | tcp_send_fin(sk); |
1868 | } | 1872 | } |
1869 | } | 1873 | } |
1874 | EXPORT_SYMBOL(tcp_shutdown); | ||
1870 | 1875 | ||
1871 | void tcp_close(struct sock *sk, long timeout) | 1876 | void tcp_close(struct sock *sk, long timeout) |
1872 | { | 1877 | { |
@@ -1899,6 +1904,10 @@ void tcp_close(struct sock *sk, long timeout) | |||
1899 | 1904 | ||
1900 | sk_mem_reclaim(sk); | 1905 | sk_mem_reclaim(sk); |
1901 | 1906 | ||
1907 | /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */ | ||
1908 | if (sk->sk_state == TCP_CLOSE) | ||
1909 | goto adjudge_to_death; | ||
1910 | |||
1902 | /* As outlined in RFC 2525, section 2.17, we send a RST here because | 1911 | /* As outlined in RFC 2525, section 2.17, we send a RST here because |
1903 | * data was lost. To witness the awful effects of the old behavior of | 1912 | * data was lost. To witness the awful effects of the old behavior of |
1904 | * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk | 1913 | * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk |
@@ -2026,6 +2035,7 @@ out: | |||
2026 | local_bh_enable(); | 2035 | local_bh_enable(); |
2027 | sock_put(sk); | 2036 | sock_put(sk); |
2028 | } | 2037 | } |
2038 | EXPORT_SYMBOL(tcp_close); | ||
2029 | 2039 | ||
2030 | /* These states need RST on ABORT according to RFC793 */ | 2040 | /* These states need RST on ABORT according to RFC793 */ |
2031 | 2041 | ||
@@ -2099,6 +2109,7 @@ int tcp_disconnect(struct sock *sk, int flags) | |||
2099 | sk->sk_error_report(sk); | 2109 | sk->sk_error_report(sk); |
2100 | return err; | 2110 | return err; |
2101 | } | 2111 | } |
2112 | EXPORT_SYMBOL(tcp_disconnect); | ||
2102 | 2113 | ||
2103 | /* | 2114 | /* |
2104 | * Socket option code for TCP. | 2115 | * Socket option code for TCP. |
@@ -2176,6 +2187,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2176 | GFP_KERNEL); | 2187 | GFP_KERNEL); |
2177 | if (cvp == NULL) | 2188 | if (cvp == NULL) |
2178 | return -ENOMEM; | 2189 | return -ENOMEM; |
2190 | |||
2191 | kref_init(&cvp->kref); | ||
2179 | } | 2192 | } |
2180 | lock_sock(sk); | 2193 | lock_sock(sk); |
2181 | tp->rx_opt.cookie_in_always = | 2194 | tp->rx_opt.cookie_in_always = |
@@ -2190,12 +2203,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2190 | */ | 2203 | */ |
2191 | kref_put(&tp->cookie_values->kref, | 2204 | kref_put(&tp->cookie_values->kref, |
2192 | tcp_cookie_values_release); | 2205 | tcp_cookie_values_release); |
2193 | kref_init(&cvp->kref); | ||
2194 | tp->cookie_values = cvp; | ||
2195 | } else { | 2206 | } else { |
2196 | cvp = tp->cookie_values; | 2207 | cvp = tp->cookie_values; |
2197 | } | 2208 | } |
2198 | } | 2209 | } |
2210 | |||
2199 | if (cvp != NULL) { | 2211 | if (cvp != NULL) { |
2200 | cvp->cookie_desired = ctd.tcpct_cookie_desired; | 2212 | cvp->cookie_desired = ctd.tcpct_cookie_desired; |
2201 | 2213 | ||
@@ -2209,6 +2221,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2209 | cvp->s_data_desired = ctd.tcpct_s_data_desired; | 2221 | cvp->s_data_desired = ctd.tcpct_s_data_desired; |
2210 | cvp->s_data_constant = 0; /* false */ | 2222 | cvp->s_data_constant = 0; /* false */ |
2211 | } | 2223 | } |
2224 | |||
2225 | tp->cookie_values = cvp; | ||
2212 | } | 2226 | } |
2213 | release_sock(sk); | 2227 | release_sock(sk); |
2214 | return err; | 2228 | return err; |
@@ -2397,6 +2411,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
2397 | optval, optlen); | 2411 | optval, optlen); |
2398 | return do_tcp_setsockopt(sk, level, optname, optval, optlen); | 2412 | return do_tcp_setsockopt(sk, level, optname, optval, optlen); |
2399 | } | 2413 | } |
2414 | EXPORT_SYMBOL(tcp_setsockopt); | ||
2400 | 2415 | ||
2401 | #ifdef CONFIG_COMPAT | 2416 | #ifdef CONFIG_COMPAT |
2402 | int compat_tcp_setsockopt(struct sock *sk, int level, int optname, | 2417 | int compat_tcp_setsockopt(struct sock *sk, int level, int optname, |
@@ -2407,7 +2422,6 @@ int compat_tcp_setsockopt(struct sock *sk, int level, int optname, | |||
2407 | optval, optlen); | 2422 | optval, optlen); |
2408 | return do_tcp_setsockopt(sk, level, optname, optval, optlen); | 2423 | return do_tcp_setsockopt(sk, level, optname, optval, optlen); |
2409 | } | 2424 | } |
2410 | |||
2411 | EXPORT_SYMBOL(compat_tcp_setsockopt); | 2425 | EXPORT_SYMBOL(compat_tcp_setsockopt); |
2412 | #endif | 2426 | #endif |
2413 | 2427 | ||
@@ -2473,7 +2487,6 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) | |||
2473 | 2487 | ||
2474 | info->tcpi_total_retrans = tp->total_retrans; | 2488 | info->tcpi_total_retrans = tp->total_retrans; |
2475 | } | 2489 | } |
2476 | |||
2477 | EXPORT_SYMBOL_GPL(tcp_get_info); | 2490 | EXPORT_SYMBOL_GPL(tcp_get_info); |
2478 | 2491 | ||
2479 | static int do_tcp_getsockopt(struct sock *sk, int level, | 2492 | static int do_tcp_getsockopt(struct sock *sk, int level, |
@@ -2591,6 +2604,12 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
2591 | return -EFAULT; | 2604 | return -EFAULT; |
2592 | return 0; | 2605 | return 0; |
2593 | } | 2606 | } |
2607 | case TCP_THIN_LINEAR_TIMEOUTS: | ||
2608 | val = tp->thin_lto; | ||
2609 | break; | ||
2610 | case TCP_THIN_DUPACK: | ||
2611 | val = tp->thin_dupack; | ||
2612 | break; | ||
2594 | default: | 2613 | default: |
2595 | return -ENOPROTOOPT; | 2614 | return -ENOPROTOOPT; |
2596 | } | 2615 | } |
@@ -2612,6 +2631,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, | |||
2612 | optval, optlen); | 2631 | optval, optlen); |
2613 | return do_tcp_getsockopt(sk, level, optname, optval, optlen); | 2632 | return do_tcp_getsockopt(sk, level, optname, optval, optlen); |
2614 | } | 2633 | } |
2634 | EXPORT_SYMBOL(tcp_getsockopt); | ||
2615 | 2635 | ||
2616 | #ifdef CONFIG_COMPAT | 2636 | #ifdef CONFIG_COMPAT |
2617 | int compat_tcp_getsockopt(struct sock *sk, int level, int optname, | 2637 | int compat_tcp_getsockopt(struct sock *sk, int level, int optname, |
@@ -2622,7 +2642,6 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, | |||
2622 | optval, optlen); | 2642 | optval, optlen); |
2623 | return do_tcp_getsockopt(sk, level, optname, optval, optlen); | 2643 | return do_tcp_getsockopt(sk, level, optname, optval, optlen); |
2624 | } | 2644 | } |
2625 | |||
2626 | EXPORT_SYMBOL(compat_tcp_getsockopt); | 2645 | EXPORT_SYMBOL(compat_tcp_getsockopt); |
2627 | #endif | 2646 | #endif |
2628 | 2647 | ||
@@ -2859,7 +2878,6 @@ void tcp_free_md5sig_pool(void) | |||
2859 | if (pool) | 2878 | if (pool) |
2860 | __tcp_free_md5sig_pool(pool); | 2879 | __tcp_free_md5sig_pool(pool); |
2861 | } | 2880 | } |
2862 | |||
2863 | EXPORT_SYMBOL(tcp_free_md5sig_pool); | 2881 | EXPORT_SYMBOL(tcp_free_md5sig_pool); |
2864 | 2882 | ||
2865 | static struct tcp_md5sig_pool * __percpu * | 2883 | static struct tcp_md5sig_pool * __percpu * |
@@ -2935,7 +2953,6 @@ retry: | |||
2935 | } | 2953 | } |
2936 | return pool; | 2954 | return pool; |
2937 | } | 2955 | } |
2938 | |||
2939 | EXPORT_SYMBOL(tcp_alloc_md5sig_pool); | 2956 | EXPORT_SYMBOL(tcp_alloc_md5sig_pool); |
2940 | 2957 | ||
2941 | 2958 | ||
@@ -2959,7 +2976,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) | |||
2959 | spin_unlock(&tcp_md5sig_pool_lock); | 2976 | spin_unlock(&tcp_md5sig_pool_lock); |
2960 | 2977 | ||
2961 | if (p) | 2978 | if (p) |
2962 | return *per_cpu_ptr(p, smp_processor_id()); | 2979 | return *this_cpu_ptr(p); |
2963 | 2980 | ||
2964 | local_bh_enable(); | 2981 | local_bh_enable(); |
2965 | return NULL; | 2982 | return NULL; |
@@ -2987,7 +3004,6 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, | |||
2987 | th->check = old_checksum; | 3004 | th->check = old_checksum; |
2988 | return err; | 3005 | return err; |
2989 | } | 3006 | } |
2990 | |||
2991 | EXPORT_SYMBOL(tcp_md5_hash_header); | 3007 | EXPORT_SYMBOL(tcp_md5_hash_header); |
2992 | 3008 | ||
2993 | int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, | 3009 | int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, |
@@ -3000,6 +3016,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, | |||
3000 | const unsigned head_data_len = skb_headlen(skb) > header_len ? | 3016 | const unsigned head_data_len = skb_headlen(skb) > header_len ? |
3001 | skb_headlen(skb) - header_len : 0; | 3017 | skb_headlen(skb) - header_len : 0; |
3002 | const struct skb_shared_info *shi = skb_shinfo(skb); | 3018 | const struct skb_shared_info *shi = skb_shinfo(skb); |
3019 | struct sk_buff *frag_iter; | ||
3003 | 3020 | ||
3004 | sg_init_table(&sg, 1); | 3021 | sg_init_table(&sg, 1); |
3005 | 3022 | ||
@@ -3014,9 +3031,12 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, | |||
3014 | return 1; | 3031 | return 1; |
3015 | } | 3032 | } |
3016 | 3033 | ||
3034 | skb_walk_frags(skb, frag_iter) | ||
3035 | if (tcp_md5_hash_skb_data(hp, frag_iter, 0)) | ||
3036 | return 1; | ||
3037 | |||
3017 | return 0; | 3038 | return 0; |
3018 | } | 3039 | } |
3019 | |||
3020 | EXPORT_SYMBOL(tcp_md5_hash_skb_data); | 3040 | EXPORT_SYMBOL(tcp_md5_hash_skb_data); |
3021 | 3041 | ||
3022 | int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) | 3042 | int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) |
@@ -3026,7 +3046,6 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) | |||
3026 | sg_init_one(&sg, key->key, key->keylen); | 3046 | sg_init_one(&sg, key->key, key->keylen); |
3027 | return crypto_hash_update(&hp->md5_desc, &sg, key->keylen); | 3047 | return crypto_hash_update(&hp->md5_desc, &sg, key->keylen); |
3028 | } | 3048 | } |
3029 | |||
3030 | EXPORT_SYMBOL(tcp_md5_hash_key); | 3049 | EXPORT_SYMBOL(tcp_md5_hash_key); |
3031 | 3050 | ||
3032 | #endif | 3051 | #endif |
@@ -3298,16 +3317,3 @@ void __init tcp_init(void) | |||
3298 | tcp_secret_retiring = &tcp_secret_two; | 3317 | tcp_secret_retiring = &tcp_secret_two; |
3299 | tcp_secret_secondary = &tcp_secret_two; | 3318 | tcp_secret_secondary = &tcp_secret_two; |
3300 | } | 3319 | } |
3301 | |||
3302 | EXPORT_SYMBOL(tcp_close); | ||
3303 | EXPORT_SYMBOL(tcp_disconnect); | ||
3304 | EXPORT_SYMBOL(tcp_getsockopt); | ||
3305 | EXPORT_SYMBOL(tcp_ioctl); | ||
3306 | EXPORT_SYMBOL(tcp_poll); | ||
3307 | EXPORT_SYMBOL(tcp_read_sock); | ||
3308 | EXPORT_SYMBOL(tcp_recvmsg); | ||
3309 | EXPORT_SYMBOL(tcp_sendmsg); | ||
3310 | EXPORT_SYMBOL(tcp_splice_read); | ||
3311 | EXPORT_SYMBOL(tcp_sendpage); | ||
3312 | EXPORT_SYMBOL(tcp_setsockopt); | ||
3313 | EXPORT_SYMBOL(tcp_shutdown); | ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 548d575e6cc6..e663b78a2ef6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -78,10 +78,13 @@ int sysctl_tcp_window_scaling __read_mostly = 1; | |||
78 | int sysctl_tcp_sack __read_mostly = 1; | 78 | int sysctl_tcp_sack __read_mostly = 1; |
79 | int sysctl_tcp_fack __read_mostly = 1; | 79 | int sysctl_tcp_fack __read_mostly = 1; |
80 | int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; | 80 | int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; |
81 | EXPORT_SYMBOL(sysctl_tcp_reordering); | ||
81 | int sysctl_tcp_ecn __read_mostly = 2; | 82 | int sysctl_tcp_ecn __read_mostly = 2; |
83 | EXPORT_SYMBOL(sysctl_tcp_ecn); | ||
82 | int sysctl_tcp_dsack __read_mostly = 1; | 84 | int sysctl_tcp_dsack __read_mostly = 1; |
83 | int sysctl_tcp_app_win __read_mostly = 31; | 85 | int sysctl_tcp_app_win __read_mostly = 31; |
84 | int sysctl_tcp_adv_win_scale __read_mostly = 2; | 86 | int sysctl_tcp_adv_win_scale __read_mostly = 2; |
87 | EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); | ||
85 | 88 | ||
86 | int sysctl_tcp_stdurg __read_mostly; | 89 | int sysctl_tcp_stdurg __read_mostly; |
87 | int sysctl_tcp_rfc1337 __read_mostly; | 90 | int sysctl_tcp_rfc1337 __read_mostly; |
@@ -419,6 +422,7 @@ void tcp_initialize_rcv_mss(struct sock *sk) | |||
419 | 422 | ||
420 | inet_csk(sk)->icsk_ack.rcv_mss = hint; | 423 | inet_csk(sk)->icsk_ack.rcv_mss = hint; |
421 | } | 424 | } |
425 | EXPORT_SYMBOL(tcp_initialize_rcv_mss); | ||
422 | 426 | ||
423 | /* Receiver "autotuning" code. | 427 | /* Receiver "autotuning" code. |
424 | * | 428 | * |
@@ -2938,6 +2942,7 @@ void tcp_simple_retransmit(struct sock *sk) | |||
2938 | } | 2942 | } |
2939 | tcp_xmit_retransmit_queue(sk); | 2943 | tcp_xmit_retransmit_queue(sk); |
2940 | } | 2944 | } |
2945 | EXPORT_SYMBOL(tcp_simple_retransmit); | ||
2941 | 2946 | ||
2942 | /* Process an event, which can update packets-in-flight not trivially. | 2947 | /* Process an event, which can update packets-in-flight not trivially. |
2943 | * Main goal of this function is to calculate new estimate for left_out, | 2948 | * Main goal of this function is to calculate new estimate for left_out, |
@@ -3286,7 +3291,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3286 | * connection startup slow start one packet too | 3291 | * connection startup slow start one packet too |
3287 | * quickly. This is severely frowned upon behavior. | 3292 | * quickly. This is severely frowned upon behavior. |
3288 | */ | 3293 | */ |
3289 | if (!(scb->flags & TCPCB_FLAG_SYN)) { | 3294 | if (!(scb->flags & TCPHDR_SYN)) { |
3290 | flag |= FLAG_DATA_ACKED; | 3295 | flag |= FLAG_DATA_ACKED; |
3291 | } else { | 3296 | } else { |
3292 | flag |= FLAG_SYN_ACKED; | 3297 | flag |= FLAG_SYN_ACKED; |
@@ -3858,6 +3863,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, | |||
3858 | } | 3863 | } |
3859 | } | 3864 | } |
3860 | } | 3865 | } |
3866 | EXPORT_SYMBOL(tcp_parse_options); | ||
3861 | 3867 | ||
3862 | static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) | 3868 | static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) |
3863 | { | 3869 | { |
@@ -3924,13 +3930,14 @@ u8 *tcp_parse_md5sig_option(struct tcphdr *th) | |||
3924 | if (opsize < 2 || opsize > length) | 3930 | if (opsize < 2 || opsize > length) |
3925 | return NULL; | 3931 | return NULL; |
3926 | if (opcode == TCPOPT_MD5SIG) | 3932 | if (opcode == TCPOPT_MD5SIG) |
3927 | return ptr; | 3933 | return opsize == TCPOLEN_MD5SIG ? ptr : NULL; |
3928 | } | 3934 | } |
3929 | ptr += opsize - 2; | 3935 | ptr += opsize - 2; |
3930 | length -= opsize; | 3936 | length -= opsize; |
3931 | } | 3937 | } |
3932 | return NULL; | 3938 | return NULL; |
3933 | } | 3939 | } |
3940 | EXPORT_SYMBOL(tcp_parse_md5sig_option); | ||
3934 | #endif | 3941 | #endif |
3935 | 3942 | ||
3936 | static inline void tcp_store_ts_recent(struct tcp_sock *tp) | 3943 | static inline void tcp_store_ts_recent(struct tcp_sock *tp) |
@@ -5432,6 +5439,7 @@ discard: | |||
5432 | __kfree_skb(skb); | 5439 | __kfree_skb(skb); |
5433 | return 0; | 5440 | return 0; |
5434 | } | 5441 | } |
5442 | EXPORT_SYMBOL(tcp_rcv_established); | ||
5435 | 5443 | ||
5436 | static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | 5444 | static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, |
5437 | struct tcphdr *th, unsigned len) | 5445 | struct tcphdr *th, unsigned len) |
@@ -5931,14 +5939,4 @@ discard: | |||
5931 | } | 5939 | } |
5932 | return 0; | 5940 | return 0; |
5933 | } | 5941 | } |
5934 | |||
5935 | EXPORT_SYMBOL(sysctl_tcp_ecn); | ||
5936 | EXPORT_SYMBOL(sysctl_tcp_reordering); | ||
5937 | EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); | ||
5938 | EXPORT_SYMBOL(tcp_parse_options); | ||
5939 | #ifdef CONFIG_TCP_MD5SIG | ||
5940 | EXPORT_SYMBOL(tcp_parse_md5sig_option); | ||
5941 | #endif | ||
5942 | EXPORT_SYMBOL(tcp_rcv_established); | ||
5943 | EXPORT_SYMBOL(tcp_rcv_state_process); | 5942 | EXPORT_SYMBOL(tcp_rcv_state_process); |
5944 | EXPORT_SYMBOL(tcp_initialize_rcv_mss); | ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fe193e53af44..020766292bb0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -84,6 +84,7 @@ | |||
84 | 84 | ||
85 | int sysctl_tcp_tw_reuse __read_mostly; | 85 | int sysctl_tcp_tw_reuse __read_mostly; |
86 | int sysctl_tcp_low_latency __read_mostly; | 86 | int sysctl_tcp_low_latency __read_mostly; |
87 | EXPORT_SYMBOL(sysctl_tcp_low_latency); | ||
87 | 88 | ||
88 | 89 | ||
89 | #ifdef CONFIG_TCP_MD5SIG | 90 | #ifdef CONFIG_TCP_MD5SIG |
@@ -100,6 +101,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) | |||
100 | #endif | 101 | #endif |
101 | 102 | ||
102 | struct inet_hashinfo tcp_hashinfo; | 103 | struct inet_hashinfo tcp_hashinfo; |
104 | EXPORT_SYMBOL(tcp_hashinfo); | ||
103 | 105 | ||
104 | static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) | 106 | static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) |
105 | { | 107 | { |
@@ -139,7 +141,6 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) | |||
139 | 141 | ||
140 | return 0; | 142 | return 0; |
141 | } | 143 | } |
142 | |||
143 | EXPORT_SYMBOL_GPL(tcp_twsk_unique); | 144 | EXPORT_SYMBOL_GPL(tcp_twsk_unique); |
144 | 145 | ||
145 | /* This will initiate an outgoing connection. */ | 146 | /* This will initiate an outgoing connection. */ |
@@ -204,10 +205,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
204 | * TIME-WAIT * and initialize rx_opt.ts_recent from it, | 205 | * TIME-WAIT * and initialize rx_opt.ts_recent from it, |
205 | * when trying new connection. | 206 | * when trying new connection. |
206 | */ | 207 | */ |
207 | if (peer != NULL && | 208 | if (peer) { |
208 | (u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { | 209 | inet_peer_refcheck(peer); |
209 | tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; | 210 | if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { |
210 | tp->rx_opt.ts_recent = peer->tcp_ts; | 211 | tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; |
212 | tp->rx_opt.ts_recent = peer->tcp_ts; | ||
213 | } | ||
211 | } | 214 | } |
212 | } | 215 | } |
213 | 216 | ||
@@ -237,7 +240,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
237 | 240 | ||
238 | /* OK, now commit destination to socket. */ | 241 | /* OK, now commit destination to socket. */ |
239 | sk->sk_gso_type = SKB_GSO_TCPV4; | 242 | sk->sk_gso_type = SKB_GSO_TCPV4; |
240 | sk_setup_caps(sk, &rt->u.dst); | 243 | sk_setup_caps(sk, &rt->dst); |
241 | 244 | ||
242 | if (!tp->write_seq) | 245 | if (!tp->write_seq) |
243 | tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, | 246 | tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, |
@@ -265,6 +268,7 @@ failure: | |||
265 | inet->inet_dport = 0; | 268 | inet->inet_dport = 0; |
266 | return err; | 269 | return err; |
267 | } | 270 | } |
271 | EXPORT_SYMBOL(tcp_v4_connect); | ||
268 | 272 | ||
269 | /* | 273 | /* |
270 | * This routine does path mtu discovery as defined in RFC1191. | 274 | * This routine does path mtu discovery as defined in RFC1191. |
@@ -543,6 +547,7 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) | |||
543 | 547 | ||
544 | __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); | 548 | __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); |
545 | } | 549 | } |
550 | EXPORT_SYMBOL(tcp_v4_send_check); | ||
546 | 551 | ||
547 | int tcp_v4_gso_send_check(struct sk_buff *skb) | 552 | int tcp_v4_gso_send_check(struct sk_buff *skb) |
548 | { | 553 | { |
@@ -793,19 +798,20 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) | |||
793 | kfree(inet_rsk(req)->opt); | 798 | kfree(inet_rsk(req)->opt); |
794 | } | 799 | } |
795 | 800 | ||
796 | #ifdef CONFIG_SYN_COOKIES | 801 | static void syn_flood_warning(const struct sk_buff *skb) |
797 | static void syn_flood_warning(struct sk_buff *skb) | ||
798 | { | 802 | { |
799 | static unsigned long warntime; | 803 | const char *msg; |
800 | 804 | ||
801 | if (time_after(jiffies, (warntime + HZ * 60))) { | 805 | #ifdef CONFIG_SYN_COOKIES |
802 | warntime = jiffies; | 806 | if (sysctl_tcp_syncookies) |
803 | printk(KERN_INFO | 807 | msg = "Sending cookies"; |
804 | "possible SYN flooding on port %d. Sending cookies.\n", | 808 | else |
805 | ntohs(tcp_hdr(skb)->dest)); | ||
806 | } | ||
807 | } | ||
808 | #endif | 809 | #endif |
810 | msg = "Dropping request"; | ||
811 | |||
812 | pr_info("TCP: Possible SYN flooding on port %d. %s.\n", | ||
813 | ntohs(tcp_hdr(skb)->dest), msg); | ||
814 | } | ||
809 | 815 | ||
810 | /* | 816 | /* |
811 | * Save and compile IPv4 options into the request_sock if needed. | 817 | * Save and compile IPv4 options into the request_sock if needed. |
@@ -857,7 +863,6 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, | |||
857 | { | 863 | { |
858 | return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr); | 864 | return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr); |
859 | } | 865 | } |
860 | |||
861 | EXPORT_SYMBOL(tcp_v4_md5_lookup); | 866 | EXPORT_SYMBOL(tcp_v4_md5_lookup); |
862 | 867 | ||
863 | static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, | 868 | static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, |
@@ -924,7 +929,6 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, | |||
924 | } | 929 | } |
925 | return 0; | 930 | return 0; |
926 | } | 931 | } |
927 | |||
928 | EXPORT_SYMBOL(tcp_v4_md5_do_add); | 932 | EXPORT_SYMBOL(tcp_v4_md5_do_add); |
929 | 933 | ||
930 | static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, | 934 | static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, |
@@ -962,7 +966,6 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) | |||
962 | } | 966 | } |
963 | return -ENOENT; | 967 | return -ENOENT; |
964 | } | 968 | } |
965 | |||
966 | EXPORT_SYMBOL(tcp_v4_md5_do_del); | 969 | EXPORT_SYMBOL(tcp_v4_md5_do_del); |
967 | 970 | ||
968 | static void tcp_v4_clear_md5_list(struct sock *sk) | 971 | static void tcp_v4_clear_md5_list(struct sock *sk) |
@@ -1135,7 +1138,6 @@ clear_hash_noput: | |||
1135 | memset(md5_hash, 0, 16); | 1138 | memset(md5_hash, 0, 16); |
1136 | return 1; | 1139 | return 1; |
1137 | } | 1140 | } |
1138 | |||
1139 | EXPORT_SYMBOL(tcp_v4_md5_hash_skb); | 1141 | EXPORT_SYMBOL(tcp_v4_md5_hash_skb); |
1140 | 1142 | ||
1141 | static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) | 1143 | static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) |
@@ -1243,6 +1245,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1243 | * evidently real one. | 1245 | * evidently real one. |
1244 | */ | 1246 | */ |
1245 | if (inet_csk_reqsk_queue_is_full(sk) && !isn) { | 1247 | if (inet_csk_reqsk_queue_is_full(sk) && !isn) { |
1248 | if (net_ratelimit()) | ||
1249 | syn_flood_warning(skb); | ||
1246 | #ifdef CONFIG_SYN_COOKIES | 1250 | #ifdef CONFIG_SYN_COOKIES |
1247 | if (sysctl_tcp_syncookies) { | 1251 | if (sysctl_tcp_syncookies) { |
1248 | want_cookie = 1; | 1252 | want_cookie = 1; |
@@ -1323,15 +1327,12 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1323 | if (security_inet_conn_request(sk, skb, req)) | 1327 | if (security_inet_conn_request(sk, skb, req)) |
1324 | goto drop_and_free; | 1328 | goto drop_and_free; |
1325 | 1329 | ||
1326 | if (!want_cookie) | 1330 | if (!want_cookie || tmp_opt.tstamp_ok) |
1327 | TCP_ECN_create_request(req, tcp_hdr(skb)); | 1331 | TCP_ECN_create_request(req, tcp_hdr(skb)); |
1328 | 1332 | ||
1329 | if (want_cookie) { | 1333 | if (want_cookie) { |
1330 | #ifdef CONFIG_SYN_COOKIES | ||
1331 | syn_flood_warning(skb); | ||
1332 | req->cookie_ts = tmp_opt.tstamp_ok; | ||
1333 | #endif | ||
1334 | isn = cookie_v4_init_sequence(sk, skb, &req->mss); | 1334 | isn = cookie_v4_init_sequence(sk, skb, &req->mss); |
1335 | req->cookie_ts = tmp_opt.tstamp_ok; | ||
1335 | } else if (!isn) { | 1336 | } else if (!isn) { |
1336 | struct inet_peer *peer = NULL; | 1337 | struct inet_peer *peer = NULL; |
1337 | 1338 | ||
@@ -1349,6 +1350,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1349 | (dst = inet_csk_route_req(sk, req)) != NULL && | 1350 | (dst = inet_csk_route_req(sk, req)) != NULL && |
1350 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && | 1351 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && |
1351 | peer->v4daddr == saddr) { | 1352 | peer->v4daddr == saddr) { |
1353 | inet_peer_refcheck(peer); | ||
1352 | if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && | 1354 | if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && |
1353 | (s32)(peer->tcp_ts - req->ts_recent) > | 1355 | (s32)(peer->tcp_ts - req->ts_recent) > |
1354 | TCP_PAWS_WINDOW) { | 1356 | TCP_PAWS_WINDOW) { |
@@ -1393,6 +1395,7 @@ drop_and_free: | |||
1393 | drop: | 1395 | drop: |
1394 | return 0; | 1396 | return 0; |
1395 | } | 1397 | } |
1398 | EXPORT_SYMBOL(tcp_v4_conn_request); | ||
1396 | 1399 | ||
1397 | 1400 | ||
1398 | /* | 1401 | /* |
@@ -1478,6 +1481,7 @@ exit: | |||
1478 | dst_release(dst); | 1481 | dst_release(dst); |
1479 | return NULL; | 1482 | return NULL; |
1480 | } | 1483 | } |
1484 | EXPORT_SYMBOL(tcp_v4_syn_recv_sock); | ||
1481 | 1485 | ||
1482 | static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | 1486 | static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) |
1483 | { | 1487 | { |
@@ -1504,7 +1508,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) | |||
1504 | } | 1508 | } |
1505 | 1509 | ||
1506 | #ifdef CONFIG_SYN_COOKIES | 1510 | #ifdef CONFIG_SYN_COOKIES |
1507 | if (!th->rst && !th->syn && th->ack) | 1511 | if (!th->syn) |
1508 | sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); | 1512 | sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); |
1509 | #endif | 1513 | #endif |
1510 | return sk; | 1514 | return sk; |
@@ -1607,6 +1611,7 @@ csum_err: | |||
1607 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); | 1611 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); |
1608 | goto discard; | 1612 | goto discard; |
1609 | } | 1613 | } |
1614 | EXPORT_SYMBOL(tcp_v4_do_rcv); | ||
1610 | 1615 | ||
1611 | /* | 1616 | /* |
1612 | * From tcp_input.c | 1617 | * From tcp_input.c |
@@ -1793,6 +1798,7 @@ int tcp_v4_remember_stamp(struct sock *sk) | |||
1793 | 1798 | ||
1794 | return 0; | 1799 | return 0; |
1795 | } | 1800 | } |
1801 | EXPORT_SYMBOL(tcp_v4_remember_stamp); | ||
1796 | 1802 | ||
1797 | int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) | 1803 | int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) |
1798 | { | 1804 | { |
@@ -1832,6 +1838,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = { | |||
1832 | .compat_getsockopt = compat_ip_getsockopt, | 1838 | .compat_getsockopt = compat_ip_getsockopt, |
1833 | #endif | 1839 | #endif |
1834 | }; | 1840 | }; |
1841 | EXPORT_SYMBOL(ipv4_specific); | ||
1835 | 1842 | ||
1836 | #ifdef CONFIG_TCP_MD5SIG | 1843 | #ifdef CONFIG_TCP_MD5SIG |
1837 | static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { | 1844 | static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { |
@@ -1960,7 +1967,6 @@ void tcp_v4_destroy_sock(struct sock *sk) | |||
1960 | 1967 | ||
1961 | percpu_counter_dec(&tcp_sockets_allocated); | 1968 | percpu_counter_dec(&tcp_sockets_allocated); |
1962 | } | 1969 | } |
1963 | |||
1964 | EXPORT_SYMBOL(tcp_v4_destroy_sock); | 1970 | EXPORT_SYMBOL(tcp_v4_destroy_sock); |
1965 | 1971 | ||
1966 | #ifdef CONFIG_PROC_FS | 1972 | #ifdef CONFIG_PROC_FS |
@@ -1978,6 +1984,11 @@ static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) | |||
1978 | hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; | 1984 | hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; |
1979 | } | 1985 | } |
1980 | 1986 | ||
1987 | /* | ||
1988 | * Get next listener socket follow cur. If cur is NULL, get first socket | ||
1989 | * starting from bucket given in st->bucket; when st->bucket is zero the | ||
1990 | * very first socket in the hash table is returned. | ||
1991 | */ | ||
1981 | static void *listening_get_next(struct seq_file *seq, void *cur) | 1992 | static void *listening_get_next(struct seq_file *seq, void *cur) |
1982 | { | 1993 | { |
1983 | struct inet_connection_sock *icsk; | 1994 | struct inet_connection_sock *icsk; |
@@ -1988,14 +1999,15 @@ static void *listening_get_next(struct seq_file *seq, void *cur) | |||
1988 | struct net *net = seq_file_net(seq); | 1999 | struct net *net = seq_file_net(seq); |
1989 | 2000 | ||
1990 | if (!sk) { | 2001 | if (!sk) { |
1991 | st->bucket = 0; | 2002 | ilb = &tcp_hashinfo.listening_hash[st->bucket]; |
1992 | ilb = &tcp_hashinfo.listening_hash[0]; | ||
1993 | spin_lock_bh(&ilb->lock); | 2003 | spin_lock_bh(&ilb->lock); |
1994 | sk = sk_nulls_head(&ilb->head); | 2004 | sk = sk_nulls_head(&ilb->head); |
2005 | st->offset = 0; | ||
1995 | goto get_sk; | 2006 | goto get_sk; |
1996 | } | 2007 | } |
1997 | ilb = &tcp_hashinfo.listening_hash[st->bucket]; | 2008 | ilb = &tcp_hashinfo.listening_hash[st->bucket]; |
1998 | ++st->num; | 2009 | ++st->num; |
2010 | ++st->offset; | ||
1999 | 2011 | ||
2000 | if (st->state == TCP_SEQ_STATE_OPENREQ) { | 2012 | if (st->state == TCP_SEQ_STATE_OPENREQ) { |
2001 | struct request_sock *req = cur; | 2013 | struct request_sock *req = cur; |
@@ -2010,6 +2022,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) | |||
2010 | } | 2022 | } |
2011 | req = req->dl_next; | 2023 | req = req->dl_next; |
2012 | } | 2024 | } |
2025 | st->offset = 0; | ||
2013 | if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) | 2026 | if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) |
2014 | break; | 2027 | break; |
2015 | get_req: | 2028 | get_req: |
@@ -2045,6 +2058,7 @@ start_req: | |||
2045 | read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); | 2058 | read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); |
2046 | } | 2059 | } |
2047 | spin_unlock_bh(&ilb->lock); | 2060 | spin_unlock_bh(&ilb->lock); |
2061 | st->offset = 0; | ||
2048 | if (++st->bucket < INET_LHTABLE_SIZE) { | 2062 | if (++st->bucket < INET_LHTABLE_SIZE) { |
2049 | ilb = &tcp_hashinfo.listening_hash[st->bucket]; | 2063 | ilb = &tcp_hashinfo.listening_hash[st->bucket]; |
2050 | spin_lock_bh(&ilb->lock); | 2064 | spin_lock_bh(&ilb->lock); |
@@ -2058,7 +2072,12 @@ out: | |||
2058 | 2072 | ||
2059 | static void *listening_get_idx(struct seq_file *seq, loff_t *pos) | 2073 | static void *listening_get_idx(struct seq_file *seq, loff_t *pos) |
2060 | { | 2074 | { |
2061 | void *rc = listening_get_next(seq, NULL); | 2075 | struct tcp_iter_state *st = seq->private; |
2076 | void *rc; | ||
2077 | |||
2078 | st->bucket = 0; | ||
2079 | st->offset = 0; | ||
2080 | rc = listening_get_next(seq, NULL); | ||
2062 | 2081 | ||
2063 | while (rc && *pos) { | 2082 | while (rc && *pos) { |
2064 | rc = listening_get_next(seq, rc); | 2083 | rc = listening_get_next(seq, rc); |
@@ -2073,13 +2092,18 @@ static inline int empty_bucket(struct tcp_iter_state *st) | |||
2073 | hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); | 2092 | hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); |
2074 | } | 2093 | } |
2075 | 2094 | ||
2095 | /* | ||
2096 | * Get first established socket starting from bucket given in st->bucket. | ||
2097 | * If st->bucket is zero, the very first socket in the hash is returned. | ||
2098 | */ | ||
2076 | static void *established_get_first(struct seq_file *seq) | 2099 | static void *established_get_first(struct seq_file *seq) |
2077 | { | 2100 | { |
2078 | struct tcp_iter_state *st = seq->private; | 2101 | struct tcp_iter_state *st = seq->private; |
2079 | struct net *net = seq_file_net(seq); | 2102 | struct net *net = seq_file_net(seq); |
2080 | void *rc = NULL; | 2103 | void *rc = NULL; |
2081 | 2104 | ||
2082 | for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { | 2105 | st->offset = 0; |
2106 | for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { | ||
2083 | struct sock *sk; | 2107 | struct sock *sk; |
2084 | struct hlist_nulls_node *node; | 2108 | struct hlist_nulls_node *node; |
2085 | struct inet_timewait_sock *tw; | 2109 | struct inet_timewait_sock *tw; |
@@ -2124,6 +2148,7 @@ static void *established_get_next(struct seq_file *seq, void *cur) | |||
2124 | struct net *net = seq_file_net(seq); | 2148 | struct net *net = seq_file_net(seq); |
2125 | 2149 | ||
2126 | ++st->num; | 2150 | ++st->num; |
2151 | ++st->offset; | ||
2127 | 2152 | ||
2128 | if (st->state == TCP_SEQ_STATE_TIME_WAIT) { | 2153 | if (st->state == TCP_SEQ_STATE_TIME_WAIT) { |
2129 | tw = cur; | 2154 | tw = cur; |
@@ -2140,6 +2165,7 @@ get_tw: | |||
2140 | st->state = TCP_SEQ_STATE_ESTABLISHED; | 2165 | st->state = TCP_SEQ_STATE_ESTABLISHED; |
2141 | 2166 | ||
2142 | /* Look for next non empty bucket */ | 2167 | /* Look for next non empty bucket */ |
2168 | st->offset = 0; | ||
2143 | while (++st->bucket <= tcp_hashinfo.ehash_mask && | 2169 | while (++st->bucket <= tcp_hashinfo.ehash_mask && |
2144 | empty_bucket(st)) | 2170 | empty_bucket(st)) |
2145 | ; | 2171 | ; |
@@ -2167,7 +2193,11 @@ out: | |||
2167 | 2193 | ||
2168 | static void *established_get_idx(struct seq_file *seq, loff_t pos) | 2194 | static void *established_get_idx(struct seq_file *seq, loff_t pos) |
2169 | { | 2195 | { |
2170 | void *rc = established_get_first(seq); | 2196 | struct tcp_iter_state *st = seq->private; |
2197 | void *rc; | ||
2198 | |||
2199 | st->bucket = 0; | ||
2200 | rc = established_get_first(seq); | ||
2171 | 2201 | ||
2172 | while (rc && pos) { | 2202 | while (rc && pos) { |
2173 | rc = established_get_next(seq, rc); | 2203 | rc = established_get_next(seq, rc); |
@@ -2192,24 +2222,72 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) | |||
2192 | return rc; | 2222 | return rc; |
2193 | } | 2223 | } |
2194 | 2224 | ||
2225 | static void *tcp_seek_last_pos(struct seq_file *seq) | ||
2226 | { | ||
2227 | struct tcp_iter_state *st = seq->private; | ||
2228 | int offset = st->offset; | ||
2229 | int orig_num = st->num; | ||
2230 | void *rc = NULL; | ||
2231 | |||
2232 | switch (st->state) { | ||
2233 | case TCP_SEQ_STATE_OPENREQ: | ||
2234 | case TCP_SEQ_STATE_LISTENING: | ||
2235 | if (st->bucket >= INET_LHTABLE_SIZE) | ||
2236 | break; | ||
2237 | st->state = TCP_SEQ_STATE_LISTENING; | ||
2238 | rc = listening_get_next(seq, NULL); | ||
2239 | while (offset-- && rc) | ||
2240 | rc = listening_get_next(seq, rc); | ||
2241 | if (rc) | ||
2242 | break; | ||
2243 | st->bucket = 0; | ||
2244 | /* Fallthrough */ | ||
2245 | case TCP_SEQ_STATE_ESTABLISHED: | ||
2246 | case TCP_SEQ_STATE_TIME_WAIT: | ||
2247 | st->state = TCP_SEQ_STATE_ESTABLISHED; | ||
2248 | if (st->bucket > tcp_hashinfo.ehash_mask) | ||
2249 | break; | ||
2250 | rc = established_get_first(seq); | ||
2251 | while (offset-- && rc) | ||
2252 | rc = established_get_next(seq, rc); | ||
2253 | } | ||
2254 | |||
2255 | st->num = orig_num; | ||
2256 | |||
2257 | return rc; | ||
2258 | } | ||
2259 | |||
2195 | static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) | 2260 | static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) |
2196 | { | 2261 | { |
2197 | struct tcp_iter_state *st = seq->private; | 2262 | struct tcp_iter_state *st = seq->private; |
2263 | void *rc; | ||
2264 | |||
2265 | if (*pos && *pos == st->last_pos) { | ||
2266 | rc = tcp_seek_last_pos(seq); | ||
2267 | if (rc) | ||
2268 | goto out; | ||
2269 | } | ||
2270 | |||
2198 | st->state = TCP_SEQ_STATE_LISTENING; | 2271 | st->state = TCP_SEQ_STATE_LISTENING; |
2199 | st->num = 0; | 2272 | st->num = 0; |
2200 | return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; | 2273 | st->bucket = 0; |
2274 | st->offset = 0; | ||
2275 | rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; | ||
2276 | |||
2277 | out: | ||
2278 | st->last_pos = *pos; | ||
2279 | return rc; | ||
2201 | } | 2280 | } |
2202 | 2281 | ||
2203 | static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 2282 | static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
2204 | { | 2283 | { |
2284 | struct tcp_iter_state *st = seq->private; | ||
2205 | void *rc = NULL; | 2285 | void *rc = NULL; |
2206 | struct tcp_iter_state *st; | ||
2207 | 2286 | ||
2208 | if (v == SEQ_START_TOKEN) { | 2287 | if (v == SEQ_START_TOKEN) { |
2209 | rc = tcp_get_idx(seq, 0); | 2288 | rc = tcp_get_idx(seq, 0); |
2210 | goto out; | 2289 | goto out; |
2211 | } | 2290 | } |
2212 | st = seq->private; | ||
2213 | 2291 | ||
2214 | switch (st->state) { | 2292 | switch (st->state) { |
2215 | case TCP_SEQ_STATE_OPENREQ: | 2293 | case TCP_SEQ_STATE_OPENREQ: |
@@ -2217,6 +2295,8 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
2217 | rc = listening_get_next(seq, v); | 2295 | rc = listening_get_next(seq, v); |
2218 | if (!rc) { | 2296 | if (!rc) { |
2219 | st->state = TCP_SEQ_STATE_ESTABLISHED; | 2297 | st->state = TCP_SEQ_STATE_ESTABLISHED; |
2298 | st->bucket = 0; | ||
2299 | st->offset = 0; | ||
2220 | rc = established_get_first(seq); | 2300 | rc = established_get_first(seq); |
2221 | } | 2301 | } |
2222 | break; | 2302 | break; |
@@ -2227,6 +2307,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
2227 | } | 2307 | } |
2228 | out: | 2308 | out: |
2229 | ++*pos; | 2309 | ++*pos; |
2310 | st->last_pos = *pos; | ||
2230 | return rc; | 2311 | return rc; |
2231 | } | 2312 | } |
2232 | 2313 | ||
@@ -2265,6 +2346,7 @@ static int tcp_seq_open(struct inode *inode, struct file *file) | |||
2265 | 2346 | ||
2266 | s = ((struct seq_file *)file->private_data)->private; | 2347 | s = ((struct seq_file *)file->private_data)->private; |
2267 | s->family = afinfo->family; | 2348 | s->family = afinfo->family; |
2349 | s->last_pos = 0; | ||
2268 | return 0; | 2350 | return 0; |
2269 | } | 2351 | } |
2270 | 2352 | ||
@@ -2288,11 +2370,13 @@ int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo) | |||
2288 | rc = -ENOMEM; | 2370 | rc = -ENOMEM; |
2289 | return rc; | 2371 | return rc; |
2290 | } | 2372 | } |
2373 | EXPORT_SYMBOL(tcp_proc_register); | ||
2291 | 2374 | ||
2292 | void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) | 2375 | void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) |
2293 | { | 2376 | { |
2294 | proc_net_remove(net, afinfo->name); | 2377 | proc_net_remove(net, afinfo->name); |
2295 | } | 2378 | } |
2379 | EXPORT_SYMBOL(tcp_proc_unregister); | ||
2296 | 2380 | ||
2297 | static void get_openreq4(struct sock *sk, struct request_sock *req, | 2381 | static void get_openreq4(struct sock *sk, struct request_sock *req, |
2298 | struct seq_file *f, int i, int uid, int *len) | 2382 | struct seq_file *f, int i, int uid, int *len) |
@@ -2516,6 +2600,8 @@ struct proto tcp_prot = { | |||
2516 | .setsockopt = tcp_setsockopt, | 2600 | .setsockopt = tcp_setsockopt, |
2517 | .getsockopt = tcp_getsockopt, | 2601 | .getsockopt = tcp_getsockopt, |
2518 | .recvmsg = tcp_recvmsg, | 2602 | .recvmsg = tcp_recvmsg, |
2603 | .sendmsg = tcp_sendmsg, | ||
2604 | .sendpage = tcp_sendpage, | ||
2519 | .backlog_rcv = tcp_v4_do_rcv, | 2605 | .backlog_rcv = tcp_v4_do_rcv, |
2520 | .hash = inet_hash, | 2606 | .hash = inet_hash, |
2521 | .unhash = inet_unhash, | 2607 | .unhash = inet_unhash, |
@@ -2534,11 +2620,13 @@ struct proto tcp_prot = { | |||
2534 | .twsk_prot = &tcp_timewait_sock_ops, | 2620 | .twsk_prot = &tcp_timewait_sock_ops, |
2535 | .rsk_prot = &tcp_request_sock_ops, | 2621 | .rsk_prot = &tcp_request_sock_ops, |
2536 | .h.hashinfo = &tcp_hashinfo, | 2622 | .h.hashinfo = &tcp_hashinfo, |
2623 | .no_autobind = true, | ||
2537 | #ifdef CONFIG_COMPAT | 2624 | #ifdef CONFIG_COMPAT |
2538 | .compat_setsockopt = compat_tcp_setsockopt, | 2625 | .compat_setsockopt = compat_tcp_setsockopt, |
2539 | .compat_getsockopt = compat_tcp_getsockopt, | 2626 | .compat_getsockopt = compat_tcp_getsockopt, |
2540 | #endif | 2627 | #endif |
2541 | }; | 2628 | }; |
2629 | EXPORT_SYMBOL(tcp_prot); | ||
2542 | 2630 | ||
2543 | 2631 | ||
2544 | static int __net_init tcp_sk_init(struct net *net) | 2632 | static int __net_init tcp_sk_init(struct net *net) |
@@ -2569,20 +2657,3 @@ void __init tcp_v4_init(void) | |||
2569 | if (register_pernet_subsys(&tcp_sk_ops)) | 2657 | if (register_pernet_subsys(&tcp_sk_ops)) |
2570 | panic("Failed to create the TCP control socket.\n"); | 2658 | panic("Failed to create the TCP control socket.\n"); |
2571 | } | 2659 | } |
2572 | |||
2573 | EXPORT_SYMBOL(ipv4_specific); | ||
2574 | EXPORT_SYMBOL(tcp_hashinfo); | ||
2575 | EXPORT_SYMBOL(tcp_prot); | ||
2576 | EXPORT_SYMBOL(tcp_v4_conn_request); | ||
2577 | EXPORT_SYMBOL(tcp_v4_connect); | ||
2578 | EXPORT_SYMBOL(tcp_v4_do_rcv); | ||
2579 | EXPORT_SYMBOL(tcp_v4_remember_stamp); | ||
2580 | EXPORT_SYMBOL(tcp_v4_send_check); | ||
2581 | EXPORT_SYMBOL(tcp_v4_syn_recv_sock); | ||
2582 | |||
2583 | #ifdef CONFIG_PROC_FS | ||
2584 | EXPORT_SYMBOL(tcp_proc_register); | ||
2585 | EXPORT_SYMBOL(tcp_proc_unregister); | ||
2586 | #endif | ||
2587 | EXPORT_SYMBOL(sysctl_tcp_low_latency); | ||
2588 | |||
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 794c2e122a41..f25b56cb85cb 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -47,7 +47,6 @@ struct inet_timewait_death_row tcp_death_row = { | |||
47 | .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, | 47 | .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, |
48 | (unsigned long)&tcp_death_row), | 48 | (unsigned long)&tcp_death_row), |
49 | }; | 49 | }; |
50 | |||
51 | EXPORT_SYMBOL_GPL(tcp_death_row); | 50 | EXPORT_SYMBOL_GPL(tcp_death_row); |
52 | 51 | ||
53 | static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) | 52 | static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) |
@@ -262,6 +261,7 @@ kill: | |||
262 | inet_twsk_put(tw); | 261 | inet_twsk_put(tw); |
263 | return TCP_TW_SUCCESS; | 262 | return TCP_TW_SUCCESS; |
264 | } | 263 | } |
264 | EXPORT_SYMBOL(tcp_timewait_state_process); | ||
265 | 265 | ||
266 | /* | 266 | /* |
267 | * Move a socket to time-wait or dead fin-wait-2 state. | 267 | * Move a socket to time-wait or dead fin-wait-2 state. |
@@ -362,7 +362,6 @@ void tcp_twsk_destructor(struct sock *sk) | |||
362 | tcp_free_md5sig_pool(); | 362 | tcp_free_md5sig_pool(); |
363 | #endif | 363 | #endif |
364 | } | 364 | } |
365 | |||
366 | EXPORT_SYMBOL_GPL(tcp_twsk_destructor); | 365 | EXPORT_SYMBOL_GPL(tcp_twsk_destructor); |
367 | 366 | ||
368 | static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, | 367 | static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, |
@@ -510,6 +509,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
510 | } | 509 | } |
511 | return newsk; | 510 | return newsk; |
512 | } | 511 | } |
512 | EXPORT_SYMBOL(tcp_create_openreq_child); | ||
513 | 513 | ||
514 | /* | 514 | /* |
515 | * Process an incoming packet for SYN_RECV sockets represented | 515 | * Process an incoming packet for SYN_RECV sockets represented |
@@ -706,6 +706,7 @@ embryonic_reset: | |||
706 | inet_csk_reqsk_queue_drop(sk, req, prev); | 706 | inet_csk_reqsk_queue_drop(sk, req, prev); |
707 | return NULL; | 707 | return NULL; |
708 | } | 708 | } |
709 | EXPORT_SYMBOL(tcp_check_req); | ||
709 | 710 | ||
710 | /* | 711 | /* |
711 | * Queue segment on the new socket if the new socket is active, | 712 | * Queue segment on the new socket if the new socket is active, |
@@ -737,8 +738,4 @@ int tcp_child_process(struct sock *parent, struct sock *child, | |||
737 | sock_put(child); | 738 | sock_put(child); |
738 | return ret; | 739 | return ret; |
739 | } | 740 | } |
740 | |||
741 | EXPORT_SYMBOL(tcp_check_req); | ||
742 | EXPORT_SYMBOL(tcp_child_process); | 741 | EXPORT_SYMBOL(tcp_child_process); |
743 | EXPORT_SYMBOL(tcp_create_openreq_child); | ||
744 | EXPORT_SYMBOL(tcp_timewait_state_process); | ||
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7ed9dc1042d1..de3bd8458588 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -247,6 +247,7 @@ void tcp_select_initial_window(int __space, __u32 mss, | |||
247 | /* Set the clamp no higher than max representable value */ | 247 | /* Set the clamp no higher than max representable value */ |
248 | (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp); | 248 | (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp); |
249 | } | 249 | } |
250 | EXPORT_SYMBOL(tcp_select_initial_window); | ||
250 | 251 | ||
251 | /* Chose a new window to advertise, update state in tcp_sock for the | 252 | /* Chose a new window to advertise, update state in tcp_sock for the |
252 | * socket, and return result with RFC1323 scaling applied. The return | 253 | * socket, and return result with RFC1323 scaling applied. The return |
@@ -294,9 +295,9 @@ static u16 tcp_select_window(struct sock *sk) | |||
294 | /* Packet ECN state for a SYN-ACK */ | 295 | /* Packet ECN state for a SYN-ACK */ |
295 | static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) | 296 | static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) |
296 | { | 297 | { |
297 | TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR; | 298 | TCP_SKB_CB(skb)->flags &= ~TCPHDR_CWR; |
298 | if (!(tp->ecn_flags & TCP_ECN_OK)) | 299 | if (!(tp->ecn_flags & TCP_ECN_OK)) |
299 | TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; | 300 | TCP_SKB_CB(skb)->flags &= ~TCPHDR_ECE; |
300 | } | 301 | } |
301 | 302 | ||
302 | /* Packet ECN state for a SYN. */ | 303 | /* Packet ECN state for a SYN. */ |
@@ -306,7 +307,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) | |||
306 | 307 | ||
307 | tp->ecn_flags = 0; | 308 | tp->ecn_flags = 0; |
308 | if (sysctl_tcp_ecn == 1) { | 309 | if (sysctl_tcp_ecn == 1) { |
309 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR; | 310 | TCP_SKB_CB(skb)->flags |= TCPHDR_ECE | TCPHDR_CWR; |
310 | tp->ecn_flags = TCP_ECN_OK; | 311 | tp->ecn_flags = TCP_ECN_OK; |
311 | } | 312 | } |
312 | } | 313 | } |
@@ -361,7 +362,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) | |||
361 | skb_shinfo(skb)->gso_type = 0; | 362 | skb_shinfo(skb)->gso_type = 0; |
362 | 363 | ||
363 | TCP_SKB_CB(skb)->seq = seq; | 364 | TCP_SKB_CB(skb)->seq = seq; |
364 | if (flags & (TCPCB_FLAG_SYN | TCPCB_FLAG_FIN)) | 365 | if (flags & (TCPHDR_SYN | TCPHDR_FIN)) |
365 | seq++; | 366 | seq++; |
366 | TCP_SKB_CB(skb)->end_seq = seq; | 367 | TCP_SKB_CB(skb)->end_seq = seq; |
367 | } | 368 | } |
@@ -820,7 +821,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
820 | tcb = TCP_SKB_CB(skb); | 821 | tcb = TCP_SKB_CB(skb); |
821 | memset(&opts, 0, sizeof(opts)); | 822 | memset(&opts, 0, sizeof(opts)); |
822 | 823 | ||
823 | if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) | 824 | if (unlikely(tcb->flags & TCPHDR_SYN)) |
824 | tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); | 825 | tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); |
825 | else | 826 | else |
826 | tcp_options_size = tcp_established_options(sk, skb, &opts, | 827 | tcp_options_size = tcp_established_options(sk, skb, &opts, |
@@ -843,7 +844,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
843 | *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | | 844 | *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | |
844 | tcb->flags); | 845 | tcb->flags); |
845 | 846 | ||
846 | if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { | 847 | if (unlikely(tcb->flags & TCPHDR_SYN)) { |
847 | /* RFC1323: The window in SYN & SYN/ACK segments | 848 | /* RFC1323: The window in SYN & SYN/ACK segments |
848 | * is never scaled. | 849 | * is never scaled. |
849 | */ | 850 | */ |
@@ -866,7 +867,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
866 | } | 867 | } |
867 | 868 | ||
868 | tcp_options_write((__be32 *)(th + 1), tp, &opts); | 869 | tcp_options_write((__be32 *)(th + 1), tp, &opts); |
869 | if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0)) | 870 | if (likely((tcb->flags & TCPHDR_SYN) == 0)) |
870 | TCP_ECN_send(sk, skb, tcp_header_size); | 871 | TCP_ECN_send(sk, skb, tcp_header_size); |
871 | 872 | ||
872 | #ifdef CONFIG_TCP_MD5SIG | 873 | #ifdef CONFIG_TCP_MD5SIG |
@@ -880,7 +881,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
880 | 881 | ||
881 | icsk->icsk_af_ops->send_check(sk, skb); | 882 | icsk->icsk_af_ops->send_check(sk, skb); |
882 | 883 | ||
883 | if (likely(tcb->flags & TCPCB_FLAG_ACK)) | 884 | if (likely(tcb->flags & TCPHDR_ACK)) |
884 | tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); | 885 | tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); |
885 | 886 | ||
886 | if (skb->len != tcp_header_size) | 887 | if (skb->len != tcp_header_size) |
@@ -1023,7 +1024,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, | |||
1023 | 1024 | ||
1024 | /* PSH and FIN should only be set in the second packet. */ | 1025 | /* PSH and FIN should only be set in the second packet. */ |
1025 | flags = TCP_SKB_CB(skb)->flags; | 1026 | flags = TCP_SKB_CB(skb)->flags; |
1026 | TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH); | 1027 | TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); |
1027 | TCP_SKB_CB(buff)->flags = flags; | 1028 | TCP_SKB_CB(buff)->flags = flags; |
1028 | TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; | 1029 | TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; |
1029 | 1030 | ||
@@ -1189,6 +1190,7 @@ void tcp_mtup_init(struct sock *sk) | |||
1189 | icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss); | 1190 | icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss); |
1190 | icsk->icsk_mtup.probe_size = 0; | 1191 | icsk->icsk_mtup.probe_size = 0; |
1191 | } | 1192 | } |
1193 | EXPORT_SYMBOL(tcp_mtup_init); | ||
1192 | 1194 | ||
1193 | /* This function synchronize snd mss to current pmtu/exthdr set. | 1195 | /* This function synchronize snd mss to current pmtu/exthdr set. |
1194 | 1196 | ||
@@ -1232,6 +1234,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) | |||
1232 | 1234 | ||
1233 | return mss_now; | 1235 | return mss_now; |
1234 | } | 1236 | } |
1237 | EXPORT_SYMBOL(tcp_sync_mss); | ||
1235 | 1238 | ||
1236 | /* Compute the current effective MSS, taking SACKs and IP options, | 1239 | /* Compute the current effective MSS, taking SACKs and IP options, |
1237 | * and even PMTU discovery events into account. | 1240 | * and even PMTU discovery events into account. |
@@ -1328,8 +1331,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, | |||
1328 | u32 in_flight, cwnd; | 1331 | u32 in_flight, cwnd; |
1329 | 1332 | ||
1330 | /* Don't be strict about the congestion window for the final FIN. */ | 1333 | /* Don't be strict about the congestion window for the final FIN. */ |
1331 | if ((TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && | 1334 | if ((TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && tcp_skb_pcount(skb) == 1) |
1332 | tcp_skb_pcount(skb) == 1) | ||
1333 | return 1; | 1335 | return 1; |
1334 | 1336 | ||
1335 | in_flight = tcp_packets_in_flight(tp); | 1337 | in_flight = tcp_packets_in_flight(tp); |
@@ -1398,7 +1400,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, | |||
1398 | * Nagle can be ignored during F-RTO too (see RFC4138). | 1400 | * Nagle can be ignored during F-RTO too (see RFC4138). |
1399 | */ | 1401 | */ |
1400 | if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || | 1402 | if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || |
1401 | (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) | 1403 | (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)) |
1402 | return 1; | 1404 | return 1; |
1403 | 1405 | ||
1404 | if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) | 1406 | if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) |
@@ -1461,7 +1463,7 @@ int tcp_may_send_now(struct sock *sk) | |||
1461 | * packet has never been sent out before (and thus is not cloned). | 1463 | * packet has never been sent out before (and thus is not cloned). |
1462 | */ | 1464 | */ |
1463 | static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | 1465 | static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, |
1464 | unsigned int mss_now) | 1466 | unsigned int mss_now, gfp_t gfp) |
1465 | { | 1467 | { |
1466 | struct sk_buff *buff; | 1468 | struct sk_buff *buff; |
1467 | int nlen = skb->len - len; | 1469 | int nlen = skb->len - len; |
@@ -1471,7 +1473,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | |||
1471 | if (skb->len != skb->data_len) | 1473 | if (skb->len != skb->data_len) |
1472 | return tcp_fragment(sk, skb, len, mss_now); | 1474 | return tcp_fragment(sk, skb, len, mss_now); |
1473 | 1475 | ||
1474 | buff = sk_stream_alloc_skb(sk, 0, GFP_ATOMIC); | 1476 | buff = sk_stream_alloc_skb(sk, 0, gfp); |
1475 | if (unlikely(buff == NULL)) | 1477 | if (unlikely(buff == NULL)) |
1476 | return -ENOMEM; | 1478 | return -ENOMEM; |
1477 | 1479 | ||
@@ -1487,7 +1489,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | |||
1487 | 1489 | ||
1488 | /* PSH and FIN should only be set in the second packet. */ | 1490 | /* PSH and FIN should only be set in the second packet. */ |
1489 | flags = TCP_SKB_CB(skb)->flags; | 1491 | flags = TCP_SKB_CB(skb)->flags; |
1490 | TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH); | 1492 | TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); |
1491 | TCP_SKB_CB(buff)->flags = flags; | 1493 | TCP_SKB_CB(buff)->flags = flags; |
1492 | 1494 | ||
1493 | /* This packet was never sent out yet, so no SACK bits. */ | 1495 | /* This packet was never sent out yet, so no SACK bits. */ |
@@ -1518,7 +1520,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) | |||
1518 | const struct inet_connection_sock *icsk = inet_csk(sk); | 1520 | const struct inet_connection_sock *icsk = inet_csk(sk); |
1519 | u32 send_win, cong_win, limit, in_flight; | 1521 | u32 send_win, cong_win, limit, in_flight; |
1520 | 1522 | ||
1521 | if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) | 1523 | if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) |
1522 | goto send_now; | 1524 | goto send_now; |
1523 | 1525 | ||
1524 | if (icsk->icsk_ca_state != TCP_CA_Open) | 1526 | if (icsk->icsk_ca_state != TCP_CA_Open) |
@@ -1644,7 +1646,7 @@ static int tcp_mtu_probe(struct sock *sk) | |||
1644 | 1646 | ||
1645 | TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; | 1647 | TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; |
1646 | TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; | 1648 | TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; |
1647 | TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK; | 1649 | TCP_SKB_CB(nskb)->flags = TCPHDR_ACK; |
1648 | TCP_SKB_CB(nskb)->sacked = 0; | 1650 | TCP_SKB_CB(nskb)->sacked = 0; |
1649 | nskb->csum = 0; | 1651 | nskb->csum = 0; |
1650 | nskb->ip_summed = skb->ip_summed; | 1652 | nskb->ip_summed = skb->ip_summed; |
@@ -1669,7 +1671,7 @@ static int tcp_mtu_probe(struct sock *sk) | |||
1669 | sk_wmem_free_skb(sk, skb); | 1671 | sk_wmem_free_skb(sk, skb); |
1670 | } else { | 1672 | } else { |
1671 | TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags & | 1673 | TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags & |
1672 | ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); | 1674 | ~(TCPHDR_FIN|TCPHDR_PSH); |
1673 | if (!skb_shinfo(skb)->nr_frags) { | 1675 | if (!skb_shinfo(skb)->nr_frags) { |
1674 | skb_pull(skb, copy); | 1676 | skb_pull(skb, copy); |
1675 | if (skb->ip_summed != CHECKSUM_PARTIAL) | 1677 | if (skb->ip_summed != CHECKSUM_PARTIAL) |
@@ -1769,7 +1771,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1769 | cwnd_quota); | 1771 | cwnd_quota); |
1770 | 1772 | ||
1771 | if (skb->len > limit && | 1773 | if (skb->len > limit && |
1772 | unlikely(tso_fragment(sk, skb, limit, mss_now))) | 1774 | unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) |
1773 | break; | 1775 | break; |
1774 | 1776 | ||
1775 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 1777 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
@@ -2020,7 +2022,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, | |||
2020 | 2022 | ||
2021 | if (!sysctl_tcp_retrans_collapse) | 2023 | if (!sysctl_tcp_retrans_collapse) |
2022 | return; | 2024 | return; |
2023 | if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) | 2025 | if (TCP_SKB_CB(skb)->flags & TCPHDR_SYN) |
2024 | return; | 2026 | return; |
2025 | 2027 | ||
2026 | tcp_for_write_queue_from_safe(skb, tmp, sk) { | 2028 | tcp_for_write_queue_from_safe(skb, tmp, sk) { |
@@ -2112,7 +2114,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
2112 | * since it is cheap to do so and saves bytes on the network. | 2114 | * since it is cheap to do so and saves bytes on the network. |
2113 | */ | 2115 | */ |
2114 | if (skb->len > 0 && | 2116 | if (skb->len > 0 && |
2115 | (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && | 2117 | (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && |
2116 | tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { | 2118 | tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { |
2117 | if (!pskb_trim(skb, 0)) { | 2119 | if (!pskb_trim(skb, 0)) { |
2118 | /* Reuse, even though it does some unnecessary work */ | 2120 | /* Reuse, even though it does some unnecessary work */ |
@@ -2304,7 +2306,7 @@ void tcp_send_fin(struct sock *sk) | |||
2304 | mss_now = tcp_current_mss(sk); | 2306 | mss_now = tcp_current_mss(sk); |
2305 | 2307 | ||
2306 | if (tcp_send_head(sk) != NULL) { | 2308 | if (tcp_send_head(sk) != NULL) { |
2307 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; | 2309 | TCP_SKB_CB(skb)->flags |= TCPHDR_FIN; |
2308 | TCP_SKB_CB(skb)->end_seq++; | 2310 | TCP_SKB_CB(skb)->end_seq++; |
2309 | tp->write_seq++; | 2311 | tp->write_seq++; |
2310 | } else { | 2312 | } else { |
@@ -2321,7 +2323,7 @@ void tcp_send_fin(struct sock *sk) | |||
2321 | skb_reserve(skb, MAX_TCP_HEADER); | 2323 | skb_reserve(skb, MAX_TCP_HEADER); |
2322 | /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ | 2324 | /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ |
2323 | tcp_init_nondata_skb(skb, tp->write_seq, | 2325 | tcp_init_nondata_skb(skb, tp->write_seq, |
2324 | TCPCB_FLAG_ACK | TCPCB_FLAG_FIN); | 2326 | TCPHDR_ACK | TCPHDR_FIN); |
2325 | tcp_queue_skb(sk, skb); | 2327 | tcp_queue_skb(sk, skb); |
2326 | } | 2328 | } |
2327 | __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); | 2329 | __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); |
@@ -2346,7 +2348,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) | |||
2346 | /* Reserve space for headers and prepare control bits. */ | 2348 | /* Reserve space for headers and prepare control bits. */ |
2347 | skb_reserve(skb, MAX_TCP_HEADER); | 2349 | skb_reserve(skb, MAX_TCP_HEADER); |
2348 | tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), | 2350 | tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), |
2349 | TCPCB_FLAG_ACK | TCPCB_FLAG_RST); | 2351 | TCPHDR_ACK | TCPHDR_RST); |
2350 | /* Send it off. */ | 2352 | /* Send it off. */ |
2351 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2353 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
2352 | if (tcp_transmit_skb(sk, skb, 0, priority)) | 2354 | if (tcp_transmit_skb(sk, skb, 0, priority)) |
@@ -2366,11 +2368,11 @@ int tcp_send_synack(struct sock *sk) | |||
2366 | struct sk_buff *skb; | 2368 | struct sk_buff *skb; |
2367 | 2369 | ||
2368 | skb = tcp_write_queue_head(sk); | 2370 | skb = tcp_write_queue_head(sk); |
2369 | if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)) { | 2371 | if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPHDR_SYN)) { |
2370 | printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); | 2372 | printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); |
2371 | return -EFAULT; | 2373 | return -EFAULT; |
2372 | } | 2374 | } |
2373 | if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_ACK)) { | 2375 | if (!(TCP_SKB_CB(skb)->flags & TCPHDR_ACK)) { |
2374 | if (skb_cloned(skb)) { | 2376 | if (skb_cloned(skb)) { |
2375 | struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); | 2377 | struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); |
2376 | if (nskb == NULL) | 2378 | if (nskb == NULL) |
@@ -2384,7 +2386,7 @@ int tcp_send_synack(struct sock *sk) | |||
2384 | skb = nskb; | 2386 | skb = nskb; |
2385 | } | 2387 | } |
2386 | 2388 | ||
2387 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK; | 2389 | TCP_SKB_CB(skb)->flags |= TCPHDR_ACK; |
2388 | TCP_ECN_send_synack(tcp_sk(sk), skb); | 2390 | TCP_ECN_send_synack(tcp_sk(sk), skb); |
2389 | } | 2391 | } |
2390 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2392 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
@@ -2463,7 +2465,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2463 | * not even correctly set) | 2465 | * not even correctly set) |
2464 | */ | 2466 | */ |
2465 | tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, | 2467 | tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, |
2466 | TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); | 2468 | TCPHDR_SYN | TCPHDR_ACK); |
2467 | 2469 | ||
2468 | if (OPTION_COOKIE_EXTENSION & opts.options) { | 2470 | if (OPTION_COOKIE_EXTENSION & opts.options) { |
2469 | if (s_data_desired) { | 2471 | if (s_data_desired) { |
@@ -2518,6 +2520,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2518 | 2520 | ||
2519 | return skb; | 2521 | return skb; |
2520 | } | 2522 | } |
2523 | EXPORT_SYMBOL(tcp_make_synack); | ||
2521 | 2524 | ||
2522 | /* Do all connect socket setups that can be done AF independent. */ | 2525 | /* Do all connect socket setups that can be done AF independent. */ |
2523 | static void tcp_connect_init(struct sock *sk) | 2526 | static void tcp_connect_init(struct sock *sk) |
@@ -2595,7 +2598,7 @@ int tcp_connect(struct sock *sk) | |||
2595 | skb_reserve(buff, MAX_TCP_HEADER); | 2598 | skb_reserve(buff, MAX_TCP_HEADER); |
2596 | 2599 | ||
2597 | tp->snd_nxt = tp->write_seq; | 2600 | tp->snd_nxt = tp->write_seq; |
2598 | tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN); | 2601 | tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); |
2599 | TCP_ECN_send_syn(sk, buff); | 2602 | TCP_ECN_send_syn(sk, buff); |
2600 | 2603 | ||
2601 | /* Send it off. */ | 2604 | /* Send it off. */ |
@@ -2620,6 +2623,7 @@ int tcp_connect(struct sock *sk) | |||
2620 | inet_csk(sk)->icsk_rto, TCP_RTO_MAX); | 2623 | inet_csk(sk)->icsk_rto, TCP_RTO_MAX); |
2621 | return 0; | 2624 | return 0; |
2622 | } | 2625 | } |
2626 | EXPORT_SYMBOL(tcp_connect); | ||
2623 | 2627 | ||
2624 | /* Send out a delayed ack, the caller does the policy checking | 2628 | /* Send out a delayed ack, the caller does the policy checking |
2625 | * to see if we should even be here. See tcp_input.c:tcp_ack_snd_check() | 2629 | * to see if we should even be here. See tcp_input.c:tcp_ack_snd_check() |
@@ -2701,7 +2705,7 @@ void tcp_send_ack(struct sock *sk) | |||
2701 | 2705 | ||
2702 | /* Reserve space for headers and prepare control bits. */ | 2706 | /* Reserve space for headers and prepare control bits. */ |
2703 | skb_reserve(buff, MAX_TCP_HEADER); | 2707 | skb_reserve(buff, MAX_TCP_HEADER); |
2704 | tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPCB_FLAG_ACK); | 2708 | tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK); |
2705 | 2709 | ||
2706 | /* Send it off, this clears delayed acks for us. */ | 2710 | /* Send it off, this clears delayed acks for us. */ |
2707 | TCP_SKB_CB(buff)->when = tcp_time_stamp; | 2711 | TCP_SKB_CB(buff)->when = tcp_time_stamp; |
@@ -2735,7 +2739,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) | |||
2735 | * end to send an ack. Don't queue or clone SKB, just | 2739 | * end to send an ack. Don't queue or clone SKB, just |
2736 | * send it. | 2740 | * send it. |
2737 | */ | 2741 | */ |
2738 | tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPCB_FLAG_ACK); | 2742 | tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK); |
2739 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2743 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
2740 | return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); | 2744 | return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); |
2741 | } | 2745 | } |
@@ -2765,13 +2769,13 @@ int tcp_write_wakeup(struct sock *sk) | |||
2765 | if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq || | 2769 | if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq || |
2766 | skb->len > mss) { | 2770 | skb->len > mss) { |
2767 | seg_size = min(seg_size, mss); | 2771 | seg_size = min(seg_size, mss); |
2768 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; | 2772 | TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; |
2769 | if (tcp_fragment(sk, skb, seg_size, mss)) | 2773 | if (tcp_fragment(sk, skb, seg_size, mss)) |
2770 | return -1; | 2774 | return -1; |
2771 | } else if (!tcp_skb_pcount(skb)) | 2775 | } else if (!tcp_skb_pcount(skb)) |
2772 | tcp_set_skb_tso_segs(sk, skb, mss); | 2776 | tcp_set_skb_tso_segs(sk, skb, mss); |
2773 | 2777 | ||
2774 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; | 2778 | TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; |
2775 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2779 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |
2776 | err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); | 2780 | err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
2777 | if (!err) | 2781 | if (!err) |
@@ -2824,10 +2828,3 @@ void tcp_send_probe0(struct sock *sk) | |||
2824 | TCP_RTO_MAX); | 2828 | TCP_RTO_MAX); |
2825 | } | 2829 | } |
2826 | } | 2830 | } |
2827 | |||
2828 | EXPORT_SYMBOL(tcp_select_initial_window); | ||
2829 | EXPORT_SYMBOL(tcp_connect); | ||
2830 | EXPORT_SYMBOL(tcp_make_synack); | ||
2831 | EXPORT_SYMBOL(tcp_simple_retransmit); | ||
2832 | EXPORT_SYMBOL(tcp_sync_mss); | ||
2833 | EXPORT_SYMBOL(tcp_mtup_init); | ||
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 440a5c6004f6..808bb920c9f5 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -41,7 +41,6 @@ void tcp_init_xmit_timers(struct sock *sk) | |||
41 | inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, | 41 | inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, |
42 | &tcp_keepalive_timer); | 42 | &tcp_keepalive_timer); |
43 | } | 43 | } |
44 | |||
45 | EXPORT_SYMBOL(tcp_init_xmit_timers); | 44 | EXPORT_SYMBOL(tcp_init_xmit_timers); |
46 | 45 | ||
47 | static void tcp_write_err(struct sock *sk) | 46 | static void tcp_write_err(struct sock *sk) |
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index 3b3813cc80b9..59186ca7808a 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c | |||
@@ -48,7 +48,6 @@ err: | |||
48 | 48 | ||
49 | return ret; | 49 | return ret; |
50 | } | 50 | } |
51 | |||
52 | EXPORT_SYMBOL(xfrm4_tunnel_register); | 51 | EXPORT_SYMBOL(xfrm4_tunnel_register); |
53 | 52 | ||
54 | int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family) | 53 | int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family) |
@@ -72,7 +71,6 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family) | |||
72 | 71 | ||
73 | return ret; | 72 | return ret; |
74 | } | 73 | } |
75 | |||
76 | EXPORT_SYMBOL(xfrm4_tunnel_deregister); | 74 | EXPORT_SYMBOL(xfrm4_tunnel_deregister); |
77 | 75 | ||
78 | static int tunnel4_rcv(struct sk_buff *skb) | 76 | static int tunnel4_rcv(struct sk_buff *skb) |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index eec4ff456e33..32e0bef60d0a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -914,7 +914,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
914 | !sock_flag(sk, SOCK_BROADCAST)) | 914 | !sock_flag(sk, SOCK_BROADCAST)) |
915 | goto out; | 915 | goto out; |
916 | if (connected) | 916 | if (connected) |
917 | sk_dst_set(sk, dst_clone(&rt->u.dst)); | 917 | sk_dst_set(sk, dst_clone(&rt->dst)); |
918 | } | 918 | } |
919 | 919 | ||
920 | if (msg->msg_flags&MSG_CONFIRM) | 920 | if (msg->msg_flags&MSG_CONFIRM) |
@@ -978,7 +978,7 @@ out: | |||
978 | return err; | 978 | return err; |
979 | 979 | ||
980 | do_confirm: | 980 | do_confirm: |
981 | dst_confirm(&rt->u.dst); | 981 | dst_confirm(&rt->dst); |
982 | if (!(msg->msg_flags&MSG_PROBE) || len) | 982 | if (!(msg->msg_flags&MSG_PROBE) || len) |
983 | goto back_from_confirm; | 983 | goto back_from_confirm; |
984 | err = 0; | 984 | err = 0; |
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 6610bf76369f..ab76aa928fa9 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c | |||
@@ -58,6 +58,7 @@ struct proto udplite_prot = { | |||
58 | .compat_getsockopt = compat_udp_getsockopt, | 58 | .compat_getsockopt = compat_udp_getsockopt, |
59 | #endif | 59 | #endif |
60 | }; | 60 | }; |
61 | EXPORT_SYMBOL(udplite_prot); | ||
61 | 62 | ||
62 | static struct inet_protosw udplite4_protosw = { | 63 | static struct inet_protosw udplite4_protosw = { |
63 | .type = SOCK_DGRAM, | 64 | .type = SOCK_DGRAM, |
@@ -127,5 +128,3 @@ out_unregister_proto: | |||
127 | out_register_err: | 128 | out_register_err: |
128 | printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__); | 129 | printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__); |
129 | } | 130 | } |
130 | |||
131 | EXPORT_SYMBOL(udplite_prot); | ||
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index ad8fbb871aa0..06814b6216dc 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c | |||
@@ -163,5 +163,4 @@ int xfrm4_rcv(struct sk_buff *skb) | |||
163 | { | 163 | { |
164 | return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0); | 164 | return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0); |
165 | } | 165 | } |
166 | |||
167 | EXPORT_SYMBOL(xfrm4_rcv); | 166 | EXPORT_SYMBOL(xfrm4_rcv); |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 23883a48ebfb..869078d4eeb9 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -37,7 +37,7 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, | |||
37 | fl.fl4_src = saddr->a4; | 37 | fl.fl4_src = saddr->a4; |
38 | 38 | ||
39 | err = __ip_route_output_key(net, &rt, &fl); | 39 | err = __ip_route_output_key(net, &rt, &fl); |
40 | dst = &rt->u.dst; | 40 | dst = &rt->dst; |
41 | if (err) | 41 | if (err) |
42 | dst = ERR_PTR(err); | 42 | dst = ERR_PTR(err); |
43 | return dst; | 43 | return dst; |