diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 124 | ||||
-rw-r--r-- | net/ipv4/arp.c | 8 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 101 | ||||
-rw-r--r-- | net/ipv4/inet_timewait_sock.c | 2 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 8 | ||||
-rw-r--r-- | net/ipv4/ipip.c | 8 | ||||
-rw-r--r-- | net/ipv4/ipmr.c | 4 | ||||
-rw-r--r-- | net/ipv4/route.c | 22 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 5 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 53 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 23 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 58 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 16 | ||||
-rw-r--r-- | net/ipv4/udp.c | 144 | ||||
-rw-r--r-- | net/ipv4/xfrm4_policy.c | 37 |
16 files changed, 375 insertions, 244 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 566ea6c4321d..6c30a73f03f5 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -124,7 +124,6 @@ static struct list_head inetsw[SOCK_MAX]; | |||
124 | static DEFINE_SPINLOCK(inetsw_lock); | 124 | static DEFINE_SPINLOCK(inetsw_lock); |
125 | 125 | ||
126 | struct ipv4_config ipv4_config; | 126 | struct ipv4_config ipv4_config; |
127 | |||
128 | EXPORT_SYMBOL(ipv4_config); | 127 | EXPORT_SYMBOL(ipv4_config); |
129 | 128 | ||
130 | /* New destruction routine */ | 129 | /* New destruction routine */ |
@@ -139,12 +138,12 @@ void inet_sock_destruct(struct sock *sk) | |||
139 | sk_mem_reclaim(sk); | 138 | sk_mem_reclaim(sk); |
140 | 139 | ||
141 | if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) { | 140 | if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) { |
142 | printk("Attempt to release TCP socket in state %d %p\n", | 141 | pr_err("Attempt to release TCP socket in state %d %p\n", |
143 | sk->sk_state, sk); | 142 | sk->sk_state, sk); |
144 | return; | 143 | return; |
145 | } | 144 | } |
146 | if (!sock_flag(sk, SOCK_DEAD)) { | 145 | if (!sock_flag(sk, SOCK_DEAD)) { |
147 | printk("Attempt to release alive inet socket %p\n", sk); | 146 | pr_err("Attempt to release alive inet socket %p\n", sk); |
148 | return; | 147 | return; |
149 | } | 148 | } |
150 | 149 | ||
@@ -157,6 +156,7 @@ void inet_sock_destruct(struct sock *sk) | |||
157 | dst_release(sk->sk_dst_cache); | 156 | dst_release(sk->sk_dst_cache); |
158 | sk_refcnt_debug_dec(sk); | 157 | sk_refcnt_debug_dec(sk); |
159 | } | 158 | } |
159 | EXPORT_SYMBOL(inet_sock_destruct); | ||
160 | 160 | ||
161 | /* | 161 | /* |
162 | * The routines beyond this point handle the behaviour of an AF_INET | 162 | * The routines beyond this point handle the behaviour of an AF_INET |
@@ -219,6 +219,7 @@ out: | |||
219 | release_sock(sk); | 219 | release_sock(sk); |
220 | return err; | 220 | return err; |
221 | } | 221 | } |
222 | EXPORT_SYMBOL(inet_listen); | ||
222 | 223 | ||
223 | u32 inet_ehash_secret __read_mostly; | 224 | u32 inet_ehash_secret __read_mostly; |
224 | EXPORT_SYMBOL(inet_ehash_secret); | 225 | EXPORT_SYMBOL(inet_ehash_secret); |
@@ -435,9 +436,11 @@ int inet_release(struct socket *sock) | |||
435 | } | 436 | } |
436 | return 0; | 437 | return 0; |
437 | } | 438 | } |
439 | EXPORT_SYMBOL(inet_release); | ||
438 | 440 | ||
439 | /* It is off by default, see below. */ | 441 | /* It is off by default, see below. */ |
440 | int sysctl_ip_nonlocal_bind __read_mostly; | 442 | int sysctl_ip_nonlocal_bind __read_mostly; |
443 | EXPORT_SYMBOL(sysctl_ip_nonlocal_bind); | ||
441 | 444 | ||
442 | int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | 445 | int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) |
443 | { | 446 | { |
@@ -519,6 +522,7 @@ out_release_sock: | |||
519 | out: | 522 | out: |
520 | return err; | 523 | return err; |
521 | } | 524 | } |
525 | EXPORT_SYMBOL(inet_bind); | ||
522 | 526 | ||
523 | int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, | 527 | int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, |
524 | int addr_len, int flags) | 528 | int addr_len, int flags) |
@@ -532,6 +536,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, | |||
532 | return -EAGAIN; | 536 | return -EAGAIN; |
533 | return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len); | 537 | return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len); |
534 | } | 538 | } |
539 | EXPORT_SYMBOL(inet_dgram_connect); | ||
535 | 540 | ||
536 | static long inet_wait_for_connect(struct sock *sk, long timeo) | 541 | static long inet_wait_for_connect(struct sock *sk, long timeo) |
537 | { | 542 | { |
@@ -641,6 +646,7 @@ sock_error: | |||
641 | sock->state = SS_DISCONNECTING; | 646 | sock->state = SS_DISCONNECTING; |
642 | goto out; | 647 | goto out; |
643 | } | 648 | } |
649 | EXPORT_SYMBOL(inet_stream_connect); | ||
644 | 650 | ||
645 | /* | 651 | /* |
646 | * Accept a pending connection. The TCP layer now gives BSD semantics. | 652 | * Accept a pending connection. The TCP layer now gives BSD semantics. |
@@ -668,6 +674,7 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags) | |||
668 | do_err: | 674 | do_err: |
669 | return err; | 675 | return err; |
670 | } | 676 | } |
677 | EXPORT_SYMBOL(inet_accept); | ||
671 | 678 | ||
672 | 679 | ||
673 | /* | 680 | /* |
@@ -699,6 +706,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, | |||
699 | *uaddr_len = sizeof(*sin); | 706 | *uaddr_len = sizeof(*sin); |
700 | return 0; | 707 | return 0; |
701 | } | 708 | } |
709 | EXPORT_SYMBOL(inet_getname); | ||
702 | 710 | ||
703 | int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | 711 | int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, |
704 | size_t size) | 712 | size_t size) |
@@ -711,9 +719,11 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | |||
711 | 719 | ||
712 | return sk->sk_prot->sendmsg(iocb, sk, msg, size); | 720 | return sk->sk_prot->sendmsg(iocb, sk, msg, size); |
713 | } | 721 | } |
722 | EXPORT_SYMBOL(inet_sendmsg); | ||
714 | 723 | ||
715 | 724 | ||
716 | static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) | 725 | static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, |
726 | size_t size, int flags) | ||
717 | { | 727 | { |
718 | struct sock *sk = sock->sk; | 728 | struct sock *sk = sock->sk; |
719 | 729 | ||
@@ -780,6 +790,7 @@ int inet_shutdown(struct socket *sock, int how) | |||
780 | release_sock(sk); | 790 | release_sock(sk); |
781 | return err; | 791 | return err; |
782 | } | 792 | } |
793 | EXPORT_SYMBOL(inet_shutdown); | ||
783 | 794 | ||
784 | /* | 795 | /* |
785 | * ioctl() calls you can issue on an INET socket. Most of these are | 796 | * ioctl() calls you can issue on an INET socket. Most of these are |
@@ -798,44 +809,45 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
798 | struct net *net = sock_net(sk); | 809 | struct net *net = sock_net(sk); |
799 | 810 | ||
800 | switch (cmd) { | 811 | switch (cmd) { |
801 | case SIOCGSTAMP: | 812 | case SIOCGSTAMP: |
802 | err = sock_get_timestamp(sk, (struct timeval __user *)arg); | 813 | err = sock_get_timestamp(sk, (struct timeval __user *)arg); |
803 | break; | 814 | break; |
804 | case SIOCGSTAMPNS: | 815 | case SIOCGSTAMPNS: |
805 | err = sock_get_timestampns(sk, (struct timespec __user *)arg); | 816 | err = sock_get_timestampns(sk, (struct timespec __user *)arg); |
806 | break; | 817 | break; |
807 | case SIOCADDRT: | 818 | case SIOCADDRT: |
808 | case SIOCDELRT: | 819 | case SIOCDELRT: |
809 | case SIOCRTMSG: | 820 | case SIOCRTMSG: |
810 | err = ip_rt_ioctl(net, cmd, (void __user *)arg); | 821 | err = ip_rt_ioctl(net, cmd, (void __user *)arg); |
811 | break; | 822 | break; |
812 | case SIOCDARP: | 823 | case SIOCDARP: |
813 | case SIOCGARP: | 824 | case SIOCGARP: |
814 | case SIOCSARP: | 825 | case SIOCSARP: |
815 | err = arp_ioctl(net, cmd, (void __user *)arg); | 826 | err = arp_ioctl(net, cmd, (void __user *)arg); |
816 | break; | 827 | break; |
817 | case SIOCGIFADDR: | 828 | case SIOCGIFADDR: |
818 | case SIOCSIFADDR: | 829 | case SIOCSIFADDR: |
819 | case SIOCGIFBRDADDR: | 830 | case SIOCGIFBRDADDR: |
820 | case SIOCSIFBRDADDR: | 831 | case SIOCSIFBRDADDR: |
821 | case SIOCGIFNETMASK: | 832 | case SIOCGIFNETMASK: |
822 | case SIOCSIFNETMASK: | 833 | case SIOCSIFNETMASK: |
823 | case SIOCGIFDSTADDR: | 834 | case SIOCGIFDSTADDR: |
824 | case SIOCSIFDSTADDR: | 835 | case SIOCSIFDSTADDR: |
825 | case SIOCSIFPFLAGS: | 836 | case SIOCSIFPFLAGS: |
826 | case SIOCGIFPFLAGS: | 837 | case SIOCGIFPFLAGS: |
827 | case SIOCSIFFLAGS: | 838 | case SIOCSIFFLAGS: |
828 | err = devinet_ioctl(net, cmd, (void __user *)arg); | 839 | err = devinet_ioctl(net, cmd, (void __user *)arg); |
829 | break; | 840 | break; |
830 | default: | 841 | default: |
831 | if (sk->sk_prot->ioctl) | 842 | if (sk->sk_prot->ioctl) |
832 | err = sk->sk_prot->ioctl(sk, cmd, arg); | 843 | err = sk->sk_prot->ioctl(sk, cmd, arg); |
833 | else | 844 | else |
834 | err = -ENOIOCTLCMD; | 845 | err = -ENOIOCTLCMD; |
835 | break; | 846 | break; |
836 | } | 847 | } |
837 | return err; | 848 | return err; |
838 | } | 849 | } |
850 | EXPORT_SYMBOL(inet_ioctl); | ||
839 | 851 | ||
840 | const struct proto_ops inet_stream_ops = { | 852 | const struct proto_ops inet_stream_ops = { |
841 | .family = PF_INET, | 853 | .family = PF_INET, |
@@ -862,6 +874,7 @@ const struct proto_ops inet_stream_ops = { | |||
862 | .compat_getsockopt = compat_sock_common_getsockopt, | 874 | .compat_getsockopt = compat_sock_common_getsockopt, |
863 | #endif | 875 | #endif |
864 | }; | 876 | }; |
877 | EXPORT_SYMBOL(inet_stream_ops); | ||
865 | 878 | ||
866 | const struct proto_ops inet_dgram_ops = { | 879 | const struct proto_ops inet_dgram_ops = { |
867 | .family = PF_INET, | 880 | .family = PF_INET, |
@@ -887,6 +900,7 @@ const struct proto_ops inet_dgram_ops = { | |||
887 | .compat_getsockopt = compat_sock_common_getsockopt, | 900 | .compat_getsockopt = compat_sock_common_getsockopt, |
888 | #endif | 901 | #endif |
889 | }; | 902 | }; |
903 | EXPORT_SYMBOL(inet_dgram_ops); | ||
890 | 904 | ||
891 | /* | 905 | /* |
892 | * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without | 906 | * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without |
@@ -1016,6 +1030,7 @@ out_illegal: | |||
1016 | p->type); | 1030 | p->type); |
1017 | goto out; | 1031 | goto out; |
1018 | } | 1032 | } |
1033 | EXPORT_SYMBOL(inet_register_protosw); | ||
1019 | 1034 | ||
1020 | void inet_unregister_protosw(struct inet_protosw *p) | 1035 | void inet_unregister_protosw(struct inet_protosw *p) |
1021 | { | 1036 | { |
@@ -1031,6 +1046,7 @@ void inet_unregister_protosw(struct inet_protosw *p) | |||
1031 | synchronize_net(); | 1046 | synchronize_net(); |
1032 | } | 1047 | } |
1033 | } | 1048 | } |
1049 | EXPORT_SYMBOL(inet_unregister_protosw); | ||
1034 | 1050 | ||
1035 | /* | 1051 | /* |
1036 | * Shall we try to damage output packets if routing dev changes? | 1052 | * Shall we try to damage output packets if routing dev changes? |
@@ -1141,7 +1157,6 @@ int inet_sk_rebuild_header(struct sock *sk) | |||
1141 | 1157 | ||
1142 | return err; | 1158 | return err; |
1143 | } | 1159 | } |
1144 | |||
1145 | EXPORT_SYMBOL(inet_sk_rebuild_header); | 1160 | EXPORT_SYMBOL(inet_sk_rebuild_header); |
1146 | 1161 | ||
1147 | static int inet_gso_send_check(struct sk_buff *skb) | 1162 | static int inet_gso_send_check(struct sk_buff *skb) |
@@ -1187,6 +1202,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) | |||
1187 | int proto; | 1202 | int proto; |
1188 | int ihl; | 1203 | int ihl; |
1189 | int id; | 1204 | int id; |
1205 | unsigned int offset = 0; | ||
1190 | 1206 | ||
1191 | if (!(features & NETIF_F_V4_CSUM)) | 1207 | if (!(features & NETIF_F_V4_CSUM)) |
1192 | features &= ~NETIF_F_SG; | 1208 | features &= ~NETIF_F_SG; |
@@ -1229,7 +1245,14 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) | |||
1229 | skb = segs; | 1245 | skb = segs; |
1230 | do { | 1246 | do { |
1231 | iph = ip_hdr(skb); | 1247 | iph = ip_hdr(skb); |
1232 | iph->id = htons(id++); | 1248 | if (proto == IPPROTO_UDP) { |
1249 | iph->id = htons(id); | ||
1250 | iph->frag_off = htons(offset >> 3); | ||
1251 | if (skb->next != NULL) | ||
1252 | iph->frag_off |= htons(IP_MF); | ||
1253 | offset += (skb->len - skb->mac_len - iph->ihl * 4); | ||
1254 | } else | ||
1255 | iph->id = htons(id++); | ||
1233 | iph->tot_len = htons(skb->len - skb->mac_len); | 1256 | iph->tot_len = htons(skb->len - skb->mac_len); |
1234 | iph->check = 0; | 1257 | iph->check = 0; |
1235 | iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl); | 1258 | iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl); |
@@ -1361,7 +1384,6 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, | |||
1361 | } | 1384 | } |
1362 | return rc; | 1385 | return rc; |
1363 | } | 1386 | } |
1364 | |||
1365 | EXPORT_SYMBOL_GPL(inet_ctl_sock_create); | 1387 | EXPORT_SYMBOL_GPL(inet_ctl_sock_create); |
1366 | 1388 | ||
1367 | unsigned long snmp_fold_field(void *mib[], int offt) | 1389 | unsigned long snmp_fold_field(void *mib[], int offt) |
@@ -1425,6 +1447,8 @@ static struct net_protocol tcp_protocol = { | |||
1425 | static struct net_protocol udp_protocol = { | 1447 | static struct net_protocol udp_protocol = { |
1426 | .handler = udp_rcv, | 1448 | .handler = udp_rcv, |
1427 | .err_handler = udp_err, | 1449 | .err_handler = udp_err, |
1450 | .gso_send_check = udp4_ufo_send_check, | ||
1451 | .gso_segment = udp4_ufo_fragment, | ||
1428 | .no_policy = 1, | 1452 | .no_policy = 1, |
1429 | .netns_ok = 1, | 1453 | .netns_ok = 1, |
1430 | }; | 1454 | }; |
@@ -1666,19 +1690,3 @@ static int __init ipv4_proc_init(void) | |||
1666 | 1690 | ||
1667 | MODULE_ALIAS_NETPROTO(PF_INET); | 1691 | MODULE_ALIAS_NETPROTO(PF_INET); |
1668 | 1692 | ||
1669 | EXPORT_SYMBOL(inet_accept); | ||
1670 | EXPORT_SYMBOL(inet_bind); | ||
1671 | EXPORT_SYMBOL(inet_dgram_connect); | ||
1672 | EXPORT_SYMBOL(inet_dgram_ops); | ||
1673 | EXPORT_SYMBOL(inet_getname); | ||
1674 | EXPORT_SYMBOL(inet_ioctl); | ||
1675 | EXPORT_SYMBOL(inet_listen); | ||
1676 | EXPORT_SYMBOL(inet_register_protosw); | ||
1677 | EXPORT_SYMBOL(inet_release); | ||
1678 | EXPORT_SYMBOL(inet_sendmsg); | ||
1679 | EXPORT_SYMBOL(inet_shutdown); | ||
1680 | EXPORT_SYMBOL(inet_sock_destruct); | ||
1681 | EXPORT_SYMBOL(inet_stream_connect); | ||
1682 | EXPORT_SYMBOL(inet_stream_ops); | ||
1683 | EXPORT_SYMBOL(inet_unregister_protosw); | ||
1684 | EXPORT_SYMBOL(sysctl_ip_nonlocal_bind); | ||
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 090e9991ac2a..4e80f336c0cf 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -130,7 +130,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb); | |||
130 | static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb); | 130 | static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb); |
131 | static void parp_redo(struct sk_buff *skb); | 131 | static void parp_redo(struct sk_buff *skb); |
132 | 132 | ||
133 | static struct neigh_ops arp_generic_ops = { | 133 | static const struct neigh_ops arp_generic_ops = { |
134 | .family = AF_INET, | 134 | .family = AF_INET, |
135 | .solicit = arp_solicit, | 135 | .solicit = arp_solicit, |
136 | .error_report = arp_error_report, | 136 | .error_report = arp_error_report, |
@@ -140,7 +140,7 @@ static struct neigh_ops arp_generic_ops = { | |||
140 | .queue_xmit = dev_queue_xmit, | 140 | .queue_xmit = dev_queue_xmit, |
141 | }; | 141 | }; |
142 | 142 | ||
143 | static struct neigh_ops arp_hh_ops = { | 143 | static const struct neigh_ops arp_hh_ops = { |
144 | .family = AF_INET, | 144 | .family = AF_INET, |
145 | .solicit = arp_solicit, | 145 | .solicit = arp_solicit, |
146 | .error_report = arp_error_report, | 146 | .error_report = arp_error_report, |
@@ -150,7 +150,7 @@ static struct neigh_ops arp_hh_ops = { | |||
150 | .queue_xmit = dev_queue_xmit, | 150 | .queue_xmit = dev_queue_xmit, |
151 | }; | 151 | }; |
152 | 152 | ||
153 | static struct neigh_ops arp_direct_ops = { | 153 | static const struct neigh_ops arp_direct_ops = { |
154 | .family = AF_INET, | 154 | .family = AF_INET, |
155 | .output = dev_queue_xmit, | 155 | .output = dev_queue_xmit, |
156 | .connected_output = dev_queue_xmit, | 156 | .connected_output = dev_queue_xmit, |
@@ -158,7 +158,7 @@ static struct neigh_ops arp_direct_ops = { | |||
158 | .queue_xmit = dev_queue_xmit, | 158 | .queue_xmit = dev_queue_xmit, |
159 | }; | 159 | }; |
160 | 160 | ||
161 | struct neigh_ops arp_broken_ops = { | 161 | const struct neigh_ops arp_broken_ops = { |
162 | .family = AF_INET, | 162 | .family = AF_INET, |
163 | .solicit = arp_solicit, | 163 | .solicit = arp_solicit, |
164 | .error_report = arp_error_report, | 164 | .error_report = arp_error_report, |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 63c2fa7b68c4..291bdf50a21f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -48,7 +48,7 @@ | |||
48 | * Patrick McHardy <kaber@trash.net> | 48 | * Patrick McHardy <kaber@trash.net> |
49 | */ | 49 | */ |
50 | 50 | ||
51 | #define VERSION "0.408" | 51 | #define VERSION "0.409" |
52 | 52 | ||
53 | #include <asm/uaccess.h> | 53 | #include <asm/uaccess.h> |
54 | #include <asm/system.h> | 54 | #include <asm/system.h> |
@@ -164,6 +164,14 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn); | |||
164 | static struct tnode *halve(struct trie *t, struct tnode *tn); | 164 | static struct tnode *halve(struct trie *t, struct tnode *tn); |
165 | /* tnodes to free after resize(); protected by RTNL */ | 165 | /* tnodes to free after resize(); protected by RTNL */ |
166 | static struct tnode *tnode_free_head; | 166 | static struct tnode *tnode_free_head; |
167 | static size_t tnode_free_size; | ||
168 | |||
169 | /* | ||
170 | * synchronize_rcu after call_rcu for that many pages; it should be especially | ||
171 | * useful before resizing the root node with PREEMPT_NONE configs; the value was | ||
172 | * obtained experimentally, aiming to avoid visible slowdown. | ||
173 | */ | ||
174 | static const int sync_pages = 128; | ||
167 | 175 | ||
168 | static struct kmem_cache *fn_alias_kmem __read_mostly; | 176 | static struct kmem_cache *fn_alias_kmem __read_mostly; |
169 | static struct kmem_cache *trie_leaf_kmem __read_mostly; | 177 | static struct kmem_cache *trie_leaf_kmem __read_mostly; |
@@ -317,8 +325,7 @@ static inline void check_tnode(const struct tnode *tn) | |||
317 | static const int halve_threshold = 25; | 325 | static const int halve_threshold = 25; |
318 | static const int inflate_threshold = 50; | 326 | static const int inflate_threshold = 50; |
319 | static const int halve_threshold_root = 15; | 327 | static const int halve_threshold_root = 15; |
320 | static const int inflate_threshold_root = 25; | 328 | static const int inflate_threshold_root = 30; |
321 | |||
322 | 329 | ||
323 | static void __alias_free_mem(struct rcu_head *head) | 330 | static void __alias_free_mem(struct rcu_head *head) |
324 | { | 331 | { |
@@ -393,6 +400,8 @@ static void tnode_free_safe(struct tnode *tn) | |||
393 | BUG_ON(IS_LEAF(tn)); | 400 | BUG_ON(IS_LEAF(tn)); |
394 | tn->tnode_free = tnode_free_head; | 401 | tn->tnode_free = tnode_free_head; |
395 | tnode_free_head = tn; | 402 | tnode_free_head = tn; |
403 | tnode_free_size += sizeof(struct tnode) + | ||
404 | (sizeof(struct node *) << tn->bits); | ||
396 | } | 405 | } |
397 | 406 | ||
398 | static void tnode_free_flush(void) | 407 | static void tnode_free_flush(void) |
@@ -404,6 +413,11 @@ static void tnode_free_flush(void) | |||
404 | tn->tnode_free = NULL; | 413 | tn->tnode_free = NULL; |
405 | tnode_free(tn); | 414 | tnode_free(tn); |
406 | } | 415 | } |
416 | |||
417 | if (tnode_free_size >= PAGE_SIZE * sync_pages) { | ||
418 | tnode_free_size = 0; | ||
419 | synchronize_rcu(); | ||
420 | } | ||
407 | } | 421 | } |
408 | 422 | ||
409 | static struct leaf *leaf_new(void) | 423 | static struct leaf *leaf_new(void) |
@@ -499,14 +513,14 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, | |||
499 | rcu_assign_pointer(tn->child[i], n); | 513 | rcu_assign_pointer(tn->child[i], n); |
500 | } | 514 | } |
501 | 515 | ||
516 | #define MAX_WORK 10 | ||
502 | static struct node *resize(struct trie *t, struct tnode *tn) | 517 | static struct node *resize(struct trie *t, struct tnode *tn) |
503 | { | 518 | { |
504 | int i; | 519 | int i; |
505 | int err = 0; | ||
506 | struct tnode *old_tn; | 520 | struct tnode *old_tn; |
507 | int inflate_threshold_use; | 521 | int inflate_threshold_use; |
508 | int halve_threshold_use; | 522 | int halve_threshold_use; |
509 | int max_resize; | 523 | int max_work; |
510 | 524 | ||
511 | if (!tn) | 525 | if (!tn) |
512 | return NULL; | 526 | return NULL; |
@@ -521,18 +535,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
521 | } | 535 | } |
522 | /* One child */ | 536 | /* One child */ |
523 | if (tn->empty_children == tnode_child_length(tn) - 1) | 537 | if (tn->empty_children == tnode_child_length(tn) - 1) |
524 | for (i = 0; i < tnode_child_length(tn); i++) { | 538 | goto one_child; |
525 | struct node *n; | ||
526 | |||
527 | n = tn->child[i]; | ||
528 | if (!n) | ||
529 | continue; | ||
530 | |||
531 | /* compress one level */ | ||
532 | node_set_parent(n, NULL); | ||
533 | tnode_free_safe(tn); | ||
534 | return n; | ||
535 | } | ||
536 | /* | 539 | /* |
537 | * Double as long as the resulting node has a number of | 540 | * Double as long as the resulting node has a number of |
538 | * nonempty nodes that are above the threshold. | 541 | * nonempty nodes that are above the threshold. |
@@ -601,14 +604,17 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
601 | 604 | ||
602 | /* Keep root node larger */ | 605 | /* Keep root node larger */ |
603 | 606 | ||
604 | if (!tn->parent) | 607 | if (!node_parent((struct node*) tn)) { |
605 | inflate_threshold_use = inflate_threshold_root; | 608 | inflate_threshold_use = inflate_threshold_root; |
606 | else | 609 | halve_threshold_use = halve_threshold_root; |
610 | } | ||
611 | else { | ||
607 | inflate_threshold_use = inflate_threshold; | 612 | inflate_threshold_use = inflate_threshold; |
613 | halve_threshold_use = halve_threshold; | ||
614 | } | ||
608 | 615 | ||
609 | err = 0; | 616 | max_work = MAX_WORK; |
610 | max_resize = 10; | 617 | while ((tn->full_children > 0 && max_work-- && |
611 | while ((tn->full_children > 0 && max_resize-- && | ||
612 | 50 * (tn->full_children + tnode_child_length(tn) | 618 | 50 * (tn->full_children + tnode_child_length(tn) |
613 | - tn->empty_children) | 619 | - tn->empty_children) |
614 | >= inflate_threshold_use * tnode_child_length(tn))) { | 620 | >= inflate_threshold_use * tnode_child_length(tn))) { |
@@ -625,35 +631,19 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
625 | } | 631 | } |
626 | } | 632 | } |
627 | 633 | ||
628 | if (max_resize < 0) { | ||
629 | if (!tn->parent) | ||
630 | pr_warning("Fix inflate_threshold_root." | ||
631 | " Now=%d size=%d bits\n", | ||
632 | inflate_threshold_root, tn->bits); | ||
633 | else | ||
634 | pr_warning("Fix inflate_threshold." | ||
635 | " Now=%d size=%d bits\n", | ||
636 | inflate_threshold, tn->bits); | ||
637 | } | ||
638 | |||
639 | check_tnode(tn); | 634 | check_tnode(tn); |
640 | 635 | ||
636 | /* Return if at least one inflate is run */ | ||
637 | if( max_work != MAX_WORK) | ||
638 | return (struct node *) tn; | ||
639 | |||
641 | /* | 640 | /* |
642 | * Halve as long as the number of empty children in this | 641 | * Halve as long as the number of empty children in this |
643 | * node is above threshold. | 642 | * node is above threshold. |
644 | */ | 643 | */ |
645 | 644 | ||
646 | 645 | max_work = MAX_WORK; | |
647 | /* Keep root node larger */ | 646 | while (tn->bits > 1 && max_work-- && |
648 | |||
649 | if (!tn->parent) | ||
650 | halve_threshold_use = halve_threshold_root; | ||
651 | else | ||
652 | halve_threshold_use = halve_threshold; | ||
653 | |||
654 | err = 0; | ||
655 | max_resize = 10; | ||
656 | while (tn->bits > 1 && max_resize-- && | ||
657 | 100 * (tnode_child_length(tn) - tn->empty_children) < | 647 | 100 * (tnode_child_length(tn) - tn->empty_children) < |
658 | halve_threshold_use * tnode_child_length(tn)) { | 648 | halve_threshold_use * tnode_child_length(tn)) { |
659 | 649 | ||
@@ -668,19 +658,10 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
668 | } | 658 | } |
669 | } | 659 | } |
670 | 660 | ||
671 | if (max_resize < 0) { | ||
672 | if (!tn->parent) | ||
673 | pr_warning("Fix halve_threshold_root." | ||
674 | " Now=%d size=%d bits\n", | ||
675 | halve_threshold_root, tn->bits); | ||
676 | else | ||
677 | pr_warning("Fix halve_threshold." | ||
678 | " Now=%d size=%d bits\n", | ||
679 | halve_threshold, tn->bits); | ||
680 | } | ||
681 | 661 | ||
682 | /* Only one child remains */ | 662 | /* Only one child remains */ |
683 | if (tn->empty_children == tnode_child_length(tn) - 1) | 663 | if (tn->empty_children == tnode_child_length(tn) - 1) { |
664 | one_child: | ||
684 | for (i = 0; i < tnode_child_length(tn); i++) { | 665 | for (i = 0; i < tnode_child_length(tn); i++) { |
685 | struct node *n; | 666 | struct node *n; |
686 | 667 | ||
@@ -694,7 +675,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
694 | tnode_free_safe(tn); | 675 | tnode_free_safe(tn); |
695 | return n; | 676 | return n; |
696 | } | 677 | } |
697 | 678 | } | |
698 | return (struct node *) tn; | 679 | return (struct node *) tn; |
699 | } | 680 | } |
700 | 681 | ||
@@ -1435,7 +1416,7 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, | |||
1435 | cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length), | 1416 | cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length), |
1436 | pos, bits); | 1417 | pos, bits); |
1437 | 1418 | ||
1438 | n = tnode_get_child(pn, cindex); | 1419 | n = tnode_get_child_rcu(pn, cindex); |
1439 | 1420 | ||
1440 | if (n == NULL) { | 1421 | if (n == NULL) { |
1441 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 1422 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
@@ -1570,7 +1551,7 @@ backtrace: | |||
1570 | if (chopped_off <= pn->bits) { | 1551 | if (chopped_off <= pn->bits) { |
1571 | cindex &= ~(1 << (chopped_off-1)); | 1552 | cindex &= ~(1 << (chopped_off-1)); |
1572 | } else { | 1553 | } else { |
1573 | struct tnode *parent = node_parent((struct node *) pn); | 1554 | struct tnode *parent = node_parent_rcu((struct node *) pn); |
1574 | if (!parent) | 1555 | if (!parent) |
1575 | goto failed; | 1556 | goto failed; |
1576 | 1557 | ||
@@ -1783,7 +1764,7 @@ static struct leaf *trie_firstleaf(struct trie *t) | |||
1783 | static struct leaf *trie_nextleaf(struct leaf *l) | 1764 | static struct leaf *trie_nextleaf(struct leaf *l) |
1784 | { | 1765 | { |
1785 | struct node *c = (struct node *) l; | 1766 | struct node *c = (struct node *) l; |
1786 | struct tnode *p = node_parent(c); | 1767 | struct tnode *p = node_parent_rcu(c); |
1787 | 1768 | ||
1788 | if (!p) | 1769 | if (!p) |
1789 | return NULL; /* trie with just one leaf */ | 1770 | return NULL; /* trie with just one leaf */ |
@@ -2391,7 +2372,7 @@ static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s) | |||
2391 | } | 2372 | } |
2392 | } | 2373 | } |
2393 | 2374 | ||
2394 | static const char *rtn_type_names[__RTN_MAX] = { | 2375 | static const char *const rtn_type_names[__RTN_MAX] = { |
2395 | [RTN_UNSPEC] = "UNSPEC", | 2376 | [RTN_UNSPEC] = "UNSPEC", |
2396 | [RTN_UNICAST] = "UNICAST", | 2377 | [RTN_UNICAST] = "UNICAST", |
2397 | [RTN_LOCAL] = "LOCAL", | 2378 | [RTN_LOCAL] = "LOCAL", |
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 61283f928825..13f0781f35cd 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c | |||
@@ -218,8 +218,8 @@ void inet_twdr_hangman(unsigned long data) | |||
218 | /* We purged the entire slot, anything left? */ | 218 | /* We purged the entire slot, anything left? */ |
219 | if (twdr->tw_count) | 219 | if (twdr->tw_count) |
220 | need_timer = 1; | 220 | need_timer = 1; |
221 | twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1)); | ||
221 | } | 222 | } |
222 | twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1)); | ||
223 | if (need_timer) | 223 | if (need_timer) |
224 | mod_timer(&twdr->tw_timer, jiffies + twdr->period); | 224 | mod_timer(&twdr->tw_timer, jiffies + twdr->period); |
225 | out: | 225 | out: |
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 82c11dd10a62..533afaadefd4 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -662,7 +662,7 @@ drop_nolock: | |||
662 | return(0); | 662 | return(0); |
663 | } | 663 | } |
664 | 664 | ||
665 | static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | 665 | static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) |
666 | { | 666 | { |
667 | struct ip_tunnel *tunnel = netdev_priv(dev); | 667 | struct ip_tunnel *tunnel = netdev_priv(dev); |
668 | struct net_device_stats *stats = &tunnel->dev->stats; | 668 | struct net_device_stats *stats = &tunnel->dev->stats; |
@@ -821,7 +821,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
821 | stats->tx_dropped++; | 821 | stats->tx_dropped++; |
822 | dev_kfree_skb(skb); | 822 | dev_kfree_skb(skb); |
823 | tunnel->recursion--; | 823 | tunnel->recursion--; |
824 | return 0; | 824 | return NETDEV_TX_OK; |
825 | } | 825 | } |
826 | if (skb->sk) | 826 | if (skb->sk) |
827 | skb_set_owner_w(new_skb, skb->sk); | 827 | skb_set_owner_w(new_skb, skb->sk); |
@@ -889,7 +889,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
889 | 889 | ||
890 | IPTUNNEL_XMIT(); | 890 | IPTUNNEL_XMIT(); |
891 | tunnel->recursion--; | 891 | tunnel->recursion--; |
892 | return 0; | 892 | return NETDEV_TX_OK; |
893 | 893 | ||
894 | tx_error_icmp: | 894 | tx_error_icmp: |
895 | dst_link_failure(skb); | 895 | dst_link_failure(skb); |
@@ -898,7 +898,7 @@ tx_error: | |||
898 | stats->tx_errors++; | 898 | stats->tx_errors++; |
899 | dev_kfree_skb(skb); | 899 | dev_kfree_skb(skb); |
900 | tunnel->recursion--; | 900 | tunnel->recursion--; |
901 | return 0; | 901 | return NETDEV_TX_OK; |
902 | } | 902 | } |
903 | 903 | ||
904 | static int ipgre_tunnel_bind_dev(struct net_device *dev) | 904 | static int ipgre_tunnel_bind_dev(struct net_device *dev) |
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 93e2b787da20..62548cb0923c 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
@@ -387,7 +387,7 @@ static int ipip_rcv(struct sk_buff *skb) | |||
387 | * and that skb is filled properly by that function. | 387 | * and that skb is filled properly by that function. |
388 | */ | 388 | */ |
389 | 389 | ||
390 | static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | 390 | static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) |
391 | { | 391 | { |
392 | struct ip_tunnel *tunnel = netdev_priv(dev); | 392 | struct ip_tunnel *tunnel = netdev_priv(dev); |
393 | struct net_device_stats *stats = &tunnel->dev->stats; | 393 | struct net_device_stats *stats = &tunnel->dev->stats; |
@@ -486,7 +486,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
486 | stats->tx_dropped++; | 486 | stats->tx_dropped++; |
487 | dev_kfree_skb(skb); | 487 | dev_kfree_skb(skb); |
488 | tunnel->recursion--; | 488 | tunnel->recursion--; |
489 | return 0; | 489 | return NETDEV_TX_OK; |
490 | } | 490 | } |
491 | if (skb->sk) | 491 | if (skb->sk) |
492 | skb_set_owner_w(new_skb, skb->sk); | 492 | skb_set_owner_w(new_skb, skb->sk); |
@@ -524,7 +524,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
524 | 524 | ||
525 | IPTUNNEL_XMIT(); | 525 | IPTUNNEL_XMIT(); |
526 | tunnel->recursion--; | 526 | tunnel->recursion--; |
527 | return 0; | 527 | return NETDEV_TX_OK; |
528 | 528 | ||
529 | tx_error_icmp: | 529 | tx_error_icmp: |
530 | dst_link_failure(skb); | 530 | dst_link_failure(skb); |
@@ -532,7 +532,7 @@ tx_error: | |||
532 | stats->tx_errors++; | 532 | stats->tx_errors++; |
533 | dev_kfree_skb(skb); | 533 | dev_kfree_skb(skb); |
534 | tunnel->recursion--; | 534 | tunnel->recursion--; |
535 | return 0; | 535 | return NETDEV_TX_OK; |
536 | } | 536 | } |
537 | 537 | ||
538 | static void ipip_tunnel_bind_dev(struct net_device *dev) | 538 | static void ipip_tunnel_bind_dev(struct net_device *dev) |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9a8da5ed92b7..65d421cf5bc7 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
@@ -201,7 +201,7 @@ failure: | |||
201 | 201 | ||
202 | #ifdef CONFIG_IP_PIMSM | 202 | #ifdef CONFIG_IP_PIMSM |
203 | 203 | ||
204 | static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) | 204 | static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) |
205 | { | 205 | { |
206 | struct net *net = dev_net(dev); | 206 | struct net *net = dev_net(dev); |
207 | 207 | ||
@@ -212,7 +212,7 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) | |||
212 | IGMPMSG_WHOLEPKT); | 212 | IGMPMSG_WHOLEPKT); |
213 | read_unlock(&mrt_lock); | 213 | read_unlock(&mrt_lock); |
214 | kfree_skb(skb); | 214 | kfree_skb(skb); |
215 | return 0; | 215 | return NETDEV_TX_OK; |
216 | } | 216 | } |
217 | 217 | ||
218 | static const struct net_device_ops reg_vif_netdev_ops = { | 218 | static const struct net_device_ops reg_vif_netdev_ops = { |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 278f46f5011b..91867d3e6328 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -1514,13 +1514,17 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
1514 | void ip_rt_send_redirect(struct sk_buff *skb) | 1514 | void ip_rt_send_redirect(struct sk_buff *skb) |
1515 | { | 1515 | { |
1516 | struct rtable *rt = skb_rtable(skb); | 1516 | struct rtable *rt = skb_rtable(skb); |
1517 | struct in_device *in_dev = in_dev_get(rt->u.dst.dev); | 1517 | struct in_device *in_dev; |
1518 | int log_martians; | ||
1518 | 1519 | ||
1519 | if (!in_dev) | 1520 | rcu_read_lock(); |
1521 | in_dev = __in_dev_get_rcu(rt->u.dst.dev); | ||
1522 | if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { | ||
1523 | rcu_read_unlock(); | ||
1520 | return; | 1524 | return; |
1521 | 1525 | } | |
1522 | if (!IN_DEV_TX_REDIRECTS(in_dev)) | 1526 | log_martians = IN_DEV_LOG_MARTIANS(in_dev); |
1523 | goto out; | 1527 | rcu_read_unlock(); |
1524 | 1528 | ||
1525 | /* No redirected packets during ip_rt_redirect_silence; | 1529 | /* No redirected packets during ip_rt_redirect_silence; |
1526 | * reset the algorithm. | 1530 | * reset the algorithm. |
@@ -1533,7 +1537,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1533 | */ | 1537 | */ |
1534 | if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) { | 1538 | if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) { |
1535 | rt->u.dst.rate_last = jiffies; | 1539 | rt->u.dst.rate_last = jiffies; |
1536 | goto out; | 1540 | return; |
1537 | } | 1541 | } |
1538 | 1542 | ||
1539 | /* Check for load limit; set rate_last to the latest sent | 1543 | /* Check for load limit; set rate_last to the latest sent |
@@ -1547,7 +1551,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1547 | rt->u.dst.rate_last = jiffies; | 1551 | rt->u.dst.rate_last = jiffies; |
1548 | ++rt->u.dst.rate_tokens; | 1552 | ++rt->u.dst.rate_tokens; |
1549 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 1553 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
1550 | if (IN_DEV_LOG_MARTIANS(in_dev) && | 1554 | if (log_martians && |
1551 | rt->u.dst.rate_tokens == ip_rt_redirect_number && | 1555 | rt->u.dst.rate_tokens == ip_rt_redirect_number && |
1552 | net_ratelimit()) | 1556 | net_ratelimit()) |
1553 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", | 1557 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", |
@@ -1555,8 +1559,6 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1555 | &rt->rt_dst, &rt->rt_gateway); | 1559 | &rt->rt_dst, &rt->rt_gateway); |
1556 | #endif | 1560 | #endif |
1557 | } | 1561 | } |
1558 | out: | ||
1559 | in_dev_put(in_dev); | ||
1560 | } | 1562 | } |
1561 | 1563 | ||
1562 | static int ip_error(struct sk_buff *skb) | 1564 | static int ip_error(struct sk_buff *skb) |
@@ -3442,7 +3444,7 @@ int __init ip_rt_init(void) | |||
3442 | printk(KERN_ERR "Unable to create route proc files\n"); | 3444 | printk(KERN_ERR "Unable to create route proc files\n"); |
3443 | #ifdef CONFIG_XFRM | 3445 | #ifdef CONFIG_XFRM |
3444 | xfrm_init(); | 3446 | xfrm_init(); |
3445 | xfrm4_init(); | 3447 | xfrm4_init(ip_rt_max_size); |
3446 | #endif | 3448 | #endif |
3447 | rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); | 3449 | rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); |
3448 | 3450 | ||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 91145244ea63..59f69a6c5863 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -2336,13 +2336,13 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
2336 | val = !!(tp->nonagle&TCP_NAGLE_CORK); | 2336 | val = !!(tp->nonagle&TCP_NAGLE_CORK); |
2337 | break; | 2337 | break; |
2338 | case TCP_KEEPIDLE: | 2338 | case TCP_KEEPIDLE: |
2339 | val = (tp->keepalive_time ? : sysctl_tcp_keepalive_time) / HZ; | 2339 | val = keepalive_time_when(tp) / HZ; |
2340 | break; | 2340 | break; |
2341 | case TCP_KEEPINTVL: | 2341 | case TCP_KEEPINTVL: |
2342 | val = (tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl) / HZ; | 2342 | val = keepalive_intvl_when(tp) / HZ; |
2343 | break; | 2343 | break; |
2344 | case TCP_KEEPCNT: | 2344 | case TCP_KEEPCNT: |
2345 | val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes; | 2345 | val = keepalive_probes(tp); |
2346 | break; | 2346 | break; |
2347 | case TCP_SYNCNT: | 2347 | case TCP_SYNCNT: |
2348 | val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; | 2348 | val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2bdb0da237e6..af6d6fa00db1 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -685,7 +685,7 @@ static inline void tcp_set_rto(struct sock *sk) | |||
685 | * is invisible. Actually, Linux-2.4 also generates erratic | 685 | * is invisible. Actually, Linux-2.4 also generates erratic |
686 | * ACKs in some circumstances. | 686 | * ACKs in some circumstances. |
687 | */ | 687 | */ |
688 | inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar; | 688 | inet_csk(sk)->icsk_rto = __tcp_set_rto(tp); |
689 | 689 | ||
690 | /* 2. Fixups made earlier cannot be right. | 690 | /* 2. Fixups made earlier cannot be right. |
691 | * If we do not estimate RTO correctly without them, | 691 | * If we do not estimate RTO correctly without them, |
@@ -696,8 +696,7 @@ static inline void tcp_set_rto(struct sock *sk) | |||
696 | /* NOTE: clamping at TCP_RTO_MIN is not required, current algo | 696 | /* NOTE: clamping at TCP_RTO_MIN is not required, current algo |
697 | * guarantees that rto is higher. | 697 | * guarantees that rto is higher. |
698 | */ | 698 | */ |
699 | if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) | 699 | tcp_bound_rto(sk); |
700 | inet_csk(sk)->icsk_rto = TCP_RTO_MAX; | ||
701 | } | 700 | } |
702 | 701 | ||
703 | /* Save metrics learned by this TCP session. | 702 | /* Save metrics learned by this TCP session. |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6d88219c5e22..6755e29a6dd3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -328,26 +328,29 @@ static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) | |||
328 | * | 328 | * |
329 | */ | 329 | */ |
330 | 330 | ||
331 | void tcp_v4_err(struct sk_buff *skb, u32 info) | 331 | void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) |
332 | { | 332 | { |
333 | struct iphdr *iph = (struct iphdr *)skb->data; | 333 | struct iphdr *iph = (struct iphdr *)icmp_skb->data; |
334 | struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); | 334 | struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); |
335 | struct inet_connection_sock *icsk; | ||
335 | struct tcp_sock *tp; | 336 | struct tcp_sock *tp; |
336 | struct inet_sock *inet; | 337 | struct inet_sock *inet; |
337 | const int type = icmp_hdr(skb)->type; | 338 | const int type = icmp_hdr(icmp_skb)->type; |
338 | const int code = icmp_hdr(skb)->code; | 339 | const int code = icmp_hdr(icmp_skb)->code; |
339 | struct sock *sk; | 340 | struct sock *sk; |
341 | struct sk_buff *skb; | ||
340 | __u32 seq; | 342 | __u32 seq; |
343 | __u32 remaining; | ||
341 | int err; | 344 | int err; |
342 | struct net *net = dev_net(skb->dev); | 345 | struct net *net = dev_net(icmp_skb->dev); |
343 | 346 | ||
344 | if (skb->len < (iph->ihl << 2) + 8) { | 347 | if (icmp_skb->len < (iph->ihl << 2) + 8) { |
345 | ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); | 348 | ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); |
346 | return; | 349 | return; |
347 | } | 350 | } |
348 | 351 | ||
349 | sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, | 352 | sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, |
350 | iph->saddr, th->source, inet_iif(skb)); | 353 | iph->saddr, th->source, inet_iif(icmp_skb)); |
351 | if (!sk) { | 354 | if (!sk) { |
352 | ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); | 355 | ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); |
353 | return; | 356 | return; |
@@ -367,6 +370,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) | |||
367 | if (sk->sk_state == TCP_CLOSE) | 370 | if (sk->sk_state == TCP_CLOSE) |
368 | goto out; | 371 | goto out; |
369 | 372 | ||
373 | icsk = inet_csk(sk); | ||
370 | tp = tcp_sk(sk); | 374 | tp = tcp_sk(sk); |
371 | seq = ntohl(th->seq); | 375 | seq = ntohl(th->seq); |
372 | if (sk->sk_state != TCP_LISTEN && | 376 | if (sk->sk_state != TCP_LISTEN && |
@@ -393,6 +397,39 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) | |||
393 | } | 397 | } |
394 | 398 | ||
395 | err = icmp_err_convert[code].errno; | 399 | err = icmp_err_convert[code].errno; |
400 | /* check if icmp_skb allows revert of backoff | ||
401 | * (see draft-zimmermann-tcp-lcd) */ | ||
402 | if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH) | ||
403 | break; | ||
404 | if (seq != tp->snd_una || !icsk->icsk_retransmits || | ||
405 | !icsk->icsk_backoff) | ||
406 | break; | ||
407 | |||
408 | icsk->icsk_backoff--; | ||
409 | inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << | ||
410 | icsk->icsk_backoff; | ||
411 | tcp_bound_rto(sk); | ||
412 | |||
413 | skb = tcp_write_queue_head(sk); | ||
414 | BUG_ON(!skb); | ||
415 | |||
416 | remaining = icsk->icsk_rto - min(icsk->icsk_rto, | ||
417 | tcp_time_stamp - TCP_SKB_CB(skb)->when); | ||
418 | |||
419 | if (remaining) { | ||
420 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
421 | remaining, TCP_RTO_MAX); | ||
422 | } else if (sock_owned_by_user(sk)) { | ||
423 | /* RTO revert clocked out retransmission, | ||
424 | * but socket is locked. Will defer. */ | ||
425 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
426 | HZ/20, TCP_RTO_MAX); | ||
427 | } else { | ||
428 | /* RTO revert clocked out retransmission. | ||
429 | * Will retransmit now */ | ||
430 | tcp_retransmit_timer(sk); | ||
431 | } | ||
432 | |||
396 | break; | 433 | break; |
397 | case ICMP_TIME_EXCEEDED: | 434 | case ICMP_TIME_EXCEEDED: |
398 | err = EHOSTUNREACH; | 435 | err = EHOSTUNREACH; |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f8d67ccc64f3..6c8b42299d9f 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -657,29 +657,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
657 | child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); | 657 | child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); |
658 | if (child == NULL) | 658 | if (child == NULL) |
659 | goto listen_overflow; | 659 | goto listen_overflow; |
660 | #ifdef CONFIG_TCP_MD5SIG | ||
661 | else { | ||
662 | /* Copy over the MD5 key from the original socket */ | ||
663 | struct tcp_md5sig_key *key; | ||
664 | struct tcp_sock *tp = tcp_sk(sk); | ||
665 | key = tp->af_specific->md5_lookup(sk, child); | ||
666 | if (key != NULL) { | ||
667 | /* | ||
668 | * We're using one, so create a matching key on the | ||
669 | * newsk structure. If we fail to get memory then we | ||
670 | * end up not copying the key across. Shucks. | ||
671 | */ | ||
672 | char *newkey = kmemdup(key->key, key->keylen, | ||
673 | GFP_ATOMIC); | ||
674 | if (newkey) { | ||
675 | if (!tcp_alloc_md5sig_pool()) | ||
676 | BUG(); | ||
677 | tp->af_specific->md5_add(child, child, newkey, | ||
678 | key->keylen); | ||
679 | } | ||
680 | } | ||
681 | } | ||
682 | #endif | ||
683 | 660 | ||
684 | inet_csk_reqsk_queue_unlink(sk, req, prev); | 661 | inet_csk_reqsk_queue_unlink(sk, req, prev); |
685 | inet_csk_reqsk_queue_removed(sk, req); | 662 | inet_csk_reqsk_queue_removed(sk, req); |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bd62712848fa..4e004424d400 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -59,6 +59,7 @@ int sysctl_tcp_base_mss __read_mostly = 512; | |||
59 | /* By default, RFC2861 behavior. */ | 59 | /* By default, RFC2861 behavior. */ |
60 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; | 60 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; |
61 | 61 | ||
62 | /* Account for new data that has been sent to the network. */ | ||
62 | static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) | 63 | static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) |
63 | { | 64 | { |
64 | struct tcp_sock *tp = tcp_sk(sk); | 65 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -142,6 +143,7 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst) | |||
142 | tp->snd_cwnd_used = 0; | 143 | tp->snd_cwnd_used = 0; |
143 | } | 144 | } |
144 | 145 | ||
146 | /* Congestion state accounting after a packet has been sent. */ | ||
145 | static void tcp_event_data_sent(struct tcp_sock *tp, | 147 | static void tcp_event_data_sent(struct tcp_sock *tp, |
146 | struct sk_buff *skb, struct sock *sk) | 148 | struct sk_buff *skb, struct sock *sk) |
147 | { | 149 | { |
@@ -161,6 +163,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp, | |||
161 | icsk->icsk_ack.pingpong = 1; | 163 | icsk->icsk_ack.pingpong = 1; |
162 | } | 164 | } |
163 | 165 | ||
166 | /* Account for an ACK we sent. */ | ||
164 | static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) | 167 | static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) |
165 | { | 168 | { |
166 | tcp_dec_quickack_mode(sk, pkts); | 169 | tcp_dec_quickack_mode(sk, pkts); |
@@ -276,6 +279,7 @@ static u16 tcp_select_window(struct sock *sk) | |||
276 | return new_win; | 279 | return new_win; |
277 | } | 280 | } |
278 | 281 | ||
282 | /* Packet ECN state for a SYN-ACK */ | ||
279 | static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) | 283 | static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) |
280 | { | 284 | { |
281 | TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR; | 285 | TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR; |
@@ -283,6 +287,7 @@ static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) | |||
283 | TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; | 287 | TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; |
284 | } | 288 | } |
285 | 289 | ||
290 | /* Packet ECN state for a SYN. */ | ||
286 | static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) | 291 | static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) |
287 | { | 292 | { |
288 | struct tcp_sock *tp = tcp_sk(sk); | 293 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -301,6 +306,9 @@ TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th) | |||
301 | th->ece = 1; | 306 | th->ece = 1; |
302 | } | 307 | } |
303 | 308 | ||
309 | /* Set up ECN state for a packet on a ESTABLISHED socket that is about to | ||
310 | * be sent. | ||
311 | */ | ||
304 | static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, | 312 | static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, |
305 | int tcp_header_len) | 313 | int tcp_header_len) |
306 | { | 314 | { |
@@ -362,7 +370,9 @@ struct tcp_out_options { | |||
362 | __u32 tsval, tsecr; /* need to include OPTION_TS */ | 370 | __u32 tsval, tsecr; /* need to include OPTION_TS */ |
363 | }; | 371 | }; |
364 | 372 | ||
365 | /* Beware: Something in the Internet is very sensitive to the ordering of | 373 | /* Write previously computed TCP options to the packet. |
374 | * | ||
375 | * Beware: Something in the Internet is very sensitive to the ordering of | ||
366 | * TCP options, we learned this through the hard way, so be careful here. | 376 | * TCP options, we learned this through the hard way, so be careful here. |
367 | * Luckily we can at least blame others for their non-compliance but from | 377 | * Luckily we can at least blame others for their non-compliance but from |
368 | * inter-operatibility perspective it seems that we're somewhat stuck with | 378 | * inter-operatibility perspective it seems that we're somewhat stuck with |
@@ -445,6 +455,9 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, | |||
445 | } | 455 | } |
446 | } | 456 | } |
447 | 457 | ||
458 | /* Compute TCP options for SYN packets. This is not the final | ||
459 | * network wire format yet. | ||
460 | */ | ||
448 | static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, | 461 | static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, |
449 | struct tcp_out_options *opts, | 462 | struct tcp_out_options *opts, |
450 | struct tcp_md5sig_key **md5) { | 463 | struct tcp_md5sig_key **md5) { |
@@ -493,6 +506,7 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, | |||
493 | return size; | 506 | return size; |
494 | } | 507 | } |
495 | 508 | ||
509 | /* Set up TCP options for SYN-ACKs. */ | ||
496 | static unsigned tcp_synack_options(struct sock *sk, | 510 | static unsigned tcp_synack_options(struct sock *sk, |
497 | struct request_sock *req, | 511 | struct request_sock *req, |
498 | unsigned mss, struct sk_buff *skb, | 512 | unsigned mss, struct sk_buff *skb, |
@@ -541,6 +555,9 @@ static unsigned tcp_synack_options(struct sock *sk, | |||
541 | return size; | 555 | return size; |
542 | } | 556 | } |
543 | 557 | ||
558 | /* Compute TCP options for ESTABLISHED sockets. This is not the | ||
559 | * final wire format yet. | ||
560 | */ | ||
544 | static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, | 561 | static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, |
545 | struct tcp_out_options *opts, | 562 | struct tcp_out_options *opts, |
546 | struct tcp_md5sig_key **md5) { | 563 | struct tcp_md5sig_key **md5) { |
@@ -705,7 +722,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
705 | return net_xmit_eval(err); | 722 | return net_xmit_eval(err); |
706 | } | 723 | } |
707 | 724 | ||
708 | /* This routine just queue's the buffer | 725 | /* This routine just queues the buffer for sending. |
709 | * | 726 | * |
710 | * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, | 727 | * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, |
711 | * otherwise socket can stall. | 728 | * otherwise socket can stall. |
@@ -722,6 +739,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) | |||
722 | sk_mem_charge(sk, skb->truesize); | 739 | sk_mem_charge(sk, skb->truesize); |
723 | } | 740 | } |
724 | 741 | ||
742 | /* Initialize TSO segments for a packet. */ | ||
725 | static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, | 743 | static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, |
726 | unsigned int mss_now) | 744 | unsigned int mss_now) |
727 | { | 745 | { |
@@ -909,6 +927,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) | |||
909 | skb->len = skb->data_len; | 927 | skb->len = skb->data_len; |
910 | } | 928 | } |
911 | 929 | ||
930 | /* Remove acked data from a packet in the transmit queue. */ | ||
912 | int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) | 931 | int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) |
913 | { | 932 | { |
914 | if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) | 933 | if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) |
@@ -937,7 +956,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) | |||
937 | return 0; | 956 | return 0; |
938 | } | 957 | } |
939 | 958 | ||
940 | /* Not accounting for SACKs here. */ | 959 | /* Calculate MSS. Not accounting for SACKs here. */ |
941 | int tcp_mtu_to_mss(struct sock *sk, int pmtu) | 960 | int tcp_mtu_to_mss(struct sock *sk, int pmtu) |
942 | { | 961 | { |
943 | struct tcp_sock *tp = tcp_sk(sk); | 962 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -981,6 +1000,7 @@ int tcp_mss_to_mtu(struct sock *sk, int mss) | |||
981 | return mtu; | 1000 | return mtu; |
982 | } | 1001 | } |
983 | 1002 | ||
1003 | /* MTU probing init per socket */ | ||
984 | void tcp_mtup_init(struct sock *sk) | 1004 | void tcp_mtup_init(struct sock *sk) |
985 | { | 1005 | { |
986 | struct tcp_sock *tp = tcp_sk(sk); | 1006 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -1143,7 +1163,8 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, | |||
1143 | return 0; | 1163 | return 0; |
1144 | } | 1164 | } |
1145 | 1165 | ||
1146 | /* This must be invoked the first time we consider transmitting | 1166 | /* Intialize TSO state of a skb. |
1167 | * This must be invoked the first time we consider transmitting | ||
1147 | * SKB onto the wire. | 1168 | * SKB onto the wire. |
1148 | */ | 1169 | */ |
1149 | static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, | 1170 | static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, |
@@ -1158,6 +1179,7 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, | |||
1158 | return tso_segs; | 1179 | return tso_segs; |
1159 | } | 1180 | } |
1160 | 1181 | ||
1182 | /* Minshall's variant of the Nagle send check. */ | ||
1161 | static inline int tcp_minshall_check(const struct tcp_sock *tp) | 1183 | static inline int tcp_minshall_check(const struct tcp_sock *tp) |
1162 | { | 1184 | { |
1163 | return after(tp->snd_sml, tp->snd_una) && | 1185 | return after(tp->snd_sml, tp->snd_una) && |
@@ -1242,6 +1264,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb, | |||
1242 | return cwnd_quota; | 1264 | return cwnd_quota; |
1243 | } | 1265 | } |
1244 | 1266 | ||
1267 | /* Test if sending is allowed right now. */ | ||
1245 | int tcp_may_send_now(struct sock *sk) | 1268 | int tcp_may_send_now(struct sock *sk) |
1246 | { | 1269 | { |
1247 | struct tcp_sock *tp = tcp_sk(sk); | 1270 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -1378,6 +1401,10 @@ send_now: | |||
1378 | } | 1401 | } |
1379 | 1402 | ||
1380 | /* Create a new MTU probe if we are ready. | 1403 | /* Create a new MTU probe if we are ready. |
1404 | * MTU probe is regularly attempting to increase the path MTU by | ||
1405 | * deliberately sending larger packets. This discovers routing | ||
1406 | * changes resulting in larger path MTUs. | ||
1407 | * | ||
1381 | * Returns 0 if we should wait to probe (no cwnd available), | 1408 | * Returns 0 if we should wait to probe (no cwnd available), |
1382 | * 1 if a probe was sent, | 1409 | * 1 if a probe was sent, |
1383 | * -1 otherwise | 1410 | * -1 otherwise |
@@ -1790,6 +1817,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) | |||
1790 | sk_wmem_free_skb(sk, next_skb); | 1817 | sk_wmem_free_skb(sk, next_skb); |
1791 | } | 1818 | } |
1792 | 1819 | ||
1820 | /* Check if coalescing SKBs is legal. */ | ||
1793 | static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb) | 1821 | static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb) |
1794 | { | 1822 | { |
1795 | if (tcp_skb_pcount(skb) > 1) | 1823 | if (tcp_skb_pcount(skb) > 1) |
@@ -1808,6 +1836,9 @@ static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb) | |||
1808 | return 1; | 1836 | return 1; |
1809 | } | 1837 | } |
1810 | 1838 | ||
1839 | /* Collapse packets in the retransmit queue to make to create | ||
1840 | * less packets on the wire. This is only done on retransmission. | ||
1841 | */ | ||
1811 | static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, | 1842 | static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, |
1812 | int space) | 1843 | int space) |
1813 | { | 1844 | { |
@@ -1957,6 +1988,9 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1957 | return err; | 1988 | return err; |
1958 | } | 1989 | } |
1959 | 1990 | ||
1991 | /* Check if we forward retransmits are possible in the current | ||
1992 | * window/congestion state. | ||
1993 | */ | ||
1960 | static int tcp_can_forward_retransmit(struct sock *sk) | 1994 | static int tcp_can_forward_retransmit(struct sock *sk) |
1961 | { | 1995 | { |
1962 | const struct inet_connection_sock *icsk = inet_csk(sk); | 1996 | const struct inet_connection_sock *icsk = inet_csk(sk); |
@@ -2145,7 +2179,8 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) | |||
2145 | TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS); | 2179 | TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS); |
2146 | } | 2180 | } |
2147 | 2181 | ||
2148 | /* WARNING: This routine must only be called when we have already sent | 2182 | /* Send a crossed SYN-ACK during socket establishment. |
2183 | * WARNING: This routine must only be called when we have already sent | ||
2149 | * a SYN packet that crossed the incoming SYN that caused this routine | 2184 | * a SYN packet that crossed the incoming SYN that caused this routine |
2150 | * to get called. If this assumption fails then the initial rcv_wnd | 2185 | * to get called. If this assumption fails then the initial rcv_wnd |
2151 | * and rcv_wscale values will not be correct. | 2186 | * and rcv_wscale values will not be correct. |
@@ -2180,9 +2215,7 @@ int tcp_send_synack(struct sock *sk) | |||
2180 | return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); | 2215 | return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
2181 | } | 2216 | } |
2182 | 2217 | ||
2183 | /* | 2218 | /* Prepare a SYN-ACK. */ |
2184 | * Prepare a SYN-ACK. | ||
2185 | */ | ||
2186 | struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | 2219 | struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, |
2187 | struct request_sock *req) | 2220 | struct request_sock *req) |
2188 | { | 2221 | { |
@@ -2269,9 +2302,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2269 | return skb; | 2302 | return skb; |
2270 | } | 2303 | } |
2271 | 2304 | ||
2272 | /* | 2305 | /* Do all connect socket setups that can be done AF independent. */ |
2273 | * Do all connect socket setups that can be done AF independent. | ||
2274 | */ | ||
2275 | static void tcp_connect_init(struct sock *sk) | 2306 | static void tcp_connect_init(struct sock *sk) |
2276 | { | 2307 | { |
2277 | struct dst_entry *dst = __sk_dst_get(sk); | 2308 | struct dst_entry *dst = __sk_dst_get(sk); |
@@ -2330,9 +2361,7 @@ static void tcp_connect_init(struct sock *sk) | |||
2330 | tcp_clear_retrans(tp); | 2361 | tcp_clear_retrans(tp); |
2331 | } | 2362 | } |
2332 | 2363 | ||
2333 | /* | 2364 | /* Build a SYN and send it off. */ |
2334 | * Build a SYN and send it off. | ||
2335 | */ | ||
2336 | int tcp_connect(struct sock *sk) | 2365 | int tcp_connect(struct sock *sk) |
2337 | { | 2366 | { |
2338 | struct tcp_sock *tp = tcp_sk(sk); | 2367 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -2493,6 +2522,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) | |||
2493 | return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); | 2522 | return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); |
2494 | } | 2523 | } |
2495 | 2524 | ||
2525 | /* Initiate keepalive or window probe from timer. */ | ||
2496 | int tcp_write_wakeup(struct sock *sk) | 2526 | int tcp_write_wakeup(struct sock *sk) |
2497 | { | 2527 | { |
2498 | struct tcp_sock *tp = tcp_sk(sk); | 2528 | struct tcp_sock *tp = tcp_sk(sk); |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b144a26359bc..cdb2ca7684d4 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -137,13 +137,14 @@ static int tcp_write_timeout(struct sock *sk) | |||
137 | { | 137 | { |
138 | struct inet_connection_sock *icsk = inet_csk(sk); | 138 | struct inet_connection_sock *icsk = inet_csk(sk); |
139 | int retry_until; | 139 | int retry_until; |
140 | bool do_reset; | ||
140 | 141 | ||
141 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { | 142 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { |
142 | if (icsk->icsk_retransmits) | 143 | if (icsk->icsk_retransmits) |
143 | dst_negative_advice(&sk->sk_dst_cache); | 144 | dst_negative_advice(&sk->sk_dst_cache); |
144 | retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; | 145 | retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; |
145 | } else { | 146 | } else { |
146 | if (icsk->icsk_retransmits >= sysctl_tcp_retries1) { | 147 | if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { |
147 | /* Black hole detection */ | 148 | /* Black hole detection */ |
148 | tcp_mtu_probing(icsk, sk); | 149 | tcp_mtu_probing(icsk, sk); |
149 | 150 | ||
@@ -155,13 +156,15 @@ static int tcp_write_timeout(struct sock *sk) | |||
155 | const int alive = (icsk->icsk_rto < TCP_RTO_MAX); | 156 | const int alive = (icsk->icsk_rto < TCP_RTO_MAX); |
156 | 157 | ||
157 | retry_until = tcp_orphan_retries(sk, alive); | 158 | retry_until = tcp_orphan_retries(sk, alive); |
159 | do_reset = alive || | ||
160 | !retransmits_timed_out(sk, retry_until); | ||
158 | 161 | ||
159 | if (tcp_out_of_resources(sk, alive || icsk->icsk_retransmits < retry_until)) | 162 | if (tcp_out_of_resources(sk, do_reset)) |
160 | return 1; | 163 | return 1; |
161 | } | 164 | } |
162 | } | 165 | } |
163 | 166 | ||
164 | if (icsk->icsk_retransmits >= retry_until) { | 167 | if (retransmits_timed_out(sk, retry_until)) { |
165 | /* Has it gone just too far? */ | 168 | /* Has it gone just too far? */ |
166 | tcp_write_err(sk); | 169 | tcp_write_err(sk); |
167 | return 1; | 170 | return 1; |
@@ -279,7 +282,7 @@ static void tcp_probe_timer(struct sock *sk) | |||
279 | * The TCP retransmit timer. | 282 | * The TCP retransmit timer. |
280 | */ | 283 | */ |
281 | 284 | ||
282 | static void tcp_retransmit_timer(struct sock *sk) | 285 | void tcp_retransmit_timer(struct sock *sk) |
283 | { | 286 | { |
284 | struct tcp_sock *tp = tcp_sk(sk); | 287 | struct tcp_sock *tp = tcp_sk(sk); |
285 | struct inet_connection_sock *icsk = inet_csk(sk); | 288 | struct inet_connection_sock *icsk = inet_csk(sk); |
@@ -385,7 +388,7 @@ static void tcp_retransmit_timer(struct sock *sk) | |||
385 | out_reset_timer: | 388 | out_reset_timer: |
386 | icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); | 389 | icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); |
387 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); | 390 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); |
388 | if (icsk->icsk_retransmits > sysctl_tcp_retries1) | 391 | if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) |
389 | __sk_dst_reset(sk); | 392 | __sk_dst_reset(sk); |
390 | 393 | ||
391 | out:; | 394 | out:; |
@@ -499,8 +502,7 @@ static void tcp_keepalive_timer (unsigned long data) | |||
499 | elapsed = tcp_time_stamp - tp->rcv_tstamp; | 502 | elapsed = tcp_time_stamp - tp->rcv_tstamp; |
500 | 503 | ||
501 | if (elapsed >= keepalive_time_when(tp)) { | 504 | if (elapsed >= keepalive_time_when(tp)) { |
502 | if ((!tp->keepalive_probes && icsk->icsk_probes_out >= sysctl_tcp_keepalive_probes) || | 505 | if (icsk->icsk_probes_out >= keepalive_probes(tp)) { |
503 | (tp->keepalive_probes && icsk->icsk_probes_out >= tp->keepalive_probes)) { | ||
504 | tcp_send_active_reset(sk, GFP_ATOMIC); | 506 | tcp_send_active_reset(sk, GFP_ATOMIC); |
505 | tcp_write_err(sk); | 507 | tcp_write_err(sk); |
506 | goto out; | 508 | goto out; |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 80e3812837ad..29ebb0d27a1e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -110,11 +110,12 @@ struct udp_table udp_table; | |||
110 | EXPORT_SYMBOL(udp_table); | 110 | EXPORT_SYMBOL(udp_table); |
111 | 111 | ||
112 | int sysctl_udp_mem[3] __read_mostly; | 112 | int sysctl_udp_mem[3] __read_mostly; |
113 | int sysctl_udp_rmem_min __read_mostly; | ||
114 | int sysctl_udp_wmem_min __read_mostly; | ||
115 | |||
116 | EXPORT_SYMBOL(sysctl_udp_mem); | 113 | EXPORT_SYMBOL(sysctl_udp_mem); |
114 | |||
115 | int sysctl_udp_rmem_min __read_mostly; | ||
117 | EXPORT_SYMBOL(sysctl_udp_rmem_min); | 116 | EXPORT_SYMBOL(sysctl_udp_rmem_min); |
117 | |||
118 | int sysctl_udp_wmem_min __read_mostly; | ||
118 | EXPORT_SYMBOL(sysctl_udp_wmem_min); | 119 | EXPORT_SYMBOL(sysctl_udp_wmem_min); |
119 | 120 | ||
120 | atomic_t udp_memory_allocated; | 121 | atomic_t udp_memory_allocated; |
@@ -158,7 +159,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, | |||
158 | */ | 159 | */ |
159 | int udp_lib_get_port(struct sock *sk, unsigned short snum, | 160 | int udp_lib_get_port(struct sock *sk, unsigned short snum, |
160 | int (*saddr_comp)(const struct sock *sk1, | 161 | int (*saddr_comp)(const struct sock *sk1, |
161 | const struct sock *sk2 ) ) | 162 | const struct sock *sk2)) |
162 | { | 163 | { |
163 | struct udp_hslot *hslot; | 164 | struct udp_hslot *hslot; |
164 | struct udp_table *udptable = sk->sk_prot->h.udp_table; | 165 | struct udp_table *udptable = sk->sk_prot->h.udp_table; |
@@ -221,14 +222,15 @@ fail_unlock: | |||
221 | fail: | 222 | fail: |
222 | return error; | 223 | return error; |
223 | } | 224 | } |
225 | EXPORT_SYMBOL(udp_lib_get_port); | ||
224 | 226 | ||
225 | static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) | 227 | static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) |
226 | { | 228 | { |
227 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); | 229 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); |
228 | 230 | ||
229 | return ( !ipv6_only_sock(sk2) && | 231 | return (!ipv6_only_sock(sk2) && |
230 | (!inet1->rcv_saddr || !inet2->rcv_saddr || | 232 | (!inet1->rcv_saddr || !inet2->rcv_saddr || |
231 | inet1->rcv_saddr == inet2->rcv_saddr )); | 233 | inet1->rcv_saddr == inet2->rcv_saddr)); |
232 | } | 234 | } |
233 | 235 | ||
234 | int udp_v4_get_port(struct sock *sk, unsigned short snum) | 236 | int udp_v4_get_port(struct sock *sk, unsigned short snum) |
@@ -383,8 +385,8 @@ found: | |||
383 | void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) | 385 | void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) |
384 | { | 386 | { |
385 | struct inet_sock *inet; | 387 | struct inet_sock *inet; |
386 | struct iphdr *iph = (struct iphdr*)skb->data; | 388 | struct iphdr *iph = (struct iphdr *)skb->data; |
387 | struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); | 389 | struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); |
388 | const int type = icmp_hdr(skb)->type; | 390 | const int type = icmp_hdr(skb)->type; |
389 | const int code = icmp_hdr(skb)->code; | 391 | const int code = icmp_hdr(skb)->code; |
390 | struct sock *sk; | 392 | struct sock *sk; |
@@ -439,7 +441,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) | |||
439 | if (!harderr || sk->sk_state != TCP_ESTABLISHED) | 441 | if (!harderr || sk->sk_state != TCP_ESTABLISHED) |
440 | goto out; | 442 | goto out; |
441 | } else { | 443 | } else { |
442 | ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1)); | 444 | ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1)); |
443 | } | 445 | } |
444 | sk->sk_err = err; | 446 | sk->sk_err = err; |
445 | sk->sk_error_report(sk); | 447 | sk->sk_error_report(sk); |
@@ -474,7 +476,7 @@ EXPORT_SYMBOL(udp_flush_pending_frames); | |||
474 | * (checksum field must be zeroed out) | 476 | * (checksum field must be zeroed out) |
475 | */ | 477 | */ |
476 | static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, | 478 | static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, |
477 | __be32 src, __be32 dst, int len ) | 479 | __be32 src, __be32 dst, int len) |
478 | { | 480 | { |
479 | unsigned int offset; | 481 | unsigned int offset; |
480 | struct udphdr *uh = udp_hdr(skb); | 482 | struct udphdr *uh = udp_hdr(skb); |
@@ -545,7 +547,7 @@ static int udp_push_pending_frames(struct sock *sk) | |||
545 | 547 | ||
546 | } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ | 548 | } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ |
547 | 549 | ||
548 | udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len); | 550 | udp4_hwcsum_outgoing(sk, skb, fl->fl4_src, fl->fl4_dst, up->len); |
549 | goto send; | 551 | goto send; |
550 | 552 | ||
551 | } else /* `normal' UDP */ | 553 | } else /* `normal' UDP */ |
@@ -553,7 +555,7 @@ static int udp_push_pending_frames(struct sock *sk) | |||
553 | 555 | ||
554 | /* add protocol-dependent pseudo-header */ | 556 | /* add protocol-dependent pseudo-header */ |
555 | uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, | 557 | uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, |
556 | sk->sk_protocol, csum ); | 558 | sk->sk_protocol, csum); |
557 | if (uh->check == 0) | 559 | if (uh->check == 0) |
558 | uh->check = CSUM_MANGLED_0; | 560 | uh->check = CSUM_MANGLED_0; |
559 | 561 | ||
@@ -592,7 +594,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
592 | * Check the flags. | 594 | * Check the flags. |
593 | */ | 595 | */ |
594 | 596 | ||
595 | if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */ | 597 | if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */ |
596 | return -EOPNOTSUPP; | 598 | return -EOPNOTSUPP; |
597 | 599 | ||
598 | ipc.opt = NULL; | 600 | ipc.opt = NULL; |
@@ -619,7 +621,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
619 | * Get and verify the address. | 621 | * Get and verify the address. |
620 | */ | 622 | */ |
621 | if (msg->msg_name) { | 623 | if (msg->msg_name) { |
622 | struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name; | 624 | struct sockaddr_in * usin = (struct sockaddr_in *)msg->msg_name; |
623 | if (msg->msg_namelen < sizeof(*usin)) | 625 | if (msg->msg_namelen < sizeof(*usin)) |
624 | return -EINVAL; | 626 | return -EINVAL; |
625 | if (usin->sin_family != AF_INET) { | 627 | if (usin->sin_family != AF_INET) { |
@@ -684,7 +686,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
684 | } | 686 | } |
685 | 687 | ||
686 | if (connected) | 688 | if (connected) |
687 | rt = (struct rtable*)sk_dst_check(sk, 0); | 689 | rt = (struct rtable *)sk_dst_check(sk, 0); |
688 | 690 | ||
689 | if (rt == NULL) { | 691 | if (rt == NULL) { |
690 | struct flowi fl = { .oif = ipc.oif, | 692 | struct flowi fl = { .oif = ipc.oif, |
@@ -782,6 +784,7 @@ do_confirm: | |||
782 | err = 0; | 784 | err = 0; |
783 | goto out; | 785 | goto out; |
784 | } | 786 | } |
787 | EXPORT_SYMBOL(udp_sendmsg); | ||
785 | 788 | ||
786 | int udp_sendpage(struct sock *sk, struct page *page, int offset, | 789 | int udp_sendpage(struct sock *sk, struct page *page, int offset, |
787 | size_t size, int flags) | 790 | size_t size, int flags) |
@@ -871,6 +874,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
871 | 874 | ||
872 | return 0; | 875 | return 0; |
873 | } | 876 | } |
877 | EXPORT_SYMBOL(udp_ioctl); | ||
874 | 878 | ||
875 | /* | 879 | /* |
876 | * This should be easy, if there is something there we | 880 | * This should be easy, if there is something there we |
@@ -892,7 +896,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
892 | * Check any passed addresses | 896 | * Check any passed addresses |
893 | */ | 897 | */ |
894 | if (addr_len) | 898 | if (addr_len) |
895 | *addr_len=sizeof(*sin); | 899 | *addr_len = sizeof(*sin); |
896 | 900 | ||
897 | if (flags & MSG_ERRQUEUE) | 901 | if (flags & MSG_ERRQUEUE) |
898 | return ip_recv_error(sk, msg, len); | 902 | return ip_recv_error(sk, msg, len); |
@@ -923,9 +927,11 @@ try_again: | |||
923 | 927 | ||
924 | if (skb_csum_unnecessary(skb)) | 928 | if (skb_csum_unnecessary(skb)) |
925 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), | 929 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), |
926 | msg->msg_iov, copied ); | 930 | msg->msg_iov, copied); |
927 | else { | 931 | else { |
928 | err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); | 932 | err = skb_copy_and_csum_datagram_iovec(skb, |
933 | sizeof(struct udphdr), | ||
934 | msg->msg_iov); | ||
929 | 935 | ||
930 | if (err == -EINVAL) | 936 | if (err == -EINVAL) |
931 | goto csum_copy_err; | 937 | goto csum_copy_err; |
@@ -941,8 +947,7 @@ try_again: | |||
941 | sock_recv_timestamp(msg, sk, skb); | 947 | sock_recv_timestamp(msg, sk, skb); |
942 | 948 | ||
943 | /* Copy the address. */ | 949 | /* Copy the address. */ |
944 | if (sin) | 950 | if (sin) { |
945 | { | ||
946 | sin->sin_family = AF_INET; | 951 | sin->sin_family = AF_INET; |
947 | sin->sin_port = udp_hdr(skb)->source; | 952 | sin->sin_port = udp_hdr(skb)->source; |
948 | sin->sin_addr.s_addr = ip_hdr(skb)->saddr; | 953 | sin->sin_addr.s_addr = ip_hdr(skb)->saddr; |
@@ -995,6 +1000,7 @@ int udp_disconnect(struct sock *sk, int flags) | |||
995 | sk_dst_reset(sk); | 1000 | sk_dst_reset(sk); |
996 | return 0; | 1001 | return 0; |
997 | } | 1002 | } |
1003 | EXPORT_SYMBOL(udp_disconnect); | ||
998 | 1004 | ||
999 | void udp_lib_unhash(struct sock *sk) | 1005 | void udp_lib_unhash(struct sock *sk) |
1000 | { | 1006 | { |
@@ -1044,7 +1050,7 @@ drop: | |||
1044 | * Note that in the success and error cases, the skb is assumed to | 1050 | * Note that in the success and error cases, the skb is assumed to |
1045 | * have either been requeued or freed. | 1051 | * have either been requeued or freed. |
1046 | */ | 1052 | */ |
1047 | int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | 1053 | int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) |
1048 | { | 1054 | { |
1049 | struct udp_sock *up = udp_sk(sk); | 1055 | struct udp_sock *up = udp_sk(sk); |
1050 | int rc; | 1056 | int rc; |
@@ -1214,7 +1220,7 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, | |||
1214 | if (uh->check == 0) { | 1220 | if (uh->check == 0) { |
1215 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1221 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1216 | } else if (skb->ip_summed == CHECKSUM_COMPLETE) { | 1222 | } else if (skb->ip_summed == CHECKSUM_COMPLETE) { |
1217 | if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, | 1223 | if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, |
1218 | proto, skb->csum)) | 1224 | proto, skb->csum)) |
1219 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1225 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1220 | } | 1226 | } |
@@ -1355,7 +1361,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
1355 | int err = 0; | 1361 | int err = 0; |
1356 | int is_udplite = IS_UDPLITE(sk); | 1362 | int is_udplite = IS_UDPLITE(sk); |
1357 | 1363 | ||
1358 | if (optlen<sizeof(int)) | 1364 | if (optlen < sizeof(int)) |
1359 | return -EINVAL; | 1365 | return -EINVAL; |
1360 | 1366 | ||
1361 | if (get_user(val, (int __user *)optval)) | 1367 | if (get_user(val, (int __user *)optval)) |
@@ -1426,6 +1432,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
1426 | 1432 | ||
1427 | return err; | 1433 | return err; |
1428 | } | 1434 | } |
1435 | EXPORT_SYMBOL(udp_lib_setsockopt); | ||
1429 | 1436 | ||
1430 | int udp_setsockopt(struct sock *sk, int level, int optname, | 1437 | int udp_setsockopt(struct sock *sk, int level, int optname, |
1431 | char __user *optval, int optlen) | 1438 | char __user *optval, int optlen) |
@@ -1453,7 +1460,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, | |||
1453 | struct udp_sock *up = udp_sk(sk); | 1460 | struct udp_sock *up = udp_sk(sk); |
1454 | int val, len; | 1461 | int val, len; |
1455 | 1462 | ||
1456 | if (get_user(len,optlen)) | 1463 | if (get_user(len, optlen)) |
1457 | return -EFAULT; | 1464 | return -EFAULT; |
1458 | 1465 | ||
1459 | len = min_t(unsigned int, len, sizeof(int)); | 1466 | len = min_t(unsigned int, len, sizeof(int)); |
@@ -1486,10 +1493,11 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, | |||
1486 | 1493 | ||
1487 | if (put_user(len, optlen)) | 1494 | if (put_user(len, optlen)) |
1488 | return -EFAULT; | 1495 | return -EFAULT; |
1489 | if (copy_to_user(optval, &val,len)) | 1496 | if (copy_to_user(optval, &val, len)) |
1490 | return -EFAULT; | 1497 | return -EFAULT; |
1491 | return 0; | 1498 | return 0; |
1492 | } | 1499 | } |
1500 | EXPORT_SYMBOL(udp_lib_getsockopt); | ||
1493 | 1501 | ||
1494 | int udp_getsockopt(struct sock *sk, int level, int optname, | 1502 | int udp_getsockopt(struct sock *sk, int level, int optname, |
1495 | char __user *optval, int __user *optlen) | 1503 | char __user *optval, int __user *optlen) |
@@ -1528,9 +1536,9 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
1528 | int is_lite = IS_UDPLITE(sk); | 1536 | int is_lite = IS_UDPLITE(sk); |
1529 | 1537 | ||
1530 | /* Check for false positives due to checksum errors */ | 1538 | /* Check for false positives due to checksum errors */ |
1531 | if ( (mask & POLLRDNORM) && | 1539 | if ((mask & POLLRDNORM) && |
1532 | !(file->f_flags & O_NONBLOCK) && | 1540 | !(file->f_flags & O_NONBLOCK) && |
1533 | !(sk->sk_shutdown & RCV_SHUTDOWN)){ | 1541 | !(sk->sk_shutdown & RCV_SHUTDOWN)) { |
1534 | struct sk_buff_head *rcvq = &sk->sk_receive_queue; | 1542 | struct sk_buff_head *rcvq = &sk->sk_receive_queue; |
1535 | struct sk_buff *skb; | 1543 | struct sk_buff *skb; |
1536 | 1544 | ||
@@ -1552,6 +1560,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
1552 | return mask; | 1560 | return mask; |
1553 | 1561 | ||
1554 | } | 1562 | } |
1563 | EXPORT_SYMBOL(udp_poll); | ||
1555 | 1564 | ||
1556 | struct proto udp_prot = { | 1565 | struct proto udp_prot = { |
1557 | .name = "UDP", | 1566 | .name = "UDP", |
@@ -1582,6 +1591,7 @@ struct proto udp_prot = { | |||
1582 | .compat_getsockopt = compat_udp_getsockopt, | 1591 | .compat_getsockopt = compat_udp_getsockopt, |
1583 | #endif | 1592 | #endif |
1584 | }; | 1593 | }; |
1594 | EXPORT_SYMBOL(udp_prot); | ||
1585 | 1595 | ||
1586 | /* ------------------------------------------------------------------------ */ | 1596 | /* ------------------------------------------------------------------------ */ |
1587 | #ifdef CONFIG_PROC_FS | 1597 | #ifdef CONFIG_PROC_FS |
@@ -1703,11 +1713,13 @@ int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo) | |||
1703 | rc = -ENOMEM; | 1713 | rc = -ENOMEM; |
1704 | return rc; | 1714 | return rc; |
1705 | } | 1715 | } |
1716 | EXPORT_SYMBOL(udp_proc_register); | ||
1706 | 1717 | ||
1707 | void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo) | 1718 | void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo) |
1708 | { | 1719 | { |
1709 | proc_net_remove(net, afinfo->name); | 1720 | proc_net_remove(net, afinfo->name); |
1710 | } | 1721 | } |
1722 | EXPORT_SYMBOL(udp_proc_unregister); | ||
1711 | 1723 | ||
1712 | /* ------------------------------------------------------------------------ */ | 1724 | /* ------------------------------------------------------------------------ */ |
1713 | static void udp4_format_sock(struct sock *sp, struct seq_file *f, | 1725 | static void udp4_format_sock(struct sock *sp, struct seq_file *f, |
@@ -1741,7 +1753,7 @@ int udp4_seq_show(struct seq_file *seq, void *v) | |||
1741 | int len; | 1753 | int len; |
1742 | 1754 | ||
1743 | udp4_format_sock(v, seq, state->bucket, &len); | 1755 | udp4_format_sock(v, seq, state->bucket, &len); |
1744 | seq_printf(seq, "%*s\n", 127 - len ,""); | 1756 | seq_printf(seq, "%*s\n", 127 - len, ""); |
1745 | } | 1757 | } |
1746 | return 0; | 1758 | return 0; |
1747 | } | 1759 | } |
@@ -1816,16 +1828,64 @@ void __init udp_init(void) | |||
1816 | sysctl_udp_wmem_min = SK_MEM_QUANTUM; | 1828 | sysctl_udp_wmem_min = SK_MEM_QUANTUM; |
1817 | } | 1829 | } |
1818 | 1830 | ||
1819 | EXPORT_SYMBOL(udp_disconnect); | 1831 | int udp4_ufo_send_check(struct sk_buff *skb) |
1820 | EXPORT_SYMBOL(udp_ioctl); | 1832 | { |
1821 | EXPORT_SYMBOL(udp_prot); | 1833 | const struct iphdr *iph; |
1822 | EXPORT_SYMBOL(udp_sendmsg); | 1834 | struct udphdr *uh; |
1823 | EXPORT_SYMBOL(udp_lib_getsockopt); | 1835 | |
1824 | EXPORT_SYMBOL(udp_lib_setsockopt); | 1836 | if (!pskb_may_pull(skb, sizeof(*uh))) |
1825 | EXPORT_SYMBOL(udp_poll); | 1837 | return -EINVAL; |
1826 | EXPORT_SYMBOL(udp_lib_get_port); | 1838 | |
1839 | iph = ip_hdr(skb); | ||
1840 | uh = udp_hdr(skb); | ||
1841 | |||
1842 | uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, | ||
1843 | IPPROTO_UDP, 0); | ||
1844 | skb->csum_start = skb_transport_header(skb) - skb->head; | ||
1845 | skb->csum_offset = offsetof(struct udphdr, check); | ||
1846 | skb->ip_summed = CHECKSUM_PARTIAL; | ||
1847 | return 0; | ||
1848 | } | ||
1849 | |||
1850 | struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features) | ||
1851 | { | ||
1852 | struct sk_buff *segs = ERR_PTR(-EINVAL); | ||
1853 | unsigned int mss; | ||
1854 | int offset; | ||
1855 | __wsum csum; | ||
1856 | |||
1857 | mss = skb_shinfo(skb)->gso_size; | ||
1858 | if (unlikely(skb->len <= mss)) | ||
1859 | goto out; | ||
1860 | |||
1861 | if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { | ||
1862 | /* Packet is from an untrusted source, reset gso_segs. */ | ||
1863 | int type = skb_shinfo(skb)->gso_type; | ||
1864 | |||
1865 | if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || | ||
1866 | !(type & (SKB_GSO_UDP)))) | ||
1867 | goto out; | ||
1868 | |||
1869 | skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); | ||
1870 | |||
1871 | segs = NULL; | ||
1872 | goto out; | ||
1873 | } | ||
1874 | |||
1875 | /* Do software UFO. Complete and fill in the UDP checksum as HW cannot | ||
1876 | * do checksum of UDP packets sent as multiple IP fragments. | ||
1877 | */ | ||
1878 | offset = skb->csum_start - skb_headroom(skb); | ||
1879 | csum = skb_checksum(skb, offset, skb->len - offset, 0); | ||
1880 | offset += skb->csum_offset; | ||
1881 | *(__sum16 *)(skb->data + offset) = csum_fold(csum); | ||
1882 | skb->ip_summed = CHECKSUM_NONE; | ||
1883 | |||
1884 | /* Fragment the skb. IP headers of the fragments are updated in | ||
1885 | * inet_gso_segment() | ||
1886 | */ | ||
1887 | segs = skb_segment(skb, features); | ||
1888 | out: | ||
1889 | return segs; | ||
1890 | } | ||
1827 | 1891 | ||
1828 | #ifdef CONFIG_PROC_FS | ||
1829 | EXPORT_SYMBOL(udp_proc_register); | ||
1830 | EXPORT_SYMBOL(udp_proc_unregister); | ||
1831 | #endif | ||
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 0071ee6f441f..74fb2eb833ec 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -264,6 +264,22 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { | |||
264 | .fill_dst = xfrm4_fill_dst, | 264 | .fill_dst = xfrm4_fill_dst, |
265 | }; | 265 | }; |
266 | 266 | ||
267 | #ifdef CONFIG_SYSCTL | ||
268 | static struct ctl_table xfrm4_policy_table[] = { | ||
269 | { | ||
270 | .ctl_name = CTL_UNNUMBERED, | ||
271 | .procname = "xfrm4_gc_thresh", | ||
272 | .data = &xfrm4_dst_ops.gc_thresh, | ||
273 | .maxlen = sizeof(int), | ||
274 | .mode = 0644, | ||
275 | .proc_handler = proc_dointvec, | ||
276 | }, | ||
277 | { } | ||
278 | }; | ||
279 | |||
280 | static struct ctl_table_header *sysctl_hdr; | ||
281 | #endif | ||
282 | |||
267 | static void __init xfrm4_policy_init(void) | 283 | static void __init xfrm4_policy_init(void) |
268 | { | 284 | { |
269 | xfrm_policy_register_afinfo(&xfrm4_policy_afinfo); | 285 | xfrm_policy_register_afinfo(&xfrm4_policy_afinfo); |
@@ -271,12 +287,31 @@ static void __init xfrm4_policy_init(void) | |||
271 | 287 | ||
272 | static void __exit xfrm4_policy_fini(void) | 288 | static void __exit xfrm4_policy_fini(void) |
273 | { | 289 | { |
290 | #ifdef CONFIG_SYSCTL | ||
291 | if (sysctl_hdr) | ||
292 | unregister_net_sysctl_table(sysctl_hdr); | ||
293 | #endif | ||
274 | xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo); | 294 | xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo); |
275 | } | 295 | } |
276 | 296 | ||
277 | void __init xfrm4_init(void) | 297 | void __init xfrm4_init(int rt_max_size) |
278 | { | 298 | { |
279 | xfrm4_state_init(); | 299 | xfrm4_state_init(); |
280 | xfrm4_policy_init(); | 300 | xfrm4_policy_init(); |
301 | /* | ||
302 | * Select a default value for the gc_thresh based on the main route | ||
303 | * table hash size. It seems to me the worst case scenario is when | ||
304 | * we have ipsec operating in transport mode, in which we create a | ||
305 | * dst_entry per socket. The xfrm gc algorithm starts trying to remove | ||
306 | * entries at gc_thresh, and prevents new allocations as 2*gc_thresh | ||
307 | * so lets set an initial xfrm gc_thresh value at the rt_max_size/2. | ||
308 | * That will let us store an ipsec connection per route table entry, | ||
309 | * and start cleaning when were 1/2 full | ||
310 | */ | ||
311 | xfrm4_dst_ops.gc_thresh = rt_max_size/2; | ||
312 | #ifdef CONFIG_SYSCTL | ||
313 | sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path, | ||
314 | xfrm4_policy_table); | ||
315 | #endif | ||
281 | } | 316 | } |
282 | 317 | ||