aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c124
-rw-r--r--net/ipv4/arp.c8
-rw-r--r--net/ipv4/fib_trie.c101
-rw-r--r--net/ipv4/inet_timewait_sock.c2
-rw-r--r--net/ipv4/ip_gre.c8
-rw-r--r--net/ipv4/ipip.c8
-rw-r--r--net/ipv4/ipmr.c4
-rw-r--r--net/ipv4/route.c22
-rw-r--r--net/ipv4/tcp.c6
-rw-r--r--net/ipv4/tcp_input.c5
-rw-r--r--net/ipv4/tcp_ipv4.c53
-rw-r--r--net/ipv4/tcp_minisocks.c23
-rw-r--r--net/ipv4/tcp_output.c58
-rw-r--r--net/ipv4/tcp_timer.c16
-rw-r--r--net/ipv4/udp.c144
-rw-r--r--net/ipv4/xfrm4_policy.c37
16 files changed, 375 insertions, 244 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 566ea6c4321d..6c30a73f03f5 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -124,7 +124,6 @@ static struct list_head inetsw[SOCK_MAX];
124static DEFINE_SPINLOCK(inetsw_lock); 124static DEFINE_SPINLOCK(inetsw_lock);
125 125
126struct ipv4_config ipv4_config; 126struct ipv4_config ipv4_config;
127
128EXPORT_SYMBOL(ipv4_config); 127EXPORT_SYMBOL(ipv4_config);
129 128
130/* New destruction routine */ 129/* New destruction routine */
@@ -139,12 +138,12 @@ void inet_sock_destruct(struct sock *sk)
139 sk_mem_reclaim(sk); 138 sk_mem_reclaim(sk);
140 139
141 if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) { 140 if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) {
142 printk("Attempt to release TCP socket in state %d %p\n", 141 pr_err("Attempt to release TCP socket in state %d %p\n",
143 sk->sk_state, sk); 142 sk->sk_state, sk);
144 return; 143 return;
145 } 144 }
146 if (!sock_flag(sk, SOCK_DEAD)) { 145 if (!sock_flag(sk, SOCK_DEAD)) {
147 printk("Attempt to release alive inet socket %p\n", sk); 146 pr_err("Attempt to release alive inet socket %p\n", sk);
148 return; 147 return;
149 } 148 }
150 149
@@ -157,6 +156,7 @@ void inet_sock_destruct(struct sock *sk)
157 dst_release(sk->sk_dst_cache); 156 dst_release(sk->sk_dst_cache);
158 sk_refcnt_debug_dec(sk); 157 sk_refcnt_debug_dec(sk);
159} 158}
159EXPORT_SYMBOL(inet_sock_destruct);
160 160
161/* 161/*
162 * The routines beyond this point handle the behaviour of an AF_INET 162 * The routines beyond this point handle the behaviour of an AF_INET
@@ -219,6 +219,7 @@ out:
219 release_sock(sk); 219 release_sock(sk);
220 return err; 220 return err;
221} 221}
222EXPORT_SYMBOL(inet_listen);
222 223
223u32 inet_ehash_secret __read_mostly; 224u32 inet_ehash_secret __read_mostly;
224EXPORT_SYMBOL(inet_ehash_secret); 225EXPORT_SYMBOL(inet_ehash_secret);
@@ -435,9 +436,11 @@ int inet_release(struct socket *sock)
435 } 436 }
436 return 0; 437 return 0;
437} 438}
439EXPORT_SYMBOL(inet_release);
438 440
439/* It is off by default, see below. */ 441/* It is off by default, see below. */
440int sysctl_ip_nonlocal_bind __read_mostly; 442int sysctl_ip_nonlocal_bind __read_mostly;
443EXPORT_SYMBOL(sysctl_ip_nonlocal_bind);
441 444
442int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 445int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
443{ 446{
@@ -519,6 +522,7 @@ out_release_sock:
519out: 522out:
520 return err; 523 return err;
521} 524}
525EXPORT_SYMBOL(inet_bind);
522 526
523int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, 527int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
524 int addr_len, int flags) 528 int addr_len, int flags)
@@ -532,6 +536,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
532 return -EAGAIN; 536 return -EAGAIN;
533 return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len); 537 return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len);
534} 538}
539EXPORT_SYMBOL(inet_dgram_connect);
535 540
536static long inet_wait_for_connect(struct sock *sk, long timeo) 541static long inet_wait_for_connect(struct sock *sk, long timeo)
537{ 542{
@@ -641,6 +646,7 @@ sock_error:
641 sock->state = SS_DISCONNECTING; 646 sock->state = SS_DISCONNECTING;
642 goto out; 647 goto out;
643} 648}
649EXPORT_SYMBOL(inet_stream_connect);
644 650
645/* 651/*
646 * Accept a pending connection. The TCP layer now gives BSD semantics. 652 * Accept a pending connection. The TCP layer now gives BSD semantics.
@@ -668,6 +674,7 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags)
668do_err: 674do_err:
669 return err; 675 return err;
670} 676}
677EXPORT_SYMBOL(inet_accept);
671 678
672 679
673/* 680/*
@@ -699,6 +706,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
699 *uaddr_len = sizeof(*sin); 706 *uaddr_len = sizeof(*sin);
700 return 0; 707 return 0;
701} 708}
709EXPORT_SYMBOL(inet_getname);
702 710
703int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, 711int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
704 size_t size) 712 size_t size)
@@ -711,9 +719,11 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
711 719
712 return sk->sk_prot->sendmsg(iocb, sk, msg, size); 720 return sk->sk_prot->sendmsg(iocb, sk, msg, size);
713} 721}
722EXPORT_SYMBOL(inet_sendmsg);
714 723
715 724
716static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) 725static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
726 size_t size, int flags)
717{ 727{
718 struct sock *sk = sock->sk; 728 struct sock *sk = sock->sk;
719 729
@@ -780,6 +790,7 @@ int inet_shutdown(struct socket *sock, int how)
780 release_sock(sk); 790 release_sock(sk);
781 return err; 791 return err;
782} 792}
793EXPORT_SYMBOL(inet_shutdown);
783 794
784/* 795/*
785 * ioctl() calls you can issue on an INET socket. Most of these are 796 * ioctl() calls you can issue on an INET socket. Most of these are
@@ -798,44 +809,45 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
798 struct net *net = sock_net(sk); 809 struct net *net = sock_net(sk);
799 810
800 switch (cmd) { 811 switch (cmd) {
801 case SIOCGSTAMP: 812 case SIOCGSTAMP:
802 err = sock_get_timestamp(sk, (struct timeval __user *)arg); 813 err = sock_get_timestamp(sk, (struct timeval __user *)arg);
803 break; 814 break;
804 case SIOCGSTAMPNS: 815 case SIOCGSTAMPNS:
805 err = sock_get_timestampns(sk, (struct timespec __user *)arg); 816 err = sock_get_timestampns(sk, (struct timespec __user *)arg);
806 break; 817 break;
807 case SIOCADDRT: 818 case SIOCADDRT:
808 case SIOCDELRT: 819 case SIOCDELRT:
809 case SIOCRTMSG: 820 case SIOCRTMSG:
810 err = ip_rt_ioctl(net, cmd, (void __user *)arg); 821 err = ip_rt_ioctl(net, cmd, (void __user *)arg);
811 break; 822 break;
812 case SIOCDARP: 823 case SIOCDARP:
813 case SIOCGARP: 824 case SIOCGARP:
814 case SIOCSARP: 825 case SIOCSARP:
815 err = arp_ioctl(net, cmd, (void __user *)arg); 826 err = arp_ioctl(net, cmd, (void __user *)arg);
816 break; 827 break;
817 case SIOCGIFADDR: 828 case SIOCGIFADDR:
818 case SIOCSIFADDR: 829 case SIOCSIFADDR:
819 case SIOCGIFBRDADDR: 830 case SIOCGIFBRDADDR:
820 case SIOCSIFBRDADDR: 831 case SIOCSIFBRDADDR:
821 case SIOCGIFNETMASK: 832 case SIOCGIFNETMASK:
822 case SIOCSIFNETMASK: 833 case SIOCSIFNETMASK:
823 case SIOCGIFDSTADDR: 834 case SIOCGIFDSTADDR:
824 case SIOCSIFDSTADDR: 835 case SIOCSIFDSTADDR:
825 case SIOCSIFPFLAGS: 836 case SIOCSIFPFLAGS:
826 case SIOCGIFPFLAGS: 837 case SIOCGIFPFLAGS:
827 case SIOCSIFFLAGS: 838 case SIOCSIFFLAGS:
828 err = devinet_ioctl(net, cmd, (void __user *)arg); 839 err = devinet_ioctl(net, cmd, (void __user *)arg);
829 break; 840 break;
830 default: 841 default:
831 if (sk->sk_prot->ioctl) 842 if (sk->sk_prot->ioctl)
832 err = sk->sk_prot->ioctl(sk, cmd, arg); 843 err = sk->sk_prot->ioctl(sk, cmd, arg);
833 else 844 else
834 err = -ENOIOCTLCMD; 845 err = -ENOIOCTLCMD;
835 break; 846 break;
836 } 847 }
837 return err; 848 return err;
838} 849}
850EXPORT_SYMBOL(inet_ioctl);
839 851
840const struct proto_ops inet_stream_ops = { 852const struct proto_ops inet_stream_ops = {
841 .family = PF_INET, 853 .family = PF_INET,
@@ -862,6 +874,7 @@ const struct proto_ops inet_stream_ops = {
862 .compat_getsockopt = compat_sock_common_getsockopt, 874 .compat_getsockopt = compat_sock_common_getsockopt,
863#endif 875#endif
864}; 876};
877EXPORT_SYMBOL(inet_stream_ops);
865 878
866const struct proto_ops inet_dgram_ops = { 879const struct proto_ops inet_dgram_ops = {
867 .family = PF_INET, 880 .family = PF_INET,
@@ -887,6 +900,7 @@ const struct proto_ops inet_dgram_ops = {
887 .compat_getsockopt = compat_sock_common_getsockopt, 900 .compat_getsockopt = compat_sock_common_getsockopt,
888#endif 901#endif
889}; 902};
903EXPORT_SYMBOL(inet_dgram_ops);
890 904
891/* 905/*
892 * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without 906 * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
@@ -1016,6 +1030,7 @@ out_illegal:
1016 p->type); 1030 p->type);
1017 goto out; 1031 goto out;
1018} 1032}
1033EXPORT_SYMBOL(inet_register_protosw);
1019 1034
1020void inet_unregister_protosw(struct inet_protosw *p) 1035void inet_unregister_protosw(struct inet_protosw *p)
1021{ 1036{
@@ -1031,6 +1046,7 @@ void inet_unregister_protosw(struct inet_protosw *p)
1031 synchronize_net(); 1046 synchronize_net();
1032 } 1047 }
1033} 1048}
1049EXPORT_SYMBOL(inet_unregister_protosw);
1034 1050
1035/* 1051/*
1036 * Shall we try to damage output packets if routing dev changes? 1052 * Shall we try to damage output packets if routing dev changes?
@@ -1141,7 +1157,6 @@ int inet_sk_rebuild_header(struct sock *sk)
1141 1157
1142 return err; 1158 return err;
1143} 1159}
1144
1145EXPORT_SYMBOL(inet_sk_rebuild_header); 1160EXPORT_SYMBOL(inet_sk_rebuild_header);
1146 1161
1147static int inet_gso_send_check(struct sk_buff *skb) 1162static int inet_gso_send_check(struct sk_buff *skb)
@@ -1187,6 +1202,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
1187 int proto; 1202 int proto;
1188 int ihl; 1203 int ihl;
1189 int id; 1204 int id;
1205 unsigned int offset = 0;
1190 1206
1191 if (!(features & NETIF_F_V4_CSUM)) 1207 if (!(features & NETIF_F_V4_CSUM))
1192 features &= ~NETIF_F_SG; 1208 features &= ~NETIF_F_SG;
@@ -1229,7 +1245,14 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
1229 skb = segs; 1245 skb = segs;
1230 do { 1246 do {
1231 iph = ip_hdr(skb); 1247 iph = ip_hdr(skb);
1232 iph->id = htons(id++); 1248 if (proto == IPPROTO_UDP) {
1249 iph->id = htons(id);
1250 iph->frag_off = htons(offset >> 3);
1251 if (skb->next != NULL)
1252 iph->frag_off |= htons(IP_MF);
1253 offset += (skb->len - skb->mac_len - iph->ihl * 4);
1254 } else
1255 iph->id = htons(id++);
1233 iph->tot_len = htons(skb->len - skb->mac_len); 1256 iph->tot_len = htons(skb->len - skb->mac_len);
1234 iph->check = 0; 1257 iph->check = 0;
1235 iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl); 1258 iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
@@ -1361,7 +1384,6 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
1361 } 1384 }
1362 return rc; 1385 return rc;
1363} 1386}
1364
1365EXPORT_SYMBOL_GPL(inet_ctl_sock_create); 1387EXPORT_SYMBOL_GPL(inet_ctl_sock_create);
1366 1388
1367unsigned long snmp_fold_field(void *mib[], int offt) 1389unsigned long snmp_fold_field(void *mib[], int offt)
@@ -1425,6 +1447,8 @@ static struct net_protocol tcp_protocol = {
1425static struct net_protocol udp_protocol = { 1447static struct net_protocol udp_protocol = {
1426 .handler = udp_rcv, 1448 .handler = udp_rcv,
1427 .err_handler = udp_err, 1449 .err_handler = udp_err,
1450 .gso_send_check = udp4_ufo_send_check,
1451 .gso_segment = udp4_ufo_fragment,
1428 .no_policy = 1, 1452 .no_policy = 1,
1429 .netns_ok = 1, 1453 .netns_ok = 1,
1430}; 1454};
@@ -1666,19 +1690,3 @@ static int __init ipv4_proc_init(void)
1666 1690
1667MODULE_ALIAS_NETPROTO(PF_INET); 1691MODULE_ALIAS_NETPROTO(PF_INET);
1668 1692
1669EXPORT_SYMBOL(inet_accept);
1670EXPORT_SYMBOL(inet_bind);
1671EXPORT_SYMBOL(inet_dgram_connect);
1672EXPORT_SYMBOL(inet_dgram_ops);
1673EXPORT_SYMBOL(inet_getname);
1674EXPORT_SYMBOL(inet_ioctl);
1675EXPORT_SYMBOL(inet_listen);
1676EXPORT_SYMBOL(inet_register_protosw);
1677EXPORT_SYMBOL(inet_release);
1678EXPORT_SYMBOL(inet_sendmsg);
1679EXPORT_SYMBOL(inet_shutdown);
1680EXPORT_SYMBOL(inet_sock_destruct);
1681EXPORT_SYMBOL(inet_stream_connect);
1682EXPORT_SYMBOL(inet_stream_ops);
1683EXPORT_SYMBOL(inet_unregister_protosw);
1684EXPORT_SYMBOL(sysctl_ip_nonlocal_bind);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 090e9991ac2a..4e80f336c0cf 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -130,7 +130,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
130static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb); 130static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
131static void parp_redo(struct sk_buff *skb); 131static void parp_redo(struct sk_buff *skb);
132 132
133static struct neigh_ops arp_generic_ops = { 133static const struct neigh_ops arp_generic_ops = {
134 .family = AF_INET, 134 .family = AF_INET,
135 .solicit = arp_solicit, 135 .solicit = arp_solicit,
136 .error_report = arp_error_report, 136 .error_report = arp_error_report,
@@ -140,7 +140,7 @@ static struct neigh_ops arp_generic_ops = {
140 .queue_xmit = dev_queue_xmit, 140 .queue_xmit = dev_queue_xmit,
141}; 141};
142 142
143static struct neigh_ops arp_hh_ops = { 143static const struct neigh_ops arp_hh_ops = {
144 .family = AF_INET, 144 .family = AF_INET,
145 .solicit = arp_solicit, 145 .solicit = arp_solicit,
146 .error_report = arp_error_report, 146 .error_report = arp_error_report,
@@ -150,7 +150,7 @@ static struct neigh_ops arp_hh_ops = {
150 .queue_xmit = dev_queue_xmit, 150 .queue_xmit = dev_queue_xmit,
151}; 151};
152 152
153static struct neigh_ops arp_direct_ops = { 153static const struct neigh_ops arp_direct_ops = {
154 .family = AF_INET, 154 .family = AF_INET,
155 .output = dev_queue_xmit, 155 .output = dev_queue_xmit,
156 .connected_output = dev_queue_xmit, 156 .connected_output = dev_queue_xmit,
@@ -158,7 +158,7 @@ static struct neigh_ops arp_direct_ops = {
158 .queue_xmit = dev_queue_xmit, 158 .queue_xmit = dev_queue_xmit,
159}; 159};
160 160
161struct neigh_ops arp_broken_ops = { 161const struct neigh_ops arp_broken_ops = {
162 .family = AF_INET, 162 .family = AF_INET,
163 .solicit = arp_solicit, 163 .solicit = arp_solicit,
164 .error_report = arp_error_report, 164 .error_report = arp_error_report,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 63c2fa7b68c4..291bdf50a21f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -48,7 +48,7 @@
48 * Patrick McHardy <kaber@trash.net> 48 * Patrick McHardy <kaber@trash.net>
49 */ 49 */
50 50
51#define VERSION "0.408" 51#define VERSION "0.409"
52 52
53#include <asm/uaccess.h> 53#include <asm/uaccess.h>
54#include <asm/system.h> 54#include <asm/system.h>
@@ -164,6 +164,14 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn);
164static struct tnode *halve(struct trie *t, struct tnode *tn); 164static struct tnode *halve(struct trie *t, struct tnode *tn);
165/* tnodes to free after resize(); protected by RTNL */ 165/* tnodes to free after resize(); protected by RTNL */
166static struct tnode *tnode_free_head; 166static struct tnode *tnode_free_head;
167static size_t tnode_free_size;
168
169/*
170 * synchronize_rcu after call_rcu for that many pages; it should be especially
171 * useful before resizing the root node with PREEMPT_NONE configs; the value was
172 * obtained experimentally, aiming to avoid visible slowdown.
173 */
174static const int sync_pages = 128;
167 175
168static struct kmem_cache *fn_alias_kmem __read_mostly; 176static struct kmem_cache *fn_alias_kmem __read_mostly;
169static struct kmem_cache *trie_leaf_kmem __read_mostly; 177static struct kmem_cache *trie_leaf_kmem __read_mostly;
@@ -317,8 +325,7 @@ static inline void check_tnode(const struct tnode *tn)
317static const int halve_threshold = 25; 325static const int halve_threshold = 25;
318static const int inflate_threshold = 50; 326static const int inflate_threshold = 50;
319static const int halve_threshold_root = 15; 327static const int halve_threshold_root = 15;
320static const int inflate_threshold_root = 25; 328static const int inflate_threshold_root = 30;
321
322 329
323static void __alias_free_mem(struct rcu_head *head) 330static void __alias_free_mem(struct rcu_head *head)
324{ 331{
@@ -393,6 +400,8 @@ static void tnode_free_safe(struct tnode *tn)
393 BUG_ON(IS_LEAF(tn)); 400 BUG_ON(IS_LEAF(tn));
394 tn->tnode_free = tnode_free_head; 401 tn->tnode_free = tnode_free_head;
395 tnode_free_head = tn; 402 tnode_free_head = tn;
403 tnode_free_size += sizeof(struct tnode) +
404 (sizeof(struct node *) << tn->bits);
396} 405}
397 406
398static void tnode_free_flush(void) 407static void tnode_free_flush(void)
@@ -404,6 +413,11 @@ static void tnode_free_flush(void)
404 tn->tnode_free = NULL; 413 tn->tnode_free = NULL;
405 tnode_free(tn); 414 tnode_free(tn);
406 } 415 }
416
417 if (tnode_free_size >= PAGE_SIZE * sync_pages) {
418 tnode_free_size = 0;
419 synchronize_rcu();
420 }
407} 421}
408 422
409static struct leaf *leaf_new(void) 423static struct leaf *leaf_new(void)
@@ -499,14 +513,14 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
499 rcu_assign_pointer(tn->child[i], n); 513 rcu_assign_pointer(tn->child[i], n);
500} 514}
501 515
516#define MAX_WORK 10
502static struct node *resize(struct trie *t, struct tnode *tn) 517static struct node *resize(struct trie *t, struct tnode *tn)
503{ 518{
504 int i; 519 int i;
505 int err = 0;
506 struct tnode *old_tn; 520 struct tnode *old_tn;
507 int inflate_threshold_use; 521 int inflate_threshold_use;
508 int halve_threshold_use; 522 int halve_threshold_use;
509 int max_resize; 523 int max_work;
510 524
511 if (!tn) 525 if (!tn)
512 return NULL; 526 return NULL;
@@ -521,18 +535,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
521 } 535 }
522 /* One child */ 536 /* One child */
523 if (tn->empty_children == tnode_child_length(tn) - 1) 537 if (tn->empty_children == tnode_child_length(tn) - 1)
524 for (i = 0; i < tnode_child_length(tn); i++) { 538 goto one_child;
525 struct node *n;
526
527 n = tn->child[i];
528 if (!n)
529 continue;
530
531 /* compress one level */
532 node_set_parent(n, NULL);
533 tnode_free_safe(tn);
534 return n;
535 }
536 /* 539 /*
537 * Double as long as the resulting node has a number of 540 * Double as long as the resulting node has a number of
538 * nonempty nodes that are above the threshold. 541 * nonempty nodes that are above the threshold.
@@ -601,14 +604,17 @@ static struct node *resize(struct trie *t, struct tnode *tn)
601 604
602 /* Keep root node larger */ 605 /* Keep root node larger */
603 606
604 if (!tn->parent) 607 if (!node_parent((struct node*) tn)) {
605 inflate_threshold_use = inflate_threshold_root; 608 inflate_threshold_use = inflate_threshold_root;
606 else 609 halve_threshold_use = halve_threshold_root;
610 }
611 else {
607 inflate_threshold_use = inflate_threshold; 612 inflate_threshold_use = inflate_threshold;
613 halve_threshold_use = halve_threshold;
614 }
608 615
609 err = 0; 616 max_work = MAX_WORK;
610 max_resize = 10; 617 while ((tn->full_children > 0 && max_work-- &&
611 while ((tn->full_children > 0 && max_resize-- &&
612 50 * (tn->full_children + tnode_child_length(tn) 618 50 * (tn->full_children + tnode_child_length(tn)
613 - tn->empty_children) 619 - tn->empty_children)
614 >= inflate_threshold_use * tnode_child_length(tn))) { 620 >= inflate_threshold_use * tnode_child_length(tn))) {
@@ -625,35 +631,19 @@ static struct node *resize(struct trie *t, struct tnode *tn)
625 } 631 }
626 } 632 }
627 633
628 if (max_resize < 0) {
629 if (!tn->parent)
630 pr_warning("Fix inflate_threshold_root."
631 " Now=%d size=%d bits\n",
632 inflate_threshold_root, tn->bits);
633 else
634 pr_warning("Fix inflate_threshold."
635 " Now=%d size=%d bits\n",
636 inflate_threshold, tn->bits);
637 }
638
639 check_tnode(tn); 634 check_tnode(tn);
640 635
636 /* Return if at least one inflate is run */
637 if( max_work != MAX_WORK)
638 return (struct node *) tn;
639
641 /* 640 /*
642 * Halve as long as the number of empty children in this 641 * Halve as long as the number of empty children in this
643 * node is above threshold. 642 * node is above threshold.
644 */ 643 */
645 644
646 645 max_work = MAX_WORK;
647 /* Keep root node larger */ 646 while (tn->bits > 1 && max_work-- &&
648
649 if (!tn->parent)
650 halve_threshold_use = halve_threshold_root;
651 else
652 halve_threshold_use = halve_threshold;
653
654 err = 0;
655 max_resize = 10;
656 while (tn->bits > 1 && max_resize-- &&
657 100 * (tnode_child_length(tn) - tn->empty_children) < 647 100 * (tnode_child_length(tn) - tn->empty_children) <
658 halve_threshold_use * tnode_child_length(tn)) { 648 halve_threshold_use * tnode_child_length(tn)) {
659 649
@@ -668,19 +658,10 @@ static struct node *resize(struct trie *t, struct tnode *tn)
668 } 658 }
669 } 659 }
670 660
671 if (max_resize < 0) {
672 if (!tn->parent)
673 pr_warning("Fix halve_threshold_root."
674 " Now=%d size=%d bits\n",
675 halve_threshold_root, tn->bits);
676 else
677 pr_warning("Fix halve_threshold."
678 " Now=%d size=%d bits\n",
679 halve_threshold, tn->bits);
680 }
681 661
682 /* Only one child remains */ 662 /* Only one child remains */
683 if (tn->empty_children == tnode_child_length(tn) - 1) 663 if (tn->empty_children == tnode_child_length(tn) - 1) {
664one_child:
684 for (i = 0; i < tnode_child_length(tn); i++) { 665 for (i = 0; i < tnode_child_length(tn); i++) {
685 struct node *n; 666 struct node *n;
686 667
@@ -694,7 +675,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
694 tnode_free_safe(tn); 675 tnode_free_safe(tn);
695 return n; 676 return n;
696 } 677 }
697 678 }
698 return (struct node *) tn; 679 return (struct node *) tn;
699} 680}
700 681
@@ -1435,7 +1416,7 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
1435 cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length), 1416 cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length),
1436 pos, bits); 1417 pos, bits);
1437 1418
1438 n = tnode_get_child(pn, cindex); 1419 n = tnode_get_child_rcu(pn, cindex);
1439 1420
1440 if (n == NULL) { 1421 if (n == NULL) {
1441#ifdef CONFIG_IP_FIB_TRIE_STATS 1422#ifdef CONFIG_IP_FIB_TRIE_STATS
@@ -1570,7 +1551,7 @@ backtrace:
1570 if (chopped_off <= pn->bits) { 1551 if (chopped_off <= pn->bits) {
1571 cindex &= ~(1 << (chopped_off-1)); 1552 cindex &= ~(1 << (chopped_off-1));
1572 } else { 1553 } else {
1573 struct tnode *parent = node_parent((struct node *) pn); 1554 struct tnode *parent = node_parent_rcu((struct node *) pn);
1574 if (!parent) 1555 if (!parent)
1575 goto failed; 1556 goto failed;
1576 1557
@@ -1783,7 +1764,7 @@ static struct leaf *trie_firstleaf(struct trie *t)
1783static struct leaf *trie_nextleaf(struct leaf *l) 1764static struct leaf *trie_nextleaf(struct leaf *l)
1784{ 1765{
1785 struct node *c = (struct node *) l; 1766 struct node *c = (struct node *) l;
1786 struct tnode *p = node_parent(c); 1767 struct tnode *p = node_parent_rcu(c);
1787 1768
1788 if (!p) 1769 if (!p)
1789 return NULL; /* trie with just one leaf */ 1770 return NULL; /* trie with just one leaf */
@@ -2391,7 +2372,7 @@ static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
2391 } 2372 }
2392} 2373}
2393 2374
2394static const char *rtn_type_names[__RTN_MAX] = { 2375static const char *const rtn_type_names[__RTN_MAX] = {
2395 [RTN_UNSPEC] = "UNSPEC", 2376 [RTN_UNSPEC] = "UNSPEC",
2396 [RTN_UNICAST] = "UNICAST", 2377 [RTN_UNICAST] = "UNICAST",
2397 [RTN_LOCAL] = "LOCAL", 2378 [RTN_LOCAL] = "LOCAL",
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 61283f928825..13f0781f35cd 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -218,8 +218,8 @@ void inet_twdr_hangman(unsigned long data)
218 /* We purged the entire slot, anything left? */ 218 /* We purged the entire slot, anything left? */
219 if (twdr->tw_count) 219 if (twdr->tw_count)
220 need_timer = 1; 220 need_timer = 1;
221 twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1));
221 } 222 }
222 twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1));
223 if (need_timer) 223 if (need_timer)
224 mod_timer(&twdr->tw_timer, jiffies + twdr->period); 224 mod_timer(&twdr->tw_timer, jiffies + twdr->period);
225out: 225out:
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 82c11dd10a62..533afaadefd4 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -662,7 +662,7 @@ drop_nolock:
662 return(0); 662 return(0);
663} 663}
664 664
665static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 665static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
666{ 666{
667 struct ip_tunnel *tunnel = netdev_priv(dev); 667 struct ip_tunnel *tunnel = netdev_priv(dev);
668 struct net_device_stats *stats = &tunnel->dev->stats; 668 struct net_device_stats *stats = &tunnel->dev->stats;
@@ -821,7 +821,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
821 stats->tx_dropped++; 821 stats->tx_dropped++;
822 dev_kfree_skb(skb); 822 dev_kfree_skb(skb);
823 tunnel->recursion--; 823 tunnel->recursion--;
824 return 0; 824 return NETDEV_TX_OK;
825 } 825 }
826 if (skb->sk) 826 if (skb->sk)
827 skb_set_owner_w(new_skb, skb->sk); 827 skb_set_owner_w(new_skb, skb->sk);
@@ -889,7 +889,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
889 889
890 IPTUNNEL_XMIT(); 890 IPTUNNEL_XMIT();
891 tunnel->recursion--; 891 tunnel->recursion--;
892 return 0; 892 return NETDEV_TX_OK;
893 893
894tx_error_icmp: 894tx_error_icmp:
895 dst_link_failure(skb); 895 dst_link_failure(skb);
@@ -898,7 +898,7 @@ tx_error:
898 stats->tx_errors++; 898 stats->tx_errors++;
899 dev_kfree_skb(skb); 899 dev_kfree_skb(skb);
900 tunnel->recursion--; 900 tunnel->recursion--;
901 return 0; 901 return NETDEV_TX_OK;
902} 902}
903 903
904static int ipgre_tunnel_bind_dev(struct net_device *dev) 904static int ipgre_tunnel_bind_dev(struct net_device *dev)
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 93e2b787da20..62548cb0923c 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -387,7 +387,7 @@ static int ipip_rcv(struct sk_buff *skb)
387 * and that skb is filled properly by that function. 387 * and that skb is filled properly by that function.
388 */ 388 */
389 389
390static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 390static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
391{ 391{
392 struct ip_tunnel *tunnel = netdev_priv(dev); 392 struct ip_tunnel *tunnel = netdev_priv(dev);
393 struct net_device_stats *stats = &tunnel->dev->stats; 393 struct net_device_stats *stats = &tunnel->dev->stats;
@@ -486,7 +486,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
486 stats->tx_dropped++; 486 stats->tx_dropped++;
487 dev_kfree_skb(skb); 487 dev_kfree_skb(skb);
488 tunnel->recursion--; 488 tunnel->recursion--;
489 return 0; 489 return NETDEV_TX_OK;
490 } 490 }
491 if (skb->sk) 491 if (skb->sk)
492 skb_set_owner_w(new_skb, skb->sk); 492 skb_set_owner_w(new_skb, skb->sk);
@@ -524,7 +524,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
524 524
525 IPTUNNEL_XMIT(); 525 IPTUNNEL_XMIT();
526 tunnel->recursion--; 526 tunnel->recursion--;
527 return 0; 527 return NETDEV_TX_OK;
528 528
529tx_error_icmp: 529tx_error_icmp:
530 dst_link_failure(skb); 530 dst_link_failure(skb);
@@ -532,7 +532,7 @@ tx_error:
532 stats->tx_errors++; 532 stats->tx_errors++;
533 dev_kfree_skb(skb); 533 dev_kfree_skb(skb);
534 tunnel->recursion--; 534 tunnel->recursion--;
535 return 0; 535 return NETDEV_TX_OK;
536} 536}
537 537
538static void ipip_tunnel_bind_dev(struct net_device *dev) 538static void ipip_tunnel_bind_dev(struct net_device *dev)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9a8da5ed92b7..65d421cf5bc7 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -201,7 +201,7 @@ failure:
201 201
202#ifdef CONFIG_IP_PIMSM 202#ifdef CONFIG_IP_PIMSM
203 203
204static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 204static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
205{ 205{
206 struct net *net = dev_net(dev); 206 struct net *net = dev_net(dev);
207 207
@@ -212,7 +212,7 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
212 IGMPMSG_WHOLEPKT); 212 IGMPMSG_WHOLEPKT);
213 read_unlock(&mrt_lock); 213 read_unlock(&mrt_lock);
214 kfree_skb(skb); 214 kfree_skb(skb);
215 return 0; 215 return NETDEV_TX_OK;
216} 216}
217 217
218static const struct net_device_ops reg_vif_netdev_ops = { 218static const struct net_device_ops reg_vif_netdev_ops = {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 278f46f5011b..91867d3e6328 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1514,13 +1514,17 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1514void ip_rt_send_redirect(struct sk_buff *skb) 1514void ip_rt_send_redirect(struct sk_buff *skb)
1515{ 1515{
1516 struct rtable *rt = skb_rtable(skb); 1516 struct rtable *rt = skb_rtable(skb);
1517 struct in_device *in_dev = in_dev_get(rt->u.dst.dev); 1517 struct in_device *in_dev;
1518 int log_martians;
1518 1519
1519 if (!in_dev) 1520 rcu_read_lock();
1521 in_dev = __in_dev_get_rcu(rt->u.dst.dev);
1522 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
1523 rcu_read_unlock();
1520 return; 1524 return;
1521 1525 }
1522 if (!IN_DEV_TX_REDIRECTS(in_dev)) 1526 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
1523 goto out; 1527 rcu_read_unlock();
1524 1528
1525 /* No redirected packets during ip_rt_redirect_silence; 1529 /* No redirected packets during ip_rt_redirect_silence;
1526 * reset the algorithm. 1530 * reset the algorithm.
@@ -1533,7 +1537,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1533 */ 1537 */
1534 if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) { 1538 if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) {
1535 rt->u.dst.rate_last = jiffies; 1539 rt->u.dst.rate_last = jiffies;
1536 goto out; 1540 return;
1537 } 1541 }
1538 1542
1539 /* Check for load limit; set rate_last to the latest sent 1543 /* Check for load limit; set rate_last to the latest sent
@@ -1547,7 +1551,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1547 rt->u.dst.rate_last = jiffies; 1551 rt->u.dst.rate_last = jiffies;
1548 ++rt->u.dst.rate_tokens; 1552 ++rt->u.dst.rate_tokens;
1549#ifdef CONFIG_IP_ROUTE_VERBOSE 1553#ifdef CONFIG_IP_ROUTE_VERBOSE
1550 if (IN_DEV_LOG_MARTIANS(in_dev) && 1554 if (log_martians &&
1551 rt->u.dst.rate_tokens == ip_rt_redirect_number && 1555 rt->u.dst.rate_tokens == ip_rt_redirect_number &&
1552 net_ratelimit()) 1556 net_ratelimit())
1553 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", 1557 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
@@ -1555,8 +1559,6 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1555 &rt->rt_dst, &rt->rt_gateway); 1559 &rt->rt_dst, &rt->rt_gateway);
1556#endif 1560#endif
1557 } 1561 }
1558out:
1559 in_dev_put(in_dev);
1560} 1562}
1561 1563
1562static int ip_error(struct sk_buff *skb) 1564static int ip_error(struct sk_buff *skb)
@@ -3442,7 +3444,7 @@ int __init ip_rt_init(void)
3442 printk(KERN_ERR "Unable to create route proc files\n"); 3444 printk(KERN_ERR "Unable to create route proc files\n");
3443#ifdef CONFIG_XFRM 3445#ifdef CONFIG_XFRM
3444 xfrm_init(); 3446 xfrm_init();
3445 xfrm4_init(); 3447 xfrm4_init(ip_rt_max_size);
3446#endif 3448#endif
3447 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); 3449 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL);
3448 3450
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 91145244ea63..59f69a6c5863 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2336,13 +2336,13 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2336 val = !!(tp->nonagle&TCP_NAGLE_CORK); 2336 val = !!(tp->nonagle&TCP_NAGLE_CORK);
2337 break; 2337 break;
2338 case TCP_KEEPIDLE: 2338 case TCP_KEEPIDLE:
2339 val = (tp->keepalive_time ? : sysctl_tcp_keepalive_time) / HZ; 2339 val = keepalive_time_when(tp) / HZ;
2340 break; 2340 break;
2341 case TCP_KEEPINTVL: 2341 case TCP_KEEPINTVL:
2342 val = (tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl) / HZ; 2342 val = keepalive_intvl_when(tp) / HZ;
2343 break; 2343 break;
2344 case TCP_KEEPCNT: 2344 case TCP_KEEPCNT:
2345 val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes; 2345 val = keepalive_probes(tp);
2346 break; 2346 break;
2347 case TCP_SYNCNT: 2347 case TCP_SYNCNT:
2348 val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; 2348 val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2bdb0da237e6..af6d6fa00db1 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -685,7 +685,7 @@ static inline void tcp_set_rto(struct sock *sk)
685 * is invisible. Actually, Linux-2.4 also generates erratic 685 * is invisible. Actually, Linux-2.4 also generates erratic
686 * ACKs in some circumstances. 686 * ACKs in some circumstances.
687 */ 687 */
688 inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar; 688 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
689 689
690 /* 2. Fixups made earlier cannot be right. 690 /* 2. Fixups made earlier cannot be right.
691 * If we do not estimate RTO correctly without them, 691 * If we do not estimate RTO correctly without them,
@@ -696,8 +696,7 @@ static inline void tcp_set_rto(struct sock *sk)
696 /* NOTE: clamping at TCP_RTO_MIN is not required, current algo 696 /* NOTE: clamping at TCP_RTO_MIN is not required, current algo
697 * guarantees that rto is higher. 697 * guarantees that rto is higher.
698 */ 698 */
699 if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) 699 tcp_bound_rto(sk);
700 inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
701} 700}
702 701
703/* Save metrics learned by this TCP session. 702/* Save metrics learned by this TCP session.
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 6d88219c5e22..6755e29a6dd3 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -328,26 +328,29 @@ static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
328 * 328 *
329 */ 329 */
330 330
331void tcp_v4_err(struct sk_buff *skb, u32 info) 331void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
332{ 332{
333 struct iphdr *iph = (struct iphdr *)skb->data; 333 struct iphdr *iph = (struct iphdr *)icmp_skb->data;
334 struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); 334 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
335 struct inet_connection_sock *icsk;
335 struct tcp_sock *tp; 336 struct tcp_sock *tp;
336 struct inet_sock *inet; 337 struct inet_sock *inet;
337 const int type = icmp_hdr(skb)->type; 338 const int type = icmp_hdr(icmp_skb)->type;
338 const int code = icmp_hdr(skb)->code; 339 const int code = icmp_hdr(icmp_skb)->code;
339 struct sock *sk; 340 struct sock *sk;
341 struct sk_buff *skb;
340 __u32 seq; 342 __u32 seq;
343 __u32 remaining;
341 int err; 344 int err;
342 struct net *net = dev_net(skb->dev); 345 struct net *net = dev_net(icmp_skb->dev);
343 346
344 if (skb->len < (iph->ihl << 2) + 8) { 347 if (icmp_skb->len < (iph->ihl << 2) + 8) {
345 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 348 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
346 return; 349 return;
347 } 350 }
348 351
349 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, 352 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
350 iph->saddr, th->source, inet_iif(skb)); 353 iph->saddr, th->source, inet_iif(icmp_skb));
351 if (!sk) { 354 if (!sk) {
352 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 355 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
353 return; 356 return;
@@ -367,6 +370,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
367 if (sk->sk_state == TCP_CLOSE) 370 if (sk->sk_state == TCP_CLOSE)
368 goto out; 371 goto out;
369 372
373 icsk = inet_csk(sk);
370 tp = tcp_sk(sk); 374 tp = tcp_sk(sk);
371 seq = ntohl(th->seq); 375 seq = ntohl(th->seq);
372 if (sk->sk_state != TCP_LISTEN && 376 if (sk->sk_state != TCP_LISTEN &&
@@ -393,6 +397,39 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
393 } 397 }
394 398
395 err = icmp_err_convert[code].errno; 399 err = icmp_err_convert[code].errno;
400 /* check if icmp_skb allows revert of backoff
401 * (see draft-zimmermann-tcp-lcd) */
402 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
403 break;
404 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
405 !icsk->icsk_backoff)
406 break;
407
408 icsk->icsk_backoff--;
409 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
410 icsk->icsk_backoff;
411 tcp_bound_rto(sk);
412
413 skb = tcp_write_queue_head(sk);
414 BUG_ON(!skb);
415
416 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
417 tcp_time_stamp - TCP_SKB_CB(skb)->when);
418
419 if (remaining) {
420 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
421 remaining, TCP_RTO_MAX);
422 } else if (sock_owned_by_user(sk)) {
423 /* RTO revert clocked out retransmission,
424 * but socket is locked. Will defer. */
425 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
426 HZ/20, TCP_RTO_MAX);
427 } else {
428 /* RTO revert clocked out retransmission.
429 * Will retransmit now */
430 tcp_retransmit_timer(sk);
431 }
432
396 break; 433 break;
397 case ICMP_TIME_EXCEEDED: 434 case ICMP_TIME_EXCEEDED:
398 err = EHOSTUNREACH; 435 err = EHOSTUNREACH;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f8d67ccc64f3..6c8b42299d9f 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -657,29 +657,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
657 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); 657 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
658 if (child == NULL) 658 if (child == NULL)
659 goto listen_overflow; 659 goto listen_overflow;
660#ifdef CONFIG_TCP_MD5SIG
661 else {
662 /* Copy over the MD5 key from the original socket */
663 struct tcp_md5sig_key *key;
664 struct tcp_sock *tp = tcp_sk(sk);
665 key = tp->af_specific->md5_lookup(sk, child);
666 if (key != NULL) {
667 /*
668 * We're using one, so create a matching key on the
669 * newsk structure. If we fail to get memory then we
670 * end up not copying the key across. Shucks.
671 */
672 char *newkey = kmemdup(key->key, key->keylen,
673 GFP_ATOMIC);
674 if (newkey) {
675 if (!tcp_alloc_md5sig_pool())
676 BUG();
677 tp->af_specific->md5_add(child, child, newkey,
678 key->keylen);
679 }
680 }
681 }
682#endif
683 660
684 inet_csk_reqsk_queue_unlink(sk, req, prev); 661 inet_csk_reqsk_queue_unlink(sk, req, prev);
685 inet_csk_reqsk_queue_removed(sk, req); 662 inet_csk_reqsk_queue_removed(sk, req);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index bd62712848fa..4e004424d400 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -59,6 +59,7 @@ int sysctl_tcp_base_mss __read_mostly = 512;
59/* By default, RFC2861 behavior. */ 59/* By default, RFC2861 behavior. */
60int sysctl_tcp_slow_start_after_idle __read_mostly = 1; 60int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
61 61
62/* Account for new data that has been sent to the network. */
62static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) 63static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
63{ 64{
64 struct tcp_sock *tp = tcp_sk(sk); 65 struct tcp_sock *tp = tcp_sk(sk);
@@ -142,6 +143,7 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
142 tp->snd_cwnd_used = 0; 143 tp->snd_cwnd_used = 0;
143} 144}
144 145
146/* Congestion state accounting after a packet has been sent. */
145static void tcp_event_data_sent(struct tcp_sock *tp, 147static void tcp_event_data_sent(struct tcp_sock *tp,
146 struct sk_buff *skb, struct sock *sk) 148 struct sk_buff *skb, struct sock *sk)
147{ 149{
@@ -161,6 +163,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
161 icsk->icsk_ack.pingpong = 1; 163 icsk->icsk_ack.pingpong = 1;
162} 164}
163 165
166/* Account for an ACK we sent. */
164static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) 167static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
165{ 168{
166 tcp_dec_quickack_mode(sk, pkts); 169 tcp_dec_quickack_mode(sk, pkts);
@@ -276,6 +279,7 @@ static u16 tcp_select_window(struct sock *sk)
276 return new_win; 279 return new_win;
277} 280}
278 281
282/* Packet ECN state for a SYN-ACK */
279static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) 283static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
280{ 284{
281 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR; 285 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR;
@@ -283,6 +287,7 @@ static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
283 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; 287 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE;
284} 288}
285 289
290/* Packet ECN state for a SYN. */
286static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) 291static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
287{ 292{
288 struct tcp_sock *tp = tcp_sk(sk); 293 struct tcp_sock *tp = tcp_sk(sk);
@@ -301,6 +306,9 @@ TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th)
301 th->ece = 1; 306 th->ece = 1;
302} 307}
303 308
309/* Set up ECN state for a packet on a ESTABLISHED socket that is about to
310 * be sent.
311 */
304static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, 312static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
305 int tcp_header_len) 313 int tcp_header_len)
306{ 314{
@@ -362,7 +370,9 @@ struct tcp_out_options {
362 __u32 tsval, tsecr; /* need to include OPTION_TS */ 370 __u32 tsval, tsecr; /* need to include OPTION_TS */
363}; 371};
364 372
365/* Beware: Something in the Internet is very sensitive to the ordering of 373/* Write previously computed TCP options to the packet.
374 *
375 * Beware: Something in the Internet is very sensitive to the ordering of
366 * TCP options, we learned this through the hard way, so be careful here. 376 * TCP options, we learned this through the hard way, so be careful here.
367 * Luckily we can at least blame others for their non-compliance but from 377 * Luckily we can at least blame others for their non-compliance but from
368 * inter-operatibility perspective it seems that we're somewhat stuck with 378 * inter-operatibility perspective it seems that we're somewhat stuck with
@@ -445,6 +455,9 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
445 } 455 }
446} 456}
447 457
458/* Compute TCP options for SYN packets. This is not the final
459 * network wire format yet.
460 */
448static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, 461static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
449 struct tcp_out_options *opts, 462 struct tcp_out_options *opts,
450 struct tcp_md5sig_key **md5) { 463 struct tcp_md5sig_key **md5) {
@@ -493,6 +506,7 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
493 return size; 506 return size;
494} 507}
495 508
509/* Set up TCP options for SYN-ACKs. */
496static unsigned tcp_synack_options(struct sock *sk, 510static unsigned tcp_synack_options(struct sock *sk,
497 struct request_sock *req, 511 struct request_sock *req,
498 unsigned mss, struct sk_buff *skb, 512 unsigned mss, struct sk_buff *skb,
@@ -541,6 +555,9 @@ static unsigned tcp_synack_options(struct sock *sk,
541 return size; 555 return size;
542} 556}
543 557
558/* Compute TCP options for ESTABLISHED sockets. This is not the
559 * final wire format yet.
560 */
544static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, 561static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
545 struct tcp_out_options *opts, 562 struct tcp_out_options *opts,
546 struct tcp_md5sig_key **md5) { 563 struct tcp_md5sig_key **md5) {
@@ -705,7 +722,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
705 return net_xmit_eval(err); 722 return net_xmit_eval(err);
706} 723}
707 724
708/* This routine just queue's the buffer 725/* This routine just queues the buffer for sending.
709 * 726 *
710 * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, 727 * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
711 * otherwise socket can stall. 728 * otherwise socket can stall.
@@ -722,6 +739,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
722 sk_mem_charge(sk, skb->truesize); 739 sk_mem_charge(sk, skb->truesize);
723} 740}
724 741
742/* Initialize TSO segments for a packet. */
725static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, 743static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb,
726 unsigned int mss_now) 744 unsigned int mss_now)
727{ 745{
@@ -909,6 +927,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
909 skb->len = skb->data_len; 927 skb->len = skb->data_len;
910} 928}
911 929
930/* Remove acked data from a packet in the transmit queue. */
912int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) 931int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
913{ 932{
914 if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 933 if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
@@ -937,7 +956,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
937 return 0; 956 return 0;
938} 957}
939 958
940/* Not accounting for SACKs here. */ 959/* Calculate MSS. Not accounting for SACKs here. */
941int tcp_mtu_to_mss(struct sock *sk, int pmtu) 960int tcp_mtu_to_mss(struct sock *sk, int pmtu)
942{ 961{
943 struct tcp_sock *tp = tcp_sk(sk); 962 struct tcp_sock *tp = tcp_sk(sk);
@@ -981,6 +1000,7 @@ int tcp_mss_to_mtu(struct sock *sk, int mss)
981 return mtu; 1000 return mtu;
982} 1001}
983 1002
1003/* MTU probing init per socket */
984void tcp_mtup_init(struct sock *sk) 1004void tcp_mtup_init(struct sock *sk)
985{ 1005{
986 struct tcp_sock *tp = tcp_sk(sk); 1006 struct tcp_sock *tp = tcp_sk(sk);
@@ -1143,7 +1163,8 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
1143 return 0; 1163 return 0;
1144} 1164}
1145 1165
1146/* This must be invoked the first time we consider transmitting 1166/* Intialize TSO state of a skb.
1167 * This must be invoked the first time we consider transmitting
1147 * SKB onto the wire. 1168 * SKB onto the wire.
1148 */ 1169 */
1149static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, 1170static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb,
@@ -1158,6 +1179,7 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb,
1158 return tso_segs; 1179 return tso_segs;
1159} 1180}
1160 1181
1182/* Minshall's variant of the Nagle send check. */
1161static inline int tcp_minshall_check(const struct tcp_sock *tp) 1183static inline int tcp_minshall_check(const struct tcp_sock *tp)
1162{ 1184{
1163 return after(tp->snd_sml, tp->snd_una) && 1185 return after(tp->snd_sml, tp->snd_una) &&
@@ -1242,6 +1264,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
1242 return cwnd_quota; 1264 return cwnd_quota;
1243} 1265}
1244 1266
1267/* Test if sending is allowed right now. */
1245int tcp_may_send_now(struct sock *sk) 1268int tcp_may_send_now(struct sock *sk)
1246{ 1269{
1247 struct tcp_sock *tp = tcp_sk(sk); 1270 struct tcp_sock *tp = tcp_sk(sk);
@@ -1378,6 +1401,10 @@ send_now:
1378} 1401}
1379 1402
1380/* Create a new MTU probe if we are ready. 1403/* Create a new MTU probe if we are ready.
1404 * MTU probe is regularly attempting to increase the path MTU by
1405 * deliberately sending larger packets. This discovers routing
1406 * changes resulting in larger path MTUs.
1407 *
1381 * Returns 0 if we should wait to probe (no cwnd available), 1408 * Returns 0 if we should wait to probe (no cwnd available),
1382 * 1 if a probe was sent, 1409 * 1 if a probe was sent,
1383 * -1 otherwise 1410 * -1 otherwise
@@ -1790,6 +1817,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
1790 sk_wmem_free_skb(sk, next_skb); 1817 sk_wmem_free_skb(sk, next_skb);
1791} 1818}
1792 1819
1820/* Check if coalescing SKBs is legal. */
1793static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb) 1821static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb)
1794{ 1822{
1795 if (tcp_skb_pcount(skb) > 1) 1823 if (tcp_skb_pcount(skb) > 1)
@@ -1808,6 +1836,9 @@ static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb)
1808 return 1; 1836 return 1;
1809} 1837}
1810 1838
1839/* Collapse packets in the retransmit queue to make to create
1840 * less packets on the wire. This is only done on retransmission.
1841 */
1811static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, 1842static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
1812 int space) 1843 int space)
1813{ 1844{
@@ -1957,6 +1988,9 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1957 return err; 1988 return err;
1958} 1989}
1959 1990
1991/* Check if we forward retransmits are possible in the current
1992 * window/congestion state.
1993 */
1960static int tcp_can_forward_retransmit(struct sock *sk) 1994static int tcp_can_forward_retransmit(struct sock *sk)
1961{ 1995{
1962 const struct inet_connection_sock *icsk = inet_csk(sk); 1996 const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2145,7 +2179,8 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2145 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS); 2179 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
2146} 2180}
2147 2181
2148/* WARNING: This routine must only be called when we have already sent 2182/* Send a crossed SYN-ACK during socket establishment.
2183 * WARNING: This routine must only be called when we have already sent
2149 * a SYN packet that crossed the incoming SYN that caused this routine 2184 * a SYN packet that crossed the incoming SYN that caused this routine
2150 * to get called. If this assumption fails then the initial rcv_wnd 2185 * to get called. If this assumption fails then the initial rcv_wnd
2151 * and rcv_wscale values will not be correct. 2186 * and rcv_wscale values will not be correct.
@@ -2180,9 +2215,7 @@ int tcp_send_synack(struct sock *sk)
2180 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2215 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2181} 2216}
2182 2217
2183/* 2218/* Prepare a SYN-ACK. */
2184 * Prepare a SYN-ACK.
2185 */
2186struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, 2219struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2187 struct request_sock *req) 2220 struct request_sock *req)
2188{ 2221{
@@ -2269,9 +2302,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2269 return skb; 2302 return skb;
2270} 2303}
2271 2304
2272/* 2305/* Do all connect socket setups that can be done AF independent. */
2273 * Do all connect socket setups that can be done AF independent.
2274 */
2275static void tcp_connect_init(struct sock *sk) 2306static void tcp_connect_init(struct sock *sk)
2276{ 2307{
2277 struct dst_entry *dst = __sk_dst_get(sk); 2308 struct dst_entry *dst = __sk_dst_get(sk);
@@ -2330,9 +2361,7 @@ static void tcp_connect_init(struct sock *sk)
2330 tcp_clear_retrans(tp); 2361 tcp_clear_retrans(tp);
2331} 2362}
2332 2363
2333/* 2364/* Build a SYN and send it off. */
2334 * Build a SYN and send it off.
2335 */
2336int tcp_connect(struct sock *sk) 2365int tcp_connect(struct sock *sk)
2337{ 2366{
2338 struct tcp_sock *tp = tcp_sk(sk); 2367 struct tcp_sock *tp = tcp_sk(sk);
@@ -2493,6 +2522,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
2493 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); 2522 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
2494} 2523}
2495 2524
2525/* Initiate keepalive or window probe from timer. */
2496int tcp_write_wakeup(struct sock *sk) 2526int tcp_write_wakeup(struct sock *sk)
2497{ 2527{
2498 struct tcp_sock *tp = tcp_sk(sk); 2528 struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b144a26359bc..cdb2ca7684d4 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -137,13 +137,14 @@ static int tcp_write_timeout(struct sock *sk)
137{ 137{
138 struct inet_connection_sock *icsk = inet_csk(sk); 138 struct inet_connection_sock *icsk = inet_csk(sk);
139 int retry_until; 139 int retry_until;
140 bool do_reset;
140 141
141 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { 142 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
142 if (icsk->icsk_retransmits) 143 if (icsk->icsk_retransmits)
143 dst_negative_advice(&sk->sk_dst_cache); 144 dst_negative_advice(&sk->sk_dst_cache);
144 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; 145 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
145 } else { 146 } else {
146 if (icsk->icsk_retransmits >= sysctl_tcp_retries1) { 147 if (retransmits_timed_out(sk, sysctl_tcp_retries1)) {
147 /* Black hole detection */ 148 /* Black hole detection */
148 tcp_mtu_probing(icsk, sk); 149 tcp_mtu_probing(icsk, sk);
149 150
@@ -155,13 +156,15 @@ static int tcp_write_timeout(struct sock *sk)
155 const int alive = (icsk->icsk_rto < TCP_RTO_MAX); 156 const int alive = (icsk->icsk_rto < TCP_RTO_MAX);
156 157
157 retry_until = tcp_orphan_retries(sk, alive); 158 retry_until = tcp_orphan_retries(sk, alive);
159 do_reset = alive ||
160 !retransmits_timed_out(sk, retry_until);
158 161
159 if (tcp_out_of_resources(sk, alive || icsk->icsk_retransmits < retry_until)) 162 if (tcp_out_of_resources(sk, do_reset))
160 return 1; 163 return 1;
161 } 164 }
162 } 165 }
163 166
164 if (icsk->icsk_retransmits >= retry_until) { 167 if (retransmits_timed_out(sk, retry_until)) {
165 /* Has it gone just too far? */ 168 /* Has it gone just too far? */
166 tcp_write_err(sk); 169 tcp_write_err(sk);
167 return 1; 170 return 1;
@@ -279,7 +282,7 @@ static void tcp_probe_timer(struct sock *sk)
279 * The TCP retransmit timer. 282 * The TCP retransmit timer.
280 */ 283 */
281 284
282static void tcp_retransmit_timer(struct sock *sk) 285void tcp_retransmit_timer(struct sock *sk)
283{ 286{
284 struct tcp_sock *tp = tcp_sk(sk); 287 struct tcp_sock *tp = tcp_sk(sk);
285 struct inet_connection_sock *icsk = inet_csk(sk); 288 struct inet_connection_sock *icsk = inet_csk(sk);
@@ -385,7 +388,7 @@ static void tcp_retransmit_timer(struct sock *sk)
385out_reset_timer: 388out_reset_timer:
386 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); 389 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
387 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); 390 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
388 if (icsk->icsk_retransmits > sysctl_tcp_retries1) 391 if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1))
389 __sk_dst_reset(sk); 392 __sk_dst_reset(sk);
390 393
391out:; 394out:;
@@ -499,8 +502,7 @@ static void tcp_keepalive_timer (unsigned long data)
499 elapsed = tcp_time_stamp - tp->rcv_tstamp; 502 elapsed = tcp_time_stamp - tp->rcv_tstamp;
500 503
501 if (elapsed >= keepalive_time_when(tp)) { 504 if (elapsed >= keepalive_time_when(tp)) {
502 if ((!tp->keepalive_probes && icsk->icsk_probes_out >= sysctl_tcp_keepalive_probes) || 505 if (icsk->icsk_probes_out >= keepalive_probes(tp)) {
503 (tp->keepalive_probes && icsk->icsk_probes_out >= tp->keepalive_probes)) {
504 tcp_send_active_reset(sk, GFP_ATOMIC); 506 tcp_send_active_reset(sk, GFP_ATOMIC);
505 tcp_write_err(sk); 507 tcp_write_err(sk);
506 goto out; 508 goto out;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 80e3812837ad..29ebb0d27a1e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -110,11 +110,12 @@ struct udp_table udp_table;
110EXPORT_SYMBOL(udp_table); 110EXPORT_SYMBOL(udp_table);
111 111
112int sysctl_udp_mem[3] __read_mostly; 112int sysctl_udp_mem[3] __read_mostly;
113int sysctl_udp_rmem_min __read_mostly;
114int sysctl_udp_wmem_min __read_mostly;
115
116EXPORT_SYMBOL(sysctl_udp_mem); 113EXPORT_SYMBOL(sysctl_udp_mem);
114
115int sysctl_udp_rmem_min __read_mostly;
117EXPORT_SYMBOL(sysctl_udp_rmem_min); 116EXPORT_SYMBOL(sysctl_udp_rmem_min);
117
118int sysctl_udp_wmem_min __read_mostly;
118EXPORT_SYMBOL(sysctl_udp_wmem_min); 119EXPORT_SYMBOL(sysctl_udp_wmem_min);
119 120
120atomic_t udp_memory_allocated; 121atomic_t udp_memory_allocated;
@@ -158,7 +159,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
158 */ 159 */
159int udp_lib_get_port(struct sock *sk, unsigned short snum, 160int udp_lib_get_port(struct sock *sk, unsigned short snum,
160 int (*saddr_comp)(const struct sock *sk1, 161 int (*saddr_comp)(const struct sock *sk1,
161 const struct sock *sk2 ) ) 162 const struct sock *sk2))
162{ 163{
163 struct udp_hslot *hslot; 164 struct udp_hslot *hslot;
164 struct udp_table *udptable = sk->sk_prot->h.udp_table; 165 struct udp_table *udptable = sk->sk_prot->h.udp_table;
@@ -221,14 +222,15 @@ fail_unlock:
221fail: 222fail:
222 return error; 223 return error;
223} 224}
225EXPORT_SYMBOL(udp_lib_get_port);
224 226
225static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) 227static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
226{ 228{
227 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); 229 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
228 230
229 return ( !ipv6_only_sock(sk2) && 231 return (!ipv6_only_sock(sk2) &&
230 (!inet1->rcv_saddr || !inet2->rcv_saddr || 232 (!inet1->rcv_saddr || !inet2->rcv_saddr ||
231 inet1->rcv_saddr == inet2->rcv_saddr )); 233 inet1->rcv_saddr == inet2->rcv_saddr));
232} 234}
233 235
234int udp_v4_get_port(struct sock *sk, unsigned short snum) 236int udp_v4_get_port(struct sock *sk, unsigned short snum)
@@ -383,8 +385,8 @@ found:
383void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) 385void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
384{ 386{
385 struct inet_sock *inet; 387 struct inet_sock *inet;
386 struct iphdr *iph = (struct iphdr*)skb->data; 388 struct iphdr *iph = (struct iphdr *)skb->data;
387 struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); 389 struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2));
388 const int type = icmp_hdr(skb)->type; 390 const int type = icmp_hdr(skb)->type;
389 const int code = icmp_hdr(skb)->code; 391 const int code = icmp_hdr(skb)->code;
390 struct sock *sk; 392 struct sock *sk;
@@ -439,7 +441,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
439 if (!harderr || sk->sk_state != TCP_ESTABLISHED) 441 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
440 goto out; 442 goto out;
441 } else { 443 } else {
442 ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1)); 444 ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1));
443 } 445 }
444 sk->sk_err = err; 446 sk->sk_err = err;
445 sk->sk_error_report(sk); 447 sk->sk_error_report(sk);
@@ -474,7 +476,7 @@ EXPORT_SYMBOL(udp_flush_pending_frames);
474 * (checksum field must be zeroed out) 476 * (checksum field must be zeroed out)
475 */ 477 */
476static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, 478static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
477 __be32 src, __be32 dst, int len ) 479 __be32 src, __be32 dst, int len)
478{ 480{
479 unsigned int offset; 481 unsigned int offset;
480 struct udphdr *uh = udp_hdr(skb); 482 struct udphdr *uh = udp_hdr(skb);
@@ -545,7 +547,7 @@ static int udp_push_pending_frames(struct sock *sk)
545 547
546 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ 548 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
547 549
548 udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len); 550 udp4_hwcsum_outgoing(sk, skb, fl->fl4_src, fl->fl4_dst, up->len);
549 goto send; 551 goto send;
550 552
551 } else /* `normal' UDP */ 553 } else /* `normal' UDP */
@@ -553,7 +555,7 @@ static int udp_push_pending_frames(struct sock *sk)
553 555
554 /* add protocol-dependent pseudo-header */ 556 /* add protocol-dependent pseudo-header */
555 uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, 557 uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
556 sk->sk_protocol, csum ); 558 sk->sk_protocol, csum);
557 if (uh->check == 0) 559 if (uh->check == 0)
558 uh->check = CSUM_MANGLED_0; 560 uh->check = CSUM_MANGLED_0;
559 561
@@ -592,7 +594,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
592 * Check the flags. 594 * Check the flags.
593 */ 595 */
594 596
595 if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */ 597 if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */
596 return -EOPNOTSUPP; 598 return -EOPNOTSUPP;
597 599
598 ipc.opt = NULL; 600 ipc.opt = NULL;
@@ -619,7 +621,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
619 * Get and verify the address. 621 * Get and verify the address.
620 */ 622 */
621 if (msg->msg_name) { 623 if (msg->msg_name) {
622 struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name; 624 struct sockaddr_in * usin = (struct sockaddr_in *)msg->msg_name;
623 if (msg->msg_namelen < sizeof(*usin)) 625 if (msg->msg_namelen < sizeof(*usin))
624 return -EINVAL; 626 return -EINVAL;
625 if (usin->sin_family != AF_INET) { 627 if (usin->sin_family != AF_INET) {
@@ -684,7 +686,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
684 } 686 }
685 687
686 if (connected) 688 if (connected)
687 rt = (struct rtable*)sk_dst_check(sk, 0); 689 rt = (struct rtable *)sk_dst_check(sk, 0);
688 690
689 if (rt == NULL) { 691 if (rt == NULL) {
690 struct flowi fl = { .oif = ipc.oif, 692 struct flowi fl = { .oif = ipc.oif,
@@ -782,6 +784,7 @@ do_confirm:
782 err = 0; 784 err = 0;
783 goto out; 785 goto out;
784} 786}
787EXPORT_SYMBOL(udp_sendmsg);
785 788
786int udp_sendpage(struct sock *sk, struct page *page, int offset, 789int udp_sendpage(struct sock *sk, struct page *page, int offset,
787 size_t size, int flags) 790 size_t size, int flags)
@@ -871,6 +874,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
871 874
872 return 0; 875 return 0;
873} 876}
877EXPORT_SYMBOL(udp_ioctl);
874 878
875/* 879/*
876 * This should be easy, if there is something there we 880 * This should be easy, if there is something there we
@@ -892,7 +896,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
892 * Check any passed addresses 896 * Check any passed addresses
893 */ 897 */
894 if (addr_len) 898 if (addr_len)
895 *addr_len=sizeof(*sin); 899 *addr_len = sizeof(*sin);
896 900
897 if (flags & MSG_ERRQUEUE) 901 if (flags & MSG_ERRQUEUE)
898 return ip_recv_error(sk, msg, len); 902 return ip_recv_error(sk, msg, len);
@@ -923,9 +927,11 @@ try_again:
923 927
924 if (skb_csum_unnecessary(skb)) 928 if (skb_csum_unnecessary(skb))
925 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), 929 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
926 msg->msg_iov, copied ); 930 msg->msg_iov, copied);
927 else { 931 else {
928 err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); 932 err = skb_copy_and_csum_datagram_iovec(skb,
933 sizeof(struct udphdr),
934 msg->msg_iov);
929 935
930 if (err == -EINVAL) 936 if (err == -EINVAL)
931 goto csum_copy_err; 937 goto csum_copy_err;
@@ -941,8 +947,7 @@ try_again:
941 sock_recv_timestamp(msg, sk, skb); 947 sock_recv_timestamp(msg, sk, skb);
942 948
943 /* Copy the address. */ 949 /* Copy the address. */
944 if (sin) 950 if (sin) {
945 {
946 sin->sin_family = AF_INET; 951 sin->sin_family = AF_INET;
947 sin->sin_port = udp_hdr(skb)->source; 952 sin->sin_port = udp_hdr(skb)->source;
948 sin->sin_addr.s_addr = ip_hdr(skb)->saddr; 953 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
@@ -995,6 +1000,7 @@ int udp_disconnect(struct sock *sk, int flags)
995 sk_dst_reset(sk); 1000 sk_dst_reset(sk);
996 return 0; 1001 return 0;
997} 1002}
1003EXPORT_SYMBOL(udp_disconnect);
998 1004
999void udp_lib_unhash(struct sock *sk) 1005void udp_lib_unhash(struct sock *sk)
1000{ 1006{
@@ -1044,7 +1050,7 @@ drop:
1044 * Note that in the success and error cases, the skb is assumed to 1050 * Note that in the success and error cases, the skb is assumed to
1045 * have either been requeued or freed. 1051 * have either been requeued or freed.
1046 */ 1052 */
1047int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) 1053int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1048{ 1054{
1049 struct udp_sock *up = udp_sk(sk); 1055 struct udp_sock *up = udp_sk(sk);
1050 int rc; 1056 int rc;
@@ -1214,7 +1220,7 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
1214 if (uh->check == 0) { 1220 if (uh->check == 0) {
1215 skb->ip_summed = CHECKSUM_UNNECESSARY; 1221 skb->ip_summed = CHECKSUM_UNNECESSARY;
1216 } else if (skb->ip_summed == CHECKSUM_COMPLETE) { 1222 } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
1217 if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, 1223 if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
1218 proto, skb->csum)) 1224 proto, skb->csum))
1219 skb->ip_summed = CHECKSUM_UNNECESSARY; 1225 skb->ip_summed = CHECKSUM_UNNECESSARY;
1220 } 1226 }
@@ -1355,7 +1361,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1355 int err = 0; 1361 int err = 0;
1356 int is_udplite = IS_UDPLITE(sk); 1362 int is_udplite = IS_UDPLITE(sk);
1357 1363
1358 if (optlen<sizeof(int)) 1364 if (optlen < sizeof(int))
1359 return -EINVAL; 1365 return -EINVAL;
1360 1366
1361 if (get_user(val, (int __user *)optval)) 1367 if (get_user(val, (int __user *)optval))
@@ -1426,6 +1432,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1426 1432
1427 return err; 1433 return err;
1428} 1434}
1435EXPORT_SYMBOL(udp_lib_setsockopt);
1429 1436
1430int udp_setsockopt(struct sock *sk, int level, int optname, 1437int udp_setsockopt(struct sock *sk, int level, int optname,
1431 char __user *optval, int optlen) 1438 char __user *optval, int optlen)
@@ -1453,7 +1460,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
1453 struct udp_sock *up = udp_sk(sk); 1460 struct udp_sock *up = udp_sk(sk);
1454 int val, len; 1461 int val, len;
1455 1462
1456 if (get_user(len,optlen)) 1463 if (get_user(len, optlen))
1457 return -EFAULT; 1464 return -EFAULT;
1458 1465
1459 len = min_t(unsigned int, len, sizeof(int)); 1466 len = min_t(unsigned int, len, sizeof(int));
@@ -1486,10 +1493,11 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
1486 1493
1487 if (put_user(len, optlen)) 1494 if (put_user(len, optlen))
1488 return -EFAULT; 1495 return -EFAULT;
1489 if (copy_to_user(optval, &val,len)) 1496 if (copy_to_user(optval, &val, len))
1490 return -EFAULT; 1497 return -EFAULT;
1491 return 0; 1498 return 0;
1492} 1499}
1500EXPORT_SYMBOL(udp_lib_getsockopt);
1493 1501
1494int udp_getsockopt(struct sock *sk, int level, int optname, 1502int udp_getsockopt(struct sock *sk, int level, int optname,
1495 char __user *optval, int __user *optlen) 1503 char __user *optval, int __user *optlen)
@@ -1528,9 +1536,9 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1528 int is_lite = IS_UDPLITE(sk); 1536 int is_lite = IS_UDPLITE(sk);
1529 1537
1530 /* Check for false positives due to checksum errors */ 1538 /* Check for false positives due to checksum errors */
1531 if ( (mask & POLLRDNORM) && 1539 if ((mask & POLLRDNORM) &&
1532 !(file->f_flags & O_NONBLOCK) && 1540 !(file->f_flags & O_NONBLOCK) &&
1533 !(sk->sk_shutdown & RCV_SHUTDOWN)){ 1541 !(sk->sk_shutdown & RCV_SHUTDOWN)) {
1534 struct sk_buff_head *rcvq = &sk->sk_receive_queue; 1542 struct sk_buff_head *rcvq = &sk->sk_receive_queue;
1535 struct sk_buff *skb; 1543 struct sk_buff *skb;
1536 1544
@@ -1552,6 +1560,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1552 return mask; 1560 return mask;
1553 1561
1554} 1562}
1563EXPORT_SYMBOL(udp_poll);
1555 1564
1556struct proto udp_prot = { 1565struct proto udp_prot = {
1557 .name = "UDP", 1566 .name = "UDP",
@@ -1582,6 +1591,7 @@ struct proto udp_prot = {
1582 .compat_getsockopt = compat_udp_getsockopt, 1591 .compat_getsockopt = compat_udp_getsockopt,
1583#endif 1592#endif
1584}; 1593};
1594EXPORT_SYMBOL(udp_prot);
1585 1595
1586/* ------------------------------------------------------------------------ */ 1596/* ------------------------------------------------------------------------ */
1587#ifdef CONFIG_PROC_FS 1597#ifdef CONFIG_PROC_FS
@@ -1703,11 +1713,13 @@ int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
1703 rc = -ENOMEM; 1713 rc = -ENOMEM;
1704 return rc; 1714 return rc;
1705} 1715}
1716EXPORT_SYMBOL(udp_proc_register);
1706 1717
1707void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo) 1718void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo)
1708{ 1719{
1709 proc_net_remove(net, afinfo->name); 1720 proc_net_remove(net, afinfo->name);
1710} 1721}
1722EXPORT_SYMBOL(udp_proc_unregister);
1711 1723
1712/* ------------------------------------------------------------------------ */ 1724/* ------------------------------------------------------------------------ */
1713static void udp4_format_sock(struct sock *sp, struct seq_file *f, 1725static void udp4_format_sock(struct sock *sp, struct seq_file *f,
@@ -1741,7 +1753,7 @@ int udp4_seq_show(struct seq_file *seq, void *v)
1741 int len; 1753 int len;
1742 1754
1743 udp4_format_sock(v, seq, state->bucket, &len); 1755 udp4_format_sock(v, seq, state->bucket, &len);
1744 seq_printf(seq, "%*s\n", 127 - len ,""); 1756 seq_printf(seq, "%*s\n", 127 - len, "");
1745 } 1757 }
1746 return 0; 1758 return 0;
1747} 1759}
@@ -1816,16 +1828,64 @@ void __init udp_init(void)
1816 sysctl_udp_wmem_min = SK_MEM_QUANTUM; 1828 sysctl_udp_wmem_min = SK_MEM_QUANTUM;
1817} 1829}
1818 1830
1819EXPORT_SYMBOL(udp_disconnect); 1831int udp4_ufo_send_check(struct sk_buff *skb)
1820EXPORT_SYMBOL(udp_ioctl); 1832{
1821EXPORT_SYMBOL(udp_prot); 1833 const struct iphdr *iph;
1822EXPORT_SYMBOL(udp_sendmsg); 1834 struct udphdr *uh;
1823EXPORT_SYMBOL(udp_lib_getsockopt); 1835
1824EXPORT_SYMBOL(udp_lib_setsockopt); 1836 if (!pskb_may_pull(skb, sizeof(*uh)))
1825EXPORT_SYMBOL(udp_poll); 1837 return -EINVAL;
1826EXPORT_SYMBOL(udp_lib_get_port); 1838
1839 iph = ip_hdr(skb);
1840 uh = udp_hdr(skb);
1841
1842 uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
1843 IPPROTO_UDP, 0);
1844 skb->csum_start = skb_transport_header(skb) - skb->head;
1845 skb->csum_offset = offsetof(struct udphdr, check);
1846 skb->ip_summed = CHECKSUM_PARTIAL;
1847 return 0;
1848}
1849
1850struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features)
1851{
1852 struct sk_buff *segs = ERR_PTR(-EINVAL);
1853 unsigned int mss;
1854 int offset;
1855 __wsum csum;
1856
1857 mss = skb_shinfo(skb)->gso_size;
1858 if (unlikely(skb->len <= mss))
1859 goto out;
1860
1861 if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
1862 /* Packet is from an untrusted source, reset gso_segs. */
1863 int type = skb_shinfo(skb)->gso_type;
1864
1865 if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
1866 !(type & (SKB_GSO_UDP))))
1867 goto out;
1868
1869 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
1870
1871 segs = NULL;
1872 goto out;
1873 }
1874
1875 /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
1876 * do checksum of UDP packets sent as multiple IP fragments.
1877 */
1878 offset = skb->csum_start - skb_headroom(skb);
1879 csum = skb_checksum(skb, offset, skb->len - offset, 0);
1880 offset += skb->csum_offset;
1881 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
1882 skb->ip_summed = CHECKSUM_NONE;
1883
1884 /* Fragment the skb. IP headers of the fragments are updated in
1885 * inet_gso_segment()
1886 */
1887 segs = skb_segment(skb, features);
1888out:
1889 return segs;
1890}
1827 1891
1828#ifdef CONFIG_PROC_FS
1829EXPORT_SYMBOL(udp_proc_register);
1830EXPORT_SYMBOL(udp_proc_unregister);
1831#endif
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 0071ee6f441f..74fb2eb833ec 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -264,6 +264,22 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
264 .fill_dst = xfrm4_fill_dst, 264 .fill_dst = xfrm4_fill_dst,
265}; 265};
266 266
267#ifdef CONFIG_SYSCTL
268static struct ctl_table xfrm4_policy_table[] = {
269 {
270 .ctl_name = CTL_UNNUMBERED,
271 .procname = "xfrm4_gc_thresh",
272 .data = &xfrm4_dst_ops.gc_thresh,
273 .maxlen = sizeof(int),
274 .mode = 0644,
275 .proc_handler = proc_dointvec,
276 },
277 { }
278};
279
280static struct ctl_table_header *sysctl_hdr;
281#endif
282
267static void __init xfrm4_policy_init(void) 283static void __init xfrm4_policy_init(void)
268{ 284{
269 xfrm_policy_register_afinfo(&xfrm4_policy_afinfo); 285 xfrm_policy_register_afinfo(&xfrm4_policy_afinfo);
@@ -271,12 +287,31 @@ static void __init xfrm4_policy_init(void)
271 287
272static void __exit xfrm4_policy_fini(void) 288static void __exit xfrm4_policy_fini(void)
273{ 289{
290#ifdef CONFIG_SYSCTL
291 if (sysctl_hdr)
292 unregister_net_sysctl_table(sysctl_hdr);
293#endif
274 xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo); 294 xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo);
275} 295}
276 296
277void __init xfrm4_init(void) 297void __init xfrm4_init(int rt_max_size)
278{ 298{
279 xfrm4_state_init(); 299 xfrm4_state_init();
280 xfrm4_policy_init(); 300 xfrm4_policy_init();
301 /*
302 * Select a default value for the gc_thresh based on the main route
303 * table hash size. It seems to me the worst case scenario is when
304 * we have ipsec operating in transport mode, in which we create a
305 * dst_entry per socket. The xfrm gc algorithm starts trying to remove
306 * entries at gc_thresh, and prevents new allocations as 2*gc_thresh
307 * so lets set an initial xfrm gc_thresh value at the rt_max_size/2.
308 * That will let us store an ipsec connection per route table entry,
309 * and start cleaning when were 1/2 full
310 */
311 xfrm4_dst_ops.gc_thresh = rt_max_size/2;
312#ifdef CONFIG_SYSCTL
313 sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path,
314 xfrm4_policy_table);
315#endif
281} 316}
282 317