aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-09-14 13:37:28 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-14 13:37:28 -0400
commitd7e9660ad9d5e0845f52848bce31bcf5cdcdea6b (patch)
treec6c67d145771187b194d79d603742b31090a59d6 /net/ipv4
parentb8cb48aae1b8c50b37dcb7710363aa69a7a0d9ca (diff)
parent13af7a6ea502fcdd4c0e3d7de6e332b102309491 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1623 commits) netxen: update copyright netxen: fix tx timeout recovery netxen: fix file firmware leak netxen: improve pci memory access netxen: change firmware write size tg3: Fix return ring size breakage netxen: build fix for INET=n cdc-phonet: autoconfigure Phonet address Phonet: back-end for autoconfigured addresses Phonet: fix netlink address dump error handling ipv6: Add IFA_F_DADFAILED flag net: Add DEVTYPE support for Ethernet based devices mv643xx_eth.c: remove unused txq_set_wrr() ucc_geth: Fix hangs after switching from full to half duplex ucc_geth: Rearrange some code to avoid forward declarations phy/marvell: Make non-aneg speed/duplex forcing work for 88E1111 PHYs drivers/net/phy: introduce missing kfree drivers/net/wan: introduce missing kfree net: force bridge module(s) to be GPL Subject: [PATCH] appletalk: Fix skb leak when ipddp interface is not loaded ... Fixed up trivial conflicts: - arch/x86/include/asm/socket.h converted to <asm-generic/socket.h> in the x86 tree. The generic header has the same new #define's, so that works out fine. - drivers/net/tun.c fix conflict between 89f56d1e9 ("tun: reuse struct sock fields") that switched over to using 'tun->socket.sk' instead of the redundantly available (and thus removed) 'tun->sk', and 2b980dbd ("lsm: Add hooks to the TUN driver") which added a new 'tun->sk' use. Noted in 'next' by Stephen Rothwell.
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c124
-rw-r--r--net/ipv4/arp.c8
-rw-r--r--net/ipv4/fib_trie.c101
-rw-r--r--net/ipv4/inet_timewait_sock.c2
-rw-r--r--net/ipv4/ip_gre.c8
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/ipip.c8
-rw-r--r--net/ipv4/ipmr.c4
-rw-r--r--net/ipv4/netfilter/arp_tables.c47
-rw-r--r--net/ipv4/netfilter/arptable_filter.c4
-rw-r--r--net/ipv4/netfilter/ip_tables.c51
-rw-r--r--net/ipv4/netfilter/iptable_filter.c10
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c16
-rw-r--r--net/ipv4/netfilter/iptable_raw.c10
-rw-r--r--net/ipv4/netfilter/iptable_security.c12
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c22
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c8
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c8
-rw-r--r--net/ipv4/protocol.c19
-rw-r--r--net/ipv4/raw.c9
-rw-r--r--net/ipv4/route.c22
-rw-r--r--net/ipv4/tcp.c16
-rw-r--r--net/ipv4/tcp_input.c5
-rw-r--r--net/ipv4/tcp_ipv4.c66
-rw-r--r--net/ipv4/tcp_minisocks.c25
-rw-r--r--net/ipv4/tcp_output.c63
-rw-r--r--net/ipv4/tcp_timer.c16
-rw-r--r--net/ipv4/udp.c156
-rw-r--r--net/ipv4/xfrm4_policy.c37
30 files changed, 526 insertions, 359 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 566ea6c4321d..6c30a73f03f5 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -124,7 +124,6 @@ static struct list_head inetsw[SOCK_MAX];
124static DEFINE_SPINLOCK(inetsw_lock); 124static DEFINE_SPINLOCK(inetsw_lock);
125 125
126struct ipv4_config ipv4_config; 126struct ipv4_config ipv4_config;
127
128EXPORT_SYMBOL(ipv4_config); 127EXPORT_SYMBOL(ipv4_config);
129 128
130/* New destruction routine */ 129/* New destruction routine */
@@ -139,12 +138,12 @@ void inet_sock_destruct(struct sock *sk)
139 sk_mem_reclaim(sk); 138 sk_mem_reclaim(sk);
140 139
141 if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) { 140 if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) {
142 printk("Attempt to release TCP socket in state %d %p\n", 141 pr_err("Attempt to release TCP socket in state %d %p\n",
143 sk->sk_state, sk); 142 sk->sk_state, sk);
144 return; 143 return;
145 } 144 }
146 if (!sock_flag(sk, SOCK_DEAD)) { 145 if (!sock_flag(sk, SOCK_DEAD)) {
147 printk("Attempt to release alive inet socket %p\n", sk); 146 pr_err("Attempt to release alive inet socket %p\n", sk);
148 return; 147 return;
149 } 148 }
150 149
@@ -157,6 +156,7 @@ void inet_sock_destruct(struct sock *sk)
157 dst_release(sk->sk_dst_cache); 156 dst_release(sk->sk_dst_cache);
158 sk_refcnt_debug_dec(sk); 157 sk_refcnt_debug_dec(sk);
159} 158}
159EXPORT_SYMBOL(inet_sock_destruct);
160 160
161/* 161/*
162 * The routines beyond this point handle the behaviour of an AF_INET 162 * The routines beyond this point handle the behaviour of an AF_INET
@@ -219,6 +219,7 @@ out:
219 release_sock(sk); 219 release_sock(sk);
220 return err; 220 return err;
221} 221}
222EXPORT_SYMBOL(inet_listen);
222 223
223u32 inet_ehash_secret __read_mostly; 224u32 inet_ehash_secret __read_mostly;
224EXPORT_SYMBOL(inet_ehash_secret); 225EXPORT_SYMBOL(inet_ehash_secret);
@@ -435,9 +436,11 @@ int inet_release(struct socket *sock)
435 } 436 }
436 return 0; 437 return 0;
437} 438}
439EXPORT_SYMBOL(inet_release);
438 440
439/* It is off by default, see below. */ 441/* It is off by default, see below. */
440int sysctl_ip_nonlocal_bind __read_mostly; 442int sysctl_ip_nonlocal_bind __read_mostly;
443EXPORT_SYMBOL(sysctl_ip_nonlocal_bind);
441 444
442int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 445int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
443{ 446{
@@ -519,6 +522,7 @@ out_release_sock:
519out: 522out:
520 return err; 523 return err;
521} 524}
525EXPORT_SYMBOL(inet_bind);
522 526
523int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, 527int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
524 int addr_len, int flags) 528 int addr_len, int flags)
@@ -532,6 +536,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
532 return -EAGAIN; 536 return -EAGAIN;
533 return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len); 537 return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len);
534} 538}
539EXPORT_SYMBOL(inet_dgram_connect);
535 540
536static long inet_wait_for_connect(struct sock *sk, long timeo) 541static long inet_wait_for_connect(struct sock *sk, long timeo)
537{ 542{
@@ -641,6 +646,7 @@ sock_error:
641 sock->state = SS_DISCONNECTING; 646 sock->state = SS_DISCONNECTING;
642 goto out; 647 goto out;
643} 648}
649EXPORT_SYMBOL(inet_stream_connect);
644 650
645/* 651/*
646 * Accept a pending connection. The TCP layer now gives BSD semantics. 652 * Accept a pending connection. The TCP layer now gives BSD semantics.
@@ -668,6 +674,7 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags)
668do_err: 674do_err:
669 return err; 675 return err;
670} 676}
677EXPORT_SYMBOL(inet_accept);
671 678
672 679
673/* 680/*
@@ -699,6 +706,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
699 *uaddr_len = sizeof(*sin); 706 *uaddr_len = sizeof(*sin);
700 return 0; 707 return 0;
701} 708}
709EXPORT_SYMBOL(inet_getname);
702 710
703int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, 711int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
704 size_t size) 712 size_t size)
@@ -711,9 +719,11 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
711 719
712 return sk->sk_prot->sendmsg(iocb, sk, msg, size); 720 return sk->sk_prot->sendmsg(iocb, sk, msg, size);
713} 721}
722EXPORT_SYMBOL(inet_sendmsg);
714 723
715 724
716static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) 725static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
726 size_t size, int flags)
717{ 727{
718 struct sock *sk = sock->sk; 728 struct sock *sk = sock->sk;
719 729
@@ -780,6 +790,7 @@ int inet_shutdown(struct socket *sock, int how)
780 release_sock(sk); 790 release_sock(sk);
781 return err; 791 return err;
782} 792}
793EXPORT_SYMBOL(inet_shutdown);
783 794
784/* 795/*
785 * ioctl() calls you can issue on an INET socket. Most of these are 796 * ioctl() calls you can issue on an INET socket. Most of these are
@@ -798,44 +809,45 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
798 struct net *net = sock_net(sk); 809 struct net *net = sock_net(sk);
799 810
800 switch (cmd) { 811 switch (cmd) {
801 case SIOCGSTAMP: 812 case SIOCGSTAMP:
802 err = sock_get_timestamp(sk, (struct timeval __user *)arg); 813 err = sock_get_timestamp(sk, (struct timeval __user *)arg);
803 break; 814 break;
804 case SIOCGSTAMPNS: 815 case SIOCGSTAMPNS:
805 err = sock_get_timestampns(sk, (struct timespec __user *)arg); 816 err = sock_get_timestampns(sk, (struct timespec __user *)arg);
806 break; 817 break;
807 case SIOCADDRT: 818 case SIOCADDRT:
808 case SIOCDELRT: 819 case SIOCDELRT:
809 case SIOCRTMSG: 820 case SIOCRTMSG:
810 err = ip_rt_ioctl(net, cmd, (void __user *)arg); 821 err = ip_rt_ioctl(net, cmd, (void __user *)arg);
811 break; 822 break;
812 case SIOCDARP: 823 case SIOCDARP:
813 case SIOCGARP: 824 case SIOCGARP:
814 case SIOCSARP: 825 case SIOCSARP:
815 err = arp_ioctl(net, cmd, (void __user *)arg); 826 err = arp_ioctl(net, cmd, (void __user *)arg);
816 break; 827 break;
817 case SIOCGIFADDR: 828 case SIOCGIFADDR:
818 case SIOCSIFADDR: 829 case SIOCSIFADDR:
819 case SIOCGIFBRDADDR: 830 case SIOCGIFBRDADDR:
820 case SIOCSIFBRDADDR: 831 case SIOCSIFBRDADDR:
821 case SIOCGIFNETMASK: 832 case SIOCGIFNETMASK:
822 case SIOCSIFNETMASK: 833 case SIOCSIFNETMASK:
823 case SIOCGIFDSTADDR: 834 case SIOCGIFDSTADDR:
824 case SIOCSIFDSTADDR: 835 case SIOCSIFDSTADDR:
825 case SIOCSIFPFLAGS: 836 case SIOCSIFPFLAGS:
826 case SIOCGIFPFLAGS: 837 case SIOCGIFPFLAGS:
827 case SIOCSIFFLAGS: 838 case SIOCSIFFLAGS:
828 err = devinet_ioctl(net, cmd, (void __user *)arg); 839 err = devinet_ioctl(net, cmd, (void __user *)arg);
829 break; 840 break;
830 default: 841 default:
831 if (sk->sk_prot->ioctl) 842 if (sk->sk_prot->ioctl)
832 err = sk->sk_prot->ioctl(sk, cmd, arg); 843 err = sk->sk_prot->ioctl(sk, cmd, arg);
833 else 844 else
834 err = -ENOIOCTLCMD; 845 err = -ENOIOCTLCMD;
835 break; 846 break;
836 } 847 }
837 return err; 848 return err;
838} 849}
850EXPORT_SYMBOL(inet_ioctl);
839 851
840const struct proto_ops inet_stream_ops = { 852const struct proto_ops inet_stream_ops = {
841 .family = PF_INET, 853 .family = PF_INET,
@@ -862,6 +874,7 @@ const struct proto_ops inet_stream_ops = {
862 .compat_getsockopt = compat_sock_common_getsockopt, 874 .compat_getsockopt = compat_sock_common_getsockopt,
863#endif 875#endif
864}; 876};
877EXPORT_SYMBOL(inet_stream_ops);
865 878
866const struct proto_ops inet_dgram_ops = { 879const struct proto_ops inet_dgram_ops = {
867 .family = PF_INET, 880 .family = PF_INET,
@@ -887,6 +900,7 @@ const struct proto_ops inet_dgram_ops = {
887 .compat_getsockopt = compat_sock_common_getsockopt, 900 .compat_getsockopt = compat_sock_common_getsockopt,
888#endif 901#endif
889}; 902};
903EXPORT_SYMBOL(inet_dgram_ops);
890 904
891/* 905/*
892 * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without 906 * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
@@ -1016,6 +1030,7 @@ out_illegal:
1016 p->type); 1030 p->type);
1017 goto out; 1031 goto out;
1018} 1032}
1033EXPORT_SYMBOL(inet_register_protosw);
1019 1034
1020void inet_unregister_protosw(struct inet_protosw *p) 1035void inet_unregister_protosw(struct inet_protosw *p)
1021{ 1036{
@@ -1031,6 +1046,7 @@ void inet_unregister_protosw(struct inet_protosw *p)
1031 synchronize_net(); 1046 synchronize_net();
1032 } 1047 }
1033} 1048}
1049EXPORT_SYMBOL(inet_unregister_protosw);
1034 1050
1035/* 1051/*
1036 * Shall we try to damage output packets if routing dev changes? 1052 * Shall we try to damage output packets if routing dev changes?
@@ -1141,7 +1157,6 @@ int inet_sk_rebuild_header(struct sock *sk)
1141 1157
1142 return err; 1158 return err;
1143} 1159}
1144
1145EXPORT_SYMBOL(inet_sk_rebuild_header); 1160EXPORT_SYMBOL(inet_sk_rebuild_header);
1146 1161
1147static int inet_gso_send_check(struct sk_buff *skb) 1162static int inet_gso_send_check(struct sk_buff *skb)
@@ -1187,6 +1202,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
1187 int proto; 1202 int proto;
1188 int ihl; 1203 int ihl;
1189 int id; 1204 int id;
1205 unsigned int offset = 0;
1190 1206
1191 if (!(features & NETIF_F_V4_CSUM)) 1207 if (!(features & NETIF_F_V4_CSUM))
1192 features &= ~NETIF_F_SG; 1208 features &= ~NETIF_F_SG;
@@ -1229,7 +1245,14 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
1229 skb = segs; 1245 skb = segs;
1230 do { 1246 do {
1231 iph = ip_hdr(skb); 1247 iph = ip_hdr(skb);
1232 iph->id = htons(id++); 1248 if (proto == IPPROTO_UDP) {
1249 iph->id = htons(id);
1250 iph->frag_off = htons(offset >> 3);
1251 if (skb->next != NULL)
1252 iph->frag_off |= htons(IP_MF);
1253 offset += (skb->len - skb->mac_len - iph->ihl * 4);
1254 } else
1255 iph->id = htons(id++);
1233 iph->tot_len = htons(skb->len - skb->mac_len); 1256 iph->tot_len = htons(skb->len - skb->mac_len);
1234 iph->check = 0; 1257 iph->check = 0;
1235 iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl); 1258 iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
@@ -1361,7 +1384,6 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
1361 } 1384 }
1362 return rc; 1385 return rc;
1363} 1386}
1364
1365EXPORT_SYMBOL_GPL(inet_ctl_sock_create); 1387EXPORT_SYMBOL_GPL(inet_ctl_sock_create);
1366 1388
1367unsigned long snmp_fold_field(void *mib[], int offt) 1389unsigned long snmp_fold_field(void *mib[], int offt)
@@ -1425,6 +1447,8 @@ static struct net_protocol tcp_protocol = {
1425static struct net_protocol udp_protocol = { 1447static struct net_protocol udp_protocol = {
1426 .handler = udp_rcv, 1448 .handler = udp_rcv,
1427 .err_handler = udp_err, 1449 .err_handler = udp_err,
1450 .gso_send_check = udp4_ufo_send_check,
1451 .gso_segment = udp4_ufo_fragment,
1428 .no_policy = 1, 1452 .no_policy = 1,
1429 .netns_ok = 1, 1453 .netns_ok = 1,
1430}; 1454};
@@ -1666,19 +1690,3 @@ static int __init ipv4_proc_init(void)
1666 1690
1667MODULE_ALIAS_NETPROTO(PF_INET); 1691MODULE_ALIAS_NETPROTO(PF_INET);
1668 1692
1669EXPORT_SYMBOL(inet_accept);
1670EXPORT_SYMBOL(inet_bind);
1671EXPORT_SYMBOL(inet_dgram_connect);
1672EXPORT_SYMBOL(inet_dgram_ops);
1673EXPORT_SYMBOL(inet_getname);
1674EXPORT_SYMBOL(inet_ioctl);
1675EXPORT_SYMBOL(inet_listen);
1676EXPORT_SYMBOL(inet_register_protosw);
1677EXPORT_SYMBOL(inet_release);
1678EXPORT_SYMBOL(inet_sendmsg);
1679EXPORT_SYMBOL(inet_shutdown);
1680EXPORT_SYMBOL(inet_sock_destruct);
1681EXPORT_SYMBOL(inet_stream_connect);
1682EXPORT_SYMBOL(inet_stream_ops);
1683EXPORT_SYMBOL(inet_unregister_protosw);
1684EXPORT_SYMBOL(sysctl_ip_nonlocal_bind);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 090e9991ac2a..4e80f336c0cf 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -130,7 +130,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
130static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb); 130static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
131static void parp_redo(struct sk_buff *skb); 131static void parp_redo(struct sk_buff *skb);
132 132
133static struct neigh_ops arp_generic_ops = { 133static const struct neigh_ops arp_generic_ops = {
134 .family = AF_INET, 134 .family = AF_INET,
135 .solicit = arp_solicit, 135 .solicit = arp_solicit,
136 .error_report = arp_error_report, 136 .error_report = arp_error_report,
@@ -140,7 +140,7 @@ static struct neigh_ops arp_generic_ops = {
140 .queue_xmit = dev_queue_xmit, 140 .queue_xmit = dev_queue_xmit,
141}; 141};
142 142
143static struct neigh_ops arp_hh_ops = { 143static const struct neigh_ops arp_hh_ops = {
144 .family = AF_INET, 144 .family = AF_INET,
145 .solicit = arp_solicit, 145 .solicit = arp_solicit,
146 .error_report = arp_error_report, 146 .error_report = arp_error_report,
@@ -150,7 +150,7 @@ static struct neigh_ops arp_hh_ops = {
150 .queue_xmit = dev_queue_xmit, 150 .queue_xmit = dev_queue_xmit,
151}; 151};
152 152
153static struct neigh_ops arp_direct_ops = { 153static const struct neigh_ops arp_direct_ops = {
154 .family = AF_INET, 154 .family = AF_INET,
155 .output = dev_queue_xmit, 155 .output = dev_queue_xmit,
156 .connected_output = dev_queue_xmit, 156 .connected_output = dev_queue_xmit,
@@ -158,7 +158,7 @@ static struct neigh_ops arp_direct_ops = {
158 .queue_xmit = dev_queue_xmit, 158 .queue_xmit = dev_queue_xmit,
159}; 159};
160 160
161struct neigh_ops arp_broken_ops = { 161const struct neigh_ops arp_broken_ops = {
162 .family = AF_INET, 162 .family = AF_INET,
163 .solicit = arp_solicit, 163 .solicit = arp_solicit,
164 .error_report = arp_error_report, 164 .error_report = arp_error_report,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 63c2fa7b68c4..291bdf50a21f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -48,7 +48,7 @@
48 * Patrick McHardy <kaber@trash.net> 48 * Patrick McHardy <kaber@trash.net>
49 */ 49 */
50 50
51#define VERSION "0.408" 51#define VERSION "0.409"
52 52
53#include <asm/uaccess.h> 53#include <asm/uaccess.h>
54#include <asm/system.h> 54#include <asm/system.h>
@@ -164,6 +164,14 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn);
164static struct tnode *halve(struct trie *t, struct tnode *tn); 164static struct tnode *halve(struct trie *t, struct tnode *tn);
165/* tnodes to free after resize(); protected by RTNL */ 165/* tnodes to free after resize(); protected by RTNL */
166static struct tnode *tnode_free_head; 166static struct tnode *tnode_free_head;
167static size_t tnode_free_size;
168
169/*
170 * synchronize_rcu after call_rcu for that many pages; it should be especially
171 * useful before resizing the root node with PREEMPT_NONE configs; the value was
172 * obtained experimentally, aiming to avoid visible slowdown.
173 */
174static const int sync_pages = 128;
167 175
168static struct kmem_cache *fn_alias_kmem __read_mostly; 176static struct kmem_cache *fn_alias_kmem __read_mostly;
169static struct kmem_cache *trie_leaf_kmem __read_mostly; 177static struct kmem_cache *trie_leaf_kmem __read_mostly;
@@ -317,8 +325,7 @@ static inline void check_tnode(const struct tnode *tn)
317static const int halve_threshold = 25; 325static const int halve_threshold = 25;
318static const int inflate_threshold = 50; 326static const int inflate_threshold = 50;
319static const int halve_threshold_root = 15; 327static const int halve_threshold_root = 15;
320static const int inflate_threshold_root = 25; 328static const int inflate_threshold_root = 30;
321
322 329
323static void __alias_free_mem(struct rcu_head *head) 330static void __alias_free_mem(struct rcu_head *head)
324{ 331{
@@ -393,6 +400,8 @@ static void tnode_free_safe(struct tnode *tn)
393 BUG_ON(IS_LEAF(tn)); 400 BUG_ON(IS_LEAF(tn));
394 tn->tnode_free = tnode_free_head; 401 tn->tnode_free = tnode_free_head;
395 tnode_free_head = tn; 402 tnode_free_head = tn;
403 tnode_free_size += sizeof(struct tnode) +
404 (sizeof(struct node *) << tn->bits);
396} 405}
397 406
398static void tnode_free_flush(void) 407static void tnode_free_flush(void)
@@ -404,6 +413,11 @@ static void tnode_free_flush(void)
404 tn->tnode_free = NULL; 413 tn->tnode_free = NULL;
405 tnode_free(tn); 414 tnode_free(tn);
406 } 415 }
416
417 if (tnode_free_size >= PAGE_SIZE * sync_pages) {
418 tnode_free_size = 0;
419 synchronize_rcu();
420 }
407} 421}
408 422
409static struct leaf *leaf_new(void) 423static struct leaf *leaf_new(void)
@@ -499,14 +513,14 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
499 rcu_assign_pointer(tn->child[i], n); 513 rcu_assign_pointer(tn->child[i], n);
500} 514}
501 515
516#define MAX_WORK 10
502static struct node *resize(struct trie *t, struct tnode *tn) 517static struct node *resize(struct trie *t, struct tnode *tn)
503{ 518{
504 int i; 519 int i;
505 int err = 0;
506 struct tnode *old_tn; 520 struct tnode *old_tn;
507 int inflate_threshold_use; 521 int inflate_threshold_use;
508 int halve_threshold_use; 522 int halve_threshold_use;
509 int max_resize; 523 int max_work;
510 524
511 if (!tn) 525 if (!tn)
512 return NULL; 526 return NULL;
@@ -521,18 +535,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
521 } 535 }
522 /* One child */ 536 /* One child */
523 if (tn->empty_children == tnode_child_length(tn) - 1) 537 if (tn->empty_children == tnode_child_length(tn) - 1)
524 for (i = 0; i < tnode_child_length(tn); i++) { 538 goto one_child;
525 struct node *n;
526
527 n = tn->child[i];
528 if (!n)
529 continue;
530
531 /* compress one level */
532 node_set_parent(n, NULL);
533 tnode_free_safe(tn);
534 return n;
535 }
536 /* 539 /*
537 * Double as long as the resulting node has a number of 540 * Double as long as the resulting node has a number of
538 * nonempty nodes that are above the threshold. 541 * nonempty nodes that are above the threshold.
@@ -601,14 +604,17 @@ static struct node *resize(struct trie *t, struct tnode *tn)
601 604
602 /* Keep root node larger */ 605 /* Keep root node larger */
603 606
604 if (!tn->parent) 607 if (!node_parent((struct node*) tn)) {
605 inflate_threshold_use = inflate_threshold_root; 608 inflate_threshold_use = inflate_threshold_root;
606 else 609 halve_threshold_use = halve_threshold_root;
610 }
611 else {
607 inflate_threshold_use = inflate_threshold; 612 inflate_threshold_use = inflate_threshold;
613 halve_threshold_use = halve_threshold;
614 }
608 615
609 err = 0; 616 max_work = MAX_WORK;
610 max_resize = 10; 617 while ((tn->full_children > 0 && max_work-- &&
611 while ((tn->full_children > 0 && max_resize-- &&
612 50 * (tn->full_children + tnode_child_length(tn) 618 50 * (tn->full_children + tnode_child_length(tn)
613 - tn->empty_children) 619 - tn->empty_children)
614 >= inflate_threshold_use * tnode_child_length(tn))) { 620 >= inflate_threshold_use * tnode_child_length(tn))) {
@@ -625,35 +631,19 @@ static struct node *resize(struct trie *t, struct tnode *tn)
625 } 631 }
626 } 632 }
627 633
628 if (max_resize < 0) {
629 if (!tn->parent)
630 pr_warning("Fix inflate_threshold_root."
631 " Now=%d size=%d bits\n",
632 inflate_threshold_root, tn->bits);
633 else
634 pr_warning("Fix inflate_threshold."
635 " Now=%d size=%d bits\n",
636 inflate_threshold, tn->bits);
637 }
638
639 check_tnode(tn); 634 check_tnode(tn);
640 635
636 /* Return if at least one inflate is run */
637 if( max_work != MAX_WORK)
638 return (struct node *) tn;
639
641 /* 640 /*
642 * Halve as long as the number of empty children in this 641 * Halve as long as the number of empty children in this
643 * node is above threshold. 642 * node is above threshold.
644 */ 643 */
645 644
646 645 max_work = MAX_WORK;
647 /* Keep root node larger */ 646 while (tn->bits > 1 && max_work-- &&
648
649 if (!tn->parent)
650 halve_threshold_use = halve_threshold_root;
651 else
652 halve_threshold_use = halve_threshold;
653
654 err = 0;
655 max_resize = 10;
656 while (tn->bits > 1 && max_resize-- &&
657 100 * (tnode_child_length(tn) - tn->empty_children) < 647 100 * (tnode_child_length(tn) - tn->empty_children) <
658 halve_threshold_use * tnode_child_length(tn)) { 648 halve_threshold_use * tnode_child_length(tn)) {
659 649
@@ -668,19 +658,10 @@ static struct node *resize(struct trie *t, struct tnode *tn)
668 } 658 }
669 } 659 }
670 660
671 if (max_resize < 0) {
672 if (!tn->parent)
673 pr_warning("Fix halve_threshold_root."
674 " Now=%d size=%d bits\n",
675 halve_threshold_root, tn->bits);
676 else
677 pr_warning("Fix halve_threshold."
678 " Now=%d size=%d bits\n",
679 halve_threshold, tn->bits);
680 }
681 661
682 /* Only one child remains */ 662 /* Only one child remains */
683 if (tn->empty_children == tnode_child_length(tn) - 1) 663 if (tn->empty_children == tnode_child_length(tn) - 1) {
664one_child:
684 for (i = 0; i < tnode_child_length(tn); i++) { 665 for (i = 0; i < tnode_child_length(tn); i++) {
685 struct node *n; 666 struct node *n;
686 667
@@ -694,7 +675,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
694 tnode_free_safe(tn); 675 tnode_free_safe(tn);
695 return n; 676 return n;
696 } 677 }
697 678 }
698 return (struct node *) tn; 679 return (struct node *) tn;
699} 680}
700 681
@@ -1435,7 +1416,7 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
1435 cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length), 1416 cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length),
1436 pos, bits); 1417 pos, bits);
1437 1418
1438 n = tnode_get_child(pn, cindex); 1419 n = tnode_get_child_rcu(pn, cindex);
1439 1420
1440 if (n == NULL) { 1421 if (n == NULL) {
1441#ifdef CONFIG_IP_FIB_TRIE_STATS 1422#ifdef CONFIG_IP_FIB_TRIE_STATS
@@ -1570,7 +1551,7 @@ backtrace:
1570 if (chopped_off <= pn->bits) { 1551 if (chopped_off <= pn->bits) {
1571 cindex &= ~(1 << (chopped_off-1)); 1552 cindex &= ~(1 << (chopped_off-1));
1572 } else { 1553 } else {
1573 struct tnode *parent = node_parent((struct node *) pn); 1554 struct tnode *parent = node_parent_rcu((struct node *) pn);
1574 if (!parent) 1555 if (!parent)
1575 goto failed; 1556 goto failed;
1576 1557
@@ -1783,7 +1764,7 @@ static struct leaf *trie_firstleaf(struct trie *t)
1783static struct leaf *trie_nextleaf(struct leaf *l) 1764static struct leaf *trie_nextleaf(struct leaf *l)
1784{ 1765{
1785 struct node *c = (struct node *) l; 1766 struct node *c = (struct node *) l;
1786 struct tnode *p = node_parent(c); 1767 struct tnode *p = node_parent_rcu(c);
1787 1768
1788 if (!p) 1769 if (!p)
1789 return NULL; /* trie with just one leaf */ 1770 return NULL; /* trie with just one leaf */
@@ -2391,7 +2372,7 @@ static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
2391 } 2372 }
2392} 2373}
2393 2374
2394static const char *rtn_type_names[__RTN_MAX] = { 2375static const char *const rtn_type_names[__RTN_MAX] = {
2395 [RTN_UNSPEC] = "UNSPEC", 2376 [RTN_UNSPEC] = "UNSPEC",
2396 [RTN_UNICAST] = "UNICAST", 2377 [RTN_UNICAST] = "UNICAST",
2397 [RTN_LOCAL] = "LOCAL", 2378 [RTN_LOCAL] = "LOCAL",
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 61283f928825..13f0781f35cd 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -218,8 +218,8 @@ void inet_twdr_hangman(unsigned long data)
218 /* We purged the entire slot, anything left? */ 218 /* We purged the entire slot, anything left? */
219 if (twdr->tw_count) 219 if (twdr->tw_count)
220 need_timer = 1; 220 need_timer = 1;
221 twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1));
221 } 222 }
222 twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1));
223 if (need_timer) 223 if (need_timer)
224 mod_timer(&twdr->tw_timer, jiffies + twdr->period); 224 mod_timer(&twdr->tw_timer, jiffies + twdr->period);
225out: 225out:
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 82c11dd10a62..533afaadefd4 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -662,7 +662,7 @@ drop_nolock:
662 return(0); 662 return(0);
663} 663}
664 664
665static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 665static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
666{ 666{
667 struct ip_tunnel *tunnel = netdev_priv(dev); 667 struct ip_tunnel *tunnel = netdev_priv(dev);
668 struct net_device_stats *stats = &tunnel->dev->stats; 668 struct net_device_stats *stats = &tunnel->dev->stats;
@@ -821,7 +821,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
821 stats->tx_dropped++; 821 stats->tx_dropped++;
822 dev_kfree_skb(skb); 822 dev_kfree_skb(skb);
823 tunnel->recursion--; 823 tunnel->recursion--;
824 return 0; 824 return NETDEV_TX_OK;
825 } 825 }
826 if (skb->sk) 826 if (skb->sk)
827 skb_set_owner_w(new_skb, skb->sk); 827 skb_set_owner_w(new_skb, skb->sk);
@@ -889,7 +889,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
889 889
890 IPTUNNEL_XMIT(); 890 IPTUNNEL_XMIT();
891 tunnel->recursion--; 891 tunnel->recursion--;
892 return 0; 892 return NETDEV_TX_OK;
893 893
894tx_error_icmp: 894tx_error_icmp:
895 dst_link_failure(skb); 895 dst_link_failure(skb);
@@ -898,7 +898,7 @@ tx_error:
898 stats->tx_errors++; 898 stats->tx_errors++;
899 dev_kfree_skb(skb); 899 dev_kfree_skb(skb);
900 tunnel->recursion--; 900 tunnel->recursion--;
901 return 0; 901 return NETDEV_TX_OK;
902} 902}
903 903
904static int ipgre_tunnel_bind_dev(struct net_device *dev) 904static int ipgre_tunnel_bind_dev(struct net_device *dev)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7ffcd96fe591..9fe5d7b81580 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1304,7 +1304,7 @@ int ip_push_pending_frames(struct sock *sk)
1304 err = ip_local_out(skb); 1304 err = ip_local_out(skb);
1305 if (err) { 1305 if (err) {
1306 if (err > 0) 1306 if (err > 0)
1307 err = inet->recverr ? net_xmit_errno(err) : 0; 1307 err = net_xmit_errno(err);
1308 if (err) 1308 if (err)
1309 goto error; 1309 goto error;
1310 } 1310 }
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 93e2b787da20..62548cb0923c 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -387,7 +387,7 @@ static int ipip_rcv(struct sk_buff *skb)
387 * and that skb is filled properly by that function. 387 * and that skb is filled properly by that function.
388 */ 388 */
389 389
390static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 390static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
391{ 391{
392 struct ip_tunnel *tunnel = netdev_priv(dev); 392 struct ip_tunnel *tunnel = netdev_priv(dev);
393 struct net_device_stats *stats = &tunnel->dev->stats; 393 struct net_device_stats *stats = &tunnel->dev->stats;
@@ -486,7 +486,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
486 stats->tx_dropped++; 486 stats->tx_dropped++;
487 dev_kfree_skb(skb); 487 dev_kfree_skb(skb);
488 tunnel->recursion--; 488 tunnel->recursion--;
489 return 0; 489 return NETDEV_TX_OK;
490 } 490 }
491 if (skb->sk) 491 if (skb->sk)
492 skb_set_owner_w(new_skb, skb->sk); 492 skb_set_owner_w(new_skb, skb->sk);
@@ -524,7 +524,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
524 524
525 IPTUNNEL_XMIT(); 525 IPTUNNEL_XMIT();
526 tunnel->recursion--; 526 tunnel->recursion--;
527 return 0; 527 return NETDEV_TX_OK;
528 528
529tx_error_icmp: 529tx_error_icmp:
530 dst_link_failure(skb); 530 dst_link_failure(skb);
@@ -532,7 +532,7 @@ tx_error:
532 stats->tx_errors++; 532 stats->tx_errors++;
533 dev_kfree_skb(skb); 533 dev_kfree_skb(skb);
534 tunnel->recursion--; 534 tunnel->recursion--;
535 return 0; 535 return NETDEV_TX_OK;
536} 536}
537 537
538static void ipip_tunnel_bind_dev(struct net_device *dev) 538static void ipip_tunnel_bind_dev(struct net_device *dev)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9a8da5ed92b7..65d421cf5bc7 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -201,7 +201,7 @@ failure:
201 201
202#ifdef CONFIG_IP_PIMSM 202#ifdef CONFIG_IP_PIMSM
203 203
204static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 204static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
205{ 205{
206 struct net *net = dev_net(dev); 206 struct net *net = dev_net(dev);
207 207
@@ -212,7 +212,7 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
212 IGMPMSG_WHOLEPKT); 212 IGMPMSG_WHOLEPKT);
213 read_unlock(&mrt_lock); 213 read_unlock(&mrt_lock);
214 kfree_skb(skb); 214 kfree_skb(skb);
215 return 0; 215 return NETDEV_TX_OK;
216} 216}
217 217
218static const struct net_device_ops reg_vif_netdev_ops = { 218static const struct net_device_ops reg_vif_netdev_ops = {
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 7505dff4ffdf..27774c99d888 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -8,7 +8,7 @@
8 * Copyright (C) 2002 David S. Miller (davem@redhat.com) 8 * Copyright (C) 2002 David S. Miller (davem@redhat.com)
9 * 9 *
10 */ 10 */
11 11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/skbuff.h> 13#include <linux/skbuff.h>
14#include <linux/netdevice.h> 14#include <linux/netdevice.h>
@@ -341,15 +341,11 @@ unsigned int arpt_do_table(struct sk_buff *skb,
341} 341}
342 342
343/* All zeroes == unconditional rule. */ 343/* All zeroes == unconditional rule. */
344static inline int unconditional(const struct arpt_arp *arp) 344static inline bool unconditional(const struct arpt_arp *arp)
345{ 345{
346 unsigned int i; 346 static const struct arpt_arp uncond;
347 347
348 for (i = 0; i < sizeof(*arp)/sizeof(__u32); i++) 348 return memcmp(arp, &uncond, sizeof(uncond)) == 0;
349 if (((__u32 *)arp)[i])
350 return 0;
351
352 return 1;
353} 349}
354 350
355/* Figures out from what hook each rule can be called: returns 0 if 351/* Figures out from what hook each rule can be called: returns 0 if
@@ -537,12 +533,28 @@ out:
537 return ret; 533 return ret;
538} 534}
539 535
536static bool check_underflow(struct arpt_entry *e)
537{
538 const struct arpt_entry_target *t;
539 unsigned int verdict;
540
541 if (!unconditional(&e->arp))
542 return false;
543 t = arpt_get_target(e);
544 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
545 return false;
546 verdict = ((struct arpt_standard_target *)t)->verdict;
547 verdict = -verdict - 1;
548 return verdict == NF_DROP || verdict == NF_ACCEPT;
549}
550
540static inline int check_entry_size_and_hooks(struct arpt_entry *e, 551static inline int check_entry_size_and_hooks(struct arpt_entry *e,
541 struct xt_table_info *newinfo, 552 struct xt_table_info *newinfo,
542 unsigned char *base, 553 unsigned char *base,
543 unsigned char *limit, 554 unsigned char *limit,
544 const unsigned int *hook_entries, 555 const unsigned int *hook_entries,
545 const unsigned int *underflows, 556 const unsigned int *underflows,
557 unsigned int valid_hooks,
546 unsigned int *i) 558 unsigned int *i)
547{ 559{
548 unsigned int h; 560 unsigned int h;
@@ -562,15 +574,21 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
562 574
563 /* Check hooks & underflows */ 575 /* Check hooks & underflows */
564 for (h = 0; h < NF_ARP_NUMHOOKS; h++) { 576 for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
577 if (!(valid_hooks & (1 << h)))
578 continue;
565 if ((unsigned char *)e - base == hook_entries[h]) 579 if ((unsigned char *)e - base == hook_entries[h])
566 newinfo->hook_entry[h] = hook_entries[h]; 580 newinfo->hook_entry[h] = hook_entries[h];
567 if ((unsigned char *)e - base == underflows[h]) 581 if ((unsigned char *)e - base == underflows[h]) {
582 if (!check_underflow(e)) {
583 pr_err("Underflows must be unconditional and "
584 "use the STANDARD target with "
585 "ACCEPT/DROP\n");
586 return -EINVAL;
587 }
568 newinfo->underflow[h] = underflows[h]; 588 newinfo->underflow[h] = underflows[h];
589 }
569 } 590 }
570 591
571 /* FIXME: underflows must be unconditional, standard verdicts
572 < 0 (not ARPT_RETURN). --RR */
573
574 /* Clear counters and comefrom */ 592 /* Clear counters and comefrom */
575 e->counters = ((struct xt_counters) { 0, 0 }); 593 e->counters = ((struct xt_counters) { 0, 0 });
576 e->comefrom = 0; 594 e->comefrom = 0;
@@ -630,7 +648,7 @@ static int translate_table(const char *name,
630 newinfo, 648 newinfo,
631 entry0, 649 entry0,
632 entry0 + size, 650 entry0 + size,
633 hook_entries, underflows, &i); 651 hook_entries, underflows, valid_hooks, &i);
634 duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); 652 duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
635 if (ret != 0) 653 if (ret != 0)
636 return ret; 654 return ret;
@@ -1760,7 +1778,8 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
1760 return ret; 1778 return ret;
1761} 1779}
1762 1780
1763struct xt_table *arpt_register_table(struct net *net, struct xt_table *table, 1781struct xt_table *arpt_register_table(struct net *net,
1782 const struct xt_table *table,
1764 const struct arpt_replace *repl) 1783 const struct arpt_replace *repl)
1765{ 1784{
1766 int ret; 1785 int ret;
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 6ecfdae7c589..97337601827a 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -15,7 +15,7 @@ MODULE_DESCRIPTION("arptables filter table");
15#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \ 15#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \
16 (1 << NF_ARP_FORWARD)) 16 (1 << NF_ARP_FORWARD))
17 17
18static struct 18static const struct
19{ 19{
20 struct arpt_replace repl; 20 struct arpt_replace repl;
21 struct arpt_standard entries[3]; 21 struct arpt_standard entries[3];
@@ -45,7 +45,7 @@ static struct
45 .term = ARPT_ERROR_INIT, 45 .term = ARPT_ERROR_INIT,
46}; 46};
47 47
48static struct xt_table packet_filter = { 48static const struct xt_table packet_filter = {
49 .name = "filter", 49 .name = "filter",
50 .valid_hooks = FILTER_VALID_HOOKS, 50 .valid_hooks = FILTER_VALID_HOOKS,
51 .me = THIS_MODULE, 51 .me = THIS_MODULE,
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index fdefae6b5dfc..cde755d5eeab 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -8,6 +8,7 @@
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
10 */ 10 */
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11#include <linux/cache.h> 12#include <linux/cache.h>
12#include <linux/capability.h> 13#include <linux/capability.h>
13#include <linux/skbuff.h> 14#include <linux/skbuff.h>
@@ -190,16 +191,11 @@ get_entry(void *base, unsigned int offset)
190 191
191/* All zeroes == unconditional rule. */ 192/* All zeroes == unconditional rule. */
192/* Mildly perf critical (only if packet tracing is on) */ 193/* Mildly perf critical (only if packet tracing is on) */
193static inline int 194static inline bool unconditional(const struct ipt_ip *ip)
194unconditional(const struct ipt_ip *ip)
195{ 195{
196 unsigned int i; 196 static const struct ipt_ip uncond;
197
198 for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
199 if (((__u32 *)ip)[i])
200 return 0;
201 197
202 return 1; 198 return memcmp(ip, &uncond, sizeof(uncond)) == 0;
203#undef FWINV 199#undef FWINV
204} 200}
205 201
@@ -315,7 +311,6 @@ ipt_do_table(struct sk_buff *skb,
315 311
316 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); 312 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
317 const struct iphdr *ip; 313 const struct iphdr *ip;
318 u_int16_t datalen;
319 bool hotdrop = false; 314 bool hotdrop = false;
320 /* Initializing verdict to NF_DROP keeps gcc happy. */ 315 /* Initializing verdict to NF_DROP keeps gcc happy. */
321 unsigned int verdict = NF_DROP; 316 unsigned int verdict = NF_DROP;
@@ -328,7 +323,6 @@ ipt_do_table(struct sk_buff *skb,
328 323
329 /* Initialization */ 324 /* Initialization */
330 ip = ip_hdr(skb); 325 ip = ip_hdr(skb);
331 datalen = skb->len - ip->ihl * 4;
332 indev = in ? in->name : nulldevname; 326 indev = in ? in->name : nulldevname;
333 outdev = out ? out->name : nulldevname; 327 outdev = out ? out->name : nulldevname;
334 /* We handle fragments by dealing with the first fragment as 328 /* We handle fragments by dealing with the first fragment as
@@ -427,8 +421,6 @@ ipt_do_table(struct sk_buff *skb,
427#endif 421#endif
428 /* Target might have changed stuff. */ 422 /* Target might have changed stuff. */
429 ip = ip_hdr(skb); 423 ip = ip_hdr(skb);
430 datalen = skb->len - ip->ihl * 4;
431
432 if (verdict == IPT_CONTINUE) 424 if (verdict == IPT_CONTINUE)
433 e = ipt_next_entry(e); 425 e = ipt_next_entry(e);
434 else 426 else
@@ -716,6 +708,21 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
716 return ret; 708 return ret;
717} 709}
718 710
711static bool check_underflow(struct ipt_entry *e)
712{
713 const struct ipt_entry_target *t;
714 unsigned int verdict;
715
716 if (!unconditional(&e->ip))
717 return false;
718 t = ipt_get_target(e);
719 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
720 return false;
721 verdict = ((struct ipt_standard_target *)t)->verdict;
722 verdict = -verdict - 1;
723 return verdict == NF_DROP || verdict == NF_ACCEPT;
724}
725
719static int 726static int
720check_entry_size_and_hooks(struct ipt_entry *e, 727check_entry_size_and_hooks(struct ipt_entry *e,
721 struct xt_table_info *newinfo, 728 struct xt_table_info *newinfo,
@@ -723,6 +730,7 @@ check_entry_size_and_hooks(struct ipt_entry *e,
723 unsigned char *limit, 730 unsigned char *limit,
724 const unsigned int *hook_entries, 731 const unsigned int *hook_entries,
725 const unsigned int *underflows, 732 const unsigned int *underflows,
733 unsigned int valid_hooks,
726 unsigned int *i) 734 unsigned int *i)
727{ 735{
728 unsigned int h; 736 unsigned int h;
@@ -742,15 +750,21 @@ check_entry_size_and_hooks(struct ipt_entry *e,
742 750
743 /* Check hooks & underflows */ 751 /* Check hooks & underflows */
744 for (h = 0; h < NF_INET_NUMHOOKS; h++) { 752 for (h = 0; h < NF_INET_NUMHOOKS; h++) {
753 if (!(valid_hooks & (1 << h)))
754 continue;
745 if ((unsigned char *)e - base == hook_entries[h]) 755 if ((unsigned char *)e - base == hook_entries[h])
746 newinfo->hook_entry[h] = hook_entries[h]; 756 newinfo->hook_entry[h] = hook_entries[h];
747 if ((unsigned char *)e - base == underflows[h]) 757 if ((unsigned char *)e - base == underflows[h]) {
758 if (!check_underflow(e)) {
759 pr_err("Underflows must be unconditional and "
760 "use the STANDARD target with "
761 "ACCEPT/DROP\n");
762 return -EINVAL;
763 }
748 newinfo->underflow[h] = underflows[h]; 764 newinfo->underflow[h] = underflows[h];
765 }
749 } 766 }
750 767
751 /* FIXME: underflows must be unconditional, standard verdicts
752 < 0 (not IPT_RETURN). --RR */
753
754 /* Clear counters and comefrom */ 768 /* Clear counters and comefrom */
755 e->counters = ((struct xt_counters) { 0, 0 }); 769 e->counters = ((struct xt_counters) { 0, 0 });
756 e->comefrom = 0; 770 e->comefrom = 0;
@@ -813,7 +827,7 @@ translate_table(const char *name,
813 newinfo, 827 newinfo,
814 entry0, 828 entry0,
815 entry0 + size, 829 entry0 + size,
816 hook_entries, underflows, &i); 830 hook_entries, underflows, valid_hooks, &i);
817 if (ret != 0) 831 if (ret != 0)
818 return ret; 832 return ret;
819 833
@@ -2051,7 +2065,8 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2051 return ret; 2065 return ret;
2052} 2066}
2053 2067
2054struct xt_table *ipt_register_table(struct net *net, struct xt_table *table, 2068struct xt_table *ipt_register_table(struct net *net,
2069 const struct xt_table *table,
2055 const struct ipt_replace *repl) 2070 const struct ipt_replace *repl)
2056{ 2071{
2057 int ret; 2072 int ret;
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index c30a969724f8..df566cbd68e5 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -53,11 +53,11 @@ static struct
53 .term = IPT_ERROR_INIT, /* ERROR */ 53 .term = IPT_ERROR_INIT, /* ERROR */
54}; 54};
55 55
56static struct xt_table packet_filter = { 56static const struct xt_table packet_filter = {
57 .name = "filter", 57 .name = "filter",
58 .valid_hooks = FILTER_VALID_HOOKS, 58 .valid_hooks = FILTER_VALID_HOOKS,
59 .me = THIS_MODULE, 59 .me = THIS_MODULE,
60 .af = AF_INET, 60 .af = NFPROTO_IPV4,
61}; 61};
62 62
63/* The work comes in here from netfilter.c. */ 63/* The work comes in here from netfilter.c. */
@@ -102,21 +102,21 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = {
102 { 102 {
103 .hook = ipt_local_in_hook, 103 .hook = ipt_local_in_hook,
104 .owner = THIS_MODULE, 104 .owner = THIS_MODULE,
105 .pf = PF_INET, 105 .pf = NFPROTO_IPV4,
106 .hooknum = NF_INET_LOCAL_IN, 106 .hooknum = NF_INET_LOCAL_IN,
107 .priority = NF_IP_PRI_FILTER, 107 .priority = NF_IP_PRI_FILTER,
108 }, 108 },
109 { 109 {
110 .hook = ipt_hook, 110 .hook = ipt_hook,
111 .owner = THIS_MODULE, 111 .owner = THIS_MODULE,
112 .pf = PF_INET, 112 .pf = NFPROTO_IPV4,
113 .hooknum = NF_INET_FORWARD, 113 .hooknum = NF_INET_FORWARD,
114 .priority = NF_IP_PRI_FILTER, 114 .priority = NF_IP_PRI_FILTER,
115 }, 115 },
116 { 116 {
117 .hook = ipt_local_out_hook, 117 .hook = ipt_local_out_hook,
118 .owner = THIS_MODULE, 118 .owner = THIS_MODULE,
119 .pf = PF_INET, 119 .pf = NFPROTO_IPV4,
120 .hooknum = NF_INET_LOCAL_OUT, 120 .hooknum = NF_INET_LOCAL_OUT,
121 .priority = NF_IP_PRI_FILTER, 121 .priority = NF_IP_PRI_FILTER,
122 }, 122 },
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 4087614d9519..036047f9b0f2 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -28,7 +28,7 @@ MODULE_DESCRIPTION("iptables mangle table");
28 (1 << NF_INET_POST_ROUTING)) 28 (1 << NF_INET_POST_ROUTING))
29 29
30/* Ouch - five different hooks? Maybe this should be a config option..... -- BC */ 30/* Ouch - five different hooks? Maybe this should be a config option..... -- BC */
31static struct 31static const struct
32{ 32{
33 struct ipt_replace repl; 33 struct ipt_replace repl;
34 struct ipt_standard entries[5]; 34 struct ipt_standard entries[5];
@@ -64,11 +64,11 @@ static struct
64 .term = IPT_ERROR_INIT, /* ERROR */ 64 .term = IPT_ERROR_INIT, /* ERROR */
65}; 65};
66 66
67static struct xt_table packet_mangler = { 67static const struct xt_table packet_mangler = {
68 .name = "mangle", 68 .name = "mangle",
69 .valid_hooks = MANGLE_VALID_HOOKS, 69 .valid_hooks = MANGLE_VALID_HOOKS,
70 .me = THIS_MODULE, 70 .me = THIS_MODULE,
71 .af = AF_INET, 71 .af = NFPROTO_IPV4,
72}; 72};
73 73
74/* The work comes in here from netfilter.c. */ 74/* The work comes in here from netfilter.c. */
@@ -162,35 +162,35 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = {
162 { 162 {
163 .hook = ipt_pre_routing_hook, 163 .hook = ipt_pre_routing_hook,
164 .owner = THIS_MODULE, 164 .owner = THIS_MODULE,
165 .pf = PF_INET, 165 .pf = NFPROTO_IPV4,
166 .hooknum = NF_INET_PRE_ROUTING, 166 .hooknum = NF_INET_PRE_ROUTING,
167 .priority = NF_IP_PRI_MANGLE, 167 .priority = NF_IP_PRI_MANGLE,
168 }, 168 },
169 { 169 {
170 .hook = ipt_local_in_hook, 170 .hook = ipt_local_in_hook,
171 .owner = THIS_MODULE, 171 .owner = THIS_MODULE,
172 .pf = PF_INET, 172 .pf = NFPROTO_IPV4,
173 .hooknum = NF_INET_LOCAL_IN, 173 .hooknum = NF_INET_LOCAL_IN,
174 .priority = NF_IP_PRI_MANGLE, 174 .priority = NF_IP_PRI_MANGLE,
175 }, 175 },
176 { 176 {
177 .hook = ipt_forward_hook, 177 .hook = ipt_forward_hook,
178 .owner = THIS_MODULE, 178 .owner = THIS_MODULE,
179 .pf = PF_INET, 179 .pf = NFPROTO_IPV4,
180 .hooknum = NF_INET_FORWARD, 180 .hooknum = NF_INET_FORWARD,
181 .priority = NF_IP_PRI_MANGLE, 181 .priority = NF_IP_PRI_MANGLE,
182 }, 182 },
183 { 183 {
184 .hook = ipt_local_hook, 184 .hook = ipt_local_hook,
185 .owner = THIS_MODULE, 185 .owner = THIS_MODULE,
186 .pf = PF_INET, 186 .pf = NFPROTO_IPV4,
187 .hooknum = NF_INET_LOCAL_OUT, 187 .hooknum = NF_INET_LOCAL_OUT,
188 .priority = NF_IP_PRI_MANGLE, 188 .priority = NF_IP_PRI_MANGLE,
189 }, 189 },
190 { 190 {
191 .hook = ipt_post_routing_hook, 191 .hook = ipt_post_routing_hook,
192 .owner = THIS_MODULE, 192 .owner = THIS_MODULE,
193 .pf = PF_INET, 193 .pf = NFPROTO_IPV4,
194 .hooknum = NF_INET_POST_ROUTING, 194 .hooknum = NF_INET_POST_ROUTING,
195 .priority = NF_IP_PRI_MANGLE, 195 .priority = NF_IP_PRI_MANGLE,
196 }, 196 },
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index e5356da1fb54..993edc23be09 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -9,7 +9,7 @@
9 9
10#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) 10#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
11 11
12static struct 12static const struct
13{ 13{
14 struct ipt_replace repl; 14 struct ipt_replace repl;
15 struct ipt_standard entries[2]; 15 struct ipt_standard entries[2];
@@ -36,11 +36,11 @@ static struct
36 .term = IPT_ERROR_INIT, /* ERROR */ 36 .term = IPT_ERROR_INIT, /* ERROR */
37}; 37};
38 38
39static struct xt_table packet_raw = { 39static const struct xt_table packet_raw = {
40 .name = "raw", 40 .name = "raw",
41 .valid_hooks = RAW_VALID_HOOKS, 41 .valid_hooks = RAW_VALID_HOOKS,
42 .me = THIS_MODULE, 42 .me = THIS_MODULE,
43 .af = AF_INET, 43 .af = NFPROTO_IPV4,
44}; 44};
45 45
46/* The work comes in here from netfilter.c. */ 46/* The work comes in here from netfilter.c. */
@@ -74,14 +74,14 @@ ipt_local_hook(unsigned int hook,
74static struct nf_hook_ops ipt_ops[] __read_mostly = { 74static struct nf_hook_ops ipt_ops[] __read_mostly = {
75 { 75 {
76 .hook = ipt_hook, 76 .hook = ipt_hook,
77 .pf = PF_INET, 77 .pf = NFPROTO_IPV4,
78 .hooknum = NF_INET_PRE_ROUTING, 78 .hooknum = NF_INET_PRE_ROUTING,
79 .priority = NF_IP_PRI_RAW, 79 .priority = NF_IP_PRI_RAW,
80 .owner = THIS_MODULE, 80 .owner = THIS_MODULE,
81 }, 81 },
82 { 82 {
83 .hook = ipt_local_hook, 83 .hook = ipt_local_hook,
84 .pf = PF_INET, 84 .pf = NFPROTO_IPV4,
85 .hooknum = NF_INET_LOCAL_OUT, 85 .hooknum = NF_INET_LOCAL_OUT,
86 .priority = NF_IP_PRI_RAW, 86 .priority = NF_IP_PRI_RAW,
87 .owner = THIS_MODULE, 87 .owner = THIS_MODULE,
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index 29ab630f240a..99eb76c65d25 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -27,7 +27,7 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules");
27 (1 << NF_INET_FORWARD) | \ 27 (1 << NF_INET_FORWARD) | \
28 (1 << NF_INET_LOCAL_OUT) 28 (1 << NF_INET_LOCAL_OUT)
29 29
30static struct 30static const struct
31{ 31{
32 struct ipt_replace repl; 32 struct ipt_replace repl;
33 struct ipt_standard entries[3]; 33 struct ipt_standard entries[3];
@@ -57,11 +57,11 @@ static struct
57 .term = IPT_ERROR_INIT, /* ERROR */ 57 .term = IPT_ERROR_INIT, /* ERROR */
58}; 58};
59 59
60static struct xt_table security_table = { 60static const struct xt_table security_table = {
61 .name = "security", 61 .name = "security",
62 .valid_hooks = SECURITY_VALID_HOOKS, 62 .valid_hooks = SECURITY_VALID_HOOKS,
63 .me = THIS_MODULE, 63 .me = THIS_MODULE,
64 .af = AF_INET, 64 .af = NFPROTO_IPV4,
65}; 65};
66 66
67static unsigned int 67static unsigned int
@@ -105,21 +105,21 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = {
105 { 105 {
106 .hook = ipt_local_in_hook, 106 .hook = ipt_local_in_hook,
107 .owner = THIS_MODULE, 107 .owner = THIS_MODULE,
108 .pf = PF_INET, 108 .pf = NFPROTO_IPV4,
109 .hooknum = NF_INET_LOCAL_IN, 109 .hooknum = NF_INET_LOCAL_IN,
110 .priority = NF_IP_PRI_SECURITY, 110 .priority = NF_IP_PRI_SECURITY,
111 }, 111 },
112 { 112 {
113 .hook = ipt_forward_hook, 113 .hook = ipt_forward_hook,
114 .owner = THIS_MODULE, 114 .owner = THIS_MODULE,
115 .pf = PF_INET, 115 .pf = NFPROTO_IPV4,
116 .hooknum = NF_INET_FORWARD, 116 .hooknum = NF_INET_FORWARD,
117 .priority = NF_IP_PRI_SECURITY, 117 .priority = NF_IP_PRI_SECURITY,
118 }, 118 },
119 { 119 {
120 .hook = ipt_local_out_hook, 120 .hook = ipt_local_out_hook,
121 .owner = THIS_MODULE, 121 .owner = THIS_MODULE,
122 .pf = PF_INET, 122 .pf = NFPROTO_IPV4,
123 .hooknum = NF_INET_LOCAL_OUT, 123 .hooknum = NF_INET_LOCAL_OUT,
124 .priority = NF_IP_PRI_SECURITY, 124 .priority = NF_IP_PRI_SECURITY,
125 }, 125 },
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 7d2ead7228ac..aa95bb82ee6c 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -26,6 +26,7 @@
26#include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 26#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
27#include <net/netfilter/nf_nat_helper.h> 27#include <net/netfilter/nf_nat_helper.h>
28#include <net/netfilter/ipv4/nf_defrag_ipv4.h> 28#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
29#include <net/netfilter/nf_log.h>
29 30
30int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, 31int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
31 struct nf_conn *ct, 32 struct nf_conn *ct,
@@ -113,8 +114,11 @@ static unsigned int ipv4_confirm(unsigned int hooknum,
113 114
114 ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), 115 ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
115 ct, ctinfo); 116 ct, ctinfo);
116 if (ret != NF_ACCEPT) 117 if (ret != NF_ACCEPT) {
118 nf_log_packet(NFPROTO_IPV4, hooknum, skb, in, out, NULL,
119 "nf_ct_%s: dropping packet", helper->name);
117 return ret; 120 return ret;
121 }
118 122
119 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { 123 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
120 typeof(nf_nat_seq_adjust_hook) seq_adjust; 124 typeof(nf_nat_seq_adjust_hook) seq_adjust;
@@ -158,28 +162,28 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
158 { 162 {
159 .hook = ipv4_conntrack_in, 163 .hook = ipv4_conntrack_in,
160 .owner = THIS_MODULE, 164 .owner = THIS_MODULE,
161 .pf = PF_INET, 165 .pf = NFPROTO_IPV4,
162 .hooknum = NF_INET_PRE_ROUTING, 166 .hooknum = NF_INET_PRE_ROUTING,
163 .priority = NF_IP_PRI_CONNTRACK, 167 .priority = NF_IP_PRI_CONNTRACK,
164 }, 168 },
165 { 169 {
166 .hook = ipv4_conntrack_local, 170 .hook = ipv4_conntrack_local,
167 .owner = THIS_MODULE, 171 .owner = THIS_MODULE,
168 .pf = PF_INET, 172 .pf = NFPROTO_IPV4,
169 .hooknum = NF_INET_LOCAL_OUT, 173 .hooknum = NF_INET_LOCAL_OUT,
170 .priority = NF_IP_PRI_CONNTRACK, 174 .priority = NF_IP_PRI_CONNTRACK,
171 }, 175 },
172 { 176 {
173 .hook = ipv4_confirm, 177 .hook = ipv4_confirm,
174 .owner = THIS_MODULE, 178 .owner = THIS_MODULE,
175 .pf = PF_INET, 179 .pf = NFPROTO_IPV4,
176 .hooknum = NF_INET_POST_ROUTING, 180 .hooknum = NF_INET_POST_ROUTING,
177 .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 181 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
178 }, 182 },
179 { 183 {
180 .hook = ipv4_confirm, 184 .hook = ipv4_confirm,
181 .owner = THIS_MODULE, 185 .owner = THIS_MODULE,
182 .pf = PF_INET, 186 .pf = NFPROTO_IPV4,
183 .hooknum = NF_INET_LOCAL_IN, 187 .hooknum = NF_INET_LOCAL_IN,
184 .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 188 .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
185 }, 189 },
@@ -256,11 +260,11 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
256 tuple.dst.u3.ip = inet->daddr; 260 tuple.dst.u3.ip = inet->daddr;
257 tuple.dst.u.tcp.port = inet->dport; 261 tuple.dst.u.tcp.port = inet->dport;
258 tuple.src.l3num = PF_INET; 262 tuple.src.l3num = PF_INET;
259 tuple.dst.protonum = IPPROTO_TCP; 263 tuple.dst.protonum = sk->sk_protocol;
260 264
261 /* We only do TCP at the moment: is there a better way? */ 265 /* We only do TCP and SCTP at the moment: is there a better way? */
262 if (strcmp(sk->sk_prot->name, "TCP")) { 266 if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) {
263 pr_debug("SO_ORIGINAL_DST: Not a TCP socket\n"); 267 pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
264 return -ENOPROTOOPT; 268 return -ENOPROTOOPT;
265 } 269 }
266 270
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 3229e0a81ba6..68afc6ecd343 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -212,7 +212,7 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple,
212 maxip = ntohl(range->max_ip); 212 maxip = ntohl(range->max_ip);
213 j = jhash_2words((__force u32)tuple->src.u3.ip, 213 j = jhash_2words((__force u32)tuple->src.u3.ip,
214 range->flags & IP_NAT_RANGE_PERSISTENT ? 214 range->flags & IP_NAT_RANGE_PERSISTENT ?
215 (__force u32)tuple->dst.u3.ip : 0, 0); 215 0 : (__force u32)tuple->dst.u3.ip, 0);
216 j = ((u64)j * (maxip - minip + 1)) >> 32; 216 j = ((u64)j * (maxip - minip + 1)) >> 32;
217 *var_ipp = htonl(minip + j); 217 *var_ipp = htonl(minip + j);
218} 218}
@@ -620,7 +620,7 @@ static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
620}; 620};
621 621
622static int 622static int
623nfnetlink_parse_nat(struct nlattr *nat, 623nfnetlink_parse_nat(const struct nlattr *nat,
624 const struct nf_conn *ct, struct nf_nat_range *range) 624 const struct nf_conn *ct, struct nf_nat_range *range)
625{ 625{
626 struct nlattr *tb[CTA_NAT_MAX+1]; 626 struct nlattr *tb[CTA_NAT_MAX+1];
@@ -656,7 +656,7 @@ nfnetlink_parse_nat(struct nlattr *nat,
656static int 656static int
657nfnetlink_parse_nat_setup(struct nf_conn *ct, 657nfnetlink_parse_nat_setup(struct nf_conn *ct,
658 enum nf_nat_manip_type manip, 658 enum nf_nat_manip_type manip,
659 struct nlattr *attr) 659 const struct nlattr *attr)
660{ 660{
661 struct nf_nat_range range; 661 struct nf_nat_range range;
662 662
@@ -671,7 +671,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
671static int 671static int
672nfnetlink_parse_nat_setup(struct nf_conn *ct, 672nfnetlink_parse_nat_setup(struct nf_conn *ct,
673 enum nf_nat_manip_type manip, 673 enum nf_nat_manip_type manip,
674 struct nlattr *attr) 674 const struct nlattr *attr)
675{ 675{
676 return -EOPNOTSUPP; 676 return -EOPNOTSUPP;
677} 677}
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 6348a793936e..9e81e0dfb4ec 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -28,7 +28,7 @@
28 (1 << NF_INET_POST_ROUTING) | \ 28 (1 << NF_INET_POST_ROUTING) | \
29 (1 << NF_INET_LOCAL_OUT)) 29 (1 << NF_INET_LOCAL_OUT))
30 30
31static struct 31static const struct
32{ 32{
33 struct ipt_replace repl; 33 struct ipt_replace repl;
34 struct ipt_standard entries[3]; 34 struct ipt_standard entries[3];
@@ -58,11 +58,11 @@ static struct
58 .term = IPT_ERROR_INIT, /* ERROR */ 58 .term = IPT_ERROR_INIT, /* ERROR */
59}; 59};
60 60
61static struct xt_table nat_table = { 61static const struct xt_table nat_table = {
62 .name = "nat", 62 .name = "nat",
63 .valid_hooks = NAT_VALID_HOOKS, 63 .valid_hooks = NAT_VALID_HOOKS,
64 .me = THIS_MODULE, 64 .me = THIS_MODULE,
65 .af = AF_INET, 65 .af = NFPROTO_IPV4,
66}; 66};
67 67
68/* Source NAT */ 68/* Source NAT */
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 5567bd0d0750..5f41d017ddd8 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -251,7 +251,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
251 { 251 {
252 .hook = nf_nat_in, 252 .hook = nf_nat_in,
253 .owner = THIS_MODULE, 253 .owner = THIS_MODULE,
254 .pf = PF_INET, 254 .pf = NFPROTO_IPV4,
255 .hooknum = NF_INET_PRE_ROUTING, 255 .hooknum = NF_INET_PRE_ROUTING,
256 .priority = NF_IP_PRI_NAT_DST, 256 .priority = NF_IP_PRI_NAT_DST,
257 }, 257 },
@@ -259,7 +259,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
259 { 259 {
260 .hook = nf_nat_out, 260 .hook = nf_nat_out,
261 .owner = THIS_MODULE, 261 .owner = THIS_MODULE,
262 .pf = PF_INET, 262 .pf = NFPROTO_IPV4,
263 .hooknum = NF_INET_POST_ROUTING, 263 .hooknum = NF_INET_POST_ROUTING,
264 .priority = NF_IP_PRI_NAT_SRC, 264 .priority = NF_IP_PRI_NAT_SRC,
265 }, 265 },
@@ -267,7 +267,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
267 { 267 {
268 .hook = nf_nat_local_fn, 268 .hook = nf_nat_local_fn,
269 .owner = THIS_MODULE, 269 .owner = THIS_MODULE,
270 .pf = PF_INET, 270 .pf = NFPROTO_IPV4,
271 .hooknum = NF_INET_LOCAL_OUT, 271 .hooknum = NF_INET_LOCAL_OUT,
272 .priority = NF_IP_PRI_NAT_DST, 272 .priority = NF_IP_PRI_NAT_DST,
273 }, 273 },
@@ -275,7 +275,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
275 { 275 {
276 .hook = nf_nat_fn, 276 .hook = nf_nat_fn,
277 .owner = THIS_MODULE, 277 .owner = THIS_MODULE,
278 .pf = PF_INET, 278 .pf = NFPROTO_IPV4,
279 .hooknum = NF_INET_LOCAL_IN, 279 .hooknum = NF_INET_LOCAL_IN,
280 .priority = NF_IP_PRI_NAT_SRC, 280 .priority = NF_IP_PRI_NAT_SRC,
281 }, 281 },
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index ea50da0649fd..a2e5fc0a15e1 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -22,26 +22,11 @@
22 * as published by the Free Software Foundation; either version 22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version. 23 * 2 of the License, or (at your option) any later version.
24 */ 24 */
25 25#include <linux/cache.h>
26#include <asm/uaccess.h>
27#include <asm/system.h>
28#include <linux/module.h> 26#include <linux/module.h>
29#include <linux/types.h>
30#include <linux/kernel.h>
31#include <linux/string.h>
32#include <linux/socket.h>
33#include <linux/in.h>
34#include <linux/inet.h>
35#include <linux/netdevice.h> 27#include <linux/netdevice.h>
36#include <linux/timer.h> 28#include <linux/spinlock.h>
37#include <net/ip.h>
38#include <net/protocol.h> 29#include <net/protocol.h>
39#include <linux/skbuff.h>
40#include <net/sock.h>
41#include <net/icmp.h>
42#include <net/udp.h>
43#include <net/ipip.h>
44#include <linux/igmp.h>
45 30
46struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp; 31struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp;
47static DEFINE_SPINLOCK(inet_proto_lock); 32static DEFINE_SPINLOCK(inet_proto_lock);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 2979f14bb188..ebb1e5848bc6 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -375,7 +375,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
375 err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev, 375 err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
376 dst_output); 376 dst_output);
377 if (err > 0) 377 if (err > 0)
378 err = inet->recverr ? net_xmit_errno(err) : 0; 378 err = net_xmit_errno(err);
379 if (err) 379 if (err)
380 goto error; 380 goto error;
381out: 381out:
@@ -386,6 +386,8 @@ error_fault:
386 kfree_skb(skb); 386 kfree_skb(skb);
387error: 387error:
388 IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); 388 IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
389 if (err == -ENOBUFS && !inet->recverr)
390 err = 0;
389 return err; 391 return err;
390} 392}
391 393
@@ -576,8 +578,11 @@ back_from_confirm:
576 &ipc, &rt, msg->msg_flags); 578 &ipc, &rt, msg->msg_flags);
577 if (err) 579 if (err)
578 ip_flush_pending_frames(sk); 580 ip_flush_pending_frames(sk);
579 else if (!(msg->msg_flags & MSG_MORE)) 581 else if (!(msg->msg_flags & MSG_MORE)) {
580 err = ip_push_pending_frames(sk); 582 err = ip_push_pending_frames(sk);
583 if (err == -ENOBUFS && !inet->recverr)
584 err = 0;
585 }
581 release_sock(sk); 586 release_sock(sk);
582 } 587 }
583done: 588done:
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 278f46f5011b..91867d3e6328 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1514,13 +1514,17 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1514void ip_rt_send_redirect(struct sk_buff *skb) 1514void ip_rt_send_redirect(struct sk_buff *skb)
1515{ 1515{
1516 struct rtable *rt = skb_rtable(skb); 1516 struct rtable *rt = skb_rtable(skb);
1517 struct in_device *in_dev = in_dev_get(rt->u.dst.dev); 1517 struct in_device *in_dev;
1518 int log_martians;
1518 1519
1519 if (!in_dev) 1520 rcu_read_lock();
1521 in_dev = __in_dev_get_rcu(rt->u.dst.dev);
1522 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
1523 rcu_read_unlock();
1520 return; 1524 return;
1521 1525 }
1522 if (!IN_DEV_TX_REDIRECTS(in_dev)) 1526 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
1523 goto out; 1527 rcu_read_unlock();
1524 1528
1525 /* No redirected packets during ip_rt_redirect_silence; 1529 /* No redirected packets during ip_rt_redirect_silence;
1526 * reset the algorithm. 1530 * reset the algorithm.
@@ -1533,7 +1537,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1533 */ 1537 */
1534 if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) { 1538 if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) {
1535 rt->u.dst.rate_last = jiffies; 1539 rt->u.dst.rate_last = jiffies;
1536 goto out; 1540 return;
1537 } 1541 }
1538 1542
1539 /* Check for load limit; set rate_last to the latest sent 1543 /* Check for load limit; set rate_last to the latest sent
@@ -1547,7 +1551,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1547 rt->u.dst.rate_last = jiffies; 1551 rt->u.dst.rate_last = jiffies;
1548 ++rt->u.dst.rate_tokens; 1552 ++rt->u.dst.rate_tokens;
1549#ifdef CONFIG_IP_ROUTE_VERBOSE 1553#ifdef CONFIG_IP_ROUTE_VERBOSE
1550 if (IN_DEV_LOG_MARTIANS(in_dev) && 1554 if (log_martians &&
1551 rt->u.dst.rate_tokens == ip_rt_redirect_number && 1555 rt->u.dst.rate_tokens == ip_rt_redirect_number &&
1552 net_ratelimit()) 1556 net_ratelimit())
1553 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", 1557 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
@@ -1555,8 +1559,6 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1555 &rt->rt_dst, &rt->rt_gateway); 1559 &rt->rt_dst, &rt->rt_gateway);
1556#endif 1560#endif
1557 } 1561 }
1558out:
1559 in_dev_put(in_dev);
1560} 1562}
1561 1563
1562static int ip_error(struct sk_buff *skb) 1564static int ip_error(struct sk_buff *skb)
@@ -3442,7 +3444,7 @@ int __init ip_rt_init(void)
3442 printk(KERN_ERR "Unable to create route proc files\n"); 3444 printk(KERN_ERR "Unable to create route proc files\n");
3443#ifdef CONFIG_XFRM 3445#ifdef CONFIG_XFRM
3444 xfrm_init(); 3446 xfrm_init();
3445 xfrm4_init(); 3447 xfrm4_init(ip_rt_max_size);
3446#endif 3448#endif
3447 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); 3449 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL);
3448 3450
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 91145244ea63..edeea060db44 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1839,7 +1839,7 @@ void tcp_close(struct sock *sk, long timeout)
1839 /* Unread data was tossed, zap the connection. */ 1839 /* Unread data was tossed, zap the connection. */
1840 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); 1840 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
1841 tcp_set_state(sk, TCP_CLOSE); 1841 tcp_set_state(sk, TCP_CLOSE);
1842 tcp_send_active_reset(sk, GFP_KERNEL); 1842 tcp_send_active_reset(sk, sk->sk_allocation);
1843 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { 1843 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1844 /* Check zero linger _after_ checking for unread data. */ 1844 /* Check zero linger _after_ checking for unread data. */
1845 sk->sk_prot->disconnect(sk, 0); 1845 sk->sk_prot->disconnect(sk, 0);
@@ -2336,13 +2336,13 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2336 val = !!(tp->nonagle&TCP_NAGLE_CORK); 2336 val = !!(tp->nonagle&TCP_NAGLE_CORK);
2337 break; 2337 break;
2338 case TCP_KEEPIDLE: 2338 case TCP_KEEPIDLE:
2339 val = (tp->keepalive_time ? : sysctl_tcp_keepalive_time) / HZ; 2339 val = keepalive_time_when(tp) / HZ;
2340 break; 2340 break;
2341 case TCP_KEEPINTVL: 2341 case TCP_KEEPINTVL:
2342 val = (tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl) / HZ; 2342 val = keepalive_intvl_when(tp) / HZ;
2343 break; 2343 break;
2344 case TCP_KEEPCNT: 2344 case TCP_KEEPCNT:
2345 val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes; 2345 val = keepalive_probes(tp);
2346 break; 2346 break;
2347 case TCP_SYNCNT: 2347 case TCP_SYNCNT:
2348 val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; 2348 val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
@@ -2658,7 +2658,7 @@ void tcp_free_md5sig_pool(void)
2658 2658
2659EXPORT_SYMBOL(tcp_free_md5sig_pool); 2659EXPORT_SYMBOL(tcp_free_md5sig_pool);
2660 2660
2661static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(void) 2661static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(struct sock *sk)
2662{ 2662{
2663 int cpu; 2663 int cpu;
2664 struct tcp_md5sig_pool **pool; 2664 struct tcp_md5sig_pool **pool;
@@ -2671,7 +2671,7 @@ static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(void)
2671 struct tcp_md5sig_pool *p; 2671 struct tcp_md5sig_pool *p;
2672 struct crypto_hash *hash; 2672 struct crypto_hash *hash;
2673 2673
2674 p = kzalloc(sizeof(*p), GFP_KERNEL); 2674 p = kzalloc(sizeof(*p), sk->sk_allocation);
2675 if (!p) 2675 if (!p)
2676 goto out_free; 2676 goto out_free;
2677 *per_cpu_ptr(pool, cpu) = p; 2677 *per_cpu_ptr(pool, cpu) = p;
@@ -2688,7 +2688,7 @@ out_free:
2688 return NULL; 2688 return NULL;
2689} 2689}
2690 2690
2691struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(void) 2691struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *sk)
2692{ 2692{
2693 struct tcp_md5sig_pool **pool; 2693 struct tcp_md5sig_pool **pool;
2694 int alloc = 0; 2694 int alloc = 0;
@@ -2709,7 +2709,7 @@ retry:
2709 2709
2710 if (alloc) { 2710 if (alloc) {
2711 /* we cannot hold spinlock here because this may sleep. */ 2711 /* we cannot hold spinlock here because this may sleep. */
2712 struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool(); 2712 struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool(sk);
2713 spin_lock_bh(&tcp_md5sig_pool_lock); 2713 spin_lock_bh(&tcp_md5sig_pool_lock);
2714 if (!p) { 2714 if (!p) {
2715 tcp_md5sig_users--; 2715 tcp_md5sig_users--;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2bdb0da237e6..af6d6fa00db1 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -685,7 +685,7 @@ static inline void tcp_set_rto(struct sock *sk)
685 * is invisible. Actually, Linux-2.4 also generates erratic 685 * is invisible. Actually, Linux-2.4 also generates erratic
686 * ACKs in some circumstances. 686 * ACKs in some circumstances.
687 */ 687 */
688 inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar; 688 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
689 689
690 /* 2. Fixups made earlier cannot be right. 690 /* 2. Fixups made earlier cannot be right.
691 * If we do not estimate RTO correctly without them, 691 * If we do not estimate RTO correctly without them,
@@ -696,8 +696,7 @@ static inline void tcp_set_rto(struct sock *sk)
696 /* NOTE: clamping at TCP_RTO_MIN is not required, current algo 696 /* NOTE: clamping at TCP_RTO_MIN is not required, current algo
697 * guarantees that rto is higher. 697 * guarantees that rto is higher.
698 */ 698 */
699 if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) 699 tcp_bound_rto(sk);
700 inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
701} 700}
702 701
703/* Save metrics learned by this TCP session. 702/* Save metrics learned by this TCP session.
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 6d88219c5e22..0543561da999 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -328,26 +328,29 @@ static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
328 * 328 *
329 */ 329 */
330 330
331void tcp_v4_err(struct sk_buff *skb, u32 info) 331void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
332{ 332{
333 struct iphdr *iph = (struct iphdr *)skb->data; 333 struct iphdr *iph = (struct iphdr *)icmp_skb->data;
334 struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); 334 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
335 struct inet_connection_sock *icsk;
335 struct tcp_sock *tp; 336 struct tcp_sock *tp;
336 struct inet_sock *inet; 337 struct inet_sock *inet;
337 const int type = icmp_hdr(skb)->type; 338 const int type = icmp_hdr(icmp_skb)->type;
338 const int code = icmp_hdr(skb)->code; 339 const int code = icmp_hdr(icmp_skb)->code;
339 struct sock *sk; 340 struct sock *sk;
341 struct sk_buff *skb;
340 __u32 seq; 342 __u32 seq;
343 __u32 remaining;
341 int err; 344 int err;
342 struct net *net = dev_net(skb->dev); 345 struct net *net = dev_net(icmp_skb->dev);
343 346
344 if (skb->len < (iph->ihl << 2) + 8) { 347 if (icmp_skb->len < (iph->ihl << 2) + 8) {
345 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 348 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
346 return; 349 return;
347 } 350 }
348 351
349 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, 352 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
350 iph->saddr, th->source, inet_iif(skb)); 353 iph->saddr, th->source, inet_iif(icmp_skb));
351 if (!sk) { 354 if (!sk) {
352 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 355 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
353 return; 356 return;
@@ -367,6 +370,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
367 if (sk->sk_state == TCP_CLOSE) 370 if (sk->sk_state == TCP_CLOSE)
368 goto out; 371 goto out;
369 372
373 icsk = inet_csk(sk);
370 tp = tcp_sk(sk); 374 tp = tcp_sk(sk);
371 seq = ntohl(th->seq); 375 seq = ntohl(th->seq);
372 if (sk->sk_state != TCP_LISTEN && 376 if (sk->sk_state != TCP_LISTEN &&
@@ -393,6 +397,39 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
393 } 397 }
394 398
395 err = icmp_err_convert[code].errno; 399 err = icmp_err_convert[code].errno;
400 /* check if icmp_skb allows revert of backoff
401 * (see draft-zimmermann-tcp-lcd) */
402 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
403 break;
404 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
405 !icsk->icsk_backoff)
406 break;
407
408 icsk->icsk_backoff--;
409 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
410 icsk->icsk_backoff;
411 tcp_bound_rto(sk);
412
413 skb = tcp_write_queue_head(sk);
414 BUG_ON(!skb);
415
416 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
417 tcp_time_stamp - TCP_SKB_CB(skb)->when);
418
419 if (remaining) {
420 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
421 remaining, TCP_RTO_MAX);
422 } else if (sock_owned_by_user(sk)) {
423 /* RTO revert clocked out retransmission,
424 * but socket is locked. Will defer. */
425 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
426 HZ/20, TCP_RTO_MAX);
427 } else {
428 /* RTO revert clocked out retransmission.
429 * Will retransmit now */
430 tcp_retransmit_timer(sk);
431 }
432
396 break; 433 break;
397 case ICMP_TIME_EXCEEDED: 434 case ICMP_TIME_EXCEEDED:
398 err = EHOSTUNREACH; 435 err = EHOSTUNREACH;
@@ -849,7 +886,7 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
849 } 886 }
850 sk->sk_route_caps &= ~NETIF_F_GSO_MASK; 887 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
851 } 888 }
852 if (tcp_alloc_md5sig_pool() == NULL) { 889 if (tcp_alloc_md5sig_pool(sk) == NULL) {
853 kfree(newkey); 890 kfree(newkey);
854 return -ENOMEM; 891 return -ENOMEM;
855 } 892 }
@@ -970,8 +1007,9 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
970 1007
971 if (!tcp_sk(sk)->md5sig_info) { 1008 if (!tcp_sk(sk)->md5sig_info) {
972 struct tcp_sock *tp = tcp_sk(sk); 1009 struct tcp_sock *tp = tcp_sk(sk);
973 struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL); 1010 struct tcp_md5sig_info *p;
974 1011
1012 p = kzalloc(sizeof(*p), sk->sk_allocation);
975 if (!p) 1013 if (!p)
976 return -EINVAL; 1014 return -EINVAL;
977 1015
@@ -979,7 +1017,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
979 sk->sk_route_caps &= ~NETIF_F_GSO_MASK; 1017 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
980 } 1018 }
981 1019
982 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); 1020 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
983 if (!newkey) 1021 if (!newkey)
984 return -ENOMEM; 1022 return -ENOMEM;
985 return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr, 1023 return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
@@ -1158,7 +1196,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1158}; 1196};
1159 1197
1160#ifdef CONFIG_TCP_MD5SIG 1198#ifdef CONFIG_TCP_MD5SIG
1161static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { 1199static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1162 .md5_lookup = tcp_v4_reqsk_md5_lookup, 1200 .md5_lookup = tcp_v4_reqsk_md5_lookup,
1163 .calc_md5_hash = tcp_v4_md5_hash_skb, 1201 .calc_md5_hash = tcp_v4_md5_hash_skb,
1164}; 1202};
@@ -1717,7 +1755,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1717 return 0; 1755 return 0;
1718} 1756}
1719 1757
1720struct inet_connection_sock_af_ops ipv4_specific = { 1758const struct inet_connection_sock_af_ops ipv4_specific = {
1721 .queue_xmit = ip_queue_xmit, 1759 .queue_xmit = ip_queue_xmit,
1722 .send_check = tcp_v4_send_check, 1760 .send_check = tcp_v4_send_check,
1723 .rebuild_header = inet_sk_rebuild_header, 1761 .rebuild_header = inet_sk_rebuild_header,
@@ -1737,7 +1775,7 @@ struct inet_connection_sock_af_ops ipv4_specific = {
1737}; 1775};
1738 1776
1739#ifdef CONFIG_TCP_MD5SIG 1777#ifdef CONFIG_TCP_MD5SIG
1740static struct tcp_sock_af_ops tcp_sock_ipv4_specific = { 1778static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1741 .md5_lookup = tcp_v4_md5_lookup, 1779 .md5_lookup = tcp_v4_md5_lookup,
1742 .calc_md5_hash = tcp_v4_md5_hash_skb, 1780 .calc_md5_hash = tcp_v4_md5_hash_skb,
1743 .md5_add = tcp_v4_md5_add_func, 1781 .md5_add = tcp_v4_md5_add_func,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f8d67ccc64f3..e48c37d74d77 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -322,7 +322,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
322 if (key != NULL) { 322 if (key != NULL) {
323 memcpy(&tcptw->tw_md5_key, key->key, key->keylen); 323 memcpy(&tcptw->tw_md5_key, key->key, key->keylen);
324 tcptw->tw_md5_keylen = key->keylen; 324 tcptw->tw_md5_keylen = key->keylen;
325 if (tcp_alloc_md5sig_pool() == NULL) 325 if (tcp_alloc_md5sig_pool(sk) == NULL)
326 BUG(); 326 BUG();
327 } 327 }
328 } while (0); 328 } while (0);
@@ -657,29 +657,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
657 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); 657 child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
658 if (child == NULL) 658 if (child == NULL)
659 goto listen_overflow; 659 goto listen_overflow;
660#ifdef CONFIG_TCP_MD5SIG
661 else {
662 /* Copy over the MD5 key from the original socket */
663 struct tcp_md5sig_key *key;
664 struct tcp_sock *tp = tcp_sk(sk);
665 key = tp->af_specific->md5_lookup(sk, child);
666 if (key != NULL) {
667 /*
668 * We're using one, so create a matching key on the
669 * newsk structure. If we fail to get memory then we
670 * end up not copying the key across. Shucks.
671 */
672 char *newkey = kmemdup(key->key, key->keylen,
673 GFP_ATOMIC);
674 if (newkey) {
675 if (!tcp_alloc_md5sig_pool())
676 BUG();
677 tp->af_specific->md5_add(child, child, newkey,
678 key->keylen);
679 }
680 }
681 }
682#endif
683 660
684 inet_csk_reqsk_queue_unlink(sk, req, prev); 661 inet_csk_reqsk_queue_unlink(sk, req, prev);
685 inet_csk_reqsk_queue_removed(sk, req); 662 inet_csk_reqsk_queue_removed(sk, req);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index bd62712848fa..5200aab0ca97 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -59,6 +59,7 @@ int sysctl_tcp_base_mss __read_mostly = 512;
59/* By default, RFC2861 behavior. */ 59/* By default, RFC2861 behavior. */
60int sysctl_tcp_slow_start_after_idle __read_mostly = 1; 60int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
61 61
62/* Account for new data that has been sent to the network. */
62static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) 63static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
63{ 64{
64 struct tcp_sock *tp = tcp_sk(sk); 65 struct tcp_sock *tp = tcp_sk(sk);
@@ -142,6 +143,7 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
142 tp->snd_cwnd_used = 0; 143 tp->snd_cwnd_used = 0;
143} 144}
144 145
146/* Congestion state accounting after a packet has been sent. */
145static void tcp_event_data_sent(struct tcp_sock *tp, 147static void tcp_event_data_sent(struct tcp_sock *tp,
146 struct sk_buff *skb, struct sock *sk) 148 struct sk_buff *skb, struct sock *sk)
147{ 149{
@@ -161,6 +163,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
161 icsk->icsk_ack.pingpong = 1; 163 icsk->icsk_ack.pingpong = 1;
162} 164}
163 165
166/* Account for an ACK we sent. */
164static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) 167static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
165{ 168{
166 tcp_dec_quickack_mode(sk, pkts); 169 tcp_dec_quickack_mode(sk, pkts);
@@ -276,6 +279,7 @@ static u16 tcp_select_window(struct sock *sk)
276 return new_win; 279 return new_win;
277} 280}
278 281
282/* Packet ECN state for a SYN-ACK */
279static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) 283static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
280{ 284{
281 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR; 285 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR;
@@ -283,6 +287,7 @@ static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
283 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; 287 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE;
284} 288}
285 289
290/* Packet ECN state for a SYN. */
286static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) 291static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
287{ 292{
288 struct tcp_sock *tp = tcp_sk(sk); 293 struct tcp_sock *tp = tcp_sk(sk);
@@ -301,6 +306,9 @@ TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th)
301 th->ece = 1; 306 th->ece = 1;
302} 307}
303 308
309/* Set up ECN state for a packet on a ESTABLISHED socket that is about to
310 * be sent.
311 */
304static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, 312static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
305 int tcp_header_len) 313 int tcp_header_len)
306{ 314{
@@ -362,7 +370,9 @@ struct tcp_out_options {
362 __u32 tsval, tsecr; /* need to include OPTION_TS */ 370 __u32 tsval, tsecr; /* need to include OPTION_TS */
363}; 371};
364 372
365/* Beware: Something in the Internet is very sensitive to the ordering of 373/* Write previously computed TCP options to the packet.
374 *
375 * Beware: Something in the Internet is very sensitive to the ordering of
366 * TCP options, we learned this through the hard way, so be careful here. 376 * TCP options, we learned this through the hard way, so be careful here.
367 * Luckily we can at least blame others for their non-compliance but from 377 * Luckily we can at least blame others for their non-compliance but from
368 * inter-operatibility perspective it seems that we're somewhat stuck with 378 * inter-operatibility perspective it seems that we're somewhat stuck with
@@ -445,6 +455,9 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
445 } 455 }
446} 456}
447 457
458/* Compute TCP options for SYN packets. This is not the final
459 * network wire format yet.
460 */
448static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, 461static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
449 struct tcp_out_options *opts, 462 struct tcp_out_options *opts,
450 struct tcp_md5sig_key **md5) { 463 struct tcp_md5sig_key **md5) {
@@ -493,6 +506,7 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
493 return size; 506 return size;
494} 507}
495 508
509/* Set up TCP options for SYN-ACKs. */
496static unsigned tcp_synack_options(struct sock *sk, 510static unsigned tcp_synack_options(struct sock *sk,
497 struct request_sock *req, 511 struct request_sock *req,
498 unsigned mss, struct sk_buff *skb, 512 unsigned mss, struct sk_buff *skb,
@@ -541,6 +555,9 @@ static unsigned tcp_synack_options(struct sock *sk,
541 return size; 555 return size;
542} 556}
543 557
558/* Compute TCP options for ESTABLISHED sockets. This is not the
559 * final wire format yet.
560 */
544static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, 561static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
545 struct tcp_out_options *opts, 562 struct tcp_out_options *opts,
546 struct tcp_md5sig_key **md5) { 563 struct tcp_md5sig_key **md5) {
@@ -705,7 +722,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
705 return net_xmit_eval(err); 722 return net_xmit_eval(err);
706} 723}
707 724
708/* This routine just queue's the buffer 725/* This routine just queues the buffer for sending.
709 * 726 *
710 * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, 727 * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
711 * otherwise socket can stall. 728 * otherwise socket can stall.
@@ -722,6 +739,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
722 sk_mem_charge(sk, skb->truesize); 739 sk_mem_charge(sk, skb->truesize);
723} 740}
724 741
742/* Initialize TSO segments for a packet. */
725static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, 743static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb,
726 unsigned int mss_now) 744 unsigned int mss_now)
727{ 745{
@@ -909,6 +927,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
909 skb->len = skb->data_len; 927 skb->len = skb->data_len;
910} 928}
911 929
930/* Remove acked data from a packet in the transmit queue. */
912int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) 931int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
913{ 932{
914 if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 933 if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
@@ -937,7 +956,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
937 return 0; 956 return 0;
938} 957}
939 958
940/* Not accounting for SACKs here. */ 959/* Calculate MSS. Not accounting for SACKs here. */
941int tcp_mtu_to_mss(struct sock *sk, int pmtu) 960int tcp_mtu_to_mss(struct sock *sk, int pmtu)
942{ 961{
943 struct tcp_sock *tp = tcp_sk(sk); 962 struct tcp_sock *tp = tcp_sk(sk);
@@ -981,6 +1000,7 @@ int tcp_mss_to_mtu(struct sock *sk, int mss)
981 return mtu; 1000 return mtu;
982} 1001}
983 1002
1003/* MTU probing init per socket */
984void tcp_mtup_init(struct sock *sk) 1004void tcp_mtup_init(struct sock *sk)
985{ 1005{
986 struct tcp_sock *tp = tcp_sk(sk); 1006 struct tcp_sock *tp = tcp_sk(sk);
@@ -1143,7 +1163,8 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
1143 return 0; 1163 return 0;
1144} 1164}
1145 1165
1146/* This must be invoked the first time we consider transmitting 1166/* Intialize TSO state of a skb.
1167 * This must be invoked the first time we consider transmitting
1147 * SKB onto the wire. 1168 * SKB onto the wire.
1148 */ 1169 */
1149static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, 1170static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb,
@@ -1158,6 +1179,7 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb,
1158 return tso_segs; 1179 return tso_segs;
1159} 1180}
1160 1181
1182/* Minshall's variant of the Nagle send check. */
1161static inline int tcp_minshall_check(const struct tcp_sock *tp) 1183static inline int tcp_minshall_check(const struct tcp_sock *tp)
1162{ 1184{
1163 return after(tp->snd_sml, tp->snd_una) && 1185 return after(tp->snd_sml, tp->snd_una) &&
@@ -1242,6 +1264,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
1242 return cwnd_quota; 1264 return cwnd_quota;
1243} 1265}
1244 1266
1267/* Test if sending is allowed right now. */
1245int tcp_may_send_now(struct sock *sk) 1268int tcp_may_send_now(struct sock *sk)
1246{ 1269{
1247 struct tcp_sock *tp = tcp_sk(sk); 1270 struct tcp_sock *tp = tcp_sk(sk);
@@ -1378,6 +1401,10 @@ send_now:
1378} 1401}
1379 1402
1380/* Create a new MTU probe if we are ready. 1403/* Create a new MTU probe if we are ready.
1404 * MTU probe is regularly attempting to increase the path MTU by
1405 * deliberately sending larger packets. This discovers routing
1406 * changes resulting in larger path MTUs.
1407 *
1381 * Returns 0 if we should wait to probe (no cwnd available), 1408 * Returns 0 if we should wait to probe (no cwnd available),
1382 * 1 if a probe was sent, 1409 * 1 if a probe was sent,
1383 * -1 otherwise 1410 * -1 otherwise
@@ -1790,6 +1817,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
1790 sk_wmem_free_skb(sk, next_skb); 1817 sk_wmem_free_skb(sk, next_skb);
1791} 1818}
1792 1819
1820/* Check if coalescing SKBs is legal. */
1793static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb) 1821static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb)
1794{ 1822{
1795 if (tcp_skb_pcount(skb) > 1) 1823 if (tcp_skb_pcount(skb) > 1)
@@ -1808,6 +1836,9 @@ static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb)
1808 return 1; 1836 return 1;
1809} 1837}
1810 1838
1839/* Collapse packets in the retransmit queue to make to create
1840 * less packets on the wire. This is only done on retransmission.
1841 */
1811static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, 1842static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
1812 int space) 1843 int space)
1813{ 1844{
@@ -1957,6 +1988,9 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1957 return err; 1988 return err;
1958} 1989}
1959 1990
1991/* Check if we forward retransmits are possible in the current
1992 * window/congestion state.
1993 */
1960static int tcp_can_forward_retransmit(struct sock *sk) 1994static int tcp_can_forward_retransmit(struct sock *sk)
1961{ 1995{
1962 const struct inet_connection_sock *icsk = inet_csk(sk); 1996 const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2101,7 +2135,8 @@ void tcp_send_fin(struct sock *sk)
2101 } else { 2135 } else {
2102 /* Socket is locked, keep trying until memory is available. */ 2136 /* Socket is locked, keep trying until memory is available. */
2103 for (;;) { 2137 for (;;) {
2104 skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL); 2138 skb = alloc_skb_fclone(MAX_TCP_HEADER,
2139 sk->sk_allocation);
2105 if (skb) 2140 if (skb)
2106 break; 2141 break;
2107 yield(); 2142 yield();
@@ -2145,7 +2180,8 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2145 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS); 2180 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
2146} 2181}
2147 2182
2148/* WARNING: This routine must only be called when we have already sent 2183/* Send a crossed SYN-ACK during socket establishment.
2184 * WARNING: This routine must only be called when we have already sent
2149 * a SYN packet that crossed the incoming SYN that caused this routine 2185 * a SYN packet that crossed the incoming SYN that caused this routine
2150 * to get called. If this assumption fails then the initial rcv_wnd 2186 * to get called. If this assumption fails then the initial rcv_wnd
2151 * and rcv_wscale values will not be correct. 2187 * and rcv_wscale values will not be correct.
@@ -2180,9 +2216,7 @@ int tcp_send_synack(struct sock *sk)
2180 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2216 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2181} 2217}
2182 2218
2183/* 2219/* Prepare a SYN-ACK. */
2184 * Prepare a SYN-ACK.
2185 */
2186struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, 2220struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2187 struct request_sock *req) 2221 struct request_sock *req)
2188{ 2222{
@@ -2269,9 +2303,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2269 return skb; 2303 return skb;
2270} 2304}
2271 2305
2272/* 2306/* Do all connect socket setups that can be done AF independent. */
2273 * Do all connect socket setups that can be done AF independent.
2274 */
2275static void tcp_connect_init(struct sock *sk) 2307static void tcp_connect_init(struct sock *sk)
2276{ 2308{
2277 struct dst_entry *dst = __sk_dst_get(sk); 2309 struct dst_entry *dst = __sk_dst_get(sk);
@@ -2330,9 +2362,7 @@ static void tcp_connect_init(struct sock *sk)
2330 tcp_clear_retrans(tp); 2362 tcp_clear_retrans(tp);
2331} 2363}
2332 2364
2333/* 2365/* Build a SYN and send it off. */
2334 * Build a SYN and send it off.
2335 */
2336int tcp_connect(struct sock *sk) 2366int tcp_connect(struct sock *sk)
2337{ 2367{
2338 struct tcp_sock *tp = tcp_sk(sk); 2368 struct tcp_sock *tp = tcp_sk(sk);
@@ -2359,7 +2389,7 @@ int tcp_connect(struct sock *sk)
2359 sk->sk_wmem_queued += buff->truesize; 2389 sk->sk_wmem_queued += buff->truesize;
2360 sk_mem_charge(sk, buff->truesize); 2390 sk_mem_charge(sk, buff->truesize);
2361 tp->packets_out += tcp_skb_pcount(buff); 2391 tp->packets_out += tcp_skb_pcount(buff);
2362 tcp_transmit_skb(sk, buff, 1, GFP_KERNEL); 2392 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
2363 2393
2364 /* We change tp->snd_nxt after the tcp_transmit_skb() call 2394 /* We change tp->snd_nxt after the tcp_transmit_skb() call
2365 * in order to make this packet get counted in tcpOutSegs. 2395 * in order to make this packet get counted in tcpOutSegs.
@@ -2493,6 +2523,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
2493 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); 2523 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
2494} 2524}
2495 2525
2526/* Initiate keepalive or window probe from timer. */
2496int tcp_write_wakeup(struct sock *sk) 2527int tcp_write_wakeup(struct sock *sk)
2497{ 2528{
2498 struct tcp_sock *tp = tcp_sk(sk); 2529 struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b144a26359bc..cdb2ca7684d4 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -137,13 +137,14 @@ static int tcp_write_timeout(struct sock *sk)
137{ 137{
138 struct inet_connection_sock *icsk = inet_csk(sk); 138 struct inet_connection_sock *icsk = inet_csk(sk);
139 int retry_until; 139 int retry_until;
140 bool do_reset;
140 141
141 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { 142 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
142 if (icsk->icsk_retransmits) 143 if (icsk->icsk_retransmits)
143 dst_negative_advice(&sk->sk_dst_cache); 144 dst_negative_advice(&sk->sk_dst_cache);
144 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; 145 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
145 } else { 146 } else {
146 if (icsk->icsk_retransmits >= sysctl_tcp_retries1) { 147 if (retransmits_timed_out(sk, sysctl_tcp_retries1)) {
147 /* Black hole detection */ 148 /* Black hole detection */
148 tcp_mtu_probing(icsk, sk); 149 tcp_mtu_probing(icsk, sk);
149 150
@@ -155,13 +156,15 @@ static int tcp_write_timeout(struct sock *sk)
155 const int alive = (icsk->icsk_rto < TCP_RTO_MAX); 156 const int alive = (icsk->icsk_rto < TCP_RTO_MAX);
156 157
157 retry_until = tcp_orphan_retries(sk, alive); 158 retry_until = tcp_orphan_retries(sk, alive);
159 do_reset = alive ||
160 !retransmits_timed_out(sk, retry_until);
158 161
159 if (tcp_out_of_resources(sk, alive || icsk->icsk_retransmits < retry_until)) 162 if (tcp_out_of_resources(sk, do_reset))
160 return 1; 163 return 1;
161 } 164 }
162 } 165 }
163 166
164 if (icsk->icsk_retransmits >= retry_until) { 167 if (retransmits_timed_out(sk, retry_until)) {
165 /* Has it gone just too far? */ 168 /* Has it gone just too far? */
166 tcp_write_err(sk); 169 tcp_write_err(sk);
167 return 1; 170 return 1;
@@ -279,7 +282,7 @@ static void tcp_probe_timer(struct sock *sk)
279 * The TCP retransmit timer. 282 * The TCP retransmit timer.
280 */ 283 */
281 284
282static void tcp_retransmit_timer(struct sock *sk) 285void tcp_retransmit_timer(struct sock *sk)
283{ 286{
284 struct tcp_sock *tp = tcp_sk(sk); 287 struct tcp_sock *tp = tcp_sk(sk);
285 struct inet_connection_sock *icsk = inet_csk(sk); 288 struct inet_connection_sock *icsk = inet_csk(sk);
@@ -385,7 +388,7 @@ static void tcp_retransmit_timer(struct sock *sk)
385out_reset_timer: 388out_reset_timer:
386 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); 389 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
387 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); 390 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
388 if (icsk->icsk_retransmits > sysctl_tcp_retries1) 391 if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1))
389 __sk_dst_reset(sk); 392 __sk_dst_reset(sk);
390 393
391out:; 394out:;
@@ -499,8 +502,7 @@ static void tcp_keepalive_timer (unsigned long data)
499 elapsed = tcp_time_stamp - tp->rcv_tstamp; 502 elapsed = tcp_time_stamp - tp->rcv_tstamp;
500 503
501 if (elapsed >= keepalive_time_when(tp)) { 504 if (elapsed >= keepalive_time_when(tp)) {
502 if ((!tp->keepalive_probes && icsk->icsk_probes_out >= sysctl_tcp_keepalive_probes) || 505 if (icsk->icsk_probes_out >= keepalive_probes(tp)) {
503 (tp->keepalive_probes && icsk->icsk_probes_out >= tp->keepalive_probes)) {
504 tcp_send_active_reset(sk, GFP_ATOMIC); 506 tcp_send_active_reset(sk, GFP_ATOMIC);
505 tcp_write_err(sk); 507 tcp_write_err(sk);
506 goto out; 508 goto out;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 80e3812837ad..ebaaa7f973d7 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -110,11 +110,12 @@ struct udp_table udp_table;
110EXPORT_SYMBOL(udp_table); 110EXPORT_SYMBOL(udp_table);
111 111
112int sysctl_udp_mem[3] __read_mostly; 112int sysctl_udp_mem[3] __read_mostly;
113int sysctl_udp_rmem_min __read_mostly;
114int sysctl_udp_wmem_min __read_mostly;
115
116EXPORT_SYMBOL(sysctl_udp_mem); 113EXPORT_SYMBOL(sysctl_udp_mem);
114
115int sysctl_udp_rmem_min __read_mostly;
117EXPORT_SYMBOL(sysctl_udp_rmem_min); 116EXPORT_SYMBOL(sysctl_udp_rmem_min);
117
118int sysctl_udp_wmem_min __read_mostly;
118EXPORT_SYMBOL(sysctl_udp_wmem_min); 119EXPORT_SYMBOL(sysctl_udp_wmem_min);
119 120
120atomic_t udp_memory_allocated; 121atomic_t udp_memory_allocated;
@@ -158,7 +159,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
158 */ 159 */
159int udp_lib_get_port(struct sock *sk, unsigned short snum, 160int udp_lib_get_port(struct sock *sk, unsigned short snum,
160 int (*saddr_comp)(const struct sock *sk1, 161 int (*saddr_comp)(const struct sock *sk1,
161 const struct sock *sk2 ) ) 162 const struct sock *sk2))
162{ 163{
163 struct udp_hslot *hslot; 164 struct udp_hslot *hslot;
164 struct udp_table *udptable = sk->sk_prot->h.udp_table; 165 struct udp_table *udptable = sk->sk_prot->h.udp_table;
@@ -221,14 +222,15 @@ fail_unlock:
221fail: 222fail:
222 return error; 223 return error;
223} 224}
225EXPORT_SYMBOL(udp_lib_get_port);
224 226
225static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) 227static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
226{ 228{
227 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); 229 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
228 230
229 return ( !ipv6_only_sock(sk2) && 231 return (!ipv6_only_sock(sk2) &&
230 (!inet1->rcv_saddr || !inet2->rcv_saddr || 232 (!inet1->rcv_saddr || !inet2->rcv_saddr ||
231 inet1->rcv_saddr == inet2->rcv_saddr )); 233 inet1->rcv_saddr == inet2->rcv_saddr));
232} 234}
233 235
234int udp_v4_get_port(struct sock *sk, unsigned short snum) 236int udp_v4_get_port(struct sock *sk, unsigned short snum)
@@ -383,8 +385,8 @@ found:
383void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) 385void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
384{ 386{
385 struct inet_sock *inet; 387 struct inet_sock *inet;
386 struct iphdr *iph = (struct iphdr*)skb->data; 388 struct iphdr *iph = (struct iphdr *)skb->data;
387 struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); 389 struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2));
388 const int type = icmp_hdr(skb)->type; 390 const int type = icmp_hdr(skb)->type;
389 const int code = icmp_hdr(skb)->code; 391 const int code = icmp_hdr(skb)->code;
390 struct sock *sk; 392 struct sock *sk;
@@ -439,7 +441,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
439 if (!harderr || sk->sk_state != TCP_ESTABLISHED) 441 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
440 goto out; 442 goto out;
441 } else { 443 } else {
442 ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1)); 444 ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1));
443 } 445 }
444 sk->sk_err = err; 446 sk->sk_err = err;
445 sk->sk_error_report(sk); 447 sk->sk_error_report(sk);
@@ -474,7 +476,7 @@ EXPORT_SYMBOL(udp_flush_pending_frames);
474 * (checksum field must be zeroed out) 476 * (checksum field must be zeroed out)
475 */ 477 */
476static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, 478static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
477 __be32 src, __be32 dst, int len ) 479 __be32 src, __be32 dst, int len)
478{ 480{
479 unsigned int offset; 481 unsigned int offset;
480 struct udphdr *uh = udp_hdr(skb); 482 struct udphdr *uh = udp_hdr(skb);
@@ -545,7 +547,7 @@ static int udp_push_pending_frames(struct sock *sk)
545 547
546 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ 548 } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
547 549
548 udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len); 550 udp4_hwcsum_outgoing(sk, skb, fl->fl4_src, fl->fl4_dst, up->len);
549 goto send; 551 goto send;
550 552
551 } else /* `normal' UDP */ 553 } else /* `normal' UDP */
@@ -553,18 +555,24 @@ static int udp_push_pending_frames(struct sock *sk)
553 555
554 /* add protocol-dependent pseudo-header */ 556 /* add protocol-dependent pseudo-header */
555 uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, 557 uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
556 sk->sk_protocol, csum ); 558 sk->sk_protocol, csum);
557 if (uh->check == 0) 559 if (uh->check == 0)
558 uh->check = CSUM_MANGLED_0; 560 uh->check = CSUM_MANGLED_0;
559 561
560send: 562send:
561 err = ip_push_pending_frames(sk); 563 err = ip_push_pending_frames(sk);
564 if (err) {
565 if (err == -ENOBUFS && !inet->recverr) {
566 UDP_INC_STATS_USER(sock_net(sk),
567 UDP_MIB_SNDBUFERRORS, is_udplite);
568 err = 0;
569 }
570 } else
571 UDP_INC_STATS_USER(sock_net(sk),
572 UDP_MIB_OUTDATAGRAMS, is_udplite);
562out: 573out:
563 up->len = 0; 574 up->len = 0;
564 up->pending = 0; 575 up->pending = 0;
565 if (!err)
566 UDP_INC_STATS_USER(sock_net(sk),
567 UDP_MIB_OUTDATAGRAMS, is_udplite);
568 return err; 576 return err;
569} 577}
570 578
@@ -592,7 +600,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
592 * Check the flags. 600 * Check the flags.
593 */ 601 */
594 602
595 if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */ 603 if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */
596 return -EOPNOTSUPP; 604 return -EOPNOTSUPP;
597 605
598 ipc.opt = NULL; 606 ipc.opt = NULL;
@@ -619,7 +627,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
619 * Get and verify the address. 627 * Get and verify the address.
620 */ 628 */
621 if (msg->msg_name) { 629 if (msg->msg_name) {
622 struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name; 630 struct sockaddr_in * usin = (struct sockaddr_in *)msg->msg_name;
623 if (msg->msg_namelen < sizeof(*usin)) 631 if (msg->msg_namelen < sizeof(*usin))
624 return -EINVAL; 632 return -EINVAL;
625 if (usin->sin_family != AF_INET) { 633 if (usin->sin_family != AF_INET) {
@@ -684,7 +692,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
684 } 692 }
685 693
686 if (connected) 694 if (connected)
687 rt = (struct rtable*)sk_dst_check(sk, 0); 695 rt = (struct rtable *)sk_dst_check(sk, 0);
688 696
689 if (rt == NULL) { 697 if (rt == NULL) {
690 struct flowi fl = { .oif = ipc.oif, 698 struct flowi fl = { .oif = ipc.oif,
@@ -782,6 +790,7 @@ do_confirm:
782 err = 0; 790 err = 0;
783 goto out; 791 goto out;
784} 792}
793EXPORT_SYMBOL(udp_sendmsg);
785 794
786int udp_sendpage(struct sock *sk, struct page *page, int offset, 795int udp_sendpage(struct sock *sk, struct page *page, int offset,
787 size_t size, int flags) 796 size_t size, int flags)
@@ -871,6 +880,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
871 880
872 return 0; 881 return 0;
873} 882}
883EXPORT_SYMBOL(udp_ioctl);
874 884
875/* 885/*
876 * This should be easy, if there is something there we 886 * This should be easy, if there is something there we
@@ -892,7 +902,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
892 * Check any passed addresses 902 * Check any passed addresses
893 */ 903 */
894 if (addr_len) 904 if (addr_len)
895 *addr_len=sizeof(*sin); 905 *addr_len = sizeof(*sin);
896 906
897 if (flags & MSG_ERRQUEUE) 907 if (flags & MSG_ERRQUEUE)
898 return ip_recv_error(sk, msg, len); 908 return ip_recv_error(sk, msg, len);
@@ -923,9 +933,11 @@ try_again:
923 933
924 if (skb_csum_unnecessary(skb)) 934 if (skb_csum_unnecessary(skb))
925 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), 935 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
926 msg->msg_iov, copied ); 936 msg->msg_iov, copied);
927 else { 937 else {
928 err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); 938 err = skb_copy_and_csum_datagram_iovec(skb,
939 sizeof(struct udphdr),
940 msg->msg_iov);
929 941
930 if (err == -EINVAL) 942 if (err == -EINVAL)
931 goto csum_copy_err; 943 goto csum_copy_err;
@@ -941,8 +953,7 @@ try_again:
941 sock_recv_timestamp(msg, sk, skb); 953 sock_recv_timestamp(msg, sk, skb);
942 954
943 /* Copy the address. */ 955 /* Copy the address. */
944 if (sin) 956 if (sin) {
945 {
946 sin->sin_family = AF_INET; 957 sin->sin_family = AF_INET;
947 sin->sin_port = udp_hdr(skb)->source; 958 sin->sin_port = udp_hdr(skb)->source;
948 sin->sin_addr.s_addr = ip_hdr(skb)->saddr; 959 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
@@ -995,6 +1006,7 @@ int udp_disconnect(struct sock *sk, int flags)
995 sk_dst_reset(sk); 1006 sk_dst_reset(sk);
996 return 0; 1007 return 0;
997} 1008}
1009EXPORT_SYMBOL(udp_disconnect);
998 1010
999void udp_lib_unhash(struct sock *sk) 1011void udp_lib_unhash(struct sock *sk)
1000{ 1012{
@@ -1044,7 +1056,7 @@ drop:
1044 * Note that in the success and error cases, the skb is assumed to 1056 * Note that in the success and error cases, the skb is assumed to
1045 * have either been requeued or freed. 1057 * have either been requeued or freed.
1046 */ 1058 */
1047int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) 1059int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1048{ 1060{
1049 struct udp_sock *up = udp_sk(sk); 1061 struct udp_sock *up = udp_sk(sk);
1050 int rc; 1062 int rc;
@@ -1214,7 +1226,7 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
1214 if (uh->check == 0) { 1226 if (uh->check == 0) {
1215 skb->ip_summed = CHECKSUM_UNNECESSARY; 1227 skb->ip_summed = CHECKSUM_UNNECESSARY;
1216 } else if (skb->ip_summed == CHECKSUM_COMPLETE) { 1228 } else if (skb->ip_summed == CHECKSUM_COMPLETE) {
1217 if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, 1229 if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
1218 proto, skb->csum)) 1230 proto, skb->csum))
1219 skb->ip_summed = CHECKSUM_UNNECESSARY; 1231 skb->ip_summed = CHECKSUM_UNNECESSARY;
1220 } 1232 }
@@ -1355,7 +1367,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1355 int err = 0; 1367 int err = 0;
1356 int is_udplite = IS_UDPLITE(sk); 1368 int is_udplite = IS_UDPLITE(sk);
1357 1369
1358 if (optlen<sizeof(int)) 1370 if (optlen < sizeof(int))
1359 return -EINVAL; 1371 return -EINVAL;
1360 1372
1361 if (get_user(val, (int __user *)optval)) 1373 if (get_user(val, (int __user *)optval))
@@ -1426,6 +1438,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1426 1438
1427 return err; 1439 return err;
1428} 1440}
1441EXPORT_SYMBOL(udp_lib_setsockopt);
1429 1442
1430int udp_setsockopt(struct sock *sk, int level, int optname, 1443int udp_setsockopt(struct sock *sk, int level, int optname,
1431 char __user *optval, int optlen) 1444 char __user *optval, int optlen)
@@ -1453,7 +1466,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
1453 struct udp_sock *up = udp_sk(sk); 1466 struct udp_sock *up = udp_sk(sk);
1454 int val, len; 1467 int val, len;
1455 1468
1456 if (get_user(len,optlen)) 1469 if (get_user(len, optlen))
1457 return -EFAULT; 1470 return -EFAULT;
1458 1471
1459 len = min_t(unsigned int, len, sizeof(int)); 1472 len = min_t(unsigned int, len, sizeof(int));
@@ -1486,10 +1499,11 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
1486 1499
1487 if (put_user(len, optlen)) 1500 if (put_user(len, optlen))
1488 return -EFAULT; 1501 return -EFAULT;
1489 if (copy_to_user(optval, &val,len)) 1502 if (copy_to_user(optval, &val, len))
1490 return -EFAULT; 1503 return -EFAULT;
1491 return 0; 1504 return 0;
1492} 1505}
1506EXPORT_SYMBOL(udp_lib_getsockopt);
1493 1507
1494int udp_getsockopt(struct sock *sk, int level, int optname, 1508int udp_getsockopt(struct sock *sk, int level, int optname,
1495 char __user *optval, int __user *optlen) 1509 char __user *optval, int __user *optlen)
@@ -1528,9 +1542,9 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1528 int is_lite = IS_UDPLITE(sk); 1542 int is_lite = IS_UDPLITE(sk);
1529 1543
1530 /* Check for false positives due to checksum errors */ 1544 /* Check for false positives due to checksum errors */
1531 if ( (mask & POLLRDNORM) && 1545 if ((mask & POLLRDNORM) &&
1532 !(file->f_flags & O_NONBLOCK) && 1546 !(file->f_flags & O_NONBLOCK) &&
1533 !(sk->sk_shutdown & RCV_SHUTDOWN)){ 1547 !(sk->sk_shutdown & RCV_SHUTDOWN)) {
1534 struct sk_buff_head *rcvq = &sk->sk_receive_queue; 1548 struct sk_buff_head *rcvq = &sk->sk_receive_queue;
1535 struct sk_buff *skb; 1549 struct sk_buff *skb;
1536 1550
@@ -1552,6 +1566,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
1552 return mask; 1566 return mask;
1553 1567
1554} 1568}
1569EXPORT_SYMBOL(udp_poll);
1555 1570
1556struct proto udp_prot = { 1571struct proto udp_prot = {
1557 .name = "UDP", 1572 .name = "UDP",
@@ -1582,6 +1597,7 @@ struct proto udp_prot = {
1582 .compat_getsockopt = compat_udp_getsockopt, 1597 .compat_getsockopt = compat_udp_getsockopt,
1583#endif 1598#endif
1584}; 1599};
1600EXPORT_SYMBOL(udp_prot);
1585 1601
1586/* ------------------------------------------------------------------------ */ 1602/* ------------------------------------------------------------------------ */
1587#ifdef CONFIG_PROC_FS 1603#ifdef CONFIG_PROC_FS
@@ -1703,11 +1719,13 @@ int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
1703 rc = -ENOMEM; 1719 rc = -ENOMEM;
1704 return rc; 1720 return rc;
1705} 1721}
1722EXPORT_SYMBOL(udp_proc_register);
1706 1723
1707void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo) 1724void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo)
1708{ 1725{
1709 proc_net_remove(net, afinfo->name); 1726 proc_net_remove(net, afinfo->name);
1710} 1727}
1728EXPORT_SYMBOL(udp_proc_unregister);
1711 1729
1712/* ------------------------------------------------------------------------ */ 1730/* ------------------------------------------------------------------------ */
1713static void udp4_format_sock(struct sock *sp, struct seq_file *f, 1731static void udp4_format_sock(struct sock *sp, struct seq_file *f,
@@ -1741,7 +1759,7 @@ int udp4_seq_show(struct seq_file *seq, void *v)
1741 int len; 1759 int len;
1742 1760
1743 udp4_format_sock(v, seq, state->bucket, &len); 1761 udp4_format_sock(v, seq, state->bucket, &len);
1744 seq_printf(seq, "%*s\n", 127 - len ,""); 1762 seq_printf(seq, "%*s\n", 127 - len, "");
1745 } 1763 }
1746 return 0; 1764 return 0;
1747} 1765}
@@ -1816,16 +1834,64 @@ void __init udp_init(void)
1816 sysctl_udp_wmem_min = SK_MEM_QUANTUM; 1834 sysctl_udp_wmem_min = SK_MEM_QUANTUM;
1817} 1835}
1818 1836
1819EXPORT_SYMBOL(udp_disconnect); 1837int udp4_ufo_send_check(struct sk_buff *skb)
1820EXPORT_SYMBOL(udp_ioctl); 1838{
1821EXPORT_SYMBOL(udp_prot); 1839 const struct iphdr *iph;
1822EXPORT_SYMBOL(udp_sendmsg); 1840 struct udphdr *uh;
1823EXPORT_SYMBOL(udp_lib_getsockopt); 1841
1824EXPORT_SYMBOL(udp_lib_setsockopt); 1842 if (!pskb_may_pull(skb, sizeof(*uh)))
1825EXPORT_SYMBOL(udp_poll); 1843 return -EINVAL;
1826EXPORT_SYMBOL(udp_lib_get_port); 1844
1845 iph = ip_hdr(skb);
1846 uh = udp_hdr(skb);
1847
1848 uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
1849 IPPROTO_UDP, 0);
1850 skb->csum_start = skb_transport_header(skb) - skb->head;
1851 skb->csum_offset = offsetof(struct udphdr, check);
1852 skb->ip_summed = CHECKSUM_PARTIAL;
1853 return 0;
1854}
1855
1856struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features)
1857{
1858 struct sk_buff *segs = ERR_PTR(-EINVAL);
1859 unsigned int mss;
1860 int offset;
1861 __wsum csum;
1862
1863 mss = skb_shinfo(skb)->gso_size;
1864 if (unlikely(skb->len <= mss))
1865 goto out;
1866
1867 if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
1868 /* Packet is from an untrusted source, reset gso_segs. */
1869 int type = skb_shinfo(skb)->gso_type;
1870
1871 if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
1872 !(type & (SKB_GSO_UDP))))
1873 goto out;
1874
1875 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
1876
1877 segs = NULL;
1878 goto out;
1879 }
1880
1881 /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
1882 * do checksum of UDP packets sent as multiple IP fragments.
1883 */
1884 offset = skb->csum_start - skb_headroom(skb);
1885 csum = skb_checksum(skb, offset, skb->len - offset, 0);
1886 offset += skb->csum_offset;
1887 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
1888 skb->ip_summed = CHECKSUM_NONE;
1889
1890 /* Fragment the skb. IP headers of the fragments are updated in
1891 * inet_gso_segment()
1892 */
1893 segs = skb_segment(skb, features);
1894out:
1895 return segs;
1896}
1827 1897
1828#ifdef CONFIG_PROC_FS
1829EXPORT_SYMBOL(udp_proc_register);
1830EXPORT_SYMBOL(udp_proc_unregister);
1831#endif
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 0071ee6f441f..74fb2eb833ec 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -264,6 +264,22 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
264 .fill_dst = xfrm4_fill_dst, 264 .fill_dst = xfrm4_fill_dst,
265}; 265};
266 266
267#ifdef CONFIG_SYSCTL
268static struct ctl_table xfrm4_policy_table[] = {
269 {
270 .ctl_name = CTL_UNNUMBERED,
271 .procname = "xfrm4_gc_thresh",
272 .data = &xfrm4_dst_ops.gc_thresh,
273 .maxlen = sizeof(int),
274 .mode = 0644,
275 .proc_handler = proc_dointvec,
276 },
277 { }
278};
279
280static struct ctl_table_header *sysctl_hdr;
281#endif
282
267static void __init xfrm4_policy_init(void) 283static void __init xfrm4_policy_init(void)
268{ 284{
269 xfrm_policy_register_afinfo(&xfrm4_policy_afinfo); 285 xfrm_policy_register_afinfo(&xfrm4_policy_afinfo);
@@ -271,12 +287,31 @@ static void __init xfrm4_policy_init(void)
271 287
272static void __exit xfrm4_policy_fini(void) 288static void __exit xfrm4_policy_fini(void)
273{ 289{
290#ifdef CONFIG_SYSCTL
291 if (sysctl_hdr)
292 unregister_net_sysctl_table(sysctl_hdr);
293#endif
274 xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo); 294 xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo);
275} 295}
276 296
277void __init xfrm4_init(void) 297void __init xfrm4_init(int rt_max_size)
278{ 298{
279 xfrm4_state_init(); 299 xfrm4_state_init();
280 xfrm4_policy_init(); 300 xfrm4_policy_init();
301 /*
302 * Select a default value for the gc_thresh based on the main route
303 * table hash size. It seems to me the worst case scenario is when
304 * we have ipsec operating in transport mode, in which we create a
305 * dst_entry per socket. The xfrm gc algorithm starts trying to remove
306 * entries at gc_thresh, and prevents new allocations as 2*gc_thresh
307 * so lets set an initial xfrm gc_thresh value at the rt_max_size/2.
308 * That will let us store an ipsec connection per route table entry,
309 * and start cleaning when were 1/2 full
310 */
311 xfrm4_dst_ops.gc_thresh = rt_max_size/2;
312#ifdef CONFIG_SYSCTL
313 sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path,
314 xfrm4_policy_table);
315#endif
281} 316}
282 317