diff options
Diffstat (limited to 'net/ipv4')
31 files changed, 531 insertions, 362 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 566ea6c4321d..6c30a73f03f5 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -124,7 +124,6 @@ static struct list_head inetsw[SOCK_MAX]; | |||
124 | static DEFINE_SPINLOCK(inetsw_lock); | 124 | static DEFINE_SPINLOCK(inetsw_lock); |
125 | 125 | ||
126 | struct ipv4_config ipv4_config; | 126 | struct ipv4_config ipv4_config; |
127 | |||
128 | EXPORT_SYMBOL(ipv4_config); | 127 | EXPORT_SYMBOL(ipv4_config); |
129 | 128 | ||
130 | /* New destruction routine */ | 129 | /* New destruction routine */ |
@@ -139,12 +138,12 @@ void inet_sock_destruct(struct sock *sk) | |||
139 | sk_mem_reclaim(sk); | 138 | sk_mem_reclaim(sk); |
140 | 139 | ||
141 | if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) { | 140 | if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) { |
142 | printk("Attempt to release TCP socket in state %d %p\n", | 141 | pr_err("Attempt to release TCP socket in state %d %p\n", |
143 | sk->sk_state, sk); | 142 | sk->sk_state, sk); |
144 | return; | 143 | return; |
145 | } | 144 | } |
146 | if (!sock_flag(sk, SOCK_DEAD)) { | 145 | if (!sock_flag(sk, SOCK_DEAD)) { |
147 | printk("Attempt to release alive inet socket %p\n", sk); | 146 | pr_err("Attempt to release alive inet socket %p\n", sk); |
148 | return; | 147 | return; |
149 | } | 148 | } |
150 | 149 | ||
@@ -157,6 +156,7 @@ void inet_sock_destruct(struct sock *sk) | |||
157 | dst_release(sk->sk_dst_cache); | 156 | dst_release(sk->sk_dst_cache); |
158 | sk_refcnt_debug_dec(sk); | 157 | sk_refcnt_debug_dec(sk); |
159 | } | 158 | } |
159 | EXPORT_SYMBOL(inet_sock_destruct); | ||
160 | 160 | ||
161 | /* | 161 | /* |
162 | * The routines beyond this point handle the behaviour of an AF_INET | 162 | * The routines beyond this point handle the behaviour of an AF_INET |
@@ -219,6 +219,7 @@ out: | |||
219 | release_sock(sk); | 219 | release_sock(sk); |
220 | return err; | 220 | return err; |
221 | } | 221 | } |
222 | EXPORT_SYMBOL(inet_listen); | ||
222 | 223 | ||
223 | u32 inet_ehash_secret __read_mostly; | 224 | u32 inet_ehash_secret __read_mostly; |
224 | EXPORT_SYMBOL(inet_ehash_secret); | 225 | EXPORT_SYMBOL(inet_ehash_secret); |
@@ -435,9 +436,11 @@ int inet_release(struct socket *sock) | |||
435 | } | 436 | } |
436 | return 0; | 437 | return 0; |
437 | } | 438 | } |
439 | EXPORT_SYMBOL(inet_release); | ||
438 | 440 | ||
439 | /* It is off by default, see below. */ | 441 | /* It is off by default, see below. */ |
440 | int sysctl_ip_nonlocal_bind __read_mostly; | 442 | int sysctl_ip_nonlocal_bind __read_mostly; |
443 | EXPORT_SYMBOL(sysctl_ip_nonlocal_bind); | ||
441 | 444 | ||
442 | int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | 445 | int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) |
443 | { | 446 | { |
@@ -519,6 +522,7 @@ out_release_sock: | |||
519 | out: | 522 | out: |
520 | return err; | 523 | return err; |
521 | } | 524 | } |
525 | EXPORT_SYMBOL(inet_bind); | ||
522 | 526 | ||
523 | int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, | 527 | int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, |
524 | int addr_len, int flags) | 528 | int addr_len, int flags) |
@@ -532,6 +536,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, | |||
532 | return -EAGAIN; | 536 | return -EAGAIN; |
533 | return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len); | 537 | return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len); |
534 | } | 538 | } |
539 | EXPORT_SYMBOL(inet_dgram_connect); | ||
535 | 540 | ||
536 | static long inet_wait_for_connect(struct sock *sk, long timeo) | 541 | static long inet_wait_for_connect(struct sock *sk, long timeo) |
537 | { | 542 | { |
@@ -641,6 +646,7 @@ sock_error: | |||
641 | sock->state = SS_DISCONNECTING; | 646 | sock->state = SS_DISCONNECTING; |
642 | goto out; | 647 | goto out; |
643 | } | 648 | } |
649 | EXPORT_SYMBOL(inet_stream_connect); | ||
644 | 650 | ||
645 | /* | 651 | /* |
646 | * Accept a pending connection. The TCP layer now gives BSD semantics. | 652 | * Accept a pending connection. The TCP layer now gives BSD semantics. |
@@ -668,6 +674,7 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags) | |||
668 | do_err: | 674 | do_err: |
669 | return err; | 675 | return err; |
670 | } | 676 | } |
677 | EXPORT_SYMBOL(inet_accept); | ||
671 | 678 | ||
672 | 679 | ||
673 | /* | 680 | /* |
@@ -699,6 +706,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr, | |||
699 | *uaddr_len = sizeof(*sin); | 706 | *uaddr_len = sizeof(*sin); |
700 | return 0; | 707 | return 0; |
701 | } | 708 | } |
709 | EXPORT_SYMBOL(inet_getname); | ||
702 | 710 | ||
703 | int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | 711 | int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, |
704 | size_t size) | 712 | size_t size) |
@@ -711,9 +719,11 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | |||
711 | 719 | ||
712 | return sk->sk_prot->sendmsg(iocb, sk, msg, size); | 720 | return sk->sk_prot->sendmsg(iocb, sk, msg, size); |
713 | } | 721 | } |
722 | EXPORT_SYMBOL(inet_sendmsg); | ||
714 | 723 | ||
715 | 724 | ||
716 | static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) | 725 | static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, |
726 | size_t size, int flags) | ||
717 | { | 727 | { |
718 | struct sock *sk = sock->sk; | 728 | struct sock *sk = sock->sk; |
719 | 729 | ||
@@ -780,6 +790,7 @@ int inet_shutdown(struct socket *sock, int how) | |||
780 | release_sock(sk); | 790 | release_sock(sk); |
781 | return err; | 791 | return err; |
782 | } | 792 | } |
793 | EXPORT_SYMBOL(inet_shutdown); | ||
783 | 794 | ||
784 | /* | 795 | /* |
785 | * ioctl() calls you can issue on an INET socket. Most of these are | 796 | * ioctl() calls you can issue on an INET socket. Most of these are |
@@ -798,44 +809,45 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
798 | struct net *net = sock_net(sk); | 809 | struct net *net = sock_net(sk); |
799 | 810 | ||
800 | switch (cmd) { | 811 | switch (cmd) { |
801 | case SIOCGSTAMP: | 812 | case SIOCGSTAMP: |
802 | err = sock_get_timestamp(sk, (struct timeval __user *)arg); | 813 | err = sock_get_timestamp(sk, (struct timeval __user *)arg); |
803 | break; | 814 | break; |
804 | case SIOCGSTAMPNS: | 815 | case SIOCGSTAMPNS: |
805 | err = sock_get_timestampns(sk, (struct timespec __user *)arg); | 816 | err = sock_get_timestampns(sk, (struct timespec __user *)arg); |
806 | break; | 817 | break; |
807 | case SIOCADDRT: | 818 | case SIOCADDRT: |
808 | case SIOCDELRT: | 819 | case SIOCDELRT: |
809 | case SIOCRTMSG: | 820 | case SIOCRTMSG: |
810 | err = ip_rt_ioctl(net, cmd, (void __user *)arg); | 821 | err = ip_rt_ioctl(net, cmd, (void __user *)arg); |
811 | break; | 822 | break; |
812 | case SIOCDARP: | 823 | case SIOCDARP: |
813 | case SIOCGARP: | 824 | case SIOCGARP: |
814 | case SIOCSARP: | 825 | case SIOCSARP: |
815 | err = arp_ioctl(net, cmd, (void __user *)arg); | 826 | err = arp_ioctl(net, cmd, (void __user *)arg); |
816 | break; | 827 | break; |
817 | case SIOCGIFADDR: | 828 | case SIOCGIFADDR: |
818 | case SIOCSIFADDR: | 829 | case SIOCSIFADDR: |
819 | case SIOCGIFBRDADDR: | 830 | case SIOCGIFBRDADDR: |
820 | case SIOCSIFBRDADDR: | 831 | case SIOCSIFBRDADDR: |
821 | case SIOCGIFNETMASK: | 832 | case SIOCGIFNETMASK: |
822 | case SIOCSIFNETMASK: | 833 | case SIOCSIFNETMASK: |
823 | case SIOCGIFDSTADDR: | 834 | case SIOCGIFDSTADDR: |
824 | case SIOCSIFDSTADDR: | 835 | case SIOCSIFDSTADDR: |
825 | case SIOCSIFPFLAGS: | 836 | case SIOCSIFPFLAGS: |
826 | case SIOCGIFPFLAGS: | 837 | case SIOCGIFPFLAGS: |
827 | case SIOCSIFFLAGS: | 838 | case SIOCSIFFLAGS: |
828 | err = devinet_ioctl(net, cmd, (void __user *)arg); | 839 | err = devinet_ioctl(net, cmd, (void __user *)arg); |
829 | break; | 840 | break; |
830 | default: | 841 | default: |
831 | if (sk->sk_prot->ioctl) | 842 | if (sk->sk_prot->ioctl) |
832 | err = sk->sk_prot->ioctl(sk, cmd, arg); | 843 | err = sk->sk_prot->ioctl(sk, cmd, arg); |
833 | else | 844 | else |
834 | err = -ENOIOCTLCMD; | 845 | err = -ENOIOCTLCMD; |
835 | break; | 846 | break; |
836 | } | 847 | } |
837 | return err; | 848 | return err; |
838 | } | 849 | } |
850 | EXPORT_SYMBOL(inet_ioctl); | ||
839 | 851 | ||
840 | const struct proto_ops inet_stream_ops = { | 852 | const struct proto_ops inet_stream_ops = { |
841 | .family = PF_INET, | 853 | .family = PF_INET, |
@@ -862,6 +874,7 @@ const struct proto_ops inet_stream_ops = { | |||
862 | .compat_getsockopt = compat_sock_common_getsockopt, | 874 | .compat_getsockopt = compat_sock_common_getsockopt, |
863 | #endif | 875 | #endif |
864 | }; | 876 | }; |
877 | EXPORT_SYMBOL(inet_stream_ops); | ||
865 | 878 | ||
866 | const struct proto_ops inet_dgram_ops = { | 879 | const struct proto_ops inet_dgram_ops = { |
867 | .family = PF_INET, | 880 | .family = PF_INET, |
@@ -887,6 +900,7 @@ const struct proto_ops inet_dgram_ops = { | |||
887 | .compat_getsockopt = compat_sock_common_getsockopt, | 900 | .compat_getsockopt = compat_sock_common_getsockopt, |
888 | #endif | 901 | #endif |
889 | }; | 902 | }; |
903 | EXPORT_SYMBOL(inet_dgram_ops); | ||
890 | 904 | ||
891 | /* | 905 | /* |
892 | * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without | 906 | * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without |
@@ -1016,6 +1030,7 @@ out_illegal: | |||
1016 | p->type); | 1030 | p->type); |
1017 | goto out; | 1031 | goto out; |
1018 | } | 1032 | } |
1033 | EXPORT_SYMBOL(inet_register_protosw); | ||
1019 | 1034 | ||
1020 | void inet_unregister_protosw(struct inet_protosw *p) | 1035 | void inet_unregister_protosw(struct inet_protosw *p) |
1021 | { | 1036 | { |
@@ -1031,6 +1046,7 @@ void inet_unregister_protosw(struct inet_protosw *p) | |||
1031 | synchronize_net(); | 1046 | synchronize_net(); |
1032 | } | 1047 | } |
1033 | } | 1048 | } |
1049 | EXPORT_SYMBOL(inet_unregister_protosw); | ||
1034 | 1050 | ||
1035 | /* | 1051 | /* |
1036 | * Shall we try to damage output packets if routing dev changes? | 1052 | * Shall we try to damage output packets if routing dev changes? |
@@ -1141,7 +1157,6 @@ int inet_sk_rebuild_header(struct sock *sk) | |||
1141 | 1157 | ||
1142 | return err; | 1158 | return err; |
1143 | } | 1159 | } |
1144 | |||
1145 | EXPORT_SYMBOL(inet_sk_rebuild_header); | 1160 | EXPORT_SYMBOL(inet_sk_rebuild_header); |
1146 | 1161 | ||
1147 | static int inet_gso_send_check(struct sk_buff *skb) | 1162 | static int inet_gso_send_check(struct sk_buff *skb) |
@@ -1187,6 +1202,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) | |||
1187 | int proto; | 1202 | int proto; |
1188 | int ihl; | 1203 | int ihl; |
1189 | int id; | 1204 | int id; |
1205 | unsigned int offset = 0; | ||
1190 | 1206 | ||
1191 | if (!(features & NETIF_F_V4_CSUM)) | 1207 | if (!(features & NETIF_F_V4_CSUM)) |
1192 | features &= ~NETIF_F_SG; | 1208 | features &= ~NETIF_F_SG; |
@@ -1229,7 +1245,14 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) | |||
1229 | skb = segs; | 1245 | skb = segs; |
1230 | do { | 1246 | do { |
1231 | iph = ip_hdr(skb); | 1247 | iph = ip_hdr(skb); |
1232 | iph->id = htons(id++); | 1248 | if (proto == IPPROTO_UDP) { |
1249 | iph->id = htons(id); | ||
1250 | iph->frag_off = htons(offset >> 3); | ||
1251 | if (skb->next != NULL) | ||
1252 | iph->frag_off |= htons(IP_MF); | ||
1253 | offset += (skb->len - skb->mac_len - iph->ihl * 4); | ||
1254 | } else | ||
1255 | iph->id = htons(id++); | ||
1233 | iph->tot_len = htons(skb->len - skb->mac_len); | 1256 | iph->tot_len = htons(skb->len - skb->mac_len); |
1234 | iph->check = 0; | 1257 | iph->check = 0; |
1235 | iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl); | 1258 | iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl); |
@@ -1361,7 +1384,6 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, | |||
1361 | } | 1384 | } |
1362 | return rc; | 1385 | return rc; |
1363 | } | 1386 | } |
1364 | |||
1365 | EXPORT_SYMBOL_GPL(inet_ctl_sock_create); | 1387 | EXPORT_SYMBOL_GPL(inet_ctl_sock_create); |
1366 | 1388 | ||
1367 | unsigned long snmp_fold_field(void *mib[], int offt) | 1389 | unsigned long snmp_fold_field(void *mib[], int offt) |
@@ -1425,6 +1447,8 @@ static struct net_protocol tcp_protocol = { | |||
1425 | static struct net_protocol udp_protocol = { | 1447 | static struct net_protocol udp_protocol = { |
1426 | .handler = udp_rcv, | 1448 | .handler = udp_rcv, |
1427 | .err_handler = udp_err, | 1449 | .err_handler = udp_err, |
1450 | .gso_send_check = udp4_ufo_send_check, | ||
1451 | .gso_segment = udp4_ufo_fragment, | ||
1428 | .no_policy = 1, | 1452 | .no_policy = 1, |
1429 | .netns_ok = 1, | 1453 | .netns_ok = 1, |
1430 | }; | 1454 | }; |
@@ -1666,19 +1690,3 @@ static int __init ipv4_proc_init(void) | |||
1666 | 1690 | ||
1667 | MODULE_ALIAS_NETPROTO(PF_INET); | 1691 | MODULE_ALIAS_NETPROTO(PF_INET); |
1668 | 1692 | ||
1669 | EXPORT_SYMBOL(inet_accept); | ||
1670 | EXPORT_SYMBOL(inet_bind); | ||
1671 | EXPORT_SYMBOL(inet_dgram_connect); | ||
1672 | EXPORT_SYMBOL(inet_dgram_ops); | ||
1673 | EXPORT_SYMBOL(inet_getname); | ||
1674 | EXPORT_SYMBOL(inet_ioctl); | ||
1675 | EXPORT_SYMBOL(inet_listen); | ||
1676 | EXPORT_SYMBOL(inet_register_protosw); | ||
1677 | EXPORT_SYMBOL(inet_release); | ||
1678 | EXPORT_SYMBOL(inet_sendmsg); | ||
1679 | EXPORT_SYMBOL(inet_shutdown); | ||
1680 | EXPORT_SYMBOL(inet_sock_destruct); | ||
1681 | EXPORT_SYMBOL(inet_stream_connect); | ||
1682 | EXPORT_SYMBOL(inet_stream_ops); | ||
1683 | EXPORT_SYMBOL(inet_unregister_protosw); | ||
1684 | EXPORT_SYMBOL(sysctl_ip_nonlocal_bind); | ||
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 090e9991ac2a..4e80f336c0cf 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -130,7 +130,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb); | |||
130 | static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb); | 130 | static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb); |
131 | static void parp_redo(struct sk_buff *skb); | 131 | static void parp_redo(struct sk_buff *skb); |
132 | 132 | ||
133 | static struct neigh_ops arp_generic_ops = { | 133 | static const struct neigh_ops arp_generic_ops = { |
134 | .family = AF_INET, | 134 | .family = AF_INET, |
135 | .solicit = arp_solicit, | 135 | .solicit = arp_solicit, |
136 | .error_report = arp_error_report, | 136 | .error_report = arp_error_report, |
@@ -140,7 +140,7 @@ static struct neigh_ops arp_generic_ops = { | |||
140 | .queue_xmit = dev_queue_xmit, | 140 | .queue_xmit = dev_queue_xmit, |
141 | }; | 141 | }; |
142 | 142 | ||
143 | static struct neigh_ops arp_hh_ops = { | 143 | static const struct neigh_ops arp_hh_ops = { |
144 | .family = AF_INET, | 144 | .family = AF_INET, |
145 | .solicit = arp_solicit, | 145 | .solicit = arp_solicit, |
146 | .error_report = arp_error_report, | 146 | .error_report = arp_error_report, |
@@ -150,7 +150,7 @@ static struct neigh_ops arp_hh_ops = { | |||
150 | .queue_xmit = dev_queue_xmit, | 150 | .queue_xmit = dev_queue_xmit, |
151 | }; | 151 | }; |
152 | 152 | ||
153 | static struct neigh_ops arp_direct_ops = { | 153 | static const struct neigh_ops arp_direct_ops = { |
154 | .family = AF_INET, | 154 | .family = AF_INET, |
155 | .output = dev_queue_xmit, | 155 | .output = dev_queue_xmit, |
156 | .connected_output = dev_queue_xmit, | 156 | .connected_output = dev_queue_xmit, |
@@ -158,7 +158,7 @@ static struct neigh_ops arp_direct_ops = { | |||
158 | .queue_xmit = dev_queue_xmit, | 158 | .queue_xmit = dev_queue_xmit, |
159 | }; | 159 | }; |
160 | 160 | ||
161 | struct neigh_ops arp_broken_ops = { | 161 | const struct neigh_ops arp_broken_ops = { |
162 | .family = AF_INET, | 162 | .family = AF_INET, |
163 | .solicit = arp_solicit, | 163 | .solicit = arp_solicit, |
164 | .error_report = arp_error_report, | 164 | .error_report = arp_error_report, |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 63c2fa7b68c4..291bdf50a21f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -48,7 +48,7 @@ | |||
48 | * Patrick McHardy <kaber@trash.net> | 48 | * Patrick McHardy <kaber@trash.net> |
49 | */ | 49 | */ |
50 | 50 | ||
51 | #define VERSION "0.408" | 51 | #define VERSION "0.409" |
52 | 52 | ||
53 | #include <asm/uaccess.h> | 53 | #include <asm/uaccess.h> |
54 | #include <asm/system.h> | 54 | #include <asm/system.h> |
@@ -164,6 +164,14 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn); | |||
164 | static struct tnode *halve(struct trie *t, struct tnode *tn); | 164 | static struct tnode *halve(struct trie *t, struct tnode *tn); |
165 | /* tnodes to free after resize(); protected by RTNL */ | 165 | /* tnodes to free after resize(); protected by RTNL */ |
166 | static struct tnode *tnode_free_head; | 166 | static struct tnode *tnode_free_head; |
167 | static size_t tnode_free_size; | ||
168 | |||
169 | /* | ||
170 | * synchronize_rcu after call_rcu for that many pages; it should be especially | ||
171 | * useful before resizing the root node with PREEMPT_NONE configs; the value was | ||
172 | * obtained experimentally, aiming to avoid visible slowdown. | ||
173 | */ | ||
174 | static const int sync_pages = 128; | ||
167 | 175 | ||
168 | static struct kmem_cache *fn_alias_kmem __read_mostly; | 176 | static struct kmem_cache *fn_alias_kmem __read_mostly; |
169 | static struct kmem_cache *trie_leaf_kmem __read_mostly; | 177 | static struct kmem_cache *trie_leaf_kmem __read_mostly; |
@@ -317,8 +325,7 @@ static inline void check_tnode(const struct tnode *tn) | |||
317 | static const int halve_threshold = 25; | 325 | static const int halve_threshold = 25; |
318 | static const int inflate_threshold = 50; | 326 | static const int inflate_threshold = 50; |
319 | static const int halve_threshold_root = 15; | 327 | static const int halve_threshold_root = 15; |
320 | static const int inflate_threshold_root = 25; | 328 | static const int inflate_threshold_root = 30; |
321 | |||
322 | 329 | ||
323 | static void __alias_free_mem(struct rcu_head *head) | 330 | static void __alias_free_mem(struct rcu_head *head) |
324 | { | 331 | { |
@@ -393,6 +400,8 @@ static void tnode_free_safe(struct tnode *tn) | |||
393 | BUG_ON(IS_LEAF(tn)); | 400 | BUG_ON(IS_LEAF(tn)); |
394 | tn->tnode_free = tnode_free_head; | 401 | tn->tnode_free = tnode_free_head; |
395 | tnode_free_head = tn; | 402 | tnode_free_head = tn; |
403 | tnode_free_size += sizeof(struct tnode) + | ||
404 | (sizeof(struct node *) << tn->bits); | ||
396 | } | 405 | } |
397 | 406 | ||
398 | static void tnode_free_flush(void) | 407 | static void tnode_free_flush(void) |
@@ -404,6 +413,11 @@ static void tnode_free_flush(void) | |||
404 | tn->tnode_free = NULL; | 413 | tn->tnode_free = NULL; |
405 | tnode_free(tn); | 414 | tnode_free(tn); |
406 | } | 415 | } |
416 | |||
417 | if (tnode_free_size >= PAGE_SIZE * sync_pages) { | ||
418 | tnode_free_size = 0; | ||
419 | synchronize_rcu(); | ||
420 | } | ||
407 | } | 421 | } |
408 | 422 | ||
409 | static struct leaf *leaf_new(void) | 423 | static struct leaf *leaf_new(void) |
@@ -499,14 +513,14 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, | |||
499 | rcu_assign_pointer(tn->child[i], n); | 513 | rcu_assign_pointer(tn->child[i], n); |
500 | } | 514 | } |
501 | 515 | ||
516 | #define MAX_WORK 10 | ||
502 | static struct node *resize(struct trie *t, struct tnode *tn) | 517 | static struct node *resize(struct trie *t, struct tnode *tn) |
503 | { | 518 | { |
504 | int i; | 519 | int i; |
505 | int err = 0; | ||
506 | struct tnode *old_tn; | 520 | struct tnode *old_tn; |
507 | int inflate_threshold_use; | 521 | int inflate_threshold_use; |
508 | int halve_threshold_use; | 522 | int halve_threshold_use; |
509 | int max_resize; | 523 | int max_work; |
510 | 524 | ||
511 | if (!tn) | 525 | if (!tn) |
512 | return NULL; | 526 | return NULL; |
@@ -521,18 +535,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
521 | } | 535 | } |
522 | /* One child */ | 536 | /* One child */ |
523 | if (tn->empty_children == tnode_child_length(tn) - 1) | 537 | if (tn->empty_children == tnode_child_length(tn) - 1) |
524 | for (i = 0; i < tnode_child_length(tn); i++) { | 538 | goto one_child; |
525 | struct node *n; | ||
526 | |||
527 | n = tn->child[i]; | ||
528 | if (!n) | ||
529 | continue; | ||
530 | |||
531 | /* compress one level */ | ||
532 | node_set_parent(n, NULL); | ||
533 | tnode_free_safe(tn); | ||
534 | return n; | ||
535 | } | ||
536 | /* | 539 | /* |
537 | * Double as long as the resulting node has a number of | 540 | * Double as long as the resulting node has a number of |
538 | * nonempty nodes that are above the threshold. | 541 | * nonempty nodes that are above the threshold. |
@@ -601,14 +604,17 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
601 | 604 | ||
602 | /* Keep root node larger */ | 605 | /* Keep root node larger */ |
603 | 606 | ||
604 | if (!tn->parent) | 607 | if (!node_parent((struct node*) tn)) { |
605 | inflate_threshold_use = inflate_threshold_root; | 608 | inflate_threshold_use = inflate_threshold_root; |
606 | else | 609 | halve_threshold_use = halve_threshold_root; |
610 | } | ||
611 | else { | ||
607 | inflate_threshold_use = inflate_threshold; | 612 | inflate_threshold_use = inflate_threshold; |
613 | halve_threshold_use = halve_threshold; | ||
614 | } | ||
608 | 615 | ||
609 | err = 0; | 616 | max_work = MAX_WORK; |
610 | max_resize = 10; | 617 | while ((tn->full_children > 0 && max_work-- && |
611 | while ((tn->full_children > 0 && max_resize-- && | ||
612 | 50 * (tn->full_children + tnode_child_length(tn) | 618 | 50 * (tn->full_children + tnode_child_length(tn) |
613 | - tn->empty_children) | 619 | - tn->empty_children) |
614 | >= inflate_threshold_use * tnode_child_length(tn))) { | 620 | >= inflate_threshold_use * tnode_child_length(tn))) { |
@@ -625,35 +631,19 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
625 | } | 631 | } |
626 | } | 632 | } |
627 | 633 | ||
628 | if (max_resize < 0) { | ||
629 | if (!tn->parent) | ||
630 | pr_warning("Fix inflate_threshold_root." | ||
631 | " Now=%d size=%d bits\n", | ||
632 | inflate_threshold_root, tn->bits); | ||
633 | else | ||
634 | pr_warning("Fix inflate_threshold." | ||
635 | " Now=%d size=%d bits\n", | ||
636 | inflate_threshold, tn->bits); | ||
637 | } | ||
638 | |||
639 | check_tnode(tn); | 634 | check_tnode(tn); |
640 | 635 | ||
636 | /* Return if at least one inflate is run */ | ||
637 | if( max_work != MAX_WORK) | ||
638 | return (struct node *) tn; | ||
639 | |||
641 | /* | 640 | /* |
642 | * Halve as long as the number of empty children in this | 641 | * Halve as long as the number of empty children in this |
643 | * node is above threshold. | 642 | * node is above threshold. |
644 | */ | 643 | */ |
645 | 644 | ||
646 | 645 | max_work = MAX_WORK; | |
647 | /* Keep root node larger */ | 646 | while (tn->bits > 1 && max_work-- && |
648 | |||
649 | if (!tn->parent) | ||
650 | halve_threshold_use = halve_threshold_root; | ||
651 | else | ||
652 | halve_threshold_use = halve_threshold; | ||
653 | |||
654 | err = 0; | ||
655 | max_resize = 10; | ||
656 | while (tn->bits > 1 && max_resize-- && | ||
657 | 100 * (tnode_child_length(tn) - tn->empty_children) < | 647 | 100 * (tnode_child_length(tn) - tn->empty_children) < |
658 | halve_threshold_use * tnode_child_length(tn)) { | 648 | halve_threshold_use * tnode_child_length(tn)) { |
659 | 649 | ||
@@ -668,19 +658,10 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
668 | } | 658 | } |
669 | } | 659 | } |
670 | 660 | ||
671 | if (max_resize < 0) { | ||
672 | if (!tn->parent) | ||
673 | pr_warning("Fix halve_threshold_root." | ||
674 | " Now=%d size=%d bits\n", | ||
675 | halve_threshold_root, tn->bits); | ||
676 | else | ||
677 | pr_warning("Fix halve_threshold." | ||
678 | " Now=%d size=%d bits\n", | ||
679 | halve_threshold, tn->bits); | ||
680 | } | ||
681 | 661 | ||
682 | /* Only one child remains */ | 662 | /* Only one child remains */ |
683 | if (tn->empty_children == tnode_child_length(tn) - 1) | 663 | if (tn->empty_children == tnode_child_length(tn) - 1) { |
664 | one_child: | ||
684 | for (i = 0; i < tnode_child_length(tn); i++) { | 665 | for (i = 0; i < tnode_child_length(tn); i++) { |
685 | struct node *n; | 666 | struct node *n; |
686 | 667 | ||
@@ -694,7 +675,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) | |||
694 | tnode_free_safe(tn); | 675 | tnode_free_safe(tn); |
695 | return n; | 676 | return n; |
696 | } | 677 | } |
697 | 678 | } | |
698 | return (struct node *) tn; | 679 | return (struct node *) tn; |
699 | } | 680 | } |
700 | 681 | ||
@@ -1435,7 +1416,7 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, | |||
1435 | cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length), | 1416 | cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length), |
1436 | pos, bits); | 1417 | pos, bits); |
1437 | 1418 | ||
1438 | n = tnode_get_child(pn, cindex); | 1419 | n = tnode_get_child_rcu(pn, cindex); |
1439 | 1420 | ||
1440 | if (n == NULL) { | 1421 | if (n == NULL) { |
1441 | #ifdef CONFIG_IP_FIB_TRIE_STATS | 1422 | #ifdef CONFIG_IP_FIB_TRIE_STATS |
@@ -1570,7 +1551,7 @@ backtrace: | |||
1570 | if (chopped_off <= pn->bits) { | 1551 | if (chopped_off <= pn->bits) { |
1571 | cindex &= ~(1 << (chopped_off-1)); | 1552 | cindex &= ~(1 << (chopped_off-1)); |
1572 | } else { | 1553 | } else { |
1573 | struct tnode *parent = node_parent((struct node *) pn); | 1554 | struct tnode *parent = node_parent_rcu((struct node *) pn); |
1574 | if (!parent) | 1555 | if (!parent) |
1575 | goto failed; | 1556 | goto failed; |
1576 | 1557 | ||
@@ -1783,7 +1764,7 @@ static struct leaf *trie_firstleaf(struct trie *t) | |||
1783 | static struct leaf *trie_nextleaf(struct leaf *l) | 1764 | static struct leaf *trie_nextleaf(struct leaf *l) |
1784 | { | 1765 | { |
1785 | struct node *c = (struct node *) l; | 1766 | struct node *c = (struct node *) l; |
1786 | struct tnode *p = node_parent(c); | 1767 | struct tnode *p = node_parent_rcu(c); |
1787 | 1768 | ||
1788 | if (!p) | 1769 | if (!p) |
1789 | return NULL; /* trie with just one leaf */ | 1770 | return NULL; /* trie with just one leaf */ |
@@ -2391,7 +2372,7 @@ static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s) | |||
2391 | } | 2372 | } |
2392 | } | 2373 | } |
2393 | 2374 | ||
2394 | static const char *rtn_type_names[__RTN_MAX] = { | 2375 | static const char *const rtn_type_names[__RTN_MAX] = { |
2395 | [RTN_UNSPEC] = "UNSPEC", | 2376 | [RTN_UNSPEC] = "UNSPEC", |
2396 | [RTN_UNICAST] = "UNICAST", | 2377 | [RTN_UNICAST] = "UNICAST", |
2397 | [RTN_LOCAL] = "LOCAL", | 2378 | [RTN_LOCAL] = "LOCAL", |
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 61283f928825..13f0781f35cd 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c | |||
@@ -218,8 +218,8 @@ void inet_twdr_hangman(unsigned long data) | |||
218 | /* We purged the entire slot, anything left? */ | 218 | /* We purged the entire slot, anything left? */ |
219 | if (twdr->tw_count) | 219 | if (twdr->tw_count) |
220 | need_timer = 1; | 220 | need_timer = 1; |
221 | twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1)); | ||
221 | } | 222 | } |
222 | twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1)); | ||
223 | if (need_timer) | 223 | if (need_timer) |
224 | mod_timer(&twdr->tw_timer, jiffies + twdr->period); | 224 | mod_timer(&twdr->tw_timer, jiffies + twdr->period); |
225 | out: | 225 | out: |
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index cb4a0f4bd5e5..533afaadefd4 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -662,7 +662,7 @@ drop_nolock: | |||
662 | return(0); | 662 | return(0); |
663 | } | 663 | } |
664 | 664 | ||
665 | static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | 665 | static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) |
666 | { | 666 | { |
667 | struct ip_tunnel *tunnel = netdev_priv(dev); | 667 | struct ip_tunnel *tunnel = netdev_priv(dev); |
668 | struct net_device_stats *stats = &tunnel->dev->stats; | 668 | struct net_device_stats *stats = &tunnel->dev->stats; |
@@ -821,7 +821,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
821 | stats->tx_dropped++; | 821 | stats->tx_dropped++; |
822 | dev_kfree_skb(skb); | 822 | dev_kfree_skb(skb); |
823 | tunnel->recursion--; | 823 | tunnel->recursion--; |
824 | return 0; | 824 | return NETDEV_TX_OK; |
825 | } | 825 | } |
826 | if (skb->sk) | 826 | if (skb->sk) |
827 | skb_set_owner_w(new_skb, skb->sk); | 827 | skb_set_owner_w(new_skb, skb->sk); |
@@ -889,7 +889,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
889 | 889 | ||
890 | IPTUNNEL_XMIT(); | 890 | IPTUNNEL_XMIT(); |
891 | tunnel->recursion--; | 891 | tunnel->recursion--; |
892 | return 0; | 892 | return NETDEV_TX_OK; |
893 | 893 | ||
894 | tx_error_icmp: | 894 | tx_error_icmp: |
895 | dst_link_failure(skb); | 895 | dst_link_failure(skb); |
@@ -898,7 +898,7 @@ tx_error: | |||
898 | stats->tx_errors++; | 898 | stats->tx_errors++; |
899 | dev_kfree_skb(skb); | 899 | dev_kfree_skb(skb); |
900 | tunnel->recursion--; | 900 | tunnel->recursion--; |
901 | return 0; | 901 | return NETDEV_TX_OK; |
902 | } | 902 | } |
903 | 903 | ||
904 | static int ipgre_tunnel_bind_dev(struct net_device *dev) | 904 | static int ipgre_tunnel_bind_dev(struct net_device *dev) |
@@ -951,7 +951,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) | |||
951 | addend += 4; | 951 | addend += 4; |
952 | } | 952 | } |
953 | dev->needed_headroom = addend + hlen; | 953 | dev->needed_headroom = addend + hlen; |
954 | mtu -= dev->hard_header_len - addend; | 954 | mtu -= dev->hard_header_len + addend; |
955 | 955 | ||
956 | if (mtu < 68) | 956 | if (mtu < 68) |
957 | mtu = 68; | 957 | mtu = 68; |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 7d0821054729..9fe5d7b81580 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -813,6 +813,8 @@ int ip_append_data(struct sock *sk, | |||
813 | inet->cork.addr = ipc->addr; | 813 | inet->cork.addr = ipc->addr; |
814 | } | 814 | } |
815 | rt = *rtp; | 815 | rt = *rtp; |
816 | if (unlikely(!rt)) | ||
817 | return -EFAULT; | ||
816 | /* | 818 | /* |
817 | * We steal reference to this route, caller should not release it | 819 | * We steal reference to this route, caller should not release it |
818 | */ | 820 | */ |
@@ -1302,7 +1304,7 @@ int ip_push_pending_frames(struct sock *sk) | |||
1302 | err = ip_local_out(skb); | 1304 | err = ip_local_out(skb); |
1303 | if (err) { | 1305 | if (err) { |
1304 | if (err > 0) | 1306 | if (err > 0) |
1305 | err = inet->recverr ? net_xmit_errno(err) : 0; | 1307 | err = net_xmit_errno(err); |
1306 | if (err) | 1308 | if (err) |
1307 | goto error; | 1309 | goto error; |
1308 | } | 1310 | } |
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 93e2b787da20..62548cb0923c 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
@@ -387,7 +387,7 @@ static int ipip_rcv(struct sk_buff *skb) | |||
387 | * and that skb is filled properly by that function. | 387 | * and that skb is filled properly by that function. |
388 | */ | 388 | */ |
389 | 389 | ||
390 | static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | 390 | static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) |
391 | { | 391 | { |
392 | struct ip_tunnel *tunnel = netdev_priv(dev); | 392 | struct ip_tunnel *tunnel = netdev_priv(dev); |
393 | struct net_device_stats *stats = &tunnel->dev->stats; | 393 | struct net_device_stats *stats = &tunnel->dev->stats; |
@@ -486,7 +486,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
486 | stats->tx_dropped++; | 486 | stats->tx_dropped++; |
487 | dev_kfree_skb(skb); | 487 | dev_kfree_skb(skb); |
488 | tunnel->recursion--; | 488 | tunnel->recursion--; |
489 | return 0; | 489 | return NETDEV_TX_OK; |
490 | } | 490 | } |
491 | if (skb->sk) | 491 | if (skb->sk) |
492 | skb_set_owner_w(new_skb, skb->sk); | 492 | skb_set_owner_w(new_skb, skb->sk); |
@@ -524,7 +524,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
524 | 524 | ||
525 | IPTUNNEL_XMIT(); | 525 | IPTUNNEL_XMIT(); |
526 | tunnel->recursion--; | 526 | tunnel->recursion--; |
527 | return 0; | 527 | return NETDEV_TX_OK; |
528 | 528 | ||
529 | tx_error_icmp: | 529 | tx_error_icmp: |
530 | dst_link_failure(skb); | 530 | dst_link_failure(skb); |
@@ -532,7 +532,7 @@ tx_error: | |||
532 | stats->tx_errors++; | 532 | stats->tx_errors++; |
533 | dev_kfree_skb(skb); | 533 | dev_kfree_skb(skb); |
534 | tunnel->recursion--; | 534 | tunnel->recursion--; |
535 | return 0; | 535 | return NETDEV_TX_OK; |
536 | } | 536 | } |
537 | 537 | ||
538 | static void ipip_tunnel_bind_dev(struct net_device *dev) | 538 | static void ipip_tunnel_bind_dev(struct net_device *dev) |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9a8da5ed92b7..65d421cf5bc7 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
@@ -201,7 +201,7 @@ failure: | |||
201 | 201 | ||
202 | #ifdef CONFIG_IP_PIMSM | 202 | #ifdef CONFIG_IP_PIMSM |
203 | 203 | ||
204 | static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) | 204 | static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) |
205 | { | 205 | { |
206 | struct net *net = dev_net(dev); | 206 | struct net *net = dev_net(dev); |
207 | 207 | ||
@@ -212,7 +212,7 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) | |||
212 | IGMPMSG_WHOLEPKT); | 212 | IGMPMSG_WHOLEPKT); |
213 | read_unlock(&mrt_lock); | 213 | read_unlock(&mrt_lock); |
214 | kfree_skb(skb); | 214 | kfree_skb(skb); |
215 | return 0; | 215 | return NETDEV_TX_OK; |
216 | } | 216 | } |
217 | 217 | ||
218 | static const struct net_device_ops reg_vif_netdev_ops = { | 218 | static const struct net_device_ops reg_vif_netdev_ops = { |
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 7505dff4ffdf..27774c99d888 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -8,7 +8,7 @@ | |||
8 | * Copyright (C) 2002 David S. Miller (davem@redhat.com) | 8 | * Copyright (C) 2002 David S. Miller (davem@redhat.com) |
9 | * | 9 | * |
10 | */ | 10 | */ |
11 | 11 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
12 | #include <linux/kernel.h> | 12 | #include <linux/kernel.h> |
13 | #include <linux/skbuff.h> | 13 | #include <linux/skbuff.h> |
14 | #include <linux/netdevice.h> | 14 | #include <linux/netdevice.h> |
@@ -341,15 +341,11 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
341 | } | 341 | } |
342 | 342 | ||
343 | /* All zeroes == unconditional rule. */ | 343 | /* All zeroes == unconditional rule. */ |
344 | static inline int unconditional(const struct arpt_arp *arp) | 344 | static inline bool unconditional(const struct arpt_arp *arp) |
345 | { | 345 | { |
346 | unsigned int i; | 346 | static const struct arpt_arp uncond; |
347 | 347 | ||
348 | for (i = 0; i < sizeof(*arp)/sizeof(__u32); i++) | 348 | return memcmp(arp, &uncond, sizeof(uncond)) == 0; |
349 | if (((__u32 *)arp)[i]) | ||
350 | return 0; | ||
351 | |||
352 | return 1; | ||
353 | } | 349 | } |
354 | 350 | ||
355 | /* Figures out from what hook each rule can be called: returns 0 if | 351 | /* Figures out from what hook each rule can be called: returns 0 if |
@@ -537,12 +533,28 @@ out: | |||
537 | return ret; | 533 | return ret; |
538 | } | 534 | } |
539 | 535 | ||
536 | static bool check_underflow(struct arpt_entry *e) | ||
537 | { | ||
538 | const struct arpt_entry_target *t; | ||
539 | unsigned int verdict; | ||
540 | |||
541 | if (!unconditional(&e->arp)) | ||
542 | return false; | ||
543 | t = arpt_get_target(e); | ||
544 | if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) | ||
545 | return false; | ||
546 | verdict = ((struct arpt_standard_target *)t)->verdict; | ||
547 | verdict = -verdict - 1; | ||
548 | return verdict == NF_DROP || verdict == NF_ACCEPT; | ||
549 | } | ||
550 | |||
540 | static inline int check_entry_size_and_hooks(struct arpt_entry *e, | 551 | static inline int check_entry_size_and_hooks(struct arpt_entry *e, |
541 | struct xt_table_info *newinfo, | 552 | struct xt_table_info *newinfo, |
542 | unsigned char *base, | 553 | unsigned char *base, |
543 | unsigned char *limit, | 554 | unsigned char *limit, |
544 | const unsigned int *hook_entries, | 555 | const unsigned int *hook_entries, |
545 | const unsigned int *underflows, | 556 | const unsigned int *underflows, |
557 | unsigned int valid_hooks, | ||
546 | unsigned int *i) | 558 | unsigned int *i) |
547 | { | 559 | { |
548 | unsigned int h; | 560 | unsigned int h; |
@@ -562,15 +574,21 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, | |||
562 | 574 | ||
563 | /* Check hooks & underflows */ | 575 | /* Check hooks & underflows */ |
564 | for (h = 0; h < NF_ARP_NUMHOOKS; h++) { | 576 | for (h = 0; h < NF_ARP_NUMHOOKS; h++) { |
577 | if (!(valid_hooks & (1 << h))) | ||
578 | continue; | ||
565 | if ((unsigned char *)e - base == hook_entries[h]) | 579 | if ((unsigned char *)e - base == hook_entries[h]) |
566 | newinfo->hook_entry[h] = hook_entries[h]; | 580 | newinfo->hook_entry[h] = hook_entries[h]; |
567 | if ((unsigned char *)e - base == underflows[h]) | 581 | if ((unsigned char *)e - base == underflows[h]) { |
582 | if (!check_underflow(e)) { | ||
583 | pr_err("Underflows must be unconditional and " | ||
584 | "use the STANDARD target with " | ||
585 | "ACCEPT/DROP\n"); | ||
586 | return -EINVAL; | ||
587 | } | ||
568 | newinfo->underflow[h] = underflows[h]; | 588 | newinfo->underflow[h] = underflows[h]; |
589 | } | ||
569 | } | 590 | } |
570 | 591 | ||
571 | /* FIXME: underflows must be unconditional, standard verdicts | ||
572 | < 0 (not ARPT_RETURN). --RR */ | ||
573 | |||
574 | /* Clear counters and comefrom */ | 592 | /* Clear counters and comefrom */ |
575 | e->counters = ((struct xt_counters) { 0, 0 }); | 593 | e->counters = ((struct xt_counters) { 0, 0 }); |
576 | e->comefrom = 0; | 594 | e->comefrom = 0; |
@@ -630,7 +648,7 @@ static int translate_table(const char *name, | |||
630 | newinfo, | 648 | newinfo, |
631 | entry0, | 649 | entry0, |
632 | entry0 + size, | 650 | entry0 + size, |
633 | hook_entries, underflows, &i); | 651 | hook_entries, underflows, valid_hooks, &i); |
634 | duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); | 652 | duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); |
635 | if (ret != 0) | 653 | if (ret != 0) |
636 | return ret; | 654 | return ret; |
@@ -1760,7 +1778,8 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len | |||
1760 | return ret; | 1778 | return ret; |
1761 | } | 1779 | } |
1762 | 1780 | ||
1763 | struct xt_table *arpt_register_table(struct net *net, struct xt_table *table, | 1781 | struct xt_table *arpt_register_table(struct net *net, |
1782 | const struct xt_table *table, | ||
1764 | const struct arpt_replace *repl) | 1783 | const struct arpt_replace *repl) |
1765 | { | 1784 | { |
1766 | int ret; | 1785 | int ret; |
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 6ecfdae7c589..97337601827a 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c | |||
@@ -15,7 +15,7 @@ MODULE_DESCRIPTION("arptables filter table"); | |||
15 | #define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \ | 15 | #define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \ |
16 | (1 << NF_ARP_FORWARD)) | 16 | (1 << NF_ARP_FORWARD)) |
17 | 17 | ||
18 | static struct | 18 | static const struct |
19 | { | 19 | { |
20 | struct arpt_replace repl; | 20 | struct arpt_replace repl; |
21 | struct arpt_standard entries[3]; | 21 | struct arpt_standard entries[3]; |
@@ -45,7 +45,7 @@ static struct | |||
45 | .term = ARPT_ERROR_INIT, | 45 | .term = ARPT_ERROR_INIT, |
46 | }; | 46 | }; |
47 | 47 | ||
48 | static struct xt_table packet_filter = { | 48 | static const struct xt_table packet_filter = { |
49 | .name = "filter", | 49 | .name = "filter", |
50 | .valid_hooks = FILTER_VALID_HOOKS, | 50 | .valid_hooks = FILTER_VALID_HOOKS, |
51 | .me = THIS_MODULE, | 51 | .me = THIS_MODULE, |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index fdefae6b5dfc..cde755d5eeab 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -8,6 +8,7 @@ | |||
8 | * it under the terms of the GNU General Public License version 2 as | 8 | * it under the terms of the GNU General Public License version 2 as |
9 | * published by the Free Software Foundation. | 9 | * published by the Free Software Foundation. |
10 | */ | 10 | */ |
11 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
11 | #include <linux/cache.h> | 12 | #include <linux/cache.h> |
12 | #include <linux/capability.h> | 13 | #include <linux/capability.h> |
13 | #include <linux/skbuff.h> | 14 | #include <linux/skbuff.h> |
@@ -190,16 +191,11 @@ get_entry(void *base, unsigned int offset) | |||
190 | 191 | ||
191 | /* All zeroes == unconditional rule. */ | 192 | /* All zeroes == unconditional rule. */ |
192 | /* Mildly perf critical (only if packet tracing is on) */ | 193 | /* Mildly perf critical (only if packet tracing is on) */ |
193 | static inline int | 194 | static inline bool unconditional(const struct ipt_ip *ip) |
194 | unconditional(const struct ipt_ip *ip) | ||
195 | { | 195 | { |
196 | unsigned int i; | 196 | static const struct ipt_ip uncond; |
197 | |||
198 | for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++) | ||
199 | if (((__u32 *)ip)[i]) | ||
200 | return 0; | ||
201 | 197 | ||
202 | return 1; | 198 | return memcmp(ip, &uncond, sizeof(uncond)) == 0; |
203 | #undef FWINV | 199 | #undef FWINV |
204 | } | 200 | } |
205 | 201 | ||
@@ -315,7 +311,6 @@ ipt_do_table(struct sk_buff *skb, | |||
315 | 311 | ||
316 | static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); | 312 | static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); |
317 | const struct iphdr *ip; | 313 | const struct iphdr *ip; |
318 | u_int16_t datalen; | ||
319 | bool hotdrop = false; | 314 | bool hotdrop = false; |
320 | /* Initializing verdict to NF_DROP keeps gcc happy. */ | 315 | /* Initializing verdict to NF_DROP keeps gcc happy. */ |
321 | unsigned int verdict = NF_DROP; | 316 | unsigned int verdict = NF_DROP; |
@@ -328,7 +323,6 @@ ipt_do_table(struct sk_buff *skb, | |||
328 | 323 | ||
329 | /* Initialization */ | 324 | /* Initialization */ |
330 | ip = ip_hdr(skb); | 325 | ip = ip_hdr(skb); |
331 | datalen = skb->len - ip->ihl * 4; | ||
332 | indev = in ? in->name : nulldevname; | 326 | indev = in ? in->name : nulldevname; |
333 | outdev = out ? out->name : nulldevname; | 327 | outdev = out ? out->name : nulldevname; |
334 | /* We handle fragments by dealing with the first fragment as | 328 | /* We handle fragments by dealing with the first fragment as |
@@ -427,8 +421,6 @@ ipt_do_table(struct sk_buff *skb, | |||
427 | #endif | 421 | #endif |
428 | /* Target might have changed stuff. */ | 422 | /* Target might have changed stuff. */ |
429 | ip = ip_hdr(skb); | 423 | ip = ip_hdr(skb); |
430 | datalen = skb->len - ip->ihl * 4; | ||
431 | |||
432 | if (verdict == IPT_CONTINUE) | 424 | if (verdict == IPT_CONTINUE) |
433 | e = ipt_next_entry(e); | 425 | e = ipt_next_entry(e); |
434 | else | 426 | else |
@@ -716,6 +708,21 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, | |||
716 | return ret; | 708 | return ret; |
717 | } | 709 | } |
718 | 710 | ||
711 | static bool check_underflow(struct ipt_entry *e) | ||
712 | { | ||
713 | const struct ipt_entry_target *t; | ||
714 | unsigned int verdict; | ||
715 | |||
716 | if (!unconditional(&e->ip)) | ||
717 | return false; | ||
718 | t = ipt_get_target(e); | ||
719 | if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) | ||
720 | return false; | ||
721 | verdict = ((struct ipt_standard_target *)t)->verdict; | ||
722 | verdict = -verdict - 1; | ||
723 | return verdict == NF_DROP || verdict == NF_ACCEPT; | ||
724 | } | ||
725 | |||
719 | static int | 726 | static int |
720 | check_entry_size_and_hooks(struct ipt_entry *e, | 727 | check_entry_size_and_hooks(struct ipt_entry *e, |
721 | struct xt_table_info *newinfo, | 728 | struct xt_table_info *newinfo, |
@@ -723,6 +730,7 @@ check_entry_size_and_hooks(struct ipt_entry *e, | |||
723 | unsigned char *limit, | 730 | unsigned char *limit, |
724 | const unsigned int *hook_entries, | 731 | const unsigned int *hook_entries, |
725 | const unsigned int *underflows, | 732 | const unsigned int *underflows, |
733 | unsigned int valid_hooks, | ||
726 | unsigned int *i) | 734 | unsigned int *i) |
727 | { | 735 | { |
728 | unsigned int h; | 736 | unsigned int h; |
@@ -742,15 +750,21 @@ check_entry_size_and_hooks(struct ipt_entry *e, | |||
742 | 750 | ||
743 | /* Check hooks & underflows */ | 751 | /* Check hooks & underflows */ |
744 | for (h = 0; h < NF_INET_NUMHOOKS; h++) { | 752 | for (h = 0; h < NF_INET_NUMHOOKS; h++) { |
753 | if (!(valid_hooks & (1 << h))) | ||
754 | continue; | ||
745 | if ((unsigned char *)e - base == hook_entries[h]) | 755 | if ((unsigned char *)e - base == hook_entries[h]) |
746 | newinfo->hook_entry[h] = hook_entries[h]; | 756 | newinfo->hook_entry[h] = hook_entries[h]; |
747 | if ((unsigned char *)e - base == underflows[h]) | 757 | if ((unsigned char *)e - base == underflows[h]) { |
758 | if (!check_underflow(e)) { | ||
759 | pr_err("Underflows must be unconditional and " | ||
760 | "use the STANDARD target with " | ||
761 | "ACCEPT/DROP\n"); | ||
762 | return -EINVAL; | ||
763 | } | ||
748 | newinfo->underflow[h] = underflows[h]; | 764 | newinfo->underflow[h] = underflows[h]; |
765 | } | ||
749 | } | 766 | } |
750 | 767 | ||
751 | /* FIXME: underflows must be unconditional, standard verdicts | ||
752 | < 0 (not IPT_RETURN). --RR */ | ||
753 | |||
754 | /* Clear counters and comefrom */ | 768 | /* Clear counters and comefrom */ |
755 | e->counters = ((struct xt_counters) { 0, 0 }); | 769 | e->counters = ((struct xt_counters) { 0, 0 }); |
756 | e->comefrom = 0; | 770 | e->comefrom = 0; |
@@ -813,7 +827,7 @@ translate_table(const char *name, | |||
813 | newinfo, | 827 | newinfo, |
814 | entry0, | 828 | entry0, |
815 | entry0 + size, | 829 | entry0 + size, |
816 | hook_entries, underflows, &i); | 830 | hook_entries, underflows, valid_hooks, &i); |
817 | if (ret != 0) | 831 | if (ret != 0) |
818 | return ret; | 832 | return ret; |
819 | 833 | ||
@@ -2051,7 +2065,8 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) | |||
2051 | return ret; | 2065 | return ret; |
2052 | } | 2066 | } |
2053 | 2067 | ||
2054 | struct xt_table *ipt_register_table(struct net *net, struct xt_table *table, | 2068 | struct xt_table *ipt_register_table(struct net *net, |
2069 | const struct xt_table *table, | ||
2055 | const struct ipt_replace *repl) | 2070 | const struct ipt_replace *repl) |
2056 | { | 2071 | { |
2057 | int ret; | 2072 | int ret; |
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index c30a969724f8..df566cbd68e5 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c | |||
@@ -53,11 +53,11 @@ static struct | |||
53 | .term = IPT_ERROR_INIT, /* ERROR */ | 53 | .term = IPT_ERROR_INIT, /* ERROR */ |
54 | }; | 54 | }; |
55 | 55 | ||
56 | static struct xt_table packet_filter = { | 56 | static const struct xt_table packet_filter = { |
57 | .name = "filter", | 57 | .name = "filter", |
58 | .valid_hooks = FILTER_VALID_HOOKS, | 58 | .valid_hooks = FILTER_VALID_HOOKS, |
59 | .me = THIS_MODULE, | 59 | .me = THIS_MODULE, |
60 | .af = AF_INET, | 60 | .af = NFPROTO_IPV4, |
61 | }; | 61 | }; |
62 | 62 | ||
63 | /* The work comes in here from netfilter.c. */ | 63 | /* The work comes in here from netfilter.c. */ |
@@ -102,21 +102,21 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = { | |||
102 | { | 102 | { |
103 | .hook = ipt_local_in_hook, | 103 | .hook = ipt_local_in_hook, |
104 | .owner = THIS_MODULE, | 104 | .owner = THIS_MODULE, |
105 | .pf = PF_INET, | 105 | .pf = NFPROTO_IPV4, |
106 | .hooknum = NF_INET_LOCAL_IN, | 106 | .hooknum = NF_INET_LOCAL_IN, |
107 | .priority = NF_IP_PRI_FILTER, | 107 | .priority = NF_IP_PRI_FILTER, |
108 | }, | 108 | }, |
109 | { | 109 | { |
110 | .hook = ipt_hook, | 110 | .hook = ipt_hook, |
111 | .owner = THIS_MODULE, | 111 | .owner = THIS_MODULE, |
112 | .pf = PF_INET, | 112 | .pf = NFPROTO_IPV4, |
113 | .hooknum = NF_INET_FORWARD, | 113 | .hooknum = NF_INET_FORWARD, |
114 | .priority = NF_IP_PRI_FILTER, | 114 | .priority = NF_IP_PRI_FILTER, |
115 | }, | 115 | }, |
116 | { | 116 | { |
117 | .hook = ipt_local_out_hook, | 117 | .hook = ipt_local_out_hook, |
118 | .owner = THIS_MODULE, | 118 | .owner = THIS_MODULE, |
119 | .pf = PF_INET, | 119 | .pf = NFPROTO_IPV4, |
120 | .hooknum = NF_INET_LOCAL_OUT, | 120 | .hooknum = NF_INET_LOCAL_OUT, |
121 | .priority = NF_IP_PRI_FILTER, | 121 | .priority = NF_IP_PRI_FILTER, |
122 | }, | 122 | }, |
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 4087614d9519..036047f9b0f2 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c | |||
@@ -28,7 +28,7 @@ MODULE_DESCRIPTION("iptables mangle table"); | |||
28 | (1 << NF_INET_POST_ROUTING)) | 28 | (1 << NF_INET_POST_ROUTING)) |
29 | 29 | ||
30 | /* Ouch - five different hooks? Maybe this should be a config option..... -- BC */ | 30 | /* Ouch - five different hooks? Maybe this should be a config option..... -- BC */ |
31 | static struct | 31 | static const struct |
32 | { | 32 | { |
33 | struct ipt_replace repl; | 33 | struct ipt_replace repl; |
34 | struct ipt_standard entries[5]; | 34 | struct ipt_standard entries[5]; |
@@ -64,11 +64,11 @@ static struct | |||
64 | .term = IPT_ERROR_INIT, /* ERROR */ | 64 | .term = IPT_ERROR_INIT, /* ERROR */ |
65 | }; | 65 | }; |
66 | 66 | ||
67 | static struct xt_table packet_mangler = { | 67 | static const struct xt_table packet_mangler = { |
68 | .name = "mangle", | 68 | .name = "mangle", |
69 | .valid_hooks = MANGLE_VALID_HOOKS, | 69 | .valid_hooks = MANGLE_VALID_HOOKS, |
70 | .me = THIS_MODULE, | 70 | .me = THIS_MODULE, |
71 | .af = AF_INET, | 71 | .af = NFPROTO_IPV4, |
72 | }; | 72 | }; |
73 | 73 | ||
74 | /* The work comes in here from netfilter.c. */ | 74 | /* The work comes in here from netfilter.c. */ |
@@ -162,35 +162,35 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = { | |||
162 | { | 162 | { |
163 | .hook = ipt_pre_routing_hook, | 163 | .hook = ipt_pre_routing_hook, |
164 | .owner = THIS_MODULE, | 164 | .owner = THIS_MODULE, |
165 | .pf = PF_INET, | 165 | .pf = NFPROTO_IPV4, |
166 | .hooknum = NF_INET_PRE_ROUTING, | 166 | .hooknum = NF_INET_PRE_ROUTING, |
167 | .priority = NF_IP_PRI_MANGLE, | 167 | .priority = NF_IP_PRI_MANGLE, |
168 | }, | 168 | }, |
169 | { | 169 | { |
170 | .hook = ipt_local_in_hook, | 170 | .hook = ipt_local_in_hook, |
171 | .owner = THIS_MODULE, | 171 | .owner = THIS_MODULE, |
172 | .pf = PF_INET, | 172 | .pf = NFPROTO_IPV4, |
173 | .hooknum = NF_INET_LOCAL_IN, | 173 | .hooknum = NF_INET_LOCAL_IN, |
174 | .priority = NF_IP_PRI_MANGLE, | 174 | .priority = NF_IP_PRI_MANGLE, |
175 | }, | 175 | }, |
176 | { | 176 | { |
177 | .hook = ipt_forward_hook, | 177 | .hook = ipt_forward_hook, |
178 | .owner = THIS_MODULE, | 178 | .owner = THIS_MODULE, |
179 | .pf = PF_INET, | 179 | .pf = NFPROTO_IPV4, |
180 | .hooknum = NF_INET_FORWARD, | 180 | .hooknum = NF_INET_FORWARD, |
181 | .priority = NF_IP_PRI_MANGLE, | 181 | .priority = NF_IP_PRI_MANGLE, |
182 | }, | 182 | }, |
183 | { | 183 | { |
184 | .hook = ipt_local_hook, | 184 | .hook = ipt_local_hook, |
185 | .owner = THIS_MODULE, | 185 | .owner = THIS_MODULE, |
186 | .pf = PF_INET, | 186 | .pf = NFPROTO_IPV4, |
187 | .hooknum = NF_INET_LOCAL_OUT, | 187 | .hooknum = NF_INET_LOCAL_OUT, |
188 | .priority = NF_IP_PRI_MANGLE, | 188 | .priority = NF_IP_PRI_MANGLE, |
189 | }, | 189 | }, |
190 | { | 190 | { |
191 | .hook = ipt_post_routing_hook, | 191 | .hook = ipt_post_routing_hook, |
192 | .owner = THIS_MODULE, | 192 | .owner = THIS_MODULE, |
193 | .pf = PF_INET, | 193 | .pf = NFPROTO_IPV4, |
194 | .hooknum = NF_INET_POST_ROUTING, | 194 | .hooknum = NF_INET_POST_ROUTING, |
195 | .priority = NF_IP_PRI_MANGLE, | 195 | .priority = NF_IP_PRI_MANGLE, |
196 | }, | 196 | }, |
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index e5356da1fb54..993edc23be09 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c | |||
@@ -9,7 +9,7 @@ | |||
9 | 9 | ||
10 | #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) | 10 | #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) |
11 | 11 | ||
12 | static struct | 12 | static const struct |
13 | { | 13 | { |
14 | struct ipt_replace repl; | 14 | struct ipt_replace repl; |
15 | struct ipt_standard entries[2]; | 15 | struct ipt_standard entries[2]; |
@@ -36,11 +36,11 @@ static struct | |||
36 | .term = IPT_ERROR_INIT, /* ERROR */ | 36 | .term = IPT_ERROR_INIT, /* ERROR */ |
37 | }; | 37 | }; |
38 | 38 | ||
39 | static struct xt_table packet_raw = { | 39 | static const struct xt_table packet_raw = { |
40 | .name = "raw", | 40 | .name = "raw", |
41 | .valid_hooks = RAW_VALID_HOOKS, | 41 | .valid_hooks = RAW_VALID_HOOKS, |
42 | .me = THIS_MODULE, | 42 | .me = THIS_MODULE, |
43 | .af = AF_INET, | 43 | .af = NFPROTO_IPV4, |
44 | }; | 44 | }; |
45 | 45 | ||
46 | /* The work comes in here from netfilter.c. */ | 46 | /* The work comes in here from netfilter.c. */ |
@@ -74,14 +74,14 @@ ipt_local_hook(unsigned int hook, | |||
74 | static struct nf_hook_ops ipt_ops[] __read_mostly = { | 74 | static struct nf_hook_ops ipt_ops[] __read_mostly = { |
75 | { | 75 | { |
76 | .hook = ipt_hook, | 76 | .hook = ipt_hook, |
77 | .pf = PF_INET, | 77 | .pf = NFPROTO_IPV4, |
78 | .hooknum = NF_INET_PRE_ROUTING, | 78 | .hooknum = NF_INET_PRE_ROUTING, |
79 | .priority = NF_IP_PRI_RAW, | 79 | .priority = NF_IP_PRI_RAW, |
80 | .owner = THIS_MODULE, | 80 | .owner = THIS_MODULE, |
81 | }, | 81 | }, |
82 | { | 82 | { |
83 | .hook = ipt_local_hook, | 83 | .hook = ipt_local_hook, |
84 | .pf = PF_INET, | 84 | .pf = NFPROTO_IPV4, |
85 | .hooknum = NF_INET_LOCAL_OUT, | 85 | .hooknum = NF_INET_LOCAL_OUT, |
86 | .priority = NF_IP_PRI_RAW, | 86 | .priority = NF_IP_PRI_RAW, |
87 | .owner = THIS_MODULE, | 87 | .owner = THIS_MODULE, |
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 29ab630f240a..99eb76c65d25 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c | |||
@@ -27,7 +27,7 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules"); | |||
27 | (1 << NF_INET_FORWARD) | \ | 27 | (1 << NF_INET_FORWARD) | \ |
28 | (1 << NF_INET_LOCAL_OUT) | 28 | (1 << NF_INET_LOCAL_OUT) |
29 | 29 | ||
30 | static struct | 30 | static const struct |
31 | { | 31 | { |
32 | struct ipt_replace repl; | 32 | struct ipt_replace repl; |
33 | struct ipt_standard entries[3]; | 33 | struct ipt_standard entries[3]; |
@@ -57,11 +57,11 @@ static struct | |||
57 | .term = IPT_ERROR_INIT, /* ERROR */ | 57 | .term = IPT_ERROR_INIT, /* ERROR */ |
58 | }; | 58 | }; |
59 | 59 | ||
60 | static struct xt_table security_table = { | 60 | static const struct xt_table security_table = { |
61 | .name = "security", | 61 | .name = "security", |
62 | .valid_hooks = SECURITY_VALID_HOOKS, | 62 | .valid_hooks = SECURITY_VALID_HOOKS, |
63 | .me = THIS_MODULE, | 63 | .me = THIS_MODULE, |
64 | .af = AF_INET, | 64 | .af = NFPROTO_IPV4, |
65 | }; | 65 | }; |
66 | 66 | ||
67 | static unsigned int | 67 | static unsigned int |
@@ -105,21 +105,21 @@ static struct nf_hook_ops ipt_ops[] __read_mostly = { | |||
105 | { | 105 | { |
106 | .hook = ipt_local_in_hook, | 106 | .hook = ipt_local_in_hook, |
107 | .owner = THIS_MODULE, | 107 | .owner = THIS_MODULE, |
108 | .pf = PF_INET, | 108 | .pf = NFPROTO_IPV4, |
109 | .hooknum = NF_INET_LOCAL_IN, | 109 | .hooknum = NF_INET_LOCAL_IN, |
110 | .priority = NF_IP_PRI_SECURITY, | 110 | .priority = NF_IP_PRI_SECURITY, |
111 | }, | 111 | }, |
112 | { | 112 | { |
113 | .hook = ipt_forward_hook, | 113 | .hook = ipt_forward_hook, |
114 | .owner = THIS_MODULE, | 114 | .owner = THIS_MODULE, |
115 | .pf = PF_INET, | 115 | .pf = NFPROTO_IPV4, |
116 | .hooknum = NF_INET_FORWARD, | 116 | .hooknum = NF_INET_FORWARD, |
117 | .priority = NF_IP_PRI_SECURITY, | 117 | .priority = NF_IP_PRI_SECURITY, |
118 | }, | 118 | }, |
119 | { | 119 | { |
120 | .hook = ipt_local_out_hook, | 120 | .hook = ipt_local_out_hook, |
121 | .owner = THIS_MODULE, | 121 | .owner = THIS_MODULE, |
122 | .pf = PF_INET, | 122 | .pf = NFPROTO_IPV4, |
123 | .hooknum = NF_INET_LOCAL_OUT, | 123 | .hooknum = NF_INET_LOCAL_OUT, |
124 | .priority = NF_IP_PRI_SECURITY, | 124 | .priority = NF_IP_PRI_SECURITY, |
125 | }, | 125 | }, |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 7d2ead7228ac..aa95bb82ee6c 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> | 26 | #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> |
27 | #include <net/netfilter/nf_nat_helper.h> | 27 | #include <net/netfilter/nf_nat_helper.h> |
28 | #include <net/netfilter/ipv4/nf_defrag_ipv4.h> | 28 | #include <net/netfilter/ipv4/nf_defrag_ipv4.h> |
29 | #include <net/netfilter/nf_log.h> | ||
29 | 30 | ||
30 | int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, | 31 | int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, |
31 | struct nf_conn *ct, | 32 | struct nf_conn *ct, |
@@ -113,8 +114,11 @@ static unsigned int ipv4_confirm(unsigned int hooknum, | |||
113 | 114 | ||
114 | ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), | 115 | ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), |
115 | ct, ctinfo); | 116 | ct, ctinfo); |
116 | if (ret != NF_ACCEPT) | 117 | if (ret != NF_ACCEPT) { |
118 | nf_log_packet(NFPROTO_IPV4, hooknum, skb, in, out, NULL, | ||
119 | "nf_ct_%s: dropping packet", helper->name); | ||
117 | return ret; | 120 | return ret; |
121 | } | ||
118 | 122 | ||
119 | if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { | 123 | if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { |
120 | typeof(nf_nat_seq_adjust_hook) seq_adjust; | 124 | typeof(nf_nat_seq_adjust_hook) seq_adjust; |
@@ -158,28 +162,28 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { | |||
158 | { | 162 | { |
159 | .hook = ipv4_conntrack_in, | 163 | .hook = ipv4_conntrack_in, |
160 | .owner = THIS_MODULE, | 164 | .owner = THIS_MODULE, |
161 | .pf = PF_INET, | 165 | .pf = NFPROTO_IPV4, |
162 | .hooknum = NF_INET_PRE_ROUTING, | 166 | .hooknum = NF_INET_PRE_ROUTING, |
163 | .priority = NF_IP_PRI_CONNTRACK, | 167 | .priority = NF_IP_PRI_CONNTRACK, |
164 | }, | 168 | }, |
165 | { | 169 | { |
166 | .hook = ipv4_conntrack_local, | 170 | .hook = ipv4_conntrack_local, |
167 | .owner = THIS_MODULE, | 171 | .owner = THIS_MODULE, |
168 | .pf = PF_INET, | 172 | .pf = NFPROTO_IPV4, |
169 | .hooknum = NF_INET_LOCAL_OUT, | 173 | .hooknum = NF_INET_LOCAL_OUT, |
170 | .priority = NF_IP_PRI_CONNTRACK, | 174 | .priority = NF_IP_PRI_CONNTRACK, |
171 | }, | 175 | }, |
172 | { | 176 | { |
173 | .hook = ipv4_confirm, | 177 | .hook = ipv4_confirm, |
174 | .owner = THIS_MODULE, | 178 | .owner = THIS_MODULE, |
175 | .pf = PF_INET, | 179 | .pf = NFPROTO_IPV4, |
176 | .hooknum = NF_INET_POST_ROUTING, | 180 | .hooknum = NF_INET_POST_ROUTING, |
177 | .priority = NF_IP_PRI_CONNTRACK_CONFIRM, | 181 | .priority = NF_IP_PRI_CONNTRACK_CONFIRM, |
178 | }, | 182 | }, |
179 | { | 183 | { |
180 | .hook = ipv4_confirm, | 184 | .hook = ipv4_confirm, |
181 | .owner = THIS_MODULE, | 185 | .owner = THIS_MODULE, |
182 | .pf = PF_INET, | 186 | .pf = NFPROTO_IPV4, |
183 | .hooknum = NF_INET_LOCAL_IN, | 187 | .hooknum = NF_INET_LOCAL_IN, |
184 | .priority = NF_IP_PRI_CONNTRACK_CONFIRM, | 188 | .priority = NF_IP_PRI_CONNTRACK_CONFIRM, |
185 | }, | 189 | }, |
@@ -256,11 +260,11 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) | |||
256 | tuple.dst.u3.ip = inet->daddr; | 260 | tuple.dst.u3.ip = inet->daddr; |
257 | tuple.dst.u.tcp.port = inet->dport; | 261 | tuple.dst.u.tcp.port = inet->dport; |
258 | tuple.src.l3num = PF_INET; | 262 | tuple.src.l3num = PF_INET; |
259 | tuple.dst.protonum = IPPROTO_TCP; | 263 | tuple.dst.protonum = sk->sk_protocol; |
260 | 264 | ||
261 | /* We only do TCP at the moment: is there a better way? */ | 265 | /* We only do TCP and SCTP at the moment: is there a better way? */ |
262 | if (strcmp(sk->sk_prot->name, "TCP")) { | 266 | if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) { |
263 | pr_debug("SO_ORIGINAL_DST: Not a TCP socket\n"); | 267 | pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n"); |
264 | return -ENOPROTOOPT; | 268 | return -ENOPROTOOPT; |
265 | } | 269 | } |
266 | 270 | ||
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 3229e0a81ba6..68afc6ecd343 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c | |||
@@ -212,7 +212,7 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple, | |||
212 | maxip = ntohl(range->max_ip); | 212 | maxip = ntohl(range->max_ip); |
213 | j = jhash_2words((__force u32)tuple->src.u3.ip, | 213 | j = jhash_2words((__force u32)tuple->src.u3.ip, |
214 | range->flags & IP_NAT_RANGE_PERSISTENT ? | 214 | range->flags & IP_NAT_RANGE_PERSISTENT ? |
215 | (__force u32)tuple->dst.u3.ip : 0, 0); | 215 | 0 : (__force u32)tuple->dst.u3.ip, 0); |
216 | j = ((u64)j * (maxip - minip + 1)) >> 32; | 216 | j = ((u64)j * (maxip - minip + 1)) >> 32; |
217 | *var_ipp = htonl(minip + j); | 217 | *var_ipp = htonl(minip + j); |
218 | } | 218 | } |
@@ -620,7 +620,7 @@ static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { | |||
620 | }; | 620 | }; |
621 | 621 | ||
622 | static int | 622 | static int |
623 | nfnetlink_parse_nat(struct nlattr *nat, | 623 | nfnetlink_parse_nat(const struct nlattr *nat, |
624 | const struct nf_conn *ct, struct nf_nat_range *range) | 624 | const struct nf_conn *ct, struct nf_nat_range *range) |
625 | { | 625 | { |
626 | struct nlattr *tb[CTA_NAT_MAX+1]; | 626 | struct nlattr *tb[CTA_NAT_MAX+1]; |
@@ -656,7 +656,7 @@ nfnetlink_parse_nat(struct nlattr *nat, | |||
656 | static int | 656 | static int |
657 | nfnetlink_parse_nat_setup(struct nf_conn *ct, | 657 | nfnetlink_parse_nat_setup(struct nf_conn *ct, |
658 | enum nf_nat_manip_type manip, | 658 | enum nf_nat_manip_type manip, |
659 | struct nlattr *attr) | 659 | const struct nlattr *attr) |
660 | { | 660 | { |
661 | struct nf_nat_range range; | 661 | struct nf_nat_range range; |
662 | 662 | ||
@@ -671,7 +671,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, | |||
671 | static int | 671 | static int |
672 | nfnetlink_parse_nat_setup(struct nf_conn *ct, | 672 | nfnetlink_parse_nat_setup(struct nf_conn *ct, |
673 | enum nf_nat_manip_type manip, | 673 | enum nf_nat_manip_type manip, |
674 | struct nlattr *attr) | 674 | const struct nlattr *attr) |
675 | { | 675 | { |
676 | return -EOPNOTSUPP; | 676 | return -EOPNOTSUPP; |
677 | } | 677 | } |
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 6348a793936e..9e81e0dfb4ec 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c | |||
@@ -28,7 +28,7 @@ | |||
28 | (1 << NF_INET_POST_ROUTING) | \ | 28 | (1 << NF_INET_POST_ROUTING) | \ |
29 | (1 << NF_INET_LOCAL_OUT)) | 29 | (1 << NF_INET_LOCAL_OUT)) |
30 | 30 | ||
31 | static struct | 31 | static const struct |
32 | { | 32 | { |
33 | struct ipt_replace repl; | 33 | struct ipt_replace repl; |
34 | struct ipt_standard entries[3]; | 34 | struct ipt_standard entries[3]; |
@@ -58,11 +58,11 @@ static struct | |||
58 | .term = IPT_ERROR_INIT, /* ERROR */ | 58 | .term = IPT_ERROR_INIT, /* ERROR */ |
59 | }; | 59 | }; |
60 | 60 | ||
61 | static struct xt_table nat_table = { | 61 | static const struct xt_table nat_table = { |
62 | .name = "nat", | 62 | .name = "nat", |
63 | .valid_hooks = NAT_VALID_HOOKS, | 63 | .valid_hooks = NAT_VALID_HOOKS, |
64 | .me = THIS_MODULE, | 64 | .me = THIS_MODULE, |
65 | .af = AF_INET, | 65 | .af = NFPROTO_IPV4, |
66 | }; | 66 | }; |
67 | 67 | ||
68 | /* Source NAT */ | 68 | /* Source NAT */ |
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 5567bd0d0750..5f41d017ddd8 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c | |||
@@ -251,7 +251,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = { | |||
251 | { | 251 | { |
252 | .hook = nf_nat_in, | 252 | .hook = nf_nat_in, |
253 | .owner = THIS_MODULE, | 253 | .owner = THIS_MODULE, |
254 | .pf = PF_INET, | 254 | .pf = NFPROTO_IPV4, |
255 | .hooknum = NF_INET_PRE_ROUTING, | 255 | .hooknum = NF_INET_PRE_ROUTING, |
256 | .priority = NF_IP_PRI_NAT_DST, | 256 | .priority = NF_IP_PRI_NAT_DST, |
257 | }, | 257 | }, |
@@ -259,7 +259,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = { | |||
259 | { | 259 | { |
260 | .hook = nf_nat_out, | 260 | .hook = nf_nat_out, |
261 | .owner = THIS_MODULE, | 261 | .owner = THIS_MODULE, |
262 | .pf = PF_INET, | 262 | .pf = NFPROTO_IPV4, |
263 | .hooknum = NF_INET_POST_ROUTING, | 263 | .hooknum = NF_INET_POST_ROUTING, |
264 | .priority = NF_IP_PRI_NAT_SRC, | 264 | .priority = NF_IP_PRI_NAT_SRC, |
265 | }, | 265 | }, |
@@ -267,7 +267,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = { | |||
267 | { | 267 | { |
268 | .hook = nf_nat_local_fn, | 268 | .hook = nf_nat_local_fn, |
269 | .owner = THIS_MODULE, | 269 | .owner = THIS_MODULE, |
270 | .pf = PF_INET, | 270 | .pf = NFPROTO_IPV4, |
271 | .hooknum = NF_INET_LOCAL_OUT, | 271 | .hooknum = NF_INET_LOCAL_OUT, |
272 | .priority = NF_IP_PRI_NAT_DST, | 272 | .priority = NF_IP_PRI_NAT_DST, |
273 | }, | 273 | }, |
@@ -275,7 +275,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = { | |||
275 | { | 275 | { |
276 | .hook = nf_nat_fn, | 276 | .hook = nf_nat_fn, |
277 | .owner = THIS_MODULE, | 277 | .owner = THIS_MODULE, |
278 | .pf = PF_INET, | 278 | .pf = NFPROTO_IPV4, |
279 | .hooknum = NF_INET_LOCAL_IN, | 279 | .hooknum = NF_INET_LOCAL_IN, |
280 | .priority = NF_IP_PRI_NAT_SRC, | 280 | .priority = NF_IP_PRI_NAT_SRC, |
281 | }, | 281 | }, |
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index ea50da0649fd..a2e5fc0a15e1 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c | |||
@@ -22,26 +22,11 @@ | |||
22 | * as published by the Free Software Foundation; either version | 22 | * as published by the Free Software Foundation; either version |
23 | * 2 of the License, or (at your option) any later version. | 23 | * 2 of the License, or (at your option) any later version. |
24 | */ | 24 | */ |
25 | 25 | #include <linux/cache.h> | |
26 | #include <asm/uaccess.h> | ||
27 | #include <asm/system.h> | ||
28 | #include <linux/module.h> | 26 | #include <linux/module.h> |
29 | #include <linux/types.h> | ||
30 | #include <linux/kernel.h> | ||
31 | #include <linux/string.h> | ||
32 | #include <linux/socket.h> | ||
33 | #include <linux/in.h> | ||
34 | #include <linux/inet.h> | ||
35 | #include <linux/netdevice.h> | 27 | #include <linux/netdevice.h> |
36 | #include <linux/timer.h> | 28 | #include <linux/spinlock.h> |
37 | #include <net/ip.h> | ||
38 | #include <net/protocol.h> | 29 | #include <net/protocol.h> |
39 | #include <linux/skbuff.h> | ||
40 | #include <net/sock.h> | ||
41 | #include <net/icmp.h> | ||
42 | #include <net/udp.h> | ||
43 | #include <net/ipip.h> | ||
44 | #include <linux/igmp.h> | ||
45 | 30 | ||
46 | struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp; | 31 | struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp; |
47 | static DEFINE_SPINLOCK(inet_proto_lock); | 32 | static DEFINE_SPINLOCK(inet_proto_lock); |
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 2979f14bb188..ebb1e5848bc6 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -375,7 +375,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, | |||
375 | err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev, | 375 | err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev, |
376 | dst_output); | 376 | dst_output); |
377 | if (err > 0) | 377 | if (err > 0) |
378 | err = inet->recverr ? net_xmit_errno(err) : 0; | 378 | err = net_xmit_errno(err); |
379 | if (err) | 379 | if (err) |
380 | goto error; | 380 | goto error; |
381 | out: | 381 | out: |
@@ -386,6 +386,8 @@ error_fault: | |||
386 | kfree_skb(skb); | 386 | kfree_skb(skb); |
387 | error: | 387 | error: |
388 | IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); | 388 | IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS); |
389 | if (err == -ENOBUFS && !inet->recverr) | ||
390 | err = 0; | ||
389 | return err; | 391 | return err; |
390 | } | 392 | } |
391 | 393 | ||
@@ -576,8 +578,11 @@ back_from_confirm: | |||
576 | &ipc, &rt, msg->msg_flags); | 578 | &ipc, &rt, msg->msg_flags); |
577 | if (err) | 579 | if (err) |
578 | ip_flush_pending_frames(sk); | 580 | ip_flush_pending_frames(sk); |
579 | else if (!(msg->msg_flags & MSG_MORE)) | 581 | else if (!(msg->msg_flags & MSG_MORE)) { |
580 | err = ip_push_pending_frames(sk); | 582 | err = ip_push_pending_frames(sk); |
583 | if (err == -ENOBUFS && !inet->recverr) | ||
584 | err = 0; | ||
585 | } | ||
581 | release_sock(sk); | 586 | release_sock(sk); |
582 | } | 587 | } |
583 | done: | 588 | done: |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 278f46f5011b..91867d3e6328 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -1514,13 +1514,17 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) | |||
1514 | void ip_rt_send_redirect(struct sk_buff *skb) | 1514 | void ip_rt_send_redirect(struct sk_buff *skb) |
1515 | { | 1515 | { |
1516 | struct rtable *rt = skb_rtable(skb); | 1516 | struct rtable *rt = skb_rtable(skb); |
1517 | struct in_device *in_dev = in_dev_get(rt->u.dst.dev); | 1517 | struct in_device *in_dev; |
1518 | int log_martians; | ||
1518 | 1519 | ||
1519 | if (!in_dev) | 1520 | rcu_read_lock(); |
1521 | in_dev = __in_dev_get_rcu(rt->u.dst.dev); | ||
1522 | if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { | ||
1523 | rcu_read_unlock(); | ||
1520 | return; | 1524 | return; |
1521 | 1525 | } | |
1522 | if (!IN_DEV_TX_REDIRECTS(in_dev)) | 1526 | log_martians = IN_DEV_LOG_MARTIANS(in_dev); |
1523 | goto out; | 1527 | rcu_read_unlock(); |
1524 | 1528 | ||
1525 | /* No redirected packets during ip_rt_redirect_silence; | 1529 | /* No redirected packets during ip_rt_redirect_silence; |
1526 | * reset the algorithm. | 1530 | * reset the algorithm. |
@@ -1533,7 +1537,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1533 | */ | 1537 | */ |
1534 | if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) { | 1538 | if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) { |
1535 | rt->u.dst.rate_last = jiffies; | 1539 | rt->u.dst.rate_last = jiffies; |
1536 | goto out; | 1540 | return; |
1537 | } | 1541 | } |
1538 | 1542 | ||
1539 | /* Check for load limit; set rate_last to the latest sent | 1543 | /* Check for load limit; set rate_last to the latest sent |
@@ -1547,7 +1551,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1547 | rt->u.dst.rate_last = jiffies; | 1551 | rt->u.dst.rate_last = jiffies; |
1548 | ++rt->u.dst.rate_tokens; | 1552 | ++rt->u.dst.rate_tokens; |
1549 | #ifdef CONFIG_IP_ROUTE_VERBOSE | 1553 | #ifdef CONFIG_IP_ROUTE_VERBOSE |
1550 | if (IN_DEV_LOG_MARTIANS(in_dev) && | 1554 | if (log_martians && |
1551 | rt->u.dst.rate_tokens == ip_rt_redirect_number && | 1555 | rt->u.dst.rate_tokens == ip_rt_redirect_number && |
1552 | net_ratelimit()) | 1556 | net_ratelimit()) |
1553 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", | 1557 | printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", |
@@ -1555,8 +1559,6 @@ void ip_rt_send_redirect(struct sk_buff *skb) | |||
1555 | &rt->rt_dst, &rt->rt_gateway); | 1559 | &rt->rt_dst, &rt->rt_gateway); |
1556 | #endif | 1560 | #endif |
1557 | } | 1561 | } |
1558 | out: | ||
1559 | in_dev_put(in_dev); | ||
1560 | } | 1562 | } |
1561 | 1563 | ||
1562 | static int ip_error(struct sk_buff *skb) | 1564 | static int ip_error(struct sk_buff *skb) |
@@ -3442,7 +3444,7 @@ int __init ip_rt_init(void) | |||
3442 | printk(KERN_ERR "Unable to create route proc files\n"); | 3444 | printk(KERN_ERR "Unable to create route proc files\n"); |
3443 | #ifdef CONFIG_XFRM | 3445 | #ifdef CONFIG_XFRM |
3444 | xfrm_init(); | 3446 | xfrm_init(); |
3445 | xfrm4_init(); | 3447 | xfrm4_init(ip_rt_max_size); |
3446 | #endif | 3448 | #endif |
3447 | rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); | 3449 | rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL); |
3448 | 3450 | ||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 91145244ea63..edeea060db44 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -1839,7 +1839,7 @@ void tcp_close(struct sock *sk, long timeout) | |||
1839 | /* Unread data was tossed, zap the connection. */ | 1839 | /* Unread data was tossed, zap the connection. */ |
1840 | NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); | 1840 | NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); |
1841 | tcp_set_state(sk, TCP_CLOSE); | 1841 | tcp_set_state(sk, TCP_CLOSE); |
1842 | tcp_send_active_reset(sk, GFP_KERNEL); | 1842 | tcp_send_active_reset(sk, sk->sk_allocation); |
1843 | } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { | 1843 | } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { |
1844 | /* Check zero linger _after_ checking for unread data. */ | 1844 | /* Check zero linger _after_ checking for unread data. */ |
1845 | sk->sk_prot->disconnect(sk, 0); | 1845 | sk->sk_prot->disconnect(sk, 0); |
@@ -2336,13 +2336,13 @@ static int do_tcp_getsockopt(struct sock *sk, int level, | |||
2336 | val = !!(tp->nonagle&TCP_NAGLE_CORK); | 2336 | val = !!(tp->nonagle&TCP_NAGLE_CORK); |
2337 | break; | 2337 | break; |
2338 | case TCP_KEEPIDLE: | 2338 | case TCP_KEEPIDLE: |
2339 | val = (tp->keepalive_time ? : sysctl_tcp_keepalive_time) / HZ; | 2339 | val = keepalive_time_when(tp) / HZ; |
2340 | break; | 2340 | break; |
2341 | case TCP_KEEPINTVL: | 2341 | case TCP_KEEPINTVL: |
2342 | val = (tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl) / HZ; | 2342 | val = keepalive_intvl_when(tp) / HZ; |
2343 | break; | 2343 | break; |
2344 | case TCP_KEEPCNT: | 2344 | case TCP_KEEPCNT: |
2345 | val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes; | 2345 | val = keepalive_probes(tp); |
2346 | break; | 2346 | break; |
2347 | case TCP_SYNCNT: | 2347 | case TCP_SYNCNT: |
2348 | val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; | 2348 | val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; |
@@ -2658,7 +2658,7 @@ void tcp_free_md5sig_pool(void) | |||
2658 | 2658 | ||
2659 | EXPORT_SYMBOL(tcp_free_md5sig_pool); | 2659 | EXPORT_SYMBOL(tcp_free_md5sig_pool); |
2660 | 2660 | ||
2661 | static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(void) | 2661 | static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(struct sock *sk) |
2662 | { | 2662 | { |
2663 | int cpu; | 2663 | int cpu; |
2664 | struct tcp_md5sig_pool **pool; | 2664 | struct tcp_md5sig_pool **pool; |
@@ -2671,7 +2671,7 @@ static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(void) | |||
2671 | struct tcp_md5sig_pool *p; | 2671 | struct tcp_md5sig_pool *p; |
2672 | struct crypto_hash *hash; | 2672 | struct crypto_hash *hash; |
2673 | 2673 | ||
2674 | p = kzalloc(sizeof(*p), GFP_KERNEL); | 2674 | p = kzalloc(sizeof(*p), sk->sk_allocation); |
2675 | if (!p) | 2675 | if (!p) |
2676 | goto out_free; | 2676 | goto out_free; |
2677 | *per_cpu_ptr(pool, cpu) = p; | 2677 | *per_cpu_ptr(pool, cpu) = p; |
@@ -2688,7 +2688,7 @@ out_free: | |||
2688 | return NULL; | 2688 | return NULL; |
2689 | } | 2689 | } |
2690 | 2690 | ||
2691 | struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(void) | 2691 | struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *sk) |
2692 | { | 2692 | { |
2693 | struct tcp_md5sig_pool **pool; | 2693 | struct tcp_md5sig_pool **pool; |
2694 | int alloc = 0; | 2694 | int alloc = 0; |
@@ -2709,7 +2709,7 @@ retry: | |||
2709 | 2709 | ||
2710 | if (alloc) { | 2710 | if (alloc) { |
2711 | /* we cannot hold spinlock here because this may sleep. */ | 2711 | /* we cannot hold spinlock here because this may sleep. */ |
2712 | struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool(); | 2712 | struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool(sk); |
2713 | spin_lock_bh(&tcp_md5sig_pool_lock); | 2713 | spin_lock_bh(&tcp_md5sig_pool_lock); |
2714 | if (!p) { | 2714 | if (!p) { |
2715 | tcp_md5sig_users--; | 2715 | tcp_md5sig_users--; |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index e92beb9e55e0..6428b342b164 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
@@ -116,7 +116,7 @@ int tcp_set_default_congestion_control(const char *name) | |||
116 | spin_lock(&tcp_cong_list_lock); | 116 | spin_lock(&tcp_cong_list_lock); |
117 | ca = tcp_ca_find(name); | 117 | ca = tcp_ca_find(name); |
118 | #ifdef CONFIG_MODULES | 118 | #ifdef CONFIG_MODULES |
119 | if (!ca && capable(CAP_SYS_MODULE)) { | 119 | if (!ca && capable(CAP_NET_ADMIN)) { |
120 | spin_unlock(&tcp_cong_list_lock); | 120 | spin_unlock(&tcp_cong_list_lock); |
121 | 121 | ||
122 | request_module("tcp_%s", name); | 122 | request_module("tcp_%s", name); |
@@ -246,7 +246,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) | |||
246 | 246 | ||
247 | #ifdef CONFIG_MODULES | 247 | #ifdef CONFIG_MODULES |
248 | /* not found attempt to autoload module */ | 248 | /* not found attempt to autoload module */ |
249 | if (!ca && capable(CAP_SYS_MODULE)) { | 249 | if (!ca && capable(CAP_NET_ADMIN)) { |
250 | rcu_read_unlock(); | 250 | rcu_read_unlock(); |
251 | request_module("tcp_%s", name); | 251 | request_module("tcp_%s", name); |
252 | rcu_read_lock(); | 252 | rcu_read_lock(); |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2bdb0da237e6..af6d6fa00db1 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -685,7 +685,7 @@ static inline void tcp_set_rto(struct sock *sk) | |||
685 | * is invisible. Actually, Linux-2.4 also generates erratic | 685 | * is invisible. Actually, Linux-2.4 also generates erratic |
686 | * ACKs in some circumstances. | 686 | * ACKs in some circumstances. |
687 | */ | 687 | */ |
688 | inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar; | 688 | inet_csk(sk)->icsk_rto = __tcp_set_rto(tp); |
689 | 689 | ||
690 | /* 2. Fixups made earlier cannot be right. | 690 | /* 2. Fixups made earlier cannot be right. |
691 | * If we do not estimate RTO correctly without them, | 691 | * If we do not estimate RTO correctly without them, |
@@ -696,8 +696,7 @@ static inline void tcp_set_rto(struct sock *sk) | |||
696 | /* NOTE: clamping at TCP_RTO_MIN is not required, current algo | 696 | /* NOTE: clamping at TCP_RTO_MIN is not required, current algo |
697 | * guarantees that rto is higher. | 697 | * guarantees that rto is higher. |
698 | */ | 698 | */ |
699 | if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) | 699 | tcp_bound_rto(sk); |
700 | inet_csk(sk)->icsk_rto = TCP_RTO_MAX; | ||
701 | } | 700 | } |
702 | 701 | ||
703 | /* Save metrics learned by this TCP session. | 702 | /* Save metrics learned by this TCP session. |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6d88219c5e22..0543561da999 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -328,26 +328,29 @@ static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) | |||
328 | * | 328 | * |
329 | */ | 329 | */ |
330 | 330 | ||
331 | void tcp_v4_err(struct sk_buff *skb, u32 info) | 331 | void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) |
332 | { | 332 | { |
333 | struct iphdr *iph = (struct iphdr *)skb->data; | 333 | struct iphdr *iph = (struct iphdr *)icmp_skb->data; |
334 | struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); | 334 | struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); |
335 | struct inet_connection_sock *icsk; | ||
335 | struct tcp_sock *tp; | 336 | struct tcp_sock *tp; |
336 | struct inet_sock *inet; | 337 | struct inet_sock *inet; |
337 | const int type = icmp_hdr(skb)->type; | 338 | const int type = icmp_hdr(icmp_skb)->type; |
338 | const int code = icmp_hdr(skb)->code; | 339 | const int code = icmp_hdr(icmp_skb)->code; |
339 | struct sock *sk; | 340 | struct sock *sk; |
341 | struct sk_buff *skb; | ||
340 | __u32 seq; | 342 | __u32 seq; |
343 | __u32 remaining; | ||
341 | int err; | 344 | int err; |
342 | struct net *net = dev_net(skb->dev); | 345 | struct net *net = dev_net(icmp_skb->dev); |
343 | 346 | ||
344 | if (skb->len < (iph->ihl << 2) + 8) { | 347 | if (icmp_skb->len < (iph->ihl << 2) + 8) { |
345 | ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); | 348 | ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); |
346 | return; | 349 | return; |
347 | } | 350 | } |
348 | 351 | ||
349 | sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, | 352 | sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest, |
350 | iph->saddr, th->source, inet_iif(skb)); | 353 | iph->saddr, th->source, inet_iif(icmp_skb)); |
351 | if (!sk) { | 354 | if (!sk) { |
352 | ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); | 355 | ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); |
353 | return; | 356 | return; |
@@ -367,6 +370,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) | |||
367 | if (sk->sk_state == TCP_CLOSE) | 370 | if (sk->sk_state == TCP_CLOSE) |
368 | goto out; | 371 | goto out; |
369 | 372 | ||
373 | icsk = inet_csk(sk); | ||
370 | tp = tcp_sk(sk); | 374 | tp = tcp_sk(sk); |
371 | seq = ntohl(th->seq); | 375 | seq = ntohl(th->seq); |
372 | if (sk->sk_state != TCP_LISTEN && | 376 | if (sk->sk_state != TCP_LISTEN && |
@@ -393,6 +397,39 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) | |||
393 | } | 397 | } |
394 | 398 | ||
395 | err = icmp_err_convert[code].errno; | 399 | err = icmp_err_convert[code].errno; |
400 | /* check if icmp_skb allows revert of backoff | ||
401 | * (see draft-zimmermann-tcp-lcd) */ | ||
402 | if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH) | ||
403 | break; | ||
404 | if (seq != tp->snd_una || !icsk->icsk_retransmits || | ||
405 | !icsk->icsk_backoff) | ||
406 | break; | ||
407 | |||
408 | icsk->icsk_backoff--; | ||
409 | inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) << | ||
410 | icsk->icsk_backoff; | ||
411 | tcp_bound_rto(sk); | ||
412 | |||
413 | skb = tcp_write_queue_head(sk); | ||
414 | BUG_ON(!skb); | ||
415 | |||
416 | remaining = icsk->icsk_rto - min(icsk->icsk_rto, | ||
417 | tcp_time_stamp - TCP_SKB_CB(skb)->when); | ||
418 | |||
419 | if (remaining) { | ||
420 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
421 | remaining, TCP_RTO_MAX); | ||
422 | } else if (sock_owned_by_user(sk)) { | ||
423 | /* RTO revert clocked out retransmission, | ||
424 | * but socket is locked. Will defer. */ | ||
425 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
426 | HZ/20, TCP_RTO_MAX); | ||
427 | } else { | ||
428 | /* RTO revert clocked out retransmission. | ||
429 | * Will retransmit now */ | ||
430 | tcp_retransmit_timer(sk); | ||
431 | } | ||
432 | |||
396 | break; | 433 | break; |
397 | case ICMP_TIME_EXCEEDED: | 434 | case ICMP_TIME_EXCEEDED: |
398 | err = EHOSTUNREACH; | 435 | err = EHOSTUNREACH; |
@@ -849,7 +886,7 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, | |||
849 | } | 886 | } |
850 | sk->sk_route_caps &= ~NETIF_F_GSO_MASK; | 887 | sk->sk_route_caps &= ~NETIF_F_GSO_MASK; |
851 | } | 888 | } |
852 | if (tcp_alloc_md5sig_pool() == NULL) { | 889 | if (tcp_alloc_md5sig_pool(sk) == NULL) { |
853 | kfree(newkey); | 890 | kfree(newkey); |
854 | return -ENOMEM; | 891 | return -ENOMEM; |
855 | } | 892 | } |
@@ -970,8 +1007,9 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, | |||
970 | 1007 | ||
971 | if (!tcp_sk(sk)->md5sig_info) { | 1008 | if (!tcp_sk(sk)->md5sig_info) { |
972 | struct tcp_sock *tp = tcp_sk(sk); | 1009 | struct tcp_sock *tp = tcp_sk(sk); |
973 | struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL); | 1010 | struct tcp_md5sig_info *p; |
974 | 1011 | ||
1012 | p = kzalloc(sizeof(*p), sk->sk_allocation); | ||
975 | if (!p) | 1013 | if (!p) |
976 | return -EINVAL; | 1014 | return -EINVAL; |
977 | 1015 | ||
@@ -979,7 +1017,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, | |||
979 | sk->sk_route_caps &= ~NETIF_F_GSO_MASK; | 1017 | sk->sk_route_caps &= ~NETIF_F_GSO_MASK; |
980 | } | 1018 | } |
981 | 1019 | ||
982 | newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); | 1020 | newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation); |
983 | if (!newkey) | 1021 | if (!newkey) |
984 | return -ENOMEM; | 1022 | return -ENOMEM; |
985 | return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr, | 1023 | return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr, |
@@ -1158,7 +1196,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = { | |||
1158 | }; | 1196 | }; |
1159 | 1197 | ||
1160 | #ifdef CONFIG_TCP_MD5SIG | 1198 | #ifdef CONFIG_TCP_MD5SIG |
1161 | static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { | 1199 | static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { |
1162 | .md5_lookup = tcp_v4_reqsk_md5_lookup, | 1200 | .md5_lookup = tcp_v4_reqsk_md5_lookup, |
1163 | .calc_md5_hash = tcp_v4_md5_hash_skb, | 1201 | .calc_md5_hash = tcp_v4_md5_hash_skb, |
1164 | }; | 1202 | }; |
@@ -1717,7 +1755,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) | |||
1717 | return 0; | 1755 | return 0; |
1718 | } | 1756 | } |
1719 | 1757 | ||
1720 | struct inet_connection_sock_af_ops ipv4_specific = { | 1758 | const struct inet_connection_sock_af_ops ipv4_specific = { |
1721 | .queue_xmit = ip_queue_xmit, | 1759 | .queue_xmit = ip_queue_xmit, |
1722 | .send_check = tcp_v4_send_check, | 1760 | .send_check = tcp_v4_send_check, |
1723 | .rebuild_header = inet_sk_rebuild_header, | 1761 | .rebuild_header = inet_sk_rebuild_header, |
@@ -1737,7 +1775,7 @@ struct inet_connection_sock_af_ops ipv4_specific = { | |||
1737 | }; | 1775 | }; |
1738 | 1776 | ||
1739 | #ifdef CONFIG_TCP_MD5SIG | 1777 | #ifdef CONFIG_TCP_MD5SIG |
1740 | static struct tcp_sock_af_ops tcp_sock_ipv4_specific = { | 1778 | static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { |
1741 | .md5_lookup = tcp_v4_md5_lookup, | 1779 | .md5_lookup = tcp_v4_md5_lookup, |
1742 | .calc_md5_hash = tcp_v4_md5_hash_skb, | 1780 | .calc_md5_hash = tcp_v4_md5_hash_skb, |
1743 | .md5_add = tcp_v4_md5_add_func, | 1781 | .md5_add = tcp_v4_md5_add_func, |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f8d67ccc64f3..e48c37d74d77 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -322,7 +322,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
322 | if (key != NULL) { | 322 | if (key != NULL) { |
323 | memcpy(&tcptw->tw_md5_key, key->key, key->keylen); | 323 | memcpy(&tcptw->tw_md5_key, key->key, key->keylen); |
324 | tcptw->tw_md5_keylen = key->keylen; | 324 | tcptw->tw_md5_keylen = key->keylen; |
325 | if (tcp_alloc_md5sig_pool() == NULL) | 325 | if (tcp_alloc_md5sig_pool(sk) == NULL) |
326 | BUG(); | 326 | BUG(); |
327 | } | 327 | } |
328 | } while (0); | 328 | } while (0); |
@@ -657,29 +657,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
657 | child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); | 657 | child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); |
658 | if (child == NULL) | 658 | if (child == NULL) |
659 | goto listen_overflow; | 659 | goto listen_overflow; |
660 | #ifdef CONFIG_TCP_MD5SIG | ||
661 | else { | ||
662 | /* Copy over the MD5 key from the original socket */ | ||
663 | struct tcp_md5sig_key *key; | ||
664 | struct tcp_sock *tp = tcp_sk(sk); | ||
665 | key = tp->af_specific->md5_lookup(sk, child); | ||
666 | if (key != NULL) { | ||
667 | /* | ||
668 | * We're using one, so create a matching key on the | ||
669 | * newsk structure. If we fail to get memory then we | ||
670 | * end up not copying the key across. Shucks. | ||
671 | */ | ||
672 | char *newkey = kmemdup(key->key, key->keylen, | ||
673 | GFP_ATOMIC); | ||
674 | if (newkey) { | ||
675 | if (!tcp_alloc_md5sig_pool()) | ||
676 | BUG(); | ||
677 | tp->af_specific->md5_add(child, child, newkey, | ||
678 | key->keylen); | ||
679 | } | ||
680 | } | ||
681 | } | ||
682 | #endif | ||
683 | 660 | ||
684 | inet_csk_reqsk_queue_unlink(sk, req, prev); | 661 | inet_csk_reqsk_queue_unlink(sk, req, prev); |
685 | inet_csk_reqsk_queue_removed(sk, req); | 662 | inet_csk_reqsk_queue_removed(sk, req); |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bd62712848fa..5200aab0ca97 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -59,6 +59,7 @@ int sysctl_tcp_base_mss __read_mostly = 512; | |||
59 | /* By default, RFC2861 behavior. */ | 59 | /* By default, RFC2861 behavior. */ |
60 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; | 60 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; |
61 | 61 | ||
62 | /* Account for new data that has been sent to the network. */ | ||
62 | static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) | 63 | static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) |
63 | { | 64 | { |
64 | struct tcp_sock *tp = tcp_sk(sk); | 65 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -142,6 +143,7 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst) | |||
142 | tp->snd_cwnd_used = 0; | 143 | tp->snd_cwnd_used = 0; |
143 | } | 144 | } |
144 | 145 | ||
146 | /* Congestion state accounting after a packet has been sent. */ | ||
145 | static void tcp_event_data_sent(struct tcp_sock *tp, | 147 | static void tcp_event_data_sent(struct tcp_sock *tp, |
146 | struct sk_buff *skb, struct sock *sk) | 148 | struct sk_buff *skb, struct sock *sk) |
147 | { | 149 | { |
@@ -161,6 +163,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp, | |||
161 | icsk->icsk_ack.pingpong = 1; | 163 | icsk->icsk_ack.pingpong = 1; |
162 | } | 164 | } |
163 | 165 | ||
166 | /* Account for an ACK we sent. */ | ||
164 | static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) | 167 | static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) |
165 | { | 168 | { |
166 | tcp_dec_quickack_mode(sk, pkts); | 169 | tcp_dec_quickack_mode(sk, pkts); |
@@ -276,6 +279,7 @@ static u16 tcp_select_window(struct sock *sk) | |||
276 | return new_win; | 279 | return new_win; |
277 | } | 280 | } |
278 | 281 | ||
282 | /* Packet ECN state for a SYN-ACK */ | ||
279 | static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) | 283 | static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) |
280 | { | 284 | { |
281 | TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR; | 285 | TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR; |
@@ -283,6 +287,7 @@ static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) | |||
283 | TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; | 287 | TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; |
284 | } | 288 | } |
285 | 289 | ||
290 | /* Packet ECN state for a SYN. */ | ||
286 | static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) | 291 | static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) |
287 | { | 292 | { |
288 | struct tcp_sock *tp = tcp_sk(sk); | 293 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -301,6 +306,9 @@ TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th) | |||
301 | th->ece = 1; | 306 | th->ece = 1; |
302 | } | 307 | } |
303 | 308 | ||
309 | /* Set up ECN state for a packet on a ESTABLISHED socket that is about to | ||
310 | * be sent. | ||
311 | */ | ||
304 | static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, | 312 | static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, |
305 | int tcp_header_len) | 313 | int tcp_header_len) |
306 | { | 314 | { |
@@ -362,7 +370,9 @@ struct tcp_out_options { | |||
362 | __u32 tsval, tsecr; /* need to include OPTION_TS */ | 370 | __u32 tsval, tsecr; /* need to include OPTION_TS */ |
363 | }; | 371 | }; |
364 | 372 | ||
365 | /* Beware: Something in the Internet is very sensitive to the ordering of | 373 | /* Write previously computed TCP options to the packet. |
374 | * | ||
375 | * Beware: Something in the Internet is very sensitive to the ordering of | ||
366 | * TCP options, we learned this through the hard way, so be careful here. | 376 | * TCP options, we learned this through the hard way, so be careful here. |
367 | * Luckily we can at least blame others for their non-compliance but from | 377 | * Luckily we can at least blame others for their non-compliance but from |
368 | * inter-operatibility perspective it seems that we're somewhat stuck with | 378 | * inter-operatibility perspective it seems that we're somewhat stuck with |
@@ -445,6 +455,9 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, | |||
445 | } | 455 | } |
446 | } | 456 | } |
447 | 457 | ||
458 | /* Compute TCP options for SYN packets. This is not the final | ||
459 | * network wire format yet. | ||
460 | */ | ||
448 | static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, | 461 | static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, |
449 | struct tcp_out_options *opts, | 462 | struct tcp_out_options *opts, |
450 | struct tcp_md5sig_key **md5) { | 463 | struct tcp_md5sig_key **md5) { |
@@ -493,6 +506,7 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, | |||
493 | return size; | 506 | return size; |
494 | } | 507 | } |
495 | 508 | ||
509 | /* Set up TCP options for SYN-ACKs. */ | ||
496 | static unsigned tcp_synack_options(struct sock *sk, | 510 | static unsigned tcp_synack_options(struct sock *sk, |
497 | struct request_sock *req, | 511 | struct request_sock *req, |
498 | unsigned mss, struct sk_buff *skb, | 512 | unsigned mss, struct sk_buff *skb, |
@@ -541,6 +555,9 @@ static unsigned tcp_synack_options(struct sock *sk, | |||
541 | return size; | 555 | return size; |
542 | } | 556 | } |
543 | 557 | ||
558 | /* Compute TCP options for ESTABLISHED sockets. This is not the | ||
559 | * final wire format yet. | ||
560 | */ | ||
544 | static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, | 561 | static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, |
545 | struct tcp_out_options *opts, | 562 | struct tcp_out_options *opts, |
546 | struct tcp_md5sig_key **md5) { | 563 | struct tcp_md5sig_key **md5) { |
@@ -705,7 +722,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
705 | return net_xmit_eval(err); | 722 | return net_xmit_eval(err); |
706 | } | 723 | } |
707 | 724 | ||
708 | /* This routine just queue's the buffer | 725 | /* This routine just queues the buffer for sending. |
709 | * | 726 | * |
710 | * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, | 727 | * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, |
711 | * otherwise socket can stall. | 728 | * otherwise socket can stall. |
@@ -722,6 +739,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) | |||
722 | sk_mem_charge(sk, skb->truesize); | 739 | sk_mem_charge(sk, skb->truesize); |
723 | } | 740 | } |
724 | 741 | ||
742 | /* Initialize TSO segments for a packet. */ | ||
725 | static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, | 743 | static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, |
726 | unsigned int mss_now) | 744 | unsigned int mss_now) |
727 | { | 745 | { |
@@ -909,6 +927,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) | |||
909 | skb->len = skb->data_len; | 927 | skb->len = skb->data_len; |
910 | } | 928 | } |
911 | 929 | ||
930 | /* Remove acked data from a packet in the transmit queue. */ | ||
912 | int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) | 931 | int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) |
913 | { | 932 | { |
914 | if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) | 933 | if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) |
@@ -937,7 +956,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) | |||
937 | return 0; | 956 | return 0; |
938 | } | 957 | } |
939 | 958 | ||
940 | /* Not accounting for SACKs here. */ | 959 | /* Calculate MSS. Not accounting for SACKs here. */ |
941 | int tcp_mtu_to_mss(struct sock *sk, int pmtu) | 960 | int tcp_mtu_to_mss(struct sock *sk, int pmtu) |
942 | { | 961 | { |
943 | struct tcp_sock *tp = tcp_sk(sk); | 962 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -981,6 +1000,7 @@ int tcp_mss_to_mtu(struct sock *sk, int mss) | |||
981 | return mtu; | 1000 | return mtu; |
982 | } | 1001 | } |
983 | 1002 | ||
1003 | /* MTU probing init per socket */ | ||
984 | void tcp_mtup_init(struct sock *sk) | 1004 | void tcp_mtup_init(struct sock *sk) |
985 | { | 1005 | { |
986 | struct tcp_sock *tp = tcp_sk(sk); | 1006 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -1143,7 +1163,8 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, | |||
1143 | return 0; | 1163 | return 0; |
1144 | } | 1164 | } |
1145 | 1165 | ||
1146 | /* This must be invoked the first time we consider transmitting | 1166 | /* Intialize TSO state of a skb. |
1167 | * This must be invoked the first time we consider transmitting | ||
1147 | * SKB onto the wire. | 1168 | * SKB onto the wire. |
1148 | */ | 1169 | */ |
1149 | static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, | 1170 | static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, |
@@ -1158,6 +1179,7 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, | |||
1158 | return tso_segs; | 1179 | return tso_segs; |
1159 | } | 1180 | } |
1160 | 1181 | ||
1182 | /* Minshall's variant of the Nagle send check. */ | ||
1161 | static inline int tcp_minshall_check(const struct tcp_sock *tp) | 1183 | static inline int tcp_minshall_check(const struct tcp_sock *tp) |
1162 | { | 1184 | { |
1163 | return after(tp->snd_sml, tp->snd_una) && | 1185 | return after(tp->snd_sml, tp->snd_una) && |
@@ -1242,6 +1264,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb, | |||
1242 | return cwnd_quota; | 1264 | return cwnd_quota; |
1243 | } | 1265 | } |
1244 | 1266 | ||
1267 | /* Test if sending is allowed right now. */ | ||
1245 | int tcp_may_send_now(struct sock *sk) | 1268 | int tcp_may_send_now(struct sock *sk) |
1246 | { | 1269 | { |
1247 | struct tcp_sock *tp = tcp_sk(sk); | 1270 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -1378,6 +1401,10 @@ send_now: | |||
1378 | } | 1401 | } |
1379 | 1402 | ||
1380 | /* Create a new MTU probe if we are ready. | 1403 | /* Create a new MTU probe if we are ready. |
1404 | * MTU probe is regularly attempting to increase the path MTU by | ||
1405 | * deliberately sending larger packets. This discovers routing | ||
1406 | * changes resulting in larger path MTUs. | ||
1407 | * | ||
1381 | * Returns 0 if we should wait to probe (no cwnd available), | 1408 | * Returns 0 if we should wait to probe (no cwnd available), |
1382 | * 1 if a probe was sent, | 1409 | * 1 if a probe was sent, |
1383 | * -1 otherwise | 1410 | * -1 otherwise |
@@ -1790,6 +1817,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) | |||
1790 | sk_wmem_free_skb(sk, next_skb); | 1817 | sk_wmem_free_skb(sk, next_skb); |
1791 | } | 1818 | } |
1792 | 1819 | ||
1820 | /* Check if coalescing SKBs is legal. */ | ||
1793 | static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb) | 1821 | static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb) |
1794 | { | 1822 | { |
1795 | if (tcp_skb_pcount(skb) > 1) | 1823 | if (tcp_skb_pcount(skb) > 1) |
@@ -1808,6 +1836,9 @@ static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb) | |||
1808 | return 1; | 1836 | return 1; |
1809 | } | 1837 | } |
1810 | 1838 | ||
1839 | /* Collapse packets in the retransmit queue to make to create | ||
1840 | * less packets on the wire. This is only done on retransmission. | ||
1841 | */ | ||
1811 | static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, | 1842 | static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, |
1812 | int space) | 1843 | int space) |
1813 | { | 1844 | { |
@@ -1957,6 +1988,9 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1957 | return err; | 1988 | return err; |
1958 | } | 1989 | } |
1959 | 1990 | ||
1991 | /* Check if we forward retransmits are possible in the current | ||
1992 | * window/congestion state. | ||
1993 | */ | ||
1960 | static int tcp_can_forward_retransmit(struct sock *sk) | 1994 | static int tcp_can_forward_retransmit(struct sock *sk) |
1961 | { | 1995 | { |
1962 | const struct inet_connection_sock *icsk = inet_csk(sk); | 1996 | const struct inet_connection_sock *icsk = inet_csk(sk); |
@@ -2101,7 +2135,8 @@ void tcp_send_fin(struct sock *sk) | |||
2101 | } else { | 2135 | } else { |
2102 | /* Socket is locked, keep trying until memory is available. */ | 2136 | /* Socket is locked, keep trying until memory is available. */ |
2103 | for (;;) { | 2137 | for (;;) { |
2104 | skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL); | 2138 | skb = alloc_skb_fclone(MAX_TCP_HEADER, |
2139 | sk->sk_allocation); | ||
2105 | if (skb) | 2140 | if (skb) |
2106 | break; | 2141 | break; |
2107 | yield(); | 2142 | yield(); |
@@ -2145,7 +2180,8 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) | |||
2145 | TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS); | 2180 | TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS); |
2146 | } | 2181 | } |
2147 | 2182 | ||
2148 | /* WARNING: This routine must only be called when we have already sent | 2183 | /* Send a crossed SYN-ACK during socket establishment. |
2184 | * WARNING: This routine must only be called when we have already sent | ||
2149 | * a SYN packet that crossed the incoming SYN that caused this routine | 2185 | * a SYN packet that crossed the incoming SYN that caused this routine |
2150 | * to get called. If this assumption fails then the initial rcv_wnd | 2186 | * to get called. If this assumption fails then the initial rcv_wnd |
2151 | * and rcv_wscale values will not be correct. | 2187 | * and rcv_wscale values will not be correct. |
@@ -2180,9 +2216,7 @@ int tcp_send_synack(struct sock *sk) | |||
2180 | return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); | 2216 | return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); |
2181 | } | 2217 | } |
2182 | 2218 | ||
2183 | /* | 2219 | /* Prepare a SYN-ACK. */ |
2184 | * Prepare a SYN-ACK. | ||
2185 | */ | ||
2186 | struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | 2220 | struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, |
2187 | struct request_sock *req) | 2221 | struct request_sock *req) |
2188 | { | 2222 | { |
@@ -2269,9 +2303,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2269 | return skb; | 2303 | return skb; |
2270 | } | 2304 | } |
2271 | 2305 | ||
2272 | /* | 2306 | /* Do all connect socket setups that can be done AF independent. */ |
2273 | * Do all connect socket setups that can be done AF independent. | ||
2274 | */ | ||
2275 | static void tcp_connect_init(struct sock *sk) | 2307 | static void tcp_connect_init(struct sock *sk) |
2276 | { | 2308 | { |
2277 | struct dst_entry *dst = __sk_dst_get(sk); | 2309 | struct dst_entry *dst = __sk_dst_get(sk); |
@@ -2330,9 +2362,7 @@ static void tcp_connect_init(struct sock *sk) | |||
2330 | tcp_clear_retrans(tp); | 2362 | tcp_clear_retrans(tp); |
2331 | } | 2363 | } |
2332 | 2364 | ||
2333 | /* | 2365 | /* Build a SYN and send it off. */ |
2334 | * Build a SYN and send it off. | ||
2335 | */ | ||
2336 | int tcp_connect(struct sock *sk) | 2366 | int tcp_connect(struct sock *sk) |
2337 | { | 2367 | { |
2338 | struct tcp_sock *tp = tcp_sk(sk); | 2368 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -2359,7 +2389,7 @@ int tcp_connect(struct sock *sk) | |||
2359 | sk->sk_wmem_queued += buff->truesize; | 2389 | sk->sk_wmem_queued += buff->truesize; |
2360 | sk_mem_charge(sk, buff->truesize); | 2390 | sk_mem_charge(sk, buff->truesize); |
2361 | tp->packets_out += tcp_skb_pcount(buff); | 2391 | tp->packets_out += tcp_skb_pcount(buff); |
2362 | tcp_transmit_skb(sk, buff, 1, GFP_KERNEL); | 2392 | tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); |
2363 | 2393 | ||
2364 | /* We change tp->snd_nxt after the tcp_transmit_skb() call | 2394 | /* We change tp->snd_nxt after the tcp_transmit_skb() call |
2365 | * in order to make this packet get counted in tcpOutSegs. | 2395 | * in order to make this packet get counted in tcpOutSegs. |
@@ -2493,6 +2523,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) | |||
2493 | return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); | 2523 | return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); |
2494 | } | 2524 | } |
2495 | 2525 | ||
2526 | /* Initiate keepalive or window probe from timer. */ | ||
2496 | int tcp_write_wakeup(struct sock *sk) | 2527 | int tcp_write_wakeup(struct sock *sk) |
2497 | { | 2528 | { |
2498 | struct tcp_sock *tp = tcp_sk(sk); | 2529 | struct tcp_sock *tp = tcp_sk(sk); |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b144a26359bc..cdb2ca7684d4 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -137,13 +137,14 @@ static int tcp_write_timeout(struct sock *sk) | |||
137 | { | 137 | { |
138 | struct inet_connection_sock *icsk = inet_csk(sk); | 138 | struct inet_connection_sock *icsk = inet_csk(sk); |
139 | int retry_until; | 139 | int retry_until; |
140 | bool do_reset; | ||
140 | 141 | ||
141 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { | 142 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { |
142 | if (icsk->icsk_retransmits) | 143 | if (icsk->icsk_retransmits) |
143 | dst_negative_advice(&sk->sk_dst_cache); | 144 | dst_negative_advice(&sk->sk_dst_cache); |
144 | retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; | 145 | retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; |
145 | } else { | 146 | } else { |
146 | if (icsk->icsk_retransmits >= sysctl_tcp_retries1) { | 147 | if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { |
147 | /* Black hole detection */ | 148 | /* Black hole detection */ |
148 | tcp_mtu_probing(icsk, sk); | 149 | tcp_mtu_probing(icsk, sk); |
149 | 150 | ||
@@ -155,13 +156,15 @@ static int tcp_write_timeout(struct sock *sk) | |||
155 | const int alive = (icsk->icsk_rto < TCP_RTO_MAX); | 156 | const int alive = (icsk->icsk_rto < TCP_RTO_MAX); |
156 | 157 | ||
157 | retry_until = tcp_orphan_retries(sk, alive); | 158 | retry_until = tcp_orphan_retries(sk, alive); |
159 | do_reset = alive || | ||
160 | !retransmits_timed_out(sk, retry_until); | ||
158 | 161 | ||
159 | if (tcp_out_of_resources(sk, alive || icsk->icsk_retransmits < retry_until)) | 162 | if (tcp_out_of_resources(sk, do_reset)) |
160 | return 1; | 163 | return 1; |
161 | } | 164 | } |
162 | } | 165 | } |
163 | 166 | ||
164 | if (icsk->icsk_retransmits >= retry_until) { | 167 | if (retransmits_timed_out(sk, retry_until)) { |
165 | /* Has it gone just too far? */ | 168 | /* Has it gone just too far? */ |
166 | tcp_write_err(sk); | 169 | tcp_write_err(sk); |
167 | return 1; | 170 | return 1; |
@@ -279,7 +282,7 @@ static void tcp_probe_timer(struct sock *sk) | |||
279 | * The TCP retransmit timer. | 282 | * The TCP retransmit timer. |
280 | */ | 283 | */ |
281 | 284 | ||
282 | static void tcp_retransmit_timer(struct sock *sk) | 285 | void tcp_retransmit_timer(struct sock *sk) |
283 | { | 286 | { |
284 | struct tcp_sock *tp = tcp_sk(sk); | 287 | struct tcp_sock *tp = tcp_sk(sk); |
285 | struct inet_connection_sock *icsk = inet_csk(sk); | 288 | struct inet_connection_sock *icsk = inet_csk(sk); |
@@ -385,7 +388,7 @@ static void tcp_retransmit_timer(struct sock *sk) | |||
385 | out_reset_timer: | 388 | out_reset_timer: |
386 | icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); | 389 | icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); |
387 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); | 390 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); |
388 | if (icsk->icsk_retransmits > sysctl_tcp_retries1) | 391 | if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) |
389 | __sk_dst_reset(sk); | 392 | __sk_dst_reset(sk); |
390 | 393 | ||
391 | out:; | 394 | out:; |
@@ -499,8 +502,7 @@ static void tcp_keepalive_timer (unsigned long data) | |||
499 | elapsed = tcp_time_stamp - tp->rcv_tstamp; | 502 | elapsed = tcp_time_stamp - tp->rcv_tstamp; |
500 | 503 | ||
501 | if (elapsed >= keepalive_time_when(tp)) { | 504 | if (elapsed >= keepalive_time_when(tp)) { |
502 | if ((!tp->keepalive_probes && icsk->icsk_probes_out >= sysctl_tcp_keepalive_probes) || | 505 | if (icsk->icsk_probes_out >= keepalive_probes(tp)) { |
503 | (tp->keepalive_probes && icsk->icsk_probes_out >= tp->keepalive_probes)) { | ||
504 | tcp_send_active_reset(sk, GFP_ATOMIC); | 506 | tcp_send_active_reset(sk, GFP_ATOMIC); |
505 | tcp_write_err(sk); | 507 | tcp_write_err(sk); |
506 | goto out; | 508 | goto out; |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 80e3812837ad..ebaaa7f973d7 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -110,11 +110,12 @@ struct udp_table udp_table; | |||
110 | EXPORT_SYMBOL(udp_table); | 110 | EXPORT_SYMBOL(udp_table); |
111 | 111 | ||
112 | int sysctl_udp_mem[3] __read_mostly; | 112 | int sysctl_udp_mem[3] __read_mostly; |
113 | int sysctl_udp_rmem_min __read_mostly; | ||
114 | int sysctl_udp_wmem_min __read_mostly; | ||
115 | |||
116 | EXPORT_SYMBOL(sysctl_udp_mem); | 113 | EXPORT_SYMBOL(sysctl_udp_mem); |
114 | |||
115 | int sysctl_udp_rmem_min __read_mostly; | ||
117 | EXPORT_SYMBOL(sysctl_udp_rmem_min); | 116 | EXPORT_SYMBOL(sysctl_udp_rmem_min); |
117 | |||
118 | int sysctl_udp_wmem_min __read_mostly; | ||
118 | EXPORT_SYMBOL(sysctl_udp_wmem_min); | 119 | EXPORT_SYMBOL(sysctl_udp_wmem_min); |
119 | 120 | ||
120 | atomic_t udp_memory_allocated; | 121 | atomic_t udp_memory_allocated; |
@@ -158,7 +159,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, | |||
158 | */ | 159 | */ |
159 | int udp_lib_get_port(struct sock *sk, unsigned short snum, | 160 | int udp_lib_get_port(struct sock *sk, unsigned short snum, |
160 | int (*saddr_comp)(const struct sock *sk1, | 161 | int (*saddr_comp)(const struct sock *sk1, |
161 | const struct sock *sk2 ) ) | 162 | const struct sock *sk2)) |
162 | { | 163 | { |
163 | struct udp_hslot *hslot; | 164 | struct udp_hslot *hslot; |
164 | struct udp_table *udptable = sk->sk_prot->h.udp_table; | 165 | struct udp_table *udptable = sk->sk_prot->h.udp_table; |
@@ -221,14 +222,15 @@ fail_unlock: | |||
221 | fail: | 222 | fail: |
222 | return error; | 223 | return error; |
223 | } | 224 | } |
225 | EXPORT_SYMBOL(udp_lib_get_port); | ||
224 | 226 | ||
225 | static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) | 227 | static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) |
226 | { | 228 | { |
227 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); | 229 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); |
228 | 230 | ||
229 | return ( !ipv6_only_sock(sk2) && | 231 | return (!ipv6_only_sock(sk2) && |
230 | (!inet1->rcv_saddr || !inet2->rcv_saddr || | 232 | (!inet1->rcv_saddr || !inet2->rcv_saddr || |
231 | inet1->rcv_saddr == inet2->rcv_saddr )); | 233 | inet1->rcv_saddr == inet2->rcv_saddr)); |
232 | } | 234 | } |
233 | 235 | ||
234 | int udp_v4_get_port(struct sock *sk, unsigned short snum) | 236 | int udp_v4_get_port(struct sock *sk, unsigned short snum) |
@@ -383,8 +385,8 @@ found: | |||
383 | void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) | 385 | void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) |
384 | { | 386 | { |
385 | struct inet_sock *inet; | 387 | struct inet_sock *inet; |
386 | struct iphdr *iph = (struct iphdr*)skb->data; | 388 | struct iphdr *iph = (struct iphdr *)skb->data; |
387 | struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); | 389 | struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); |
388 | const int type = icmp_hdr(skb)->type; | 390 | const int type = icmp_hdr(skb)->type; |
389 | const int code = icmp_hdr(skb)->code; | 391 | const int code = icmp_hdr(skb)->code; |
390 | struct sock *sk; | 392 | struct sock *sk; |
@@ -439,7 +441,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) | |||
439 | if (!harderr || sk->sk_state != TCP_ESTABLISHED) | 441 | if (!harderr || sk->sk_state != TCP_ESTABLISHED) |
440 | goto out; | 442 | goto out; |
441 | } else { | 443 | } else { |
442 | ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1)); | 444 | ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1)); |
443 | } | 445 | } |
444 | sk->sk_err = err; | 446 | sk->sk_err = err; |
445 | sk->sk_error_report(sk); | 447 | sk->sk_error_report(sk); |
@@ -474,7 +476,7 @@ EXPORT_SYMBOL(udp_flush_pending_frames); | |||
474 | * (checksum field must be zeroed out) | 476 | * (checksum field must be zeroed out) |
475 | */ | 477 | */ |
476 | static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, | 478 | static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, |
477 | __be32 src, __be32 dst, int len ) | 479 | __be32 src, __be32 dst, int len) |
478 | { | 480 | { |
479 | unsigned int offset; | 481 | unsigned int offset; |
480 | struct udphdr *uh = udp_hdr(skb); | 482 | struct udphdr *uh = udp_hdr(skb); |
@@ -545,7 +547,7 @@ static int udp_push_pending_frames(struct sock *sk) | |||
545 | 547 | ||
546 | } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ | 548 | } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ |
547 | 549 | ||
548 | udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len); | 550 | udp4_hwcsum_outgoing(sk, skb, fl->fl4_src, fl->fl4_dst, up->len); |
549 | goto send; | 551 | goto send; |
550 | 552 | ||
551 | } else /* `normal' UDP */ | 553 | } else /* `normal' UDP */ |
@@ -553,18 +555,24 @@ static int udp_push_pending_frames(struct sock *sk) | |||
553 | 555 | ||
554 | /* add protocol-dependent pseudo-header */ | 556 | /* add protocol-dependent pseudo-header */ |
555 | uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, | 557 | uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, |
556 | sk->sk_protocol, csum ); | 558 | sk->sk_protocol, csum); |
557 | if (uh->check == 0) | 559 | if (uh->check == 0) |
558 | uh->check = CSUM_MANGLED_0; | 560 | uh->check = CSUM_MANGLED_0; |
559 | 561 | ||
560 | send: | 562 | send: |
561 | err = ip_push_pending_frames(sk); | 563 | err = ip_push_pending_frames(sk); |
564 | if (err) { | ||
565 | if (err == -ENOBUFS && !inet->recverr) { | ||
566 | UDP_INC_STATS_USER(sock_net(sk), | ||
567 | UDP_MIB_SNDBUFERRORS, is_udplite); | ||
568 | err = 0; | ||
569 | } | ||
570 | } else | ||
571 | UDP_INC_STATS_USER(sock_net(sk), | ||
572 | UDP_MIB_OUTDATAGRAMS, is_udplite); | ||
562 | out: | 573 | out: |
563 | up->len = 0; | 574 | up->len = 0; |
564 | up->pending = 0; | 575 | up->pending = 0; |
565 | if (!err) | ||
566 | UDP_INC_STATS_USER(sock_net(sk), | ||
567 | UDP_MIB_OUTDATAGRAMS, is_udplite); | ||
568 | return err; | 576 | return err; |
569 | } | 577 | } |
570 | 578 | ||
@@ -592,7 +600,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
592 | * Check the flags. | 600 | * Check the flags. |
593 | */ | 601 | */ |
594 | 602 | ||
595 | if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */ | 603 | if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */ |
596 | return -EOPNOTSUPP; | 604 | return -EOPNOTSUPP; |
597 | 605 | ||
598 | ipc.opt = NULL; | 606 | ipc.opt = NULL; |
@@ -619,7 +627,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
619 | * Get and verify the address. | 627 | * Get and verify the address. |
620 | */ | 628 | */ |
621 | if (msg->msg_name) { | 629 | if (msg->msg_name) { |
622 | struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name; | 630 | struct sockaddr_in * usin = (struct sockaddr_in *)msg->msg_name; |
623 | if (msg->msg_namelen < sizeof(*usin)) | 631 | if (msg->msg_namelen < sizeof(*usin)) |
624 | return -EINVAL; | 632 | return -EINVAL; |
625 | if (usin->sin_family != AF_INET) { | 633 | if (usin->sin_family != AF_INET) { |
@@ -684,7 +692,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
684 | } | 692 | } |
685 | 693 | ||
686 | if (connected) | 694 | if (connected) |
687 | rt = (struct rtable*)sk_dst_check(sk, 0); | 695 | rt = (struct rtable *)sk_dst_check(sk, 0); |
688 | 696 | ||
689 | if (rt == NULL) { | 697 | if (rt == NULL) { |
690 | struct flowi fl = { .oif = ipc.oif, | 698 | struct flowi fl = { .oif = ipc.oif, |
@@ -782,6 +790,7 @@ do_confirm: | |||
782 | err = 0; | 790 | err = 0; |
783 | goto out; | 791 | goto out; |
784 | } | 792 | } |
793 | EXPORT_SYMBOL(udp_sendmsg); | ||
785 | 794 | ||
786 | int udp_sendpage(struct sock *sk, struct page *page, int offset, | 795 | int udp_sendpage(struct sock *sk, struct page *page, int offset, |
787 | size_t size, int flags) | 796 | size_t size, int flags) |
@@ -871,6 +880,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
871 | 880 | ||
872 | return 0; | 881 | return 0; |
873 | } | 882 | } |
883 | EXPORT_SYMBOL(udp_ioctl); | ||
874 | 884 | ||
875 | /* | 885 | /* |
876 | * This should be easy, if there is something there we | 886 | * This should be easy, if there is something there we |
@@ -892,7 +902,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
892 | * Check any passed addresses | 902 | * Check any passed addresses |
893 | */ | 903 | */ |
894 | if (addr_len) | 904 | if (addr_len) |
895 | *addr_len=sizeof(*sin); | 905 | *addr_len = sizeof(*sin); |
896 | 906 | ||
897 | if (flags & MSG_ERRQUEUE) | 907 | if (flags & MSG_ERRQUEUE) |
898 | return ip_recv_error(sk, msg, len); | 908 | return ip_recv_error(sk, msg, len); |
@@ -923,9 +933,11 @@ try_again: | |||
923 | 933 | ||
924 | if (skb_csum_unnecessary(skb)) | 934 | if (skb_csum_unnecessary(skb)) |
925 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), | 935 | err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), |
926 | msg->msg_iov, copied ); | 936 | msg->msg_iov, copied); |
927 | else { | 937 | else { |
928 | err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); | 938 | err = skb_copy_and_csum_datagram_iovec(skb, |
939 | sizeof(struct udphdr), | ||
940 | msg->msg_iov); | ||
929 | 941 | ||
930 | if (err == -EINVAL) | 942 | if (err == -EINVAL) |
931 | goto csum_copy_err; | 943 | goto csum_copy_err; |
@@ -941,8 +953,7 @@ try_again: | |||
941 | sock_recv_timestamp(msg, sk, skb); | 953 | sock_recv_timestamp(msg, sk, skb); |
942 | 954 | ||
943 | /* Copy the address. */ | 955 | /* Copy the address. */ |
944 | if (sin) | 956 | if (sin) { |
945 | { | ||
946 | sin->sin_family = AF_INET; | 957 | sin->sin_family = AF_INET; |
947 | sin->sin_port = udp_hdr(skb)->source; | 958 | sin->sin_port = udp_hdr(skb)->source; |
948 | sin->sin_addr.s_addr = ip_hdr(skb)->saddr; | 959 | sin->sin_addr.s_addr = ip_hdr(skb)->saddr; |
@@ -995,6 +1006,7 @@ int udp_disconnect(struct sock *sk, int flags) | |||
995 | sk_dst_reset(sk); | 1006 | sk_dst_reset(sk); |
996 | return 0; | 1007 | return 0; |
997 | } | 1008 | } |
1009 | EXPORT_SYMBOL(udp_disconnect); | ||
998 | 1010 | ||
999 | void udp_lib_unhash(struct sock *sk) | 1011 | void udp_lib_unhash(struct sock *sk) |
1000 | { | 1012 | { |
@@ -1044,7 +1056,7 @@ drop: | |||
1044 | * Note that in the success and error cases, the skb is assumed to | 1056 | * Note that in the success and error cases, the skb is assumed to |
1045 | * have either been requeued or freed. | 1057 | * have either been requeued or freed. |
1046 | */ | 1058 | */ |
1047 | int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) | 1059 | int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) |
1048 | { | 1060 | { |
1049 | struct udp_sock *up = udp_sk(sk); | 1061 | struct udp_sock *up = udp_sk(sk); |
1050 | int rc; | 1062 | int rc; |
@@ -1214,7 +1226,7 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, | |||
1214 | if (uh->check == 0) { | 1226 | if (uh->check == 0) { |
1215 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1227 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1216 | } else if (skb->ip_summed == CHECKSUM_COMPLETE) { | 1228 | } else if (skb->ip_summed == CHECKSUM_COMPLETE) { |
1217 | if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, | 1229 | if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, |
1218 | proto, skb->csum)) | 1230 | proto, skb->csum)) |
1219 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 1231 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
1220 | } | 1232 | } |
@@ -1355,7 +1367,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
1355 | int err = 0; | 1367 | int err = 0; |
1356 | int is_udplite = IS_UDPLITE(sk); | 1368 | int is_udplite = IS_UDPLITE(sk); |
1357 | 1369 | ||
1358 | if (optlen<sizeof(int)) | 1370 | if (optlen < sizeof(int)) |
1359 | return -EINVAL; | 1371 | return -EINVAL; |
1360 | 1372 | ||
1361 | if (get_user(val, (int __user *)optval)) | 1373 | if (get_user(val, (int __user *)optval)) |
@@ -1426,6 +1438,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, | |||
1426 | 1438 | ||
1427 | return err; | 1439 | return err; |
1428 | } | 1440 | } |
1441 | EXPORT_SYMBOL(udp_lib_setsockopt); | ||
1429 | 1442 | ||
1430 | int udp_setsockopt(struct sock *sk, int level, int optname, | 1443 | int udp_setsockopt(struct sock *sk, int level, int optname, |
1431 | char __user *optval, int optlen) | 1444 | char __user *optval, int optlen) |
@@ -1453,7 +1466,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, | |||
1453 | struct udp_sock *up = udp_sk(sk); | 1466 | struct udp_sock *up = udp_sk(sk); |
1454 | int val, len; | 1467 | int val, len; |
1455 | 1468 | ||
1456 | if (get_user(len,optlen)) | 1469 | if (get_user(len, optlen)) |
1457 | return -EFAULT; | 1470 | return -EFAULT; |
1458 | 1471 | ||
1459 | len = min_t(unsigned int, len, sizeof(int)); | 1472 | len = min_t(unsigned int, len, sizeof(int)); |
@@ -1486,10 +1499,11 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, | |||
1486 | 1499 | ||
1487 | if (put_user(len, optlen)) | 1500 | if (put_user(len, optlen)) |
1488 | return -EFAULT; | 1501 | return -EFAULT; |
1489 | if (copy_to_user(optval, &val,len)) | 1502 | if (copy_to_user(optval, &val, len)) |
1490 | return -EFAULT; | 1503 | return -EFAULT; |
1491 | return 0; | 1504 | return 0; |
1492 | } | 1505 | } |
1506 | EXPORT_SYMBOL(udp_lib_getsockopt); | ||
1493 | 1507 | ||
1494 | int udp_getsockopt(struct sock *sk, int level, int optname, | 1508 | int udp_getsockopt(struct sock *sk, int level, int optname, |
1495 | char __user *optval, int __user *optlen) | 1509 | char __user *optval, int __user *optlen) |
@@ -1528,9 +1542,9 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
1528 | int is_lite = IS_UDPLITE(sk); | 1542 | int is_lite = IS_UDPLITE(sk); |
1529 | 1543 | ||
1530 | /* Check for false positives due to checksum errors */ | 1544 | /* Check for false positives due to checksum errors */ |
1531 | if ( (mask & POLLRDNORM) && | 1545 | if ((mask & POLLRDNORM) && |
1532 | !(file->f_flags & O_NONBLOCK) && | 1546 | !(file->f_flags & O_NONBLOCK) && |
1533 | !(sk->sk_shutdown & RCV_SHUTDOWN)){ | 1547 | !(sk->sk_shutdown & RCV_SHUTDOWN)) { |
1534 | struct sk_buff_head *rcvq = &sk->sk_receive_queue; | 1548 | struct sk_buff_head *rcvq = &sk->sk_receive_queue; |
1535 | struct sk_buff *skb; | 1549 | struct sk_buff *skb; |
1536 | 1550 | ||
@@ -1552,6 +1566,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) | |||
1552 | return mask; | 1566 | return mask; |
1553 | 1567 | ||
1554 | } | 1568 | } |
1569 | EXPORT_SYMBOL(udp_poll); | ||
1555 | 1570 | ||
1556 | struct proto udp_prot = { | 1571 | struct proto udp_prot = { |
1557 | .name = "UDP", | 1572 | .name = "UDP", |
@@ -1582,6 +1597,7 @@ struct proto udp_prot = { | |||
1582 | .compat_getsockopt = compat_udp_getsockopt, | 1597 | .compat_getsockopt = compat_udp_getsockopt, |
1583 | #endif | 1598 | #endif |
1584 | }; | 1599 | }; |
1600 | EXPORT_SYMBOL(udp_prot); | ||
1585 | 1601 | ||
1586 | /* ------------------------------------------------------------------------ */ | 1602 | /* ------------------------------------------------------------------------ */ |
1587 | #ifdef CONFIG_PROC_FS | 1603 | #ifdef CONFIG_PROC_FS |
@@ -1703,11 +1719,13 @@ int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo) | |||
1703 | rc = -ENOMEM; | 1719 | rc = -ENOMEM; |
1704 | return rc; | 1720 | return rc; |
1705 | } | 1721 | } |
1722 | EXPORT_SYMBOL(udp_proc_register); | ||
1706 | 1723 | ||
1707 | void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo) | 1724 | void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo) |
1708 | { | 1725 | { |
1709 | proc_net_remove(net, afinfo->name); | 1726 | proc_net_remove(net, afinfo->name); |
1710 | } | 1727 | } |
1728 | EXPORT_SYMBOL(udp_proc_unregister); | ||
1711 | 1729 | ||
1712 | /* ------------------------------------------------------------------------ */ | 1730 | /* ------------------------------------------------------------------------ */ |
1713 | static void udp4_format_sock(struct sock *sp, struct seq_file *f, | 1731 | static void udp4_format_sock(struct sock *sp, struct seq_file *f, |
@@ -1741,7 +1759,7 @@ int udp4_seq_show(struct seq_file *seq, void *v) | |||
1741 | int len; | 1759 | int len; |
1742 | 1760 | ||
1743 | udp4_format_sock(v, seq, state->bucket, &len); | 1761 | udp4_format_sock(v, seq, state->bucket, &len); |
1744 | seq_printf(seq, "%*s\n", 127 - len ,""); | 1762 | seq_printf(seq, "%*s\n", 127 - len, ""); |
1745 | } | 1763 | } |
1746 | return 0; | 1764 | return 0; |
1747 | } | 1765 | } |
@@ -1816,16 +1834,64 @@ void __init udp_init(void) | |||
1816 | sysctl_udp_wmem_min = SK_MEM_QUANTUM; | 1834 | sysctl_udp_wmem_min = SK_MEM_QUANTUM; |
1817 | } | 1835 | } |
1818 | 1836 | ||
1819 | EXPORT_SYMBOL(udp_disconnect); | 1837 | int udp4_ufo_send_check(struct sk_buff *skb) |
1820 | EXPORT_SYMBOL(udp_ioctl); | 1838 | { |
1821 | EXPORT_SYMBOL(udp_prot); | 1839 | const struct iphdr *iph; |
1822 | EXPORT_SYMBOL(udp_sendmsg); | 1840 | struct udphdr *uh; |
1823 | EXPORT_SYMBOL(udp_lib_getsockopt); | 1841 | |
1824 | EXPORT_SYMBOL(udp_lib_setsockopt); | 1842 | if (!pskb_may_pull(skb, sizeof(*uh))) |
1825 | EXPORT_SYMBOL(udp_poll); | 1843 | return -EINVAL; |
1826 | EXPORT_SYMBOL(udp_lib_get_port); | 1844 | |
1845 | iph = ip_hdr(skb); | ||
1846 | uh = udp_hdr(skb); | ||
1847 | |||
1848 | uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, | ||
1849 | IPPROTO_UDP, 0); | ||
1850 | skb->csum_start = skb_transport_header(skb) - skb->head; | ||
1851 | skb->csum_offset = offsetof(struct udphdr, check); | ||
1852 | skb->ip_summed = CHECKSUM_PARTIAL; | ||
1853 | return 0; | ||
1854 | } | ||
1855 | |||
1856 | struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features) | ||
1857 | { | ||
1858 | struct sk_buff *segs = ERR_PTR(-EINVAL); | ||
1859 | unsigned int mss; | ||
1860 | int offset; | ||
1861 | __wsum csum; | ||
1862 | |||
1863 | mss = skb_shinfo(skb)->gso_size; | ||
1864 | if (unlikely(skb->len <= mss)) | ||
1865 | goto out; | ||
1866 | |||
1867 | if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { | ||
1868 | /* Packet is from an untrusted source, reset gso_segs. */ | ||
1869 | int type = skb_shinfo(skb)->gso_type; | ||
1870 | |||
1871 | if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) || | ||
1872 | !(type & (SKB_GSO_UDP)))) | ||
1873 | goto out; | ||
1874 | |||
1875 | skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); | ||
1876 | |||
1877 | segs = NULL; | ||
1878 | goto out; | ||
1879 | } | ||
1880 | |||
1881 | /* Do software UFO. Complete and fill in the UDP checksum as HW cannot | ||
1882 | * do checksum of UDP packets sent as multiple IP fragments. | ||
1883 | */ | ||
1884 | offset = skb->csum_start - skb_headroom(skb); | ||
1885 | csum = skb_checksum(skb, offset, skb->len - offset, 0); | ||
1886 | offset += skb->csum_offset; | ||
1887 | *(__sum16 *)(skb->data + offset) = csum_fold(csum); | ||
1888 | skb->ip_summed = CHECKSUM_NONE; | ||
1889 | |||
1890 | /* Fragment the skb. IP headers of the fragments are updated in | ||
1891 | * inet_gso_segment() | ||
1892 | */ | ||
1893 | segs = skb_segment(skb, features); | ||
1894 | out: | ||
1895 | return segs; | ||
1896 | } | ||
1827 | 1897 | ||
1828 | #ifdef CONFIG_PROC_FS | ||
1829 | EXPORT_SYMBOL(udp_proc_register); | ||
1830 | EXPORT_SYMBOL(udp_proc_unregister); | ||
1831 | #endif | ||
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 0071ee6f441f..74fb2eb833ec 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -264,6 +264,22 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { | |||
264 | .fill_dst = xfrm4_fill_dst, | 264 | .fill_dst = xfrm4_fill_dst, |
265 | }; | 265 | }; |
266 | 266 | ||
267 | #ifdef CONFIG_SYSCTL | ||
268 | static struct ctl_table xfrm4_policy_table[] = { | ||
269 | { | ||
270 | .ctl_name = CTL_UNNUMBERED, | ||
271 | .procname = "xfrm4_gc_thresh", | ||
272 | .data = &xfrm4_dst_ops.gc_thresh, | ||
273 | .maxlen = sizeof(int), | ||
274 | .mode = 0644, | ||
275 | .proc_handler = proc_dointvec, | ||
276 | }, | ||
277 | { } | ||
278 | }; | ||
279 | |||
280 | static struct ctl_table_header *sysctl_hdr; | ||
281 | #endif | ||
282 | |||
267 | static void __init xfrm4_policy_init(void) | 283 | static void __init xfrm4_policy_init(void) |
268 | { | 284 | { |
269 | xfrm_policy_register_afinfo(&xfrm4_policy_afinfo); | 285 | xfrm_policy_register_afinfo(&xfrm4_policy_afinfo); |
@@ -271,12 +287,31 @@ static void __init xfrm4_policy_init(void) | |||
271 | 287 | ||
272 | static void __exit xfrm4_policy_fini(void) | 288 | static void __exit xfrm4_policy_fini(void) |
273 | { | 289 | { |
290 | #ifdef CONFIG_SYSCTL | ||
291 | if (sysctl_hdr) | ||
292 | unregister_net_sysctl_table(sysctl_hdr); | ||
293 | #endif | ||
274 | xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo); | 294 | xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo); |
275 | } | 295 | } |
276 | 296 | ||
277 | void __init xfrm4_init(void) | 297 | void __init xfrm4_init(int rt_max_size) |
278 | { | 298 | { |
279 | xfrm4_state_init(); | 299 | xfrm4_state_init(); |
280 | xfrm4_policy_init(); | 300 | xfrm4_policy_init(); |
301 | /* | ||
302 | * Select a default value for the gc_thresh based on the main route | ||
303 | * table hash size. It seems to me the worst case scenario is when | ||
304 | * we have ipsec operating in transport mode, in which we create a | ||
305 | * dst_entry per socket. The xfrm gc algorithm starts trying to remove | ||
306 | * entries at gc_thresh, and prevents new allocations as 2*gc_thresh | ||
307 | * so lets set an initial xfrm gc_thresh value at the rt_max_size/2. | ||
308 | * That will let us store an ipsec connection per route table entry, | ||
309 | * and start cleaning when were 1/2 full | ||
310 | */ | ||
311 | xfrm4_dst_ops.gc_thresh = rt_max_size/2; | ||
312 | #ifdef CONFIG_SYSCTL | ||
313 | sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path, | ||
314 | xfrm4_policy_table); | ||
315 | #endif | ||
281 | } | 316 | } |
282 | 317 | ||