diff options
author | James Morris <jmorris@namei.org> | 2009-03-27 23:57:13 -0400 |
---|---|---|
committer | James Morris <jmorris@namei.org> | 2009-03-27 23:57:13 -0400 |
commit | bb798169d1bb860b07192cf9c75937fadc8610b4 (patch) | |
tree | fa67f14406a1e79897e6f29e59fed7c02ec31c30 /net/ipv4 | |
parent | a106cbfd1f3703402fc2d95d97e7a054102250f0 (diff) | |
parent | 5d80f8e5a9dc9c9a94d4aeaa567e219a808b8a4a (diff) |
Merge branch 'master' of ssh://master.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into next
Diffstat (limited to 'net/ipv4')
54 files changed, 1047 insertions, 864 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 691268f3a359..b2cf91e4ccaa 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
@@ -35,7 +35,7 @@ config IP_ADVANCED_ROUTER | |||
35 | 35 | ||
36 | at boot time after the /proc file system has been mounted. | 36 | at boot time after the /proc file system has been mounted. |
37 | 37 | ||
38 | If you turn on IP forwarding, you will also get the rp_filter, which | 38 | If you turn on IP forwarding, you should consider the rp_filter, which |
39 | automatically rejects incoming packets if the routing table entry | 39 | automatically rejects incoming packets if the routing table entry |
40 | for their source address doesn't match the network interface they're | 40 | for their source address doesn't match the network interface they're |
41 | arriving on. This has security advantages because it prevents the | 41 | arriving on. This has security advantages because it prevents the |
@@ -46,12 +46,16 @@ config IP_ADVANCED_ROUTER | |||
46 | rp_filter on use: | 46 | rp_filter on use: |
47 | 47 | ||
48 | echo 1 > /proc/sys/net/ipv4/conf/<device>/rp_filter | 48 | echo 1 > /proc/sys/net/ipv4/conf/<device>/rp_filter |
49 | or | 49 | and |
50 | echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter | 50 | echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter |
51 | 51 | ||
52 | Note that some distributions enable it in startup scripts. | ||
53 | For details about rp_filter strict and loose mode read | ||
54 | <file:Documentation/networking/ip-sysctl.txt>. | ||
55 | |||
52 | If unsure, say N here. | 56 | If unsure, say N here. |
53 | 57 | ||
54 | choice | 58 | choice |
55 | prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)" | 59 | prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)" |
56 | depends on IP_ADVANCED_ROUTER | 60 | depends on IP_ADVANCED_ROUTER |
57 | default ASK_IP_FIB_HASH | 61 | default ASK_IP_FIB_HASH |
@@ -59,27 +63,29 @@ choice | |||
59 | config ASK_IP_FIB_HASH | 63 | config ASK_IP_FIB_HASH |
60 | bool "FIB_HASH" | 64 | bool "FIB_HASH" |
61 | ---help--- | 65 | ---help--- |
62 | Current FIB is very proven and good enough for most users. | 66 | Current FIB is very proven and good enough for most users. |
63 | 67 | ||
64 | config IP_FIB_TRIE | 68 | config IP_FIB_TRIE |
65 | bool "FIB_TRIE" | 69 | bool "FIB_TRIE" |
66 | ---help--- | 70 | ---help--- |
67 | Use new experimental LC-trie as FIB lookup algorithm. | 71 | Use new experimental LC-trie as FIB lookup algorithm. |
68 | This improves lookup performance if you have a large | 72 | This improves lookup performance if you have a large |
69 | number of routes. | 73 | number of routes. |
70 | 74 | ||
71 | LC-trie is a longest matching prefix lookup algorithm which | 75 | LC-trie is a longest matching prefix lookup algorithm which |
72 | performs better than FIB_HASH for large routing tables. | 76 | performs better than FIB_HASH for large routing tables. |
73 | But, it consumes more memory and is more complex. | 77 | But, it consumes more memory and is more complex. |
74 | 78 | ||
75 | LC-trie is described in: | 79 | LC-trie is described in: |
76 | 80 | ||
77 | IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson | 81 | IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson |
78 | IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 | 82 | IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, |
79 | An experimental study of compression methods for dynamic tries | 83 | June 1999 |
80 | Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. | 84 | |
81 | http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/ | 85 | An experimental study of compression methods for dynamic tries |
82 | 86 | Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. | |
87 | http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/ | ||
88 | |||
83 | endchoice | 89 | endchoice |
84 | 90 | ||
85 | config IP_FIB_HASH | 91 | config IP_FIB_HASH |
@@ -191,7 +197,7 @@ config IP_PNP_RARP | |||
191 | <file:Documentation/filesystems/nfsroot.txt> for details. | 197 | <file:Documentation/filesystems/nfsroot.txt> for details. |
192 | 198 | ||
193 | # not yet ready.. | 199 | # not yet ready.. |
194 | # bool ' IP: ARP support' CONFIG_IP_PNP_ARP | 200 | # bool ' IP: ARP support' CONFIG_IP_PNP_ARP |
195 | config NET_IPIP | 201 | config NET_IPIP |
196 | tristate "IP: tunneling" | 202 | tristate "IP: tunneling" |
197 | select INET_TUNNEL | 203 | select INET_TUNNEL |
@@ -361,7 +367,7 @@ config INET_IPCOMP | |||
361 | ---help--- | 367 | ---help--- |
362 | Support for IP Payload Compression Protocol (IPComp) (RFC3173), | 368 | Support for IP Payload Compression Protocol (IPComp) (RFC3173), |
363 | typically needed for IPsec. | 369 | typically needed for IPsec. |
364 | 370 | ||
365 | If unsure, say Y. | 371 | If unsure, say Y. |
366 | 372 | ||
367 | config INET_XFRM_TUNNEL | 373 | config INET_XFRM_TUNNEL |
@@ -415,7 +421,7 @@ config INET_DIAG | |||
415 | Support for INET (TCP, DCCP, etc) socket monitoring interface used by | 421 | Support for INET (TCP, DCCP, etc) socket monitoring interface used by |
416 | native Linux tools such as ss. ss is included in iproute2, currently | 422 | native Linux tools such as ss. ss is included in iproute2, currently |
417 | downloadable at <http://linux-net.osdl.org/index.php/Iproute2>. | 423 | downloadable at <http://linux-net.osdl.org/index.php/Iproute2>. |
418 | 424 | ||
419 | If unsure, say Y. | 425 | If unsure, say Y. |
420 | 426 | ||
421 | config INET_TCP_DIAG | 427 | config INET_TCP_DIAG |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 743f5542d65a..d5aaabbb7cb3 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -369,7 +369,6 @@ lookup_protocol: | |||
369 | sock_init_data(sock, sk); | 369 | sock_init_data(sock, sk); |
370 | 370 | ||
371 | sk->sk_destruct = inet_sock_destruct; | 371 | sk->sk_destruct = inet_sock_destruct; |
372 | sk->sk_family = PF_INET; | ||
373 | sk->sk_protocol = protocol; | 372 | sk->sk_protocol = protocol; |
374 | sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; | 373 | sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; |
375 | 374 | ||
@@ -1253,10 +1252,10 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, | |||
1253 | int proto; | 1252 | int proto; |
1254 | int id; | 1253 | int id; |
1255 | 1254 | ||
1256 | if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) | 1255 | iph = skb_gro_header(skb, sizeof(*iph)); |
1256 | if (unlikely(!iph)) | ||
1257 | goto out; | 1257 | goto out; |
1258 | 1258 | ||
1259 | iph = ip_hdr(skb); | ||
1260 | proto = iph->protocol & (MAX_INET_PROTOS - 1); | 1259 | proto = iph->protocol & (MAX_INET_PROTOS - 1); |
1261 | 1260 | ||
1262 | rcu_read_lock(); | 1261 | rcu_read_lock(); |
@@ -1264,13 +1263,13 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, | |||
1264 | if (!ops || !ops->gro_receive) | 1263 | if (!ops || !ops->gro_receive) |
1265 | goto out_unlock; | 1264 | goto out_unlock; |
1266 | 1265 | ||
1267 | if (iph->version != 4 || iph->ihl != 5) | 1266 | if (*(u8 *)iph != 0x45) |
1268 | goto out_unlock; | 1267 | goto out_unlock; |
1269 | 1268 | ||
1270 | if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) | 1269 | if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) |
1271 | goto out_unlock; | 1270 | goto out_unlock; |
1272 | 1271 | ||
1273 | flush = ntohs(iph->tot_len) != skb->len || | 1272 | flush = ntohs(iph->tot_len) != skb_gro_len(skb) || |
1274 | iph->frag_off != htons(IP_DF); | 1273 | iph->frag_off != htons(IP_DF); |
1275 | id = ntohs(iph->id); | 1274 | id = ntohs(iph->id); |
1276 | 1275 | ||
@@ -1282,24 +1281,25 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, | |||
1282 | 1281 | ||
1283 | iph2 = ip_hdr(p); | 1282 | iph2 = ip_hdr(p); |
1284 | 1283 | ||
1285 | if (iph->protocol != iph2->protocol || | 1284 | if ((iph->protocol ^ iph2->protocol) | |
1286 | iph->tos != iph2->tos || | 1285 | (iph->tos ^ iph2->tos) | |
1287 | memcmp(&iph->saddr, &iph2->saddr, 8)) { | 1286 | (iph->saddr ^ iph2->saddr) | |
1287 | (iph->daddr ^ iph2->daddr)) { | ||
1288 | NAPI_GRO_CB(p)->same_flow = 0; | 1288 | NAPI_GRO_CB(p)->same_flow = 0; |
1289 | continue; | 1289 | continue; |
1290 | } | 1290 | } |
1291 | 1291 | ||
1292 | /* All fields must match except length and checksum. */ | 1292 | /* All fields must match except length and checksum. */ |
1293 | NAPI_GRO_CB(p)->flush |= | 1293 | NAPI_GRO_CB(p)->flush |= |
1294 | memcmp(&iph->frag_off, &iph2->frag_off, 4) || | 1294 | (iph->ttl ^ iph2->ttl) | |
1295 | (u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) != id; | 1295 | ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); |
1296 | 1296 | ||
1297 | NAPI_GRO_CB(p)->flush |= flush; | 1297 | NAPI_GRO_CB(p)->flush |= flush; |
1298 | } | 1298 | } |
1299 | 1299 | ||
1300 | NAPI_GRO_CB(skb)->flush |= flush; | 1300 | NAPI_GRO_CB(skb)->flush |= flush; |
1301 | __skb_pull(skb, sizeof(*iph)); | 1301 | skb_gro_pull(skb, sizeof(*iph)); |
1302 | skb_reset_transport_header(skb); | 1302 | skb_set_transport_header(skb, skb_gro_offset(skb)); |
1303 | 1303 | ||
1304 | pp = ops->gro_receive(head, skb); | 1304 | pp = ops->gro_receive(head, skb); |
1305 | 1305 | ||
@@ -1500,8 +1500,8 @@ static int ipv4_proc_init(void); | |||
1500 | * IP protocol layer initialiser | 1500 | * IP protocol layer initialiser |
1501 | */ | 1501 | */ |
1502 | 1502 | ||
1503 | static struct packet_type ip_packet_type = { | 1503 | static struct packet_type ip_packet_type __read_mostly = { |
1504 | .type = __constant_htons(ETH_P_IP), | 1504 | .type = cpu_to_be16(ETH_P_IP), |
1505 | .func = ip_rcv, | 1505 | .func = ip_rcv, |
1506 | .gso_send_check = inet_gso_send_check, | 1506 | .gso_send_check = inet_gso_send_check, |
1507 | .gso_segment = inet_gso_segment, | 1507 | .gso_segment = inet_gso_segment, |
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 29a74c01d8de..f11931c18381 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -801,8 +801,11 @@ static int arp_process(struct sk_buff *skb) | |||
801 | * cache. | 801 | * cache. |
802 | */ | 802 | */ |
803 | 803 | ||
804 | /* Special case: IPv4 duplicate address detection packet (RFC2131) */ | 804 | /* |
805 | if (sip == 0) { | 805 | * Special case: IPv4 duplicate address detection packet (RFC2131) |
806 | * and Gratuitous ARP/ARP Announce. (RFC3927, Section 2.4) | ||
807 | */ | ||
808 | if (sip == 0 || tip == sip) { | ||
806 | if (arp->ar_op == htons(ARPOP_REQUEST) && | 809 | if (arp->ar_op == htons(ARPOP_REQUEST) && |
807 | inet_addr_type(net, tip) == RTN_LOCAL && | 810 | inet_addr_type(net, tip) == RTN_LOCAL && |
808 | !arp_ignore(in_dev, sip, tip)) | 811 | !arp_ignore(in_dev, sip, tip)) |
@@ -892,7 +895,7 @@ static int arp_process(struct sk_buff *skb) | |||
892 | out: | 895 | out: |
893 | if (in_dev) | 896 | if (in_dev) |
894 | in_dev_put(in_dev); | 897 | in_dev_put(in_dev); |
895 | kfree_skb(skb); | 898 | consume_skb(skb); |
896 | return 0; | 899 | return 0; |
897 | } | 900 | } |
898 | 901 | ||
@@ -1225,8 +1228,8 @@ void arp_ifdown(struct net_device *dev) | |||
1225 | * Called once on startup. | 1228 | * Called once on startup. |
1226 | */ | 1229 | */ |
1227 | 1230 | ||
1228 | static struct packet_type arp_packet_type = { | 1231 | static struct packet_type arp_packet_type __read_mostly = { |
1229 | .type = __constant_htons(ETH_P_ARP), | 1232 | .type = cpu_to_be16(ETH_P_ARP), |
1230 | .func = arp_rcv, | 1233 | .func = arp_rcv, |
1231 | }; | 1234 | }; |
1232 | 1235 | ||
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 309997edc8a5..126bb911880f 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -1075,6 +1075,14 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, | |||
1075 | } | 1075 | } |
1076 | } | 1076 | } |
1077 | ip_mc_up(in_dev); | 1077 | ip_mc_up(in_dev); |
1078 | /* fall through */ | ||
1079 | case NETDEV_CHANGEADDR: | ||
1080 | if (IN_DEV_ARP_NOTIFY(in_dev)) | ||
1081 | arp_send(ARPOP_REQUEST, ETH_P_ARP, | ||
1082 | in_dev->ifa_list->ifa_address, | ||
1083 | dev, | ||
1084 | in_dev->ifa_list->ifa_address, | ||
1085 | NULL, dev->dev_addr, NULL); | ||
1078 | break; | 1086 | break; |
1079 | case NETDEV_DOWN: | 1087 | case NETDEV_DOWN: |
1080 | ip_mc_down(in_dev); | 1088 | ip_mc_down(in_dev); |
@@ -1208,7 +1216,8 @@ static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, | |||
1208 | kfree_skb(skb); | 1216 | kfree_skb(skb); |
1209 | goto errout; | 1217 | goto errout; |
1210 | } | 1218 | } |
1211 | err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); | 1219 | rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); |
1220 | return; | ||
1212 | errout: | 1221 | errout: |
1213 | if (err < 0) | 1222 | if (err < 0) |
1214 | rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); | 1223 | rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); |
@@ -1439,6 +1448,7 @@ static struct devinet_sysctl_table { | |||
1439 | DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"), | 1448 | DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"), |
1440 | DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), | 1449 | DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), |
1441 | DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), | 1450 | DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), |
1451 | DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), | ||
1442 | 1452 | ||
1443 | DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), | 1453 | DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), |
1444 | DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), | 1454 | DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 741e4fa3e474..cafcc49d0993 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -275,7 +275,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
275 | fib_res_put(&res); | 275 | fib_res_put(&res); |
276 | if (no_addr) | 276 | if (no_addr) |
277 | goto last_resort; | 277 | goto last_resort; |
278 | if (rpf) | 278 | if (rpf == 1) |
279 | goto e_inval; | 279 | goto e_inval; |
280 | fl.oif = dev->ifindex; | 280 | fl.oif = dev->ifindex; |
281 | 281 | ||
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 4817dea3bc73..f831df500907 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -322,8 +322,9 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, | |||
322 | kfree_skb(skb); | 322 | kfree_skb(skb); |
323 | goto errout; | 323 | goto errout; |
324 | } | 324 | } |
325 | err = rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE, | 325 | rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE, |
326 | info->nlh, GFP_KERNEL); | 326 | info->nlh, GFP_KERNEL); |
327 | return; | ||
327 | errout: | 328 | errout: |
328 | if (err < 0) | 329 | if (err < 0) |
329 | rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); | 330 | rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index fc562d29cc46..3f50807237e0 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -375,6 +375,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
375 | inet->tos = ip_hdr(skb)->tos; | 375 | inet->tos = ip_hdr(skb)->tos; |
376 | daddr = ipc.addr = rt->rt_src; | 376 | daddr = ipc.addr = rt->rt_src; |
377 | ipc.opt = NULL; | 377 | ipc.opt = NULL; |
378 | ipc.shtx.flags = 0; | ||
378 | if (icmp_param->replyopts.optlen) { | 379 | if (icmp_param->replyopts.optlen) { |
379 | ipc.opt = &icmp_param->replyopts; | 380 | ipc.opt = &icmp_param->replyopts; |
380 | if (ipc.opt->srr) | 381 | if (ipc.opt->srr) |
@@ -532,6 +533,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
532 | inet_sk(sk)->tos = tos; | 533 | inet_sk(sk)->tos = tos; |
533 | ipc.addr = iph->saddr; | 534 | ipc.addr = iph->saddr; |
534 | ipc.opt = &icmp_param.replyopts; | 535 | ipc.opt = &icmp_param.replyopts; |
536 | ipc.shtx.flags = 0; | ||
535 | 537 | ||
536 | { | 538 | { |
537 | struct flowi fl = { | 539 | struct flowi fl = { |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f26ab38680de..22cd19ee44e5 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -93,24 +93,40 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) | |||
93 | struct inet_bind_hashbucket *head; | 93 | struct inet_bind_hashbucket *head; |
94 | struct hlist_node *node; | 94 | struct hlist_node *node; |
95 | struct inet_bind_bucket *tb; | 95 | struct inet_bind_bucket *tb; |
96 | int ret; | 96 | int ret, attempts = 5; |
97 | struct net *net = sock_net(sk); | 97 | struct net *net = sock_net(sk); |
98 | int smallest_size = -1, smallest_rover; | ||
98 | 99 | ||
99 | local_bh_disable(); | 100 | local_bh_disable(); |
100 | if (!snum) { | 101 | if (!snum) { |
101 | int remaining, rover, low, high; | 102 | int remaining, rover, low, high; |
102 | 103 | ||
104 | again: | ||
103 | inet_get_local_port_range(&low, &high); | 105 | inet_get_local_port_range(&low, &high); |
104 | remaining = (high - low) + 1; | 106 | remaining = (high - low) + 1; |
105 | rover = net_random() % remaining + low; | 107 | smallest_rover = rover = net_random() % remaining + low; |
106 | 108 | ||
109 | smallest_size = -1; | ||
107 | do { | 110 | do { |
108 | head = &hashinfo->bhash[inet_bhashfn(net, rover, | 111 | head = &hashinfo->bhash[inet_bhashfn(net, rover, |
109 | hashinfo->bhash_size)]; | 112 | hashinfo->bhash_size)]; |
110 | spin_lock(&head->lock); | 113 | spin_lock(&head->lock); |
111 | inet_bind_bucket_for_each(tb, node, &head->chain) | 114 | inet_bind_bucket_for_each(tb, node, &head->chain) |
112 | if (ib_net(tb) == net && tb->port == rover) | 115 | if (ib_net(tb) == net && tb->port == rover) { |
116 | if (tb->fastreuse > 0 && | ||
117 | sk->sk_reuse && | ||
118 | sk->sk_state != TCP_LISTEN && | ||
119 | (tb->num_owners < smallest_size || smallest_size == -1)) { | ||
120 | smallest_size = tb->num_owners; | ||
121 | smallest_rover = rover; | ||
122 | if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) { | ||
123 | spin_unlock(&head->lock); | ||
124 | snum = smallest_rover; | ||
125 | goto have_snum; | ||
126 | } | ||
127 | } | ||
113 | goto next; | 128 | goto next; |
129 | } | ||
114 | break; | 130 | break; |
115 | next: | 131 | next: |
116 | spin_unlock(&head->lock); | 132 | spin_unlock(&head->lock); |
@@ -125,14 +141,19 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) | |||
125 | * the top level, not from the 'break;' statement. | 141 | * the top level, not from the 'break;' statement. |
126 | */ | 142 | */ |
127 | ret = 1; | 143 | ret = 1; |
128 | if (remaining <= 0) | 144 | if (remaining <= 0) { |
145 | if (smallest_size != -1) { | ||
146 | snum = smallest_rover; | ||
147 | goto have_snum; | ||
148 | } | ||
129 | goto fail; | 149 | goto fail; |
130 | 150 | } | |
131 | /* OK, here is the one we will use. HEAD is | 151 | /* OK, here is the one we will use. HEAD is |
132 | * non-NULL and we hold it's mutex. | 152 | * non-NULL and we hold it's mutex. |
133 | */ | 153 | */ |
134 | snum = rover; | 154 | snum = rover; |
135 | } else { | 155 | } else { |
156 | have_snum: | ||
136 | head = &hashinfo->bhash[inet_bhashfn(net, snum, | 157 | head = &hashinfo->bhash[inet_bhashfn(net, snum, |
137 | hashinfo->bhash_size)]; | 158 | hashinfo->bhash_size)]; |
138 | spin_lock(&head->lock); | 159 | spin_lock(&head->lock); |
@@ -145,12 +166,19 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) | |||
145 | tb_found: | 166 | tb_found: |
146 | if (!hlist_empty(&tb->owners)) { | 167 | if (!hlist_empty(&tb->owners)) { |
147 | if (tb->fastreuse > 0 && | 168 | if (tb->fastreuse > 0 && |
148 | sk->sk_reuse && sk->sk_state != TCP_LISTEN) { | 169 | sk->sk_reuse && sk->sk_state != TCP_LISTEN && |
170 | smallest_size == -1) { | ||
149 | goto success; | 171 | goto success; |
150 | } else { | 172 | } else { |
151 | ret = 1; | 173 | ret = 1; |
152 | if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) | 174 | if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { |
175 | if (sk->sk_reuse && sk->sk_state != TCP_LISTEN && | ||
176 | smallest_size != -1 && --attempts >= 0) { | ||
177 | spin_unlock(&head->lock); | ||
178 | goto again; | ||
179 | } | ||
153 | goto fail_unlock; | 180 | goto fail_unlock; |
181 | } | ||
154 | } | 182 | } |
155 | } | 183 | } |
156 | tb_not_found: | 184 | tb_not_found: |
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 6c52e08f786e..eaf3e2c8646a 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c | |||
@@ -267,6 +267,7 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, | |||
267 | 267 | ||
268 | struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, | 268 | struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, |
269 | struct inet_frags *f, void *key, unsigned int hash) | 269 | struct inet_frags *f, void *key, unsigned int hash) |
270 | __releases(&f->lock) | ||
270 | { | 271 | { |
271 | struct inet_frag_queue *q; | 272 | struct inet_frag_queue *q; |
272 | struct hlist_node *n; | 273 | struct hlist_node *n; |
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 6a1045da48d2..625cc5f64c94 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
@@ -38,6 +38,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, | |||
38 | write_pnet(&tb->ib_net, hold_net(net)); | 38 | write_pnet(&tb->ib_net, hold_net(net)); |
39 | tb->port = snum; | 39 | tb->port = snum; |
40 | tb->fastreuse = 0; | 40 | tb->fastreuse = 0; |
41 | tb->num_owners = 0; | ||
41 | INIT_HLIST_HEAD(&tb->owners); | 42 | INIT_HLIST_HEAD(&tb->owners); |
42 | hlist_add_head(&tb->node, &head->chain); | 43 | hlist_add_head(&tb->node, &head->chain); |
43 | } | 44 | } |
@@ -59,8 +60,13 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket | |||
59 | void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, | 60 | void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, |
60 | const unsigned short snum) | 61 | const unsigned short snum) |
61 | { | 62 | { |
63 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; | ||
64 | |||
65 | atomic_inc(&hashinfo->bsockets); | ||
66 | |||
62 | inet_sk(sk)->num = snum; | 67 | inet_sk(sk)->num = snum; |
63 | sk_add_bind_node(sk, &tb->owners); | 68 | sk_add_bind_node(sk, &tb->owners); |
69 | tb->num_owners++; | ||
64 | inet_csk(sk)->icsk_bind_hash = tb; | 70 | inet_csk(sk)->icsk_bind_hash = tb; |
65 | } | 71 | } |
66 | 72 | ||
@@ -75,9 +81,12 @@ static void __inet_put_port(struct sock *sk) | |||
75 | struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; | 81 | struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; |
76 | struct inet_bind_bucket *tb; | 82 | struct inet_bind_bucket *tb; |
77 | 83 | ||
84 | atomic_dec(&hashinfo->bsockets); | ||
85 | |||
78 | spin_lock(&head->lock); | 86 | spin_lock(&head->lock); |
79 | tb = inet_csk(sk)->icsk_bind_hash; | 87 | tb = inet_csk(sk)->icsk_bind_hash; |
80 | __sk_del_bind_node(sk); | 88 | __sk_del_bind_node(sk); |
89 | tb->num_owners--; | ||
81 | inet_csk(sk)->icsk_bind_hash = NULL; | 90 | inet_csk(sk)->icsk_bind_hash = NULL; |
82 | inet_sk(sk)->num = 0; | 91 | inet_sk(sk)->num = 0; |
83 | inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); | 92 | inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); |
@@ -444,9 +453,9 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, | |||
444 | */ | 453 | */ |
445 | inet_bind_bucket_for_each(tb, node, &head->chain) { | 454 | inet_bind_bucket_for_each(tb, node, &head->chain) { |
446 | if (ib_net(tb) == net && tb->port == port) { | 455 | if (ib_net(tb) == net && tb->port == port) { |
447 | WARN_ON(hlist_empty(&tb->owners)); | ||
448 | if (tb->fastreuse >= 0) | 456 | if (tb->fastreuse >= 0) |
449 | goto next_port; | 457 | goto next_port; |
458 | WARN_ON(hlist_empty(&tb->owners)); | ||
450 | if (!check_established(death_row, sk, | 459 | if (!check_established(death_row, sk, |
451 | port, &tw)) | 460 | port, &tw)) |
452 | goto ok; | 461 | goto ok; |
@@ -523,6 +532,7 @@ void inet_hashinfo_init(struct inet_hashinfo *h) | |||
523 | { | 532 | { |
524 | int i; | 533 | int i; |
525 | 534 | ||
535 | atomic_set(&h->bsockets, 0); | ||
526 | for (i = 0; i < INET_LHTABLE_SIZE; i++) { | 536 | for (i = 0; i < INET_LHTABLE_SIZE; i++) { |
527 | spin_lock_init(&h->listening_hash[i].lock); | 537 | spin_lock_init(&h->listening_hash[i].lock); |
528 | INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head, | 538 | INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head, |
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0101521f366b..e62510d5ea5a 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -164,67 +164,124 @@ static DEFINE_RWLOCK(ipgre_lock); | |||
164 | 164 | ||
165 | /* Given src, dst and key, find appropriate for input tunnel. */ | 165 | /* Given src, dst and key, find appropriate for input tunnel. */ |
166 | 166 | ||
167 | static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net, | 167 | static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, |
168 | __be32 remote, __be32 local, | 168 | __be32 remote, __be32 local, |
169 | __be32 key, __be16 gre_proto) | 169 | __be32 key, __be16 gre_proto) |
170 | { | 170 | { |
171 | struct net *net = dev_net(dev); | ||
172 | int link = dev->ifindex; | ||
171 | unsigned h0 = HASH(remote); | 173 | unsigned h0 = HASH(remote); |
172 | unsigned h1 = HASH(key); | 174 | unsigned h1 = HASH(key); |
173 | struct ip_tunnel *t; | 175 | struct ip_tunnel *t, *cand = NULL; |
174 | struct ip_tunnel *t2 = NULL; | ||
175 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); | 176 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); |
176 | int dev_type = (gre_proto == htons(ETH_P_TEB)) ? | 177 | int dev_type = (gre_proto == htons(ETH_P_TEB)) ? |
177 | ARPHRD_ETHER : ARPHRD_IPGRE; | 178 | ARPHRD_ETHER : ARPHRD_IPGRE; |
179 | int score, cand_score = 4; | ||
178 | 180 | ||
179 | for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) { | 181 | for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) { |
180 | if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { | 182 | if (local != t->parms.iph.saddr || |
181 | if (t->parms.i_key == key && t->dev->flags & IFF_UP) { | 183 | remote != t->parms.iph.daddr || |
182 | if (t->dev->type == dev_type) | 184 | key != t->parms.i_key || |
183 | return t; | 185 | !(t->dev->flags & IFF_UP)) |
184 | if (t->dev->type == ARPHRD_IPGRE && !t2) | 186 | continue; |
185 | t2 = t; | 187 | |
186 | } | 188 | if (t->dev->type != ARPHRD_IPGRE && |
189 | t->dev->type != dev_type) | ||
190 | continue; | ||
191 | |||
192 | score = 0; | ||
193 | if (t->parms.link != link) | ||
194 | score |= 1; | ||
195 | if (t->dev->type != dev_type) | ||
196 | score |= 2; | ||
197 | if (score == 0) | ||
198 | return t; | ||
199 | |||
200 | if (score < cand_score) { | ||
201 | cand = t; | ||
202 | cand_score = score; | ||
187 | } | 203 | } |
188 | } | 204 | } |
189 | 205 | ||
190 | for (t = ign->tunnels_r[h0^h1]; t; t = t->next) { | 206 | for (t = ign->tunnels_r[h0^h1]; t; t = t->next) { |
191 | if (remote == t->parms.iph.daddr) { | 207 | if (remote != t->parms.iph.daddr || |
192 | if (t->parms.i_key == key && t->dev->flags & IFF_UP) { | 208 | key != t->parms.i_key || |
193 | if (t->dev->type == dev_type) | 209 | !(t->dev->flags & IFF_UP)) |
194 | return t; | 210 | continue; |
195 | if (t->dev->type == ARPHRD_IPGRE && !t2) | 211 | |
196 | t2 = t; | 212 | if (t->dev->type != ARPHRD_IPGRE && |
197 | } | 213 | t->dev->type != dev_type) |
214 | continue; | ||
215 | |||
216 | score = 0; | ||
217 | if (t->parms.link != link) | ||
218 | score |= 1; | ||
219 | if (t->dev->type != dev_type) | ||
220 | score |= 2; | ||
221 | if (score == 0) | ||
222 | return t; | ||
223 | |||
224 | if (score < cand_score) { | ||
225 | cand = t; | ||
226 | cand_score = score; | ||
198 | } | 227 | } |
199 | } | 228 | } |
200 | 229 | ||
201 | for (t = ign->tunnels_l[h1]; t; t = t->next) { | 230 | for (t = ign->tunnels_l[h1]; t; t = t->next) { |
202 | if (local == t->parms.iph.saddr || | 231 | if ((local != t->parms.iph.saddr && |
203 | (local == t->parms.iph.daddr && | 232 | (local != t->parms.iph.daddr || |
204 | ipv4_is_multicast(local))) { | 233 | !ipv4_is_multicast(local))) || |
205 | if (t->parms.i_key == key && t->dev->flags & IFF_UP) { | 234 | key != t->parms.i_key || |
206 | if (t->dev->type == dev_type) | 235 | !(t->dev->flags & IFF_UP)) |
207 | return t; | 236 | continue; |
208 | if (t->dev->type == ARPHRD_IPGRE && !t2) | 237 | |
209 | t2 = t; | 238 | if (t->dev->type != ARPHRD_IPGRE && |
210 | } | 239 | t->dev->type != dev_type) |
240 | continue; | ||
241 | |||
242 | score = 0; | ||
243 | if (t->parms.link != link) | ||
244 | score |= 1; | ||
245 | if (t->dev->type != dev_type) | ||
246 | score |= 2; | ||
247 | if (score == 0) | ||
248 | return t; | ||
249 | |||
250 | if (score < cand_score) { | ||
251 | cand = t; | ||
252 | cand_score = score; | ||
211 | } | 253 | } |
212 | } | 254 | } |
213 | 255 | ||
214 | for (t = ign->tunnels_wc[h1]; t; t = t->next) { | 256 | for (t = ign->tunnels_wc[h1]; t; t = t->next) { |
215 | if (t->parms.i_key == key && t->dev->flags & IFF_UP) { | 257 | if (t->parms.i_key != key || |
216 | if (t->dev->type == dev_type) | 258 | !(t->dev->flags & IFF_UP)) |
217 | return t; | 259 | continue; |
218 | if (t->dev->type == ARPHRD_IPGRE && !t2) | 260 | |
219 | t2 = t; | 261 | if (t->dev->type != ARPHRD_IPGRE && |
262 | t->dev->type != dev_type) | ||
263 | continue; | ||
264 | |||
265 | score = 0; | ||
266 | if (t->parms.link != link) | ||
267 | score |= 1; | ||
268 | if (t->dev->type != dev_type) | ||
269 | score |= 2; | ||
270 | if (score == 0) | ||
271 | return t; | ||
272 | |||
273 | if (score < cand_score) { | ||
274 | cand = t; | ||
275 | cand_score = score; | ||
220 | } | 276 | } |
221 | } | 277 | } |
222 | 278 | ||
223 | if (t2) | 279 | if (cand != NULL) |
224 | return t2; | 280 | return cand; |
225 | 281 | ||
226 | if (ign->fb_tunnel_dev->flags&IFF_UP) | 282 | if (ign->fb_tunnel_dev->flags & IFF_UP) |
227 | return netdev_priv(ign->fb_tunnel_dev); | 283 | return netdev_priv(ign->fb_tunnel_dev); |
284 | |||
228 | return NULL; | 285 | return NULL; |
229 | } | 286 | } |
230 | 287 | ||
@@ -284,6 +341,7 @@ static struct ip_tunnel *ipgre_tunnel_find(struct net *net, | |||
284 | __be32 remote = parms->iph.daddr; | 341 | __be32 remote = parms->iph.daddr; |
285 | __be32 local = parms->iph.saddr; | 342 | __be32 local = parms->iph.saddr; |
286 | __be32 key = parms->i_key; | 343 | __be32 key = parms->i_key; |
344 | int link = parms->link; | ||
287 | struct ip_tunnel *t, **tp; | 345 | struct ip_tunnel *t, **tp; |
288 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); | 346 | struct ipgre_net *ign = net_generic(net, ipgre_net_id); |
289 | 347 | ||
@@ -291,6 +349,7 @@ static struct ip_tunnel *ipgre_tunnel_find(struct net *net, | |||
291 | if (local == t->parms.iph.saddr && | 349 | if (local == t->parms.iph.saddr && |
292 | remote == t->parms.iph.daddr && | 350 | remote == t->parms.iph.daddr && |
293 | key == t->parms.i_key && | 351 | key == t->parms.i_key && |
352 | link == t->parms.link && | ||
294 | type == t->dev->type) | 353 | type == t->dev->type) |
295 | break; | 354 | break; |
296 | 355 | ||
@@ -421,7 +480,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) | |||
421 | } | 480 | } |
422 | 481 | ||
423 | read_lock(&ipgre_lock); | 482 | read_lock(&ipgre_lock); |
424 | t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr, | 483 | t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, |
425 | flags & GRE_KEY ? | 484 | flags & GRE_KEY ? |
426 | *(((__be32 *)p) + (grehlen / 4) - 1) : 0, | 485 | *(((__be32 *)p) + (grehlen / 4) - 1) : 0, |
427 | p[1]); | 486 | p[1]); |
@@ -432,7 +491,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) | |||
432 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) | 491 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) |
433 | goto out; | 492 | goto out; |
434 | 493 | ||
435 | if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) | 494 | if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) |
436 | t->err_count++; | 495 | t->err_count++; |
437 | else | 496 | else |
438 | t->err_count = 1; | 497 | t->err_count = 1; |
@@ -518,7 +577,7 @@ static int ipgre_rcv(struct sk_buff *skb) | |||
518 | gre_proto = *(__be16 *)(h + 2); | 577 | gre_proto = *(__be16 *)(h + 2); |
519 | 578 | ||
520 | read_lock(&ipgre_lock); | 579 | read_lock(&ipgre_lock); |
521 | if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev), | 580 | if ((tunnel = ipgre_tunnel_lookup(skb->dev, |
522 | iph->saddr, iph->daddr, key, | 581 | iph->saddr, iph->daddr, key, |
523 | gre_proto))) { | 582 | gre_proto))) { |
524 | struct net_device_stats *stats = &tunnel->dev->stats; | 583 | struct net_device_stats *stats = &tunnel->dev->stats; |
@@ -744,7 +803,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
744 | #endif | 803 | #endif |
745 | 804 | ||
746 | if (tunnel->err_count > 0) { | 805 | if (tunnel->err_count > 0) { |
747 | if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { | 806 | if (time_before(jiffies, |
807 | tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { | ||
748 | tunnel->err_count--; | 808 | tunnel->err_count--; |
749 | 809 | ||
750 | dst_link_failure(skb); | 810 | dst_link_failure(skb); |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8ebe86dd72af..3e7e910c7c0f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -935,6 +935,10 @@ alloc_new_skb: | |||
935 | sk->sk_allocation); | 935 | sk->sk_allocation); |
936 | if (unlikely(skb == NULL)) | 936 | if (unlikely(skb == NULL)) |
937 | err = -ENOBUFS; | 937 | err = -ENOBUFS; |
938 | else | ||
939 | /* only the initial fragment is | ||
940 | time stamped */ | ||
941 | ipc->shtx.flags = 0; | ||
938 | } | 942 | } |
939 | if (skb == NULL) | 943 | if (skb == NULL) |
940 | goto error; | 944 | goto error; |
@@ -945,6 +949,7 @@ alloc_new_skb: | |||
945 | skb->ip_summed = csummode; | 949 | skb->ip_summed = csummode; |
946 | skb->csum = 0; | 950 | skb->csum = 0; |
947 | skb_reserve(skb, hh_len); | 951 | skb_reserve(skb, hh_len); |
952 | *skb_tx(skb) = ipc->shtx; | ||
948 | 953 | ||
949 | /* | 954 | /* |
950 | * Find where to start putting bytes. | 955 | * Find where to start putting bytes. |
@@ -1364,6 +1369,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar | |||
1364 | 1369 | ||
1365 | daddr = ipc.addr = rt->rt_src; | 1370 | daddr = ipc.addr = rt->rt_src; |
1366 | ipc.opt = NULL; | 1371 | ipc.opt = NULL; |
1372 | ipc.shtx.flags = 0; | ||
1367 | 1373 | ||
1368 | if (replyopts.opt.optlen) { | 1374 | if (replyopts.opt.optlen) { |
1369 | ipc.opt = &replyopts.opt; | 1375 | ipc.opt = &replyopts.opt; |
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index d722013c1cae..90d22ae0a419 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c | |||
@@ -100,8 +100,8 @@ | |||
100 | #define CONF_NAMESERVERS_MAX 3 /* Maximum number of nameservers | 100 | #define CONF_NAMESERVERS_MAX 3 /* Maximum number of nameservers |
101 | - '3' from resolv.h */ | 101 | - '3' from resolv.h */ |
102 | 102 | ||
103 | #define NONE __constant_htonl(INADDR_NONE) | 103 | #define NONE cpu_to_be32(INADDR_NONE) |
104 | #define ANY __constant_htonl(INADDR_ANY) | 104 | #define ANY cpu_to_be32(INADDR_ANY) |
105 | 105 | ||
106 | /* | 106 | /* |
107 | * Public IP configuration | 107 | * Public IP configuration |
@@ -406,7 +406,7 @@ static int __init ic_defaults(void) | |||
406 | static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); | 406 | static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); |
407 | 407 | ||
408 | static struct packet_type rarp_packet_type __initdata = { | 408 | static struct packet_type rarp_packet_type __initdata = { |
409 | .type = __constant_htons(ETH_P_RARP), | 409 | .type = cpu_to_be16(ETH_P_RARP), |
410 | .func = ic_rarp_recv, | 410 | .func = ic_rarp_recv, |
411 | }; | 411 | }; |
412 | 412 | ||
@@ -568,7 +568,7 @@ struct bootp_pkt { /* BOOTP packet format */ | |||
568 | static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); | 568 | static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); |
569 | 569 | ||
570 | static struct packet_type bootp_packet_type __initdata = { | 570 | static struct packet_type bootp_packet_type __initdata = { |
571 | .type = __constant_htons(ETH_P_IP), | 571 | .type = cpu_to_be16(ETH_P_IP), |
572 | .func = ic_bootp_recv, | 572 | .func = ic_bootp_recv, |
573 | }; | 573 | }; |
574 | 574 | ||
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 5079dfbc6f38..9054139795af 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
@@ -327,7 +327,7 @@ static int ipip_err(struct sk_buff *skb, u32 info) | |||
327 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) | 327 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) |
328 | goto out; | 328 | goto out; |
329 | 329 | ||
330 | if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) | 330 | if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) |
331 | t->err_count++; | 331 | t->err_count++; |
332 | else | 332 | else |
333 | t->err_count = 1; | 333 | t->err_count = 1; |
@@ -466,7 +466,8 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
466 | } | 466 | } |
467 | 467 | ||
468 | if (tunnel->err_count > 0) { | 468 | if (tunnel->err_count > 0) { |
469 | if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { | 469 | if (time_before(jiffies, |
470 | tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { | ||
470 | tunnel->err_count--; | 471 | tunnel->err_count--; |
471 | dst_link_failure(skb); | 472 | dst_link_failure(skb); |
472 | } else | 473 | } else |
@@ -750,7 +751,7 @@ static struct xfrm_tunnel ipip_handler = { | |||
750 | .priority = 1, | 751 | .priority = 1, |
751 | }; | 752 | }; |
752 | 753 | ||
753 | static char banner[] __initdata = | 754 | static const char banner[] __initconst = |
754 | KERN_INFO "IPv4 over IPv4 tunneling driver\n"; | 755 | KERN_INFO "IPv4 over IPv4 tunneling driver\n"; |
755 | 756 | ||
756 | static void ipip_destroy_tunnels(struct ipip_net *ipn) | 757 | static void ipip_destroy_tunnels(struct ipip_net *ipn) |
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 14666449dc1c..13e9dd3012b3 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c | |||
@@ -67,9 +67,6 @@ | |||
67 | #define CONFIG_IP_PIMSM 1 | 67 | #define CONFIG_IP_PIMSM 1 |
68 | #endif | 68 | #endif |
69 | 69 | ||
70 | static struct sock *mroute_socket; | ||
71 | |||
72 | |||
73 | /* Big lock, protecting vif table, mrt cache and mroute socket state. | 70 | /* Big lock, protecting vif table, mrt cache and mroute socket state. |
74 | Note that the changes are semaphored via rtnl_lock. | 71 | Note that the changes are semaphored via rtnl_lock. |
75 | */ | 72 | */ |
@@ -80,18 +77,9 @@ static DEFINE_RWLOCK(mrt_lock); | |||
80 | * Multicast router control variables | 77 | * Multicast router control variables |
81 | */ | 78 | */ |
82 | 79 | ||
83 | static struct vif_device vif_table[MAXVIFS]; /* Devices */ | 80 | #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL) |
84 | static int maxvif; | ||
85 | |||
86 | #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL) | ||
87 | |||
88 | static int mroute_do_assert; /* Set in PIM assert */ | ||
89 | static int mroute_do_pim; | ||
90 | |||
91 | static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */ | ||
92 | 81 | ||
93 | static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */ | 82 | static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */ |
94 | static atomic_t cache_resolve_queue_len; /* Size of unresolved */ | ||
95 | 83 | ||
96 | /* Special spinlock for queue of unresolved entries */ | 84 | /* Special spinlock for queue of unresolved entries */ |
97 | static DEFINE_SPINLOCK(mfc_unres_lock); | 85 | static DEFINE_SPINLOCK(mfc_unres_lock); |
@@ -107,7 +95,8 @@ static DEFINE_SPINLOCK(mfc_unres_lock); | |||
107 | static struct kmem_cache *mrt_cachep __read_mostly; | 95 | static struct kmem_cache *mrt_cachep __read_mostly; |
108 | 96 | ||
109 | static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); | 97 | static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); |
110 | static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert); | 98 | static int ipmr_cache_report(struct net *net, |
99 | struct sk_buff *pkt, vifi_t vifi, int assert); | ||
111 | static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); | 100 | static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); |
112 | 101 | ||
113 | #ifdef CONFIG_IP_PIMSM_V2 | 102 | #ifdef CONFIG_IP_PIMSM_V2 |
@@ -120,9 +109,11 @@ static struct timer_list ipmr_expire_timer; | |||
120 | 109 | ||
121 | static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) | 110 | static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) |
122 | { | 111 | { |
112 | struct net *net = dev_net(dev); | ||
113 | |||
123 | dev_close(dev); | 114 | dev_close(dev); |
124 | 115 | ||
125 | dev = __dev_get_by_name(&init_net, "tunl0"); | 116 | dev = __dev_get_by_name(net, "tunl0"); |
126 | if (dev) { | 117 | if (dev) { |
127 | const struct net_device_ops *ops = dev->netdev_ops; | 118 | const struct net_device_ops *ops = dev->netdev_ops; |
128 | struct ifreq ifr; | 119 | struct ifreq ifr; |
@@ -148,11 +139,11 @@ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) | |||
148 | } | 139 | } |
149 | 140 | ||
150 | static | 141 | static |
151 | struct net_device *ipmr_new_tunnel(struct vifctl *v) | 142 | struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) |
152 | { | 143 | { |
153 | struct net_device *dev; | 144 | struct net_device *dev; |
154 | 145 | ||
155 | dev = __dev_get_by_name(&init_net, "tunl0"); | 146 | dev = __dev_get_by_name(net, "tunl0"); |
156 | 147 | ||
157 | if (dev) { | 148 | if (dev) { |
158 | const struct net_device_ops *ops = dev->netdev_ops; | 149 | const struct net_device_ops *ops = dev->netdev_ops; |
@@ -181,7 +172,8 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) | |||
181 | 172 | ||
182 | dev = NULL; | 173 | dev = NULL; |
183 | 174 | ||
184 | if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) { | 175 | if (err == 0 && |
176 | (dev = __dev_get_by_name(net, p.name)) != NULL) { | ||
185 | dev->flags |= IFF_MULTICAST; | 177 | dev->flags |= IFF_MULTICAST; |
186 | 178 | ||
187 | in_dev = __in_dev_get_rtnl(dev); | 179 | in_dev = __in_dev_get_rtnl(dev); |
@@ -209,14 +201,15 @@ failure: | |||
209 | 201 | ||
210 | #ifdef CONFIG_IP_PIMSM | 202 | #ifdef CONFIG_IP_PIMSM |
211 | 203 | ||
212 | static int reg_vif_num = -1; | ||
213 | |||
214 | static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) | 204 | static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) |
215 | { | 205 | { |
206 | struct net *net = dev_net(dev); | ||
207 | |||
216 | read_lock(&mrt_lock); | 208 | read_lock(&mrt_lock); |
217 | dev->stats.tx_bytes += skb->len; | 209 | dev->stats.tx_bytes += skb->len; |
218 | dev->stats.tx_packets++; | 210 | dev->stats.tx_packets++; |
219 | ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT); | 211 | ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num, |
212 | IGMPMSG_WHOLEPKT); | ||
220 | read_unlock(&mrt_lock); | 213 | read_unlock(&mrt_lock); |
221 | kfree_skb(skb); | 214 | kfree_skb(skb); |
222 | return 0; | 215 | return 0; |
@@ -283,16 +276,16 @@ failure: | |||
283 | * @notify: Set to 1, if the caller is a notifier_call | 276 | * @notify: Set to 1, if the caller is a notifier_call |
284 | */ | 277 | */ |
285 | 278 | ||
286 | static int vif_delete(int vifi, int notify) | 279 | static int vif_delete(struct net *net, int vifi, int notify) |
287 | { | 280 | { |
288 | struct vif_device *v; | 281 | struct vif_device *v; |
289 | struct net_device *dev; | 282 | struct net_device *dev; |
290 | struct in_device *in_dev; | 283 | struct in_device *in_dev; |
291 | 284 | ||
292 | if (vifi < 0 || vifi >= maxvif) | 285 | if (vifi < 0 || vifi >= net->ipv4.maxvif) |
293 | return -EADDRNOTAVAIL; | 286 | return -EADDRNOTAVAIL; |
294 | 287 | ||
295 | v = &vif_table[vifi]; | 288 | v = &net->ipv4.vif_table[vifi]; |
296 | 289 | ||
297 | write_lock_bh(&mrt_lock); | 290 | write_lock_bh(&mrt_lock); |
298 | dev = v->dev; | 291 | dev = v->dev; |
@@ -304,17 +297,17 @@ static int vif_delete(int vifi, int notify) | |||
304 | } | 297 | } |
305 | 298 | ||
306 | #ifdef CONFIG_IP_PIMSM | 299 | #ifdef CONFIG_IP_PIMSM |
307 | if (vifi == reg_vif_num) | 300 | if (vifi == net->ipv4.mroute_reg_vif_num) |
308 | reg_vif_num = -1; | 301 | net->ipv4.mroute_reg_vif_num = -1; |
309 | #endif | 302 | #endif |
310 | 303 | ||
311 | if (vifi+1 == maxvif) { | 304 | if (vifi+1 == net->ipv4.maxvif) { |
312 | int tmp; | 305 | int tmp; |
313 | for (tmp=vifi-1; tmp>=0; tmp--) { | 306 | for (tmp=vifi-1; tmp>=0; tmp--) { |
314 | if (VIF_EXISTS(tmp)) | 307 | if (VIF_EXISTS(net, tmp)) |
315 | break; | 308 | break; |
316 | } | 309 | } |
317 | maxvif = tmp+1; | 310 | net->ipv4.maxvif = tmp+1; |
318 | } | 311 | } |
319 | 312 | ||
320 | write_unlock_bh(&mrt_lock); | 313 | write_unlock_bh(&mrt_lock); |
@@ -333,6 +326,12 @@ static int vif_delete(int vifi, int notify) | |||
333 | return 0; | 326 | return 0; |
334 | } | 327 | } |
335 | 328 | ||
329 | static inline void ipmr_cache_free(struct mfc_cache *c) | ||
330 | { | ||
331 | release_net(mfc_net(c)); | ||
332 | kmem_cache_free(mrt_cachep, c); | ||
333 | } | ||
334 | |||
336 | /* Destroy an unresolved cache entry, killing queued skbs | 335 | /* Destroy an unresolved cache entry, killing queued skbs |
337 | and reporting error to netlink readers. | 336 | and reporting error to netlink readers. |
338 | */ | 337 | */ |
@@ -341,8 +340,9 @@ static void ipmr_destroy_unres(struct mfc_cache *c) | |||
341 | { | 340 | { |
342 | struct sk_buff *skb; | 341 | struct sk_buff *skb; |
343 | struct nlmsgerr *e; | 342 | struct nlmsgerr *e; |
343 | struct net *net = mfc_net(c); | ||
344 | 344 | ||
345 | atomic_dec(&cache_resolve_queue_len); | 345 | atomic_dec(&net->ipv4.cache_resolve_queue_len); |
346 | 346 | ||
347 | while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { | 347 | while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { |
348 | if (ip_hdr(skb)->version == 0) { | 348 | if (ip_hdr(skb)->version == 0) { |
@@ -354,12 +354,12 @@ static void ipmr_destroy_unres(struct mfc_cache *c) | |||
354 | e->error = -ETIMEDOUT; | 354 | e->error = -ETIMEDOUT; |
355 | memset(&e->msg, 0, sizeof(e->msg)); | 355 | memset(&e->msg, 0, sizeof(e->msg)); |
356 | 356 | ||
357 | rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); | 357 | rtnl_unicast(skb, net, NETLINK_CB(skb).pid); |
358 | } else | 358 | } else |
359 | kfree_skb(skb); | 359 | kfree_skb(skb); |
360 | } | 360 | } |
361 | 361 | ||
362 | kmem_cache_free(mrt_cachep, c); | 362 | ipmr_cache_free(c); |
363 | } | 363 | } |
364 | 364 | ||
365 | 365 | ||
@@ -376,7 +376,7 @@ static void ipmr_expire_process(unsigned long dummy) | |||
376 | return; | 376 | return; |
377 | } | 377 | } |
378 | 378 | ||
379 | if (atomic_read(&cache_resolve_queue_len) == 0) | 379 | if (mfc_unres_queue == NULL) |
380 | goto out; | 380 | goto out; |
381 | 381 | ||
382 | now = jiffies; | 382 | now = jiffies; |
@@ -397,7 +397,7 @@ static void ipmr_expire_process(unsigned long dummy) | |||
397 | ipmr_destroy_unres(c); | 397 | ipmr_destroy_unres(c); |
398 | } | 398 | } |
399 | 399 | ||
400 | if (atomic_read(&cache_resolve_queue_len)) | 400 | if (mfc_unres_queue != NULL) |
401 | mod_timer(&ipmr_expire_timer, jiffies + expires); | 401 | mod_timer(&ipmr_expire_timer, jiffies + expires); |
402 | 402 | ||
403 | out: | 403 | out: |
@@ -409,13 +409,15 @@ out: | |||
409 | static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls) | 409 | static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls) |
410 | { | 410 | { |
411 | int vifi; | 411 | int vifi; |
412 | struct net *net = mfc_net(cache); | ||
412 | 413 | ||
413 | cache->mfc_un.res.minvif = MAXVIFS; | 414 | cache->mfc_un.res.minvif = MAXVIFS; |
414 | cache->mfc_un.res.maxvif = 0; | 415 | cache->mfc_un.res.maxvif = 0; |
415 | memset(cache->mfc_un.res.ttls, 255, MAXVIFS); | 416 | memset(cache->mfc_un.res.ttls, 255, MAXVIFS); |
416 | 417 | ||
417 | for (vifi=0; vifi<maxvif; vifi++) { | 418 | for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) { |
418 | if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) { | 419 | if (VIF_EXISTS(net, vifi) && |
420 | ttls[vifi] && ttls[vifi] < 255) { | ||
419 | cache->mfc_un.res.ttls[vifi] = ttls[vifi]; | 421 | cache->mfc_un.res.ttls[vifi] = ttls[vifi]; |
420 | if (cache->mfc_un.res.minvif > vifi) | 422 | if (cache->mfc_un.res.minvif > vifi) |
421 | cache->mfc_un.res.minvif = vifi; | 423 | cache->mfc_un.res.minvif = vifi; |
@@ -425,16 +427,16 @@ static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls) | |||
425 | } | 427 | } |
426 | } | 428 | } |
427 | 429 | ||
428 | static int vif_add(struct vifctl *vifc, int mrtsock) | 430 | static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock) |
429 | { | 431 | { |
430 | int vifi = vifc->vifc_vifi; | 432 | int vifi = vifc->vifc_vifi; |
431 | struct vif_device *v = &vif_table[vifi]; | 433 | struct vif_device *v = &net->ipv4.vif_table[vifi]; |
432 | struct net_device *dev; | 434 | struct net_device *dev; |
433 | struct in_device *in_dev; | 435 | struct in_device *in_dev; |
434 | int err; | 436 | int err; |
435 | 437 | ||
436 | /* Is vif busy ? */ | 438 | /* Is vif busy ? */ |
437 | if (VIF_EXISTS(vifi)) | 439 | if (VIF_EXISTS(net, vifi)) |
438 | return -EADDRINUSE; | 440 | return -EADDRINUSE; |
439 | 441 | ||
440 | switch (vifc->vifc_flags) { | 442 | switch (vifc->vifc_flags) { |
@@ -444,7 +446,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) | |||
444 | * Special Purpose VIF in PIM | 446 | * Special Purpose VIF in PIM |
445 | * All the packets will be sent to the daemon | 447 | * All the packets will be sent to the daemon |
446 | */ | 448 | */ |
447 | if (reg_vif_num >= 0) | 449 | if (net->ipv4.mroute_reg_vif_num >= 0) |
448 | return -EADDRINUSE; | 450 | return -EADDRINUSE; |
449 | dev = ipmr_reg_vif(); | 451 | dev = ipmr_reg_vif(); |
450 | if (!dev) | 452 | if (!dev) |
@@ -458,7 +460,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) | |||
458 | break; | 460 | break; |
459 | #endif | 461 | #endif |
460 | case VIFF_TUNNEL: | 462 | case VIFF_TUNNEL: |
461 | dev = ipmr_new_tunnel(vifc); | 463 | dev = ipmr_new_tunnel(net, vifc); |
462 | if (!dev) | 464 | if (!dev) |
463 | return -ENOBUFS; | 465 | return -ENOBUFS; |
464 | err = dev_set_allmulti(dev, 1); | 466 | err = dev_set_allmulti(dev, 1); |
@@ -469,7 +471,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) | |||
469 | } | 471 | } |
470 | break; | 472 | break; |
471 | case 0: | 473 | case 0: |
472 | dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr); | 474 | dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); |
473 | if (!dev) | 475 | if (!dev) |
474 | return -EADDRNOTAVAIL; | 476 | return -EADDRNOTAVAIL; |
475 | err = dev_set_allmulti(dev, 1); | 477 | err = dev_set_allmulti(dev, 1); |
@@ -510,20 +512,22 @@ static int vif_add(struct vifctl *vifc, int mrtsock) | |||
510 | v->dev = dev; | 512 | v->dev = dev; |
511 | #ifdef CONFIG_IP_PIMSM | 513 | #ifdef CONFIG_IP_PIMSM |
512 | if (v->flags&VIFF_REGISTER) | 514 | if (v->flags&VIFF_REGISTER) |
513 | reg_vif_num = vifi; | 515 | net->ipv4.mroute_reg_vif_num = vifi; |
514 | #endif | 516 | #endif |
515 | if (vifi+1 > maxvif) | 517 | if (vifi+1 > net->ipv4.maxvif) |
516 | maxvif = vifi+1; | 518 | net->ipv4.maxvif = vifi+1; |
517 | write_unlock_bh(&mrt_lock); | 519 | write_unlock_bh(&mrt_lock); |
518 | return 0; | 520 | return 0; |
519 | } | 521 | } |
520 | 522 | ||
521 | static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp) | 523 | static struct mfc_cache *ipmr_cache_find(struct net *net, |
524 | __be32 origin, | ||
525 | __be32 mcastgrp) | ||
522 | { | 526 | { |
523 | int line = MFC_HASH(mcastgrp, origin); | 527 | int line = MFC_HASH(mcastgrp, origin); |
524 | struct mfc_cache *c; | 528 | struct mfc_cache *c; |
525 | 529 | ||
526 | for (c=mfc_cache_array[line]; c; c = c->next) { | 530 | for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) { |
527 | if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp) | 531 | if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp) |
528 | break; | 532 | break; |
529 | } | 533 | } |
@@ -533,22 +537,24 @@ static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp) | |||
533 | /* | 537 | /* |
534 | * Allocate a multicast cache entry | 538 | * Allocate a multicast cache entry |
535 | */ | 539 | */ |
536 | static struct mfc_cache *ipmr_cache_alloc(void) | 540 | static struct mfc_cache *ipmr_cache_alloc(struct net *net) |
537 | { | 541 | { |
538 | struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); | 542 | struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); |
539 | if (c == NULL) | 543 | if (c == NULL) |
540 | return NULL; | 544 | return NULL; |
541 | c->mfc_un.res.minvif = MAXVIFS; | 545 | c->mfc_un.res.minvif = MAXVIFS; |
546 | mfc_net_set(c, net); | ||
542 | return c; | 547 | return c; |
543 | } | 548 | } |
544 | 549 | ||
545 | static struct mfc_cache *ipmr_cache_alloc_unres(void) | 550 | static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net) |
546 | { | 551 | { |
547 | struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); | 552 | struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); |
548 | if (c == NULL) | 553 | if (c == NULL) |
549 | return NULL; | 554 | return NULL; |
550 | skb_queue_head_init(&c->mfc_un.unres.unresolved); | 555 | skb_queue_head_init(&c->mfc_un.unres.unresolved); |
551 | c->mfc_un.unres.expires = jiffies + 10*HZ; | 556 | c->mfc_un.unres.expires = jiffies + 10*HZ; |
557 | mfc_net_set(c, net); | ||
552 | return c; | 558 | return c; |
553 | } | 559 | } |
554 | 560 | ||
@@ -581,7 +587,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) | |||
581 | memset(&e->msg, 0, sizeof(e->msg)); | 587 | memset(&e->msg, 0, sizeof(e->msg)); |
582 | } | 588 | } |
583 | 589 | ||
584 | rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); | 590 | rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid); |
585 | } else | 591 | } else |
586 | ip_mr_forward(skb, c, 0); | 592 | ip_mr_forward(skb, c, 0); |
587 | } | 593 | } |
@@ -594,7 +600,8 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) | |||
594 | * Called under mrt_lock. | 600 | * Called under mrt_lock. |
595 | */ | 601 | */ |
596 | 602 | ||
597 | static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) | 603 | static int ipmr_cache_report(struct net *net, |
604 | struct sk_buff *pkt, vifi_t vifi, int assert) | ||
598 | { | 605 | { |
599 | struct sk_buff *skb; | 606 | struct sk_buff *skb; |
600 | const int ihl = ip_hdrlen(pkt); | 607 | const int ihl = ip_hdrlen(pkt); |
@@ -626,7 +633,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) | |||
626 | memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); | 633 | memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); |
627 | msg->im_msgtype = IGMPMSG_WHOLEPKT; | 634 | msg->im_msgtype = IGMPMSG_WHOLEPKT; |
628 | msg->im_mbz = 0; | 635 | msg->im_mbz = 0; |
629 | msg->im_vif = reg_vif_num; | 636 | msg->im_vif = net->ipv4.mroute_reg_vif_num; |
630 | ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; | 637 | ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; |
631 | ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + | 638 | ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + |
632 | sizeof(struct iphdr)); | 639 | sizeof(struct iphdr)); |
@@ -658,7 +665,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) | |||
658 | skb->transport_header = skb->network_header; | 665 | skb->transport_header = skb->network_header; |
659 | } | 666 | } |
660 | 667 | ||
661 | if (mroute_socket == NULL) { | 668 | if (net->ipv4.mroute_sk == NULL) { |
662 | kfree_skb(skb); | 669 | kfree_skb(skb); |
663 | return -EINVAL; | 670 | return -EINVAL; |
664 | } | 671 | } |
@@ -666,7 +673,8 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) | |||
666 | /* | 673 | /* |
667 | * Deliver to mrouted | 674 | * Deliver to mrouted |
668 | */ | 675 | */ |
669 | if ((ret = sock_queue_rcv_skb(mroute_socket, skb))<0) { | 676 | ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb); |
677 | if (ret < 0) { | ||
670 | if (net_ratelimit()) | 678 | if (net_ratelimit()) |
671 | printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); | 679 | printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); |
672 | kfree_skb(skb); | 680 | kfree_skb(skb); |
@@ -680,7 +688,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) | |||
680 | */ | 688 | */ |
681 | 689 | ||
682 | static int | 690 | static int |
683 | ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) | 691 | ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb) |
684 | { | 692 | { |
685 | int err; | 693 | int err; |
686 | struct mfc_cache *c; | 694 | struct mfc_cache *c; |
@@ -688,7 +696,8 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) | |||
688 | 696 | ||
689 | spin_lock_bh(&mfc_unres_lock); | 697 | spin_lock_bh(&mfc_unres_lock); |
690 | for (c=mfc_unres_queue; c; c=c->next) { | 698 | for (c=mfc_unres_queue; c; c=c->next) { |
691 | if (c->mfc_mcastgrp == iph->daddr && | 699 | if (net_eq(mfc_net(c), net) && |
700 | c->mfc_mcastgrp == iph->daddr && | ||
692 | c->mfc_origin == iph->saddr) | 701 | c->mfc_origin == iph->saddr) |
693 | break; | 702 | break; |
694 | } | 703 | } |
@@ -698,8 +707,8 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) | |||
698 | * Create a new entry if allowable | 707 | * Create a new entry if allowable |
699 | */ | 708 | */ |
700 | 709 | ||
701 | if (atomic_read(&cache_resolve_queue_len) >= 10 || | 710 | if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 || |
702 | (c=ipmr_cache_alloc_unres())==NULL) { | 711 | (c = ipmr_cache_alloc_unres(net)) == NULL) { |
703 | spin_unlock_bh(&mfc_unres_lock); | 712 | spin_unlock_bh(&mfc_unres_lock); |
704 | 713 | ||
705 | kfree_skb(skb); | 714 | kfree_skb(skb); |
@@ -716,18 +725,19 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) | |||
716 | /* | 725 | /* |
717 | * Reflect first query at mrouted. | 726 | * Reflect first query at mrouted. |
718 | */ | 727 | */ |
719 | if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) { | 728 | err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE); |
729 | if (err < 0) { | ||
720 | /* If the report failed throw the cache entry | 730 | /* If the report failed throw the cache entry |
721 | out - Brad Parker | 731 | out - Brad Parker |
722 | */ | 732 | */ |
723 | spin_unlock_bh(&mfc_unres_lock); | 733 | spin_unlock_bh(&mfc_unres_lock); |
724 | 734 | ||
725 | kmem_cache_free(mrt_cachep, c); | 735 | ipmr_cache_free(c); |
726 | kfree_skb(skb); | 736 | kfree_skb(skb); |
727 | return err; | 737 | return err; |
728 | } | 738 | } |
729 | 739 | ||
730 | atomic_inc(&cache_resolve_queue_len); | 740 | atomic_inc(&net->ipv4.cache_resolve_queue_len); |
731 | c->next = mfc_unres_queue; | 741 | c->next = mfc_unres_queue; |
732 | mfc_unres_queue = c; | 742 | mfc_unres_queue = c; |
733 | 743 | ||
@@ -753,35 +763,37 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) | |||
753 | * MFC cache manipulation by user space mroute daemon | 763 | * MFC cache manipulation by user space mroute daemon |
754 | */ | 764 | */ |
755 | 765 | ||
756 | static int ipmr_mfc_delete(struct mfcctl *mfc) | 766 | static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc) |
757 | { | 767 | { |
758 | int line; | 768 | int line; |
759 | struct mfc_cache *c, **cp; | 769 | struct mfc_cache *c, **cp; |
760 | 770 | ||
761 | line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); | 771 | line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); |
762 | 772 | ||
763 | for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { | 773 | for (cp = &net->ipv4.mfc_cache_array[line]; |
774 | (c = *cp) != NULL; cp = &c->next) { | ||
764 | if (c->mfc_origin == mfc->mfcc_origin.s_addr && | 775 | if (c->mfc_origin == mfc->mfcc_origin.s_addr && |
765 | c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { | 776 | c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { |
766 | write_lock_bh(&mrt_lock); | 777 | write_lock_bh(&mrt_lock); |
767 | *cp = c->next; | 778 | *cp = c->next; |
768 | write_unlock_bh(&mrt_lock); | 779 | write_unlock_bh(&mrt_lock); |
769 | 780 | ||
770 | kmem_cache_free(mrt_cachep, c); | 781 | ipmr_cache_free(c); |
771 | return 0; | 782 | return 0; |
772 | } | 783 | } |
773 | } | 784 | } |
774 | return -ENOENT; | 785 | return -ENOENT; |
775 | } | 786 | } |
776 | 787 | ||
777 | static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) | 788 | static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock) |
778 | { | 789 | { |
779 | int line; | 790 | int line; |
780 | struct mfc_cache *uc, *c, **cp; | 791 | struct mfc_cache *uc, *c, **cp; |
781 | 792 | ||
782 | line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); | 793 | line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); |
783 | 794 | ||
784 | for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { | 795 | for (cp = &net->ipv4.mfc_cache_array[line]; |
796 | (c = *cp) != NULL; cp = &c->next) { | ||
785 | if (c->mfc_origin == mfc->mfcc_origin.s_addr && | 797 | if (c->mfc_origin == mfc->mfcc_origin.s_addr && |
786 | c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) | 798 | c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) |
787 | break; | 799 | break; |
@@ -800,7 +812,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) | |||
800 | if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) | 812 | if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) |
801 | return -EINVAL; | 813 | return -EINVAL; |
802 | 814 | ||
803 | c = ipmr_cache_alloc(); | 815 | c = ipmr_cache_alloc(net); |
804 | if (c == NULL) | 816 | if (c == NULL) |
805 | return -ENOMEM; | 817 | return -ENOMEM; |
806 | 818 | ||
@@ -812,8 +824,8 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) | |||
812 | c->mfc_flags |= MFC_STATIC; | 824 | c->mfc_flags |= MFC_STATIC; |
813 | 825 | ||
814 | write_lock_bh(&mrt_lock); | 826 | write_lock_bh(&mrt_lock); |
815 | c->next = mfc_cache_array[line]; | 827 | c->next = net->ipv4.mfc_cache_array[line]; |
816 | mfc_cache_array[line] = c; | 828 | net->ipv4.mfc_cache_array[line] = c; |
817 | write_unlock_bh(&mrt_lock); | 829 | write_unlock_bh(&mrt_lock); |
818 | 830 | ||
819 | /* | 831 | /* |
@@ -823,19 +835,21 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) | |||
823 | spin_lock_bh(&mfc_unres_lock); | 835 | spin_lock_bh(&mfc_unres_lock); |
824 | for (cp = &mfc_unres_queue; (uc=*cp) != NULL; | 836 | for (cp = &mfc_unres_queue; (uc=*cp) != NULL; |
825 | cp = &uc->next) { | 837 | cp = &uc->next) { |
826 | if (uc->mfc_origin == c->mfc_origin && | 838 | if (net_eq(mfc_net(uc), net) && |
839 | uc->mfc_origin == c->mfc_origin && | ||
827 | uc->mfc_mcastgrp == c->mfc_mcastgrp) { | 840 | uc->mfc_mcastgrp == c->mfc_mcastgrp) { |
828 | *cp = uc->next; | 841 | *cp = uc->next; |
829 | if (atomic_dec_and_test(&cache_resolve_queue_len)) | 842 | atomic_dec(&net->ipv4.cache_resolve_queue_len); |
830 | del_timer(&ipmr_expire_timer); | ||
831 | break; | 843 | break; |
832 | } | 844 | } |
833 | } | 845 | } |
846 | if (mfc_unres_queue == NULL) | ||
847 | del_timer(&ipmr_expire_timer); | ||
834 | spin_unlock_bh(&mfc_unres_lock); | 848 | spin_unlock_bh(&mfc_unres_lock); |
835 | 849 | ||
836 | if (uc) { | 850 | if (uc) { |
837 | ipmr_cache_resolve(uc, c); | 851 | ipmr_cache_resolve(uc, c); |
838 | kmem_cache_free(mrt_cachep, uc); | 852 | ipmr_cache_free(uc); |
839 | } | 853 | } |
840 | return 0; | 854 | return 0; |
841 | } | 855 | } |
@@ -844,16 +858,16 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) | |||
844 | * Close the multicast socket, and clear the vif tables etc | 858 | * Close the multicast socket, and clear the vif tables etc |
845 | */ | 859 | */ |
846 | 860 | ||
847 | static void mroute_clean_tables(struct sock *sk) | 861 | static void mroute_clean_tables(struct net *net) |
848 | { | 862 | { |
849 | int i; | 863 | int i; |
850 | 864 | ||
851 | /* | 865 | /* |
852 | * Shut down all active vif entries | 866 | * Shut down all active vif entries |
853 | */ | 867 | */ |
854 | for (i=0; i<maxvif; i++) { | 868 | for (i = 0; i < net->ipv4.maxvif; i++) { |
855 | if (!(vif_table[i].flags&VIFF_STATIC)) | 869 | if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC)) |
856 | vif_delete(i, 0); | 870 | vif_delete(net, i, 0); |
857 | } | 871 | } |
858 | 872 | ||
859 | /* | 873 | /* |
@@ -862,7 +876,7 @@ static void mroute_clean_tables(struct sock *sk) | |||
862 | for (i=0; i<MFC_LINES; i++) { | 876 | for (i=0; i<MFC_LINES; i++) { |
863 | struct mfc_cache *c, **cp; | 877 | struct mfc_cache *c, **cp; |
864 | 878 | ||
865 | cp = &mfc_cache_array[i]; | 879 | cp = &net->ipv4.mfc_cache_array[i]; |
866 | while ((c = *cp) != NULL) { | 880 | while ((c = *cp) != NULL) { |
867 | if (c->mfc_flags&MFC_STATIC) { | 881 | if (c->mfc_flags&MFC_STATIC) { |
868 | cp = &c->next; | 882 | cp = &c->next; |
@@ -872,22 +886,23 @@ static void mroute_clean_tables(struct sock *sk) | |||
872 | *cp = c->next; | 886 | *cp = c->next; |
873 | write_unlock_bh(&mrt_lock); | 887 | write_unlock_bh(&mrt_lock); |
874 | 888 | ||
875 | kmem_cache_free(mrt_cachep, c); | 889 | ipmr_cache_free(c); |
876 | } | 890 | } |
877 | } | 891 | } |
878 | 892 | ||
879 | if (atomic_read(&cache_resolve_queue_len) != 0) { | 893 | if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) { |
880 | struct mfc_cache *c; | 894 | struct mfc_cache *c, **cp; |
881 | 895 | ||
882 | spin_lock_bh(&mfc_unres_lock); | 896 | spin_lock_bh(&mfc_unres_lock); |
883 | while (mfc_unres_queue != NULL) { | 897 | cp = &mfc_unres_queue; |
884 | c = mfc_unres_queue; | 898 | while ((c = *cp) != NULL) { |
885 | mfc_unres_queue = c->next; | 899 | if (!net_eq(mfc_net(c), net)) { |
886 | spin_unlock_bh(&mfc_unres_lock); | 900 | cp = &c->next; |
901 | continue; | ||
902 | } | ||
903 | *cp = c->next; | ||
887 | 904 | ||
888 | ipmr_destroy_unres(c); | 905 | ipmr_destroy_unres(c); |
889 | |||
890 | spin_lock_bh(&mfc_unres_lock); | ||
891 | } | 906 | } |
892 | spin_unlock_bh(&mfc_unres_lock); | 907 | spin_unlock_bh(&mfc_unres_lock); |
893 | } | 908 | } |
@@ -895,15 +910,17 @@ static void mroute_clean_tables(struct sock *sk) | |||
895 | 910 | ||
896 | static void mrtsock_destruct(struct sock *sk) | 911 | static void mrtsock_destruct(struct sock *sk) |
897 | { | 912 | { |
913 | struct net *net = sock_net(sk); | ||
914 | |||
898 | rtnl_lock(); | 915 | rtnl_lock(); |
899 | if (sk == mroute_socket) { | 916 | if (sk == net->ipv4.mroute_sk) { |
900 | IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--; | 917 | IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; |
901 | 918 | ||
902 | write_lock_bh(&mrt_lock); | 919 | write_lock_bh(&mrt_lock); |
903 | mroute_socket = NULL; | 920 | net->ipv4.mroute_sk = NULL; |
904 | write_unlock_bh(&mrt_lock); | 921 | write_unlock_bh(&mrt_lock); |
905 | 922 | ||
906 | mroute_clean_tables(sk); | 923 | mroute_clean_tables(net); |
907 | } | 924 | } |
908 | rtnl_unlock(); | 925 | rtnl_unlock(); |
909 | } | 926 | } |
@@ -920,9 +937,10 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int | |||
920 | int ret; | 937 | int ret; |
921 | struct vifctl vif; | 938 | struct vifctl vif; |
922 | struct mfcctl mfc; | 939 | struct mfcctl mfc; |
940 | struct net *net = sock_net(sk); | ||
923 | 941 | ||
924 | if (optname != MRT_INIT) { | 942 | if (optname != MRT_INIT) { |
925 | if (sk != mroute_socket && !capable(CAP_NET_ADMIN)) | 943 | if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN)) |
926 | return -EACCES; | 944 | return -EACCES; |
927 | } | 945 | } |
928 | 946 | ||
@@ -935,7 +953,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int | |||
935 | return -ENOPROTOOPT; | 953 | return -ENOPROTOOPT; |
936 | 954 | ||
937 | rtnl_lock(); | 955 | rtnl_lock(); |
938 | if (mroute_socket) { | 956 | if (net->ipv4.mroute_sk) { |
939 | rtnl_unlock(); | 957 | rtnl_unlock(); |
940 | return -EADDRINUSE; | 958 | return -EADDRINUSE; |
941 | } | 959 | } |
@@ -943,15 +961,15 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int | |||
943 | ret = ip_ra_control(sk, 1, mrtsock_destruct); | 961 | ret = ip_ra_control(sk, 1, mrtsock_destruct); |
944 | if (ret == 0) { | 962 | if (ret == 0) { |
945 | write_lock_bh(&mrt_lock); | 963 | write_lock_bh(&mrt_lock); |
946 | mroute_socket = sk; | 964 | net->ipv4.mroute_sk = sk; |
947 | write_unlock_bh(&mrt_lock); | 965 | write_unlock_bh(&mrt_lock); |
948 | 966 | ||
949 | IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++; | 967 | IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; |
950 | } | 968 | } |
951 | rtnl_unlock(); | 969 | rtnl_unlock(); |
952 | return ret; | 970 | return ret; |
953 | case MRT_DONE: | 971 | case MRT_DONE: |
954 | if (sk != mroute_socket) | 972 | if (sk != net->ipv4.mroute_sk) |
955 | return -EACCES; | 973 | return -EACCES; |
956 | return ip_ra_control(sk, 0, NULL); | 974 | return ip_ra_control(sk, 0, NULL); |
957 | case MRT_ADD_VIF: | 975 | case MRT_ADD_VIF: |
@@ -964,9 +982,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int | |||
964 | return -ENFILE; | 982 | return -ENFILE; |
965 | rtnl_lock(); | 983 | rtnl_lock(); |
966 | if (optname == MRT_ADD_VIF) { | 984 | if (optname == MRT_ADD_VIF) { |
967 | ret = vif_add(&vif, sk==mroute_socket); | 985 | ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk); |
968 | } else { | 986 | } else { |
969 | ret = vif_delete(vif.vifc_vifi, 0); | 987 | ret = vif_delete(net, vif.vifc_vifi, 0); |
970 | } | 988 | } |
971 | rtnl_unlock(); | 989 | rtnl_unlock(); |
972 | return ret; | 990 | return ret; |
@@ -983,9 +1001,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int | |||
983 | return -EFAULT; | 1001 | return -EFAULT; |
984 | rtnl_lock(); | 1002 | rtnl_lock(); |
985 | if (optname == MRT_DEL_MFC) | 1003 | if (optname == MRT_DEL_MFC) |
986 | ret = ipmr_mfc_delete(&mfc); | 1004 | ret = ipmr_mfc_delete(net, &mfc); |
987 | else | 1005 | else |
988 | ret = ipmr_mfc_add(&mfc, sk==mroute_socket); | 1006 | ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk); |
989 | rtnl_unlock(); | 1007 | rtnl_unlock(); |
990 | return ret; | 1008 | return ret; |
991 | /* | 1009 | /* |
@@ -996,7 +1014,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int | |||
996 | int v; | 1014 | int v; |
997 | if (get_user(v,(int __user *)optval)) | 1015 | if (get_user(v,(int __user *)optval)) |
998 | return -EFAULT; | 1016 | return -EFAULT; |
999 | mroute_do_assert=(v)?1:0; | 1017 | net->ipv4.mroute_do_assert = (v) ? 1 : 0; |
1000 | return 0; | 1018 | return 0; |
1001 | } | 1019 | } |
1002 | #ifdef CONFIG_IP_PIMSM | 1020 | #ifdef CONFIG_IP_PIMSM |
@@ -1010,11 +1028,11 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int | |||
1010 | 1028 | ||
1011 | rtnl_lock(); | 1029 | rtnl_lock(); |
1012 | ret = 0; | 1030 | ret = 0; |
1013 | if (v != mroute_do_pim) { | 1031 | if (v != net->ipv4.mroute_do_pim) { |
1014 | mroute_do_pim = v; | 1032 | net->ipv4.mroute_do_pim = v; |
1015 | mroute_do_assert = v; | 1033 | net->ipv4.mroute_do_assert = v; |
1016 | #ifdef CONFIG_IP_PIMSM_V2 | 1034 | #ifdef CONFIG_IP_PIMSM_V2 |
1017 | if (mroute_do_pim) | 1035 | if (net->ipv4.mroute_do_pim) |
1018 | ret = inet_add_protocol(&pim_protocol, | 1036 | ret = inet_add_protocol(&pim_protocol, |
1019 | IPPROTO_PIM); | 1037 | IPPROTO_PIM); |
1020 | else | 1038 | else |
@@ -1045,6 +1063,7 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int | |||
1045 | { | 1063 | { |
1046 | int olr; | 1064 | int olr; |
1047 | int val; | 1065 | int val; |
1066 | struct net *net = sock_net(sk); | ||
1048 | 1067 | ||
1049 | if (optname != MRT_VERSION && | 1068 | if (optname != MRT_VERSION && |
1050 | #ifdef CONFIG_IP_PIMSM | 1069 | #ifdef CONFIG_IP_PIMSM |
@@ -1066,10 +1085,10 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int | |||
1066 | val = 0x0305; | 1085 | val = 0x0305; |
1067 | #ifdef CONFIG_IP_PIMSM | 1086 | #ifdef CONFIG_IP_PIMSM |
1068 | else if (optname == MRT_PIM) | 1087 | else if (optname == MRT_PIM) |
1069 | val = mroute_do_pim; | 1088 | val = net->ipv4.mroute_do_pim; |
1070 | #endif | 1089 | #endif |
1071 | else | 1090 | else |
1072 | val = mroute_do_assert; | 1091 | val = net->ipv4.mroute_do_assert; |
1073 | if (copy_to_user(optval, &val, olr)) | 1092 | if (copy_to_user(optval, &val, olr)) |
1074 | return -EFAULT; | 1093 | return -EFAULT; |
1075 | return 0; | 1094 | return 0; |
@@ -1085,16 +1104,17 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) | |||
1085 | struct sioc_vif_req vr; | 1104 | struct sioc_vif_req vr; |
1086 | struct vif_device *vif; | 1105 | struct vif_device *vif; |
1087 | struct mfc_cache *c; | 1106 | struct mfc_cache *c; |
1107 | struct net *net = sock_net(sk); | ||
1088 | 1108 | ||
1089 | switch (cmd) { | 1109 | switch (cmd) { |
1090 | case SIOCGETVIFCNT: | 1110 | case SIOCGETVIFCNT: |
1091 | if (copy_from_user(&vr, arg, sizeof(vr))) | 1111 | if (copy_from_user(&vr, arg, sizeof(vr))) |
1092 | return -EFAULT; | 1112 | return -EFAULT; |
1093 | if (vr.vifi >= maxvif) | 1113 | if (vr.vifi >= net->ipv4.maxvif) |
1094 | return -EINVAL; | 1114 | return -EINVAL; |
1095 | read_lock(&mrt_lock); | 1115 | read_lock(&mrt_lock); |
1096 | vif=&vif_table[vr.vifi]; | 1116 | vif = &net->ipv4.vif_table[vr.vifi]; |
1097 | if (VIF_EXISTS(vr.vifi)) { | 1117 | if (VIF_EXISTS(net, vr.vifi)) { |
1098 | vr.icount = vif->pkt_in; | 1118 | vr.icount = vif->pkt_in; |
1099 | vr.ocount = vif->pkt_out; | 1119 | vr.ocount = vif->pkt_out; |
1100 | vr.ibytes = vif->bytes_in; | 1120 | vr.ibytes = vif->bytes_in; |
@@ -1112,7 +1132,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) | |||
1112 | return -EFAULT; | 1132 | return -EFAULT; |
1113 | 1133 | ||
1114 | read_lock(&mrt_lock); | 1134 | read_lock(&mrt_lock); |
1115 | c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr); | 1135 | c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr); |
1116 | if (c) { | 1136 | if (c) { |
1117 | sr.pktcnt = c->mfc_un.res.pkt; | 1137 | sr.pktcnt = c->mfc_un.res.pkt; |
1118 | sr.bytecnt = c->mfc_un.res.bytes; | 1138 | sr.bytecnt = c->mfc_un.res.bytes; |
@@ -1134,18 +1154,19 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) | |||
1134 | static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) | 1154 | static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) |
1135 | { | 1155 | { |
1136 | struct net_device *dev = ptr; | 1156 | struct net_device *dev = ptr; |
1157 | struct net *net = dev_net(dev); | ||
1137 | struct vif_device *v; | 1158 | struct vif_device *v; |
1138 | int ct; | 1159 | int ct; |
1139 | 1160 | ||
1140 | if (!net_eq(dev_net(dev), &init_net)) | 1161 | if (!net_eq(dev_net(dev), net)) |
1141 | return NOTIFY_DONE; | 1162 | return NOTIFY_DONE; |
1142 | 1163 | ||
1143 | if (event != NETDEV_UNREGISTER) | 1164 | if (event != NETDEV_UNREGISTER) |
1144 | return NOTIFY_DONE; | 1165 | return NOTIFY_DONE; |
1145 | v=&vif_table[0]; | 1166 | v = &net->ipv4.vif_table[0]; |
1146 | for (ct=0; ct<maxvif; ct++,v++) { | 1167 | for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) { |
1147 | if (v->dev == dev) | 1168 | if (v->dev == dev) |
1148 | vif_delete(ct, 1); | 1169 | vif_delete(net, ct, 1); |
1149 | } | 1170 | } |
1150 | return NOTIFY_DONE; | 1171 | return NOTIFY_DONE; |
1151 | } | 1172 | } |
@@ -1205,8 +1226,9 @@ static inline int ipmr_forward_finish(struct sk_buff *skb) | |||
1205 | 1226 | ||
1206 | static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) | 1227 | static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) |
1207 | { | 1228 | { |
1229 | struct net *net = mfc_net(c); | ||
1208 | const struct iphdr *iph = ip_hdr(skb); | 1230 | const struct iphdr *iph = ip_hdr(skb); |
1209 | struct vif_device *vif = &vif_table[vifi]; | 1231 | struct vif_device *vif = &net->ipv4.vif_table[vifi]; |
1210 | struct net_device *dev; | 1232 | struct net_device *dev; |
1211 | struct rtable *rt; | 1233 | struct rtable *rt; |
1212 | int encap = 0; | 1234 | int encap = 0; |
@@ -1220,9 +1242,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) | |||
1220 | vif->bytes_out += skb->len; | 1242 | vif->bytes_out += skb->len; |
1221 | vif->dev->stats.tx_bytes += skb->len; | 1243 | vif->dev->stats.tx_bytes += skb->len; |
1222 | vif->dev->stats.tx_packets++; | 1244 | vif->dev->stats.tx_packets++; |
1223 | ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); | 1245 | ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT); |
1224 | kfree_skb(skb); | 1246 | goto out_free; |
1225 | return; | ||
1226 | } | 1247 | } |
1227 | #endif | 1248 | #endif |
1228 | 1249 | ||
@@ -1233,7 +1254,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) | |||
1233 | .saddr = vif->local, | 1254 | .saddr = vif->local, |
1234 | .tos = RT_TOS(iph->tos) } }, | 1255 | .tos = RT_TOS(iph->tos) } }, |
1235 | .proto = IPPROTO_IPIP }; | 1256 | .proto = IPPROTO_IPIP }; |
1236 | if (ip_route_output_key(&init_net, &rt, &fl)) | 1257 | if (ip_route_output_key(net, &rt, &fl)) |
1237 | goto out_free; | 1258 | goto out_free; |
1238 | encap = sizeof(struct iphdr); | 1259 | encap = sizeof(struct iphdr); |
1239 | } else { | 1260 | } else { |
@@ -1242,7 +1263,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) | |||
1242 | { .daddr = iph->daddr, | 1263 | { .daddr = iph->daddr, |
1243 | .tos = RT_TOS(iph->tos) } }, | 1264 | .tos = RT_TOS(iph->tos) } }, |
1244 | .proto = IPPROTO_IPIP }; | 1265 | .proto = IPPROTO_IPIP }; |
1245 | if (ip_route_output_key(&init_net, &rt, &fl)) | 1266 | if (ip_route_output_key(net, &rt, &fl)) |
1246 | goto out_free; | 1267 | goto out_free; |
1247 | } | 1268 | } |
1248 | 1269 | ||
@@ -1306,9 +1327,10 @@ out_free: | |||
1306 | 1327 | ||
1307 | static int ipmr_find_vif(struct net_device *dev) | 1328 | static int ipmr_find_vif(struct net_device *dev) |
1308 | { | 1329 | { |
1330 | struct net *net = dev_net(dev); | ||
1309 | int ct; | 1331 | int ct; |
1310 | for (ct=maxvif-1; ct>=0; ct--) { | 1332 | for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) { |
1311 | if (vif_table[ct].dev == dev) | 1333 | if (net->ipv4.vif_table[ct].dev == dev) |
1312 | break; | 1334 | break; |
1313 | } | 1335 | } |
1314 | return ct; | 1336 | return ct; |
@@ -1320,6 +1342,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local | |||
1320 | { | 1342 | { |
1321 | int psend = -1; | 1343 | int psend = -1; |
1322 | int vif, ct; | 1344 | int vif, ct; |
1345 | struct net *net = mfc_net(cache); | ||
1323 | 1346 | ||
1324 | vif = cache->mfc_parent; | 1347 | vif = cache->mfc_parent; |
1325 | cache->mfc_un.res.pkt++; | 1348 | cache->mfc_un.res.pkt++; |
@@ -1328,7 +1351,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local | |||
1328 | /* | 1351 | /* |
1329 | * Wrong interface: drop packet and (maybe) send PIM assert. | 1352 | * Wrong interface: drop packet and (maybe) send PIM assert. |
1330 | */ | 1353 | */ |
1331 | if (vif_table[vif].dev != skb->dev) { | 1354 | if (net->ipv4.vif_table[vif].dev != skb->dev) { |
1332 | int true_vifi; | 1355 | int true_vifi; |
1333 | 1356 | ||
1334 | if (skb->rtable->fl.iif == 0) { | 1357 | if (skb->rtable->fl.iif == 0) { |
@@ -1349,23 +1372,24 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local | |||
1349 | cache->mfc_un.res.wrong_if++; | 1372 | cache->mfc_un.res.wrong_if++; |
1350 | true_vifi = ipmr_find_vif(skb->dev); | 1373 | true_vifi = ipmr_find_vif(skb->dev); |
1351 | 1374 | ||
1352 | if (true_vifi >= 0 && mroute_do_assert && | 1375 | if (true_vifi >= 0 && net->ipv4.mroute_do_assert && |
1353 | /* pimsm uses asserts, when switching from RPT to SPT, | 1376 | /* pimsm uses asserts, when switching from RPT to SPT, |
1354 | so that we cannot check that packet arrived on an oif. | 1377 | so that we cannot check that packet arrived on an oif. |
1355 | It is bad, but otherwise we would need to move pretty | 1378 | It is bad, but otherwise we would need to move pretty |
1356 | large chunk of pimd to kernel. Ough... --ANK | 1379 | large chunk of pimd to kernel. Ough... --ANK |
1357 | */ | 1380 | */ |
1358 | (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) && | 1381 | (net->ipv4.mroute_do_pim || |
1382 | cache->mfc_un.res.ttls[true_vifi] < 255) && | ||
1359 | time_after(jiffies, | 1383 | time_after(jiffies, |
1360 | cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { | 1384 | cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { |
1361 | cache->mfc_un.res.last_assert = jiffies; | 1385 | cache->mfc_un.res.last_assert = jiffies; |
1362 | ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF); | 1386 | ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF); |
1363 | } | 1387 | } |
1364 | goto dont_forward; | 1388 | goto dont_forward; |
1365 | } | 1389 | } |
1366 | 1390 | ||
1367 | vif_table[vif].pkt_in++; | 1391 | net->ipv4.vif_table[vif].pkt_in++; |
1368 | vif_table[vif].bytes_in += skb->len; | 1392 | net->ipv4.vif_table[vif].bytes_in += skb->len; |
1369 | 1393 | ||
1370 | /* | 1394 | /* |
1371 | * Forward the frame | 1395 | * Forward the frame |
@@ -1405,6 +1429,7 @@ dont_forward: | |||
1405 | int ip_mr_input(struct sk_buff *skb) | 1429 | int ip_mr_input(struct sk_buff *skb) |
1406 | { | 1430 | { |
1407 | struct mfc_cache *cache; | 1431 | struct mfc_cache *cache; |
1432 | struct net *net = dev_net(skb->dev); | ||
1408 | int local = skb->rtable->rt_flags&RTCF_LOCAL; | 1433 | int local = skb->rtable->rt_flags&RTCF_LOCAL; |
1409 | 1434 | ||
1410 | /* Packet is looped back after forward, it should not be | 1435 | /* Packet is looped back after forward, it should not be |
@@ -1425,9 +1450,9 @@ int ip_mr_input(struct sk_buff *skb) | |||
1425 | that we can forward NO IGMP messages. | 1450 | that we can forward NO IGMP messages. |
1426 | */ | 1451 | */ |
1427 | read_lock(&mrt_lock); | 1452 | read_lock(&mrt_lock); |
1428 | if (mroute_socket) { | 1453 | if (net->ipv4.mroute_sk) { |
1429 | nf_reset(skb); | 1454 | nf_reset(skb); |
1430 | raw_rcv(mroute_socket, skb); | 1455 | raw_rcv(net->ipv4.mroute_sk, skb); |
1431 | read_unlock(&mrt_lock); | 1456 | read_unlock(&mrt_lock); |
1432 | return 0; | 1457 | return 0; |
1433 | } | 1458 | } |
@@ -1436,7 +1461,7 @@ int ip_mr_input(struct sk_buff *skb) | |||
1436 | } | 1461 | } |
1437 | 1462 | ||
1438 | read_lock(&mrt_lock); | 1463 | read_lock(&mrt_lock); |
1439 | cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); | 1464 | cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); |
1440 | 1465 | ||
1441 | /* | 1466 | /* |
1442 | * No usable cache entry | 1467 | * No usable cache entry |
@@ -1456,7 +1481,7 @@ int ip_mr_input(struct sk_buff *skb) | |||
1456 | 1481 | ||
1457 | vif = ipmr_find_vif(skb->dev); | 1482 | vif = ipmr_find_vif(skb->dev); |
1458 | if (vif >= 0) { | 1483 | if (vif >= 0) { |
1459 | int err = ipmr_cache_unresolved(vif, skb); | 1484 | int err = ipmr_cache_unresolved(net, vif, skb); |
1460 | read_unlock(&mrt_lock); | 1485 | read_unlock(&mrt_lock); |
1461 | 1486 | ||
1462 | return err; | 1487 | return err; |
@@ -1487,6 +1512,7 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen) | |||
1487 | { | 1512 | { |
1488 | struct net_device *reg_dev = NULL; | 1513 | struct net_device *reg_dev = NULL; |
1489 | struct iphdr *encap; | 1514 | struct iphdr *encap; |
1515 | struct net *net = dev_net(skb->dev); | ||
1490 | 1516 | ||
1491 | encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); | 1517 | encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); |
1492 | /* | 1518 | /* |
@@ -1501,8 +1527,8 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen) | |||
1501 | return 1; | 1527 | return 1; |
1502 | 1528 | ||
1503 | read_lock(&mrt_lock); | 1529 | read_lock(&mrt_lock); |
1504 | if (reg_vif_num >= 0) | 1530 | if (net->ipv4.mroute_reg_vif_num >= 0) |
1505 | reg_dev = vif_table[reg_vif_num].dev; | 1531 | reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev; |
1506 | if (reg_dev) | 1532 | if (reg_dev) |
1507 | dev_hold(reg_dev); | 1533 | dev_hold(reg_dev); |
1508 | read_unlock(&mrt_lock); | 1534 | read_unlock(&mrt_lock); |
@@ -1537,13 +1563,14 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen) | |||
1537 | int pim_rcv_v1(struct sk_buff * skb) | 1563 | int pim_rcv_v1(struct sk_buff * skb) |
1538 | { | 1564 | { |
1539 | struct igmphdr *pim; | 1565 | struct igmphdr *pim; |
1566 | struct net *net = dev_net(skb->dev); | ||
1540 | 1567 | ||
1541 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) | 1568 | if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) |
1542 | goto drop; | 1569 | goto drop; |
1543 | 1570 | ||
1544 | pim = igmp_hdr(skb); | 1571 | pim = igmp_hdr(skb); |
1545 | 1572 | ||
1546 | if (!mroute_do_pim || | 1573 | if (!net->ipv4.mroute_do_pim || |
1547 | pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) | 1574 | pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) |
1548 | goto drop; | 1575 | goto drop; |
1549 | 1576 | ||
@@ -1583,7 +1610,8 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) | |||
1583 | { | 1610 | { |
1584 | int ct; | 1611 | int ct; |
1585 | struct rtnexthop *nhp; | 1612 | struct rtnexthop *nhp; |
1586 | struct net_device *dev = vif_table[c->mfc_parent].dev; | 1613 | struct net *net = mfc_net(c); |
1614 | struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev; | ||
1587 | u8 *b = skb_tail_pointer(skb); | 1615 | u8 *b = skb_tail_pointer(skb); |
1588 | struct rtattr *mp_head; | 1616 | struct rtattr *mp_head; |
1589 | 1617 | ||
@@ -1599,7 +1627,7 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) | |||
1599 | nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); | 1627 | nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); |
1600 | nhp->rtnh_flags = 0; | 1628 | nhp->rtnh_flags = 0; |
1601 | nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; | 1629 | nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; |
1602 | nhp->rtnh_ifindex = vif_table[ct].dev->ifindex; | 1630 | nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex; |
1603 | nhp->rtnh_len = sizeof(*nhp); | 1631 | nhp->rtnh_len = sizeof(*nhp); |
1604 | } | 1632 | } |
1605 | } | 1633 | } |
@@ -1613,14 +1641,15 @@ rtattr_failure: | |||
1613 | return -EMSGSIZE; | 1641 | return -EMSGSIZE; |
1614 | } | 1642 | } |
1615 | 1643 | ||
1616 | int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) | 1644 | int ipmr_get_route(struct net *net, |
1645 | struct sk_buff *skb, struct rtmsg *rtm, int nowait) | ||
1617 | { | 1646 | { |
1618 | int err; | 1647 | int err; |
1619 | struct mfc_cache *cache; | 1648 | struct mfc_cache *cache; |
1620 | struct rtable *rt = skb->rtable; | 1649 | struct rtable *rt = skb->rtable; |
1621 | 1650 | ||
1622 | read_lock(&mrt_lock); | 1651 | read_lock(&mrt_lock); |
1623 | cache = ipmr_cache_find(rt->rt_src, rt->rt_dst); | 1652 | cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst); |
1624 | 1653 | ||
1625 | if (cache == NULL) { | 1654 | if (cache == NULL) { |
1626 | struct sk_buff *skb2; | 1655 | struct sk_buff *skb2; |
@@ -1651,7 +1680,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) | |||
1651 | iph->saddr = rt->rt_src; | 1680 | iph->saddr = rt->rt_src; |
1652 | iph->daddr = rt->rt_dst; | 1681 | iph->daddr = rt->rt_dst; |
1653 | iph->version = 0; | 1682 | iph->version = 0; |
1654 | err = ipmr_cache_unresolved(vif, skb2); | 1683 | err = ipmr_cache_unresolved(net, vif, skb2); |
1655 | read_unlock(&mrt_lock); | 1684 | read_unlock(&mrt_lock); |
1656 | return err; | 1685 | return err; |
1657 | } | 1686 | } |
@@ -1668,17 +1697,19 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) | |||
1668 | * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif | 1697 | * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif |
1669 | */ | 1698 | */ |
1670 | struct ipmr_vif_iter { | 1699 | struct ipmr_vif_iter { |
1700 | struct seq_net_private p; | ||
1671 | int ct; | 1701 | int ct; |
1672 | }; | 1702 | }; |
1673 | 1703 | ||
1674 | static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter, | 1704 | static struct vif_device *ipmr_vif_seq_idx(struct net *net, |
1705 | struct ipmr_vif_iter *iter, | ||
1675 | loff_t pos) | 1706 | loff_t pos) |
1676 | { | 1707 | { |
1677 | for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) { | 1708 | for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) { |
1678 | if (!VIF_EXISTS(iter->ct)) | 1709 | if (!VIF_EXISTS(net, iter->ct)) |
1679 | continue; | 1710 | continue; |
1680 | if (pos-- == 0) | 1711 | if (pos-- == 0) |
1681 | return &vif_table[iter->ct]; | 1712 | return &net->ipv4.vif_table[iter->ct]; |
1682 | } | 1713 | } |
1683 | return NULL; | 1714 | return NULL; |
1684 | } | 1715 | } |
@@ -1686,23 +1717,26 @@ static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter, | |||
1686 | static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) | 1717 | static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) |
1687 | __acquires(mrt_lock) | 1718 | __acquires(mrt_lock) |
1688 | { | 1719 | { |
1720 | struct net *net = seq_file_net(seq); | ||
1721 | |||
1689 | read_lock(&mrt_lock); | 1722 | read_lock(&mrt_lock); |
1690 | return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1) | 1723 | return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1) |
1691 | : SEQ_START_TOKEN; | 1724 | : SEQ_START_TOKEN; |
1692 | } | 1725 | } |
1693 | 1726 | ||
1694 | static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 1727 | static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
1695 | { | 1728 | { |
1696 | struct ipmr_vif_iter *iter = seq->private; | 1729 | struct ipmr_vif_iter *iter = seq->private; |
1730 | struct net *net = seq_file_net(seq); | ||
1697 | 1731 | ||
1698 | ++*pos; | 1732 | ++*pos; |
1699 | if (v == SEQ_START_TOKEN) | 1733 | if (v == SEQ_START_TOKEN) |
1700 | return ipmr_vif_seq_idx(iter, 0); | 1734 | return ipmr_vif_seq_idx(net, iter, 0); |
1701 | 1735 | ||
1702 | while (++iter->ct < maxvif) { | 1736 | while (++iter->ct < net->ipv4.maxvif) { |
1703 | if (!VIF_EXISTS(iter->ct)) | 1737 | if (!VIF_EXISTS(net, iter->ct)) |
1704 | continue; | 1738 | continue; |
1705 | return &vif_table[iter->ct]; | 1739 | return &net->ipv4.vif_table[iter->ct]; |
1706 | } | 1740 | } |
1707 | return NULL; | 1741 | return NULL; |
1708 | } | 1742 | } |
@@ -1715,6 +1749,8 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) | |||
1715 | 1749 | ||
1716 | static int ipmr_vif_seq_show(struct seq_file *seq, void *v) | 1750 | static int ipmr_vif_seq_show(struct seq_file *seq, void *v) |
1717 | { | 1751 | { |
1752 | struct net *net = seq_file_net(seq); | ||
1753 | |||
1718 | if (v == SEQ_START_TOKEN) { | 1754 | if (v == SEQ_START_TOKEN) { |
1719 | seq_puts(seq, | 1755 | seq_puts(seq, |
1720 | "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); | 1756 | "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); |
@@ -1724,7 +1760,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v) | |||
1724 | 1760 | ||
1725 | seq_printf(seq, | 1761 | seq_printf(seq, |
1726 | "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", | 1762 | "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", |
1727 | vif - vif_table, | 1763 | vif - net->ipv4.vif_table, |
1728 | name, vif->bytes_in, vif->pkt_in, | 1764 | name, vif->bytes_in, vif->pkt_in, |
1729 | vif->bytes_out, vif->pkt_out, | 1765 | vif->bytes_out, vif->pkt_out, |
1730 | vif->flags, vif->local, vif->remote); | 1766 | vif->flags, vif->local, vif->remote); |
@@ -1741,8 +1777,8 @@ static const struct seq_operations ipmr_vif_seq_ops = { | |||
1741 | 1777 | ||
1742 | static int ipmr_vif_open(struct inode *inode, struct file *file) | 1778 | static int ipmr_vif_open(struct inode *inode, struct file *file) |
1743 | { | 1779 | { |
1744 | return seq_open_private(file, &ipmr_vif_seq_ops, | 1780 | return seq_open_net(inode, file, &ipmr_vif_seq_ops, |
1745 | sizeof(struct ipmr_vif_iter)); | 1781 | sizeof(struct ipmr_vif_iter)); |
1746 | } | 1782 | } |
1747 | 1783 | ||
1748 | static const struct file_operations ipmr_vif_fops = { | 1784 | static const struct file_operations ipmr_vif_fops = { |
@@ -1750,23 +1786,26 @@ static const struct file_operations ipmr_vif_fops = { | |||
1750 | .open = ipmr_vif_open, | 1786 | .open = ipmr_vif_open, |
1751 | .read = seq_read, | 1787 | .read = seq_read, |
1752 | .llseek = seq_lseek, | 1788 | .llseek = seq_lseek, |
1753 | .release = seq_release_private, | 1789 | .release = seq_release_net, |
1754 | }; | 1790 | }; |
1755 | 1791 | ||
1756 | struct ipmr_mfc_iter { | 1792 | struct ipmr_mfc_iter { |
1793 | struct seq_net_private p; | ||
1757 | struct mfc_cache **cache; | 1794 | struct mfc_cache **cache; |
1758 | int ct; | 1795 | int ct; |
1759 | }; | 1796 | }; |
1760 | 1797 | ||
1761 | 1798 | ||
1762 | static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) | 1799 | static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net, |
1800 | struct ipmr_mfc_iter *it, loff_t pos) | ||
1763 | { | 1801 | { |
1764 | struct mfc_cache *mfc; | 1802 | struct mfc_cache *mfc; |
1765 | 1803 | ||
1766 | it->cache = mfc_cache_array; | 1804 | it->cache = net->ipv4.mfc_cache_array; |
1767 | read_lock(&mrt_lock); | 1805 | read_lock(&mrt_lock); |
1768 | for (it->ct = 0; it->ct < MFC_LINES; it->ct++) | 1806 | for (it->ct = 0; it->ct < MFC_LINES; it->ct++) |
1769 | for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next) | 1807 | for (mfc = net->ipv4.mfc_cache_array[it->ct]; |
1808 | mfc; mfc = mfc->next) | ||
1770 | if (pos-- == 0) | 1809 | if (pos-- == 0) |
1771 | return mfc; | 1810 | return mfc; |
1772 | read_unlock(&mrt_lock); | 1811 | read_unlock(&mrt_lock); |
@@ -1774,7 +1813,8 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) | |||
1774 | it->cache = &mfc_unres_queue; | 1813 | it->cache = &mfc_unres_queue; |
1775 | spin_lock_bh(&mfc_unres_lock); | 1814 | spin_lock_bh(&mfc_unres_lock); |
1776 | for (mfc = mfc_unres_queue; mfc; mfc = mfc->next) | 1815 | for (mfc = mfc_unres_queue; mfc; mfc = mfc->next) |
1777 | if (pos-- == 0) | 1816 | if (net_eq(mfc_net(mfc), net) && |
1817 | pos-- == 0) | ||
1778 | return mfc; | 1818 | return mfc; |
1779 | spin_unlock_bh(&mfc_unres_lock); | 1819 | spin_unlock_bh(&mfc_unres_lock); |
1780 | 1820 | ||
@@ -1786,9 +1826,11 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) | |||
1786 | static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) | 1826 | static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) |
1787 | { | 1827 | { |
1788 | struct ipmr_mfc_iter *it = seq->private; | 1828 | struct ipmr_mfc_iter *it = seq->private; |
1829 | struct net *net = seq_file_net(seq); | ||
1830 | |||
1789 | it->cache = NULL; | 1831 | it->cache = NULL; |
1790 | it->ct = 0; | 1832 | it->ct = 0; |
1791 | return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1) | 1833 | return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) |
1792 | : SEQ_START_TOKEN; | 1834 | : SEQ_START_TOKEN; |
1793 | } | 1835 | } |
1794 | 1836 | ||
@@ -1796,11 +1838,12 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
1796 | { | 1838 | { |
1797 | struct mfc_cache *mfc = v; | 1839 | struct mfc_cache *mfc = v; |
1798 | struct ipmr_mfc_iter *it = seq->private; | 1840 | struct ipmr_mfc_iter *it = seq->private; |
1841 | struct net *net = seq_file_net(seq); | ||
1799 | 1842 | ||
1800 | ++*pos; | 1843 | ++*pos; |
1801 | 1844 | ||
1802 | if (v == SEQ_START_TOKEN) | 1845 | if (v == SEQ_START_TOKEN) |
1803 | return ipmr_mfc_seq_idx(seq->private, 0); | 1846 | return ipmr_mfc_seq_idx(net, seq->private, 0); |
1804 | 1847 | ||
1805 | if (mfc->next) | 1848 | if (mfc->next) |
1806 | return mfc->next; | 1849 | return mfc->next; |
@@ -1808,10 +1851,10 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
1808 | if (it->cache == &mfc_unres_queue) | 1851 | if (it->cache == &mfc_unres_queue) |
1809 | goto end_of_list; | 1852 | goto end_of_list; |
1810 | 1853 | ||
1811 | BUG_ON(it->cache != mfc_cache_array); | 1854 | BUG_ON(it->cache != net->ipv4.mfc_cache_array); |
1812 | 1855 | ||
1813 | while (++it->ct < MFC_LINES) { | 1856 | while (++it->ct < MFC_LINES) { |
1814 | mfc = mfc_cache_array[it->ct]; | 1857 | mfc = net->ipv4.mfc_cache_array[it->ct]; |
1815 | if (mfc) | 1858 | if (mfc) |
1816 | return mfc; | 1859 | return mfc; |
1817 | } | 1860 | } |
@@ -1823,6 +1866,8 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
1823 | 1866 | ||
1824 | spin_lock_bh(&mfc_unres_lock); | 1867 | spin_lock_bh(&mfc_unres_lock); |
1825 | mfc = mfc_unres_queue; | 1868 | mfc = mfc_unres_queue; |
1869 | while (mfc && !net_eq(mfc_net(mfc), net)) | ||
1870 | mfc = mfc->next; | ||
1826 | if (mfc) | 1871 | if (mfc) |
1827 | return mfc; | 1872 | return mfc; |
1828 | 1873 | ||
@@ -1836,16 +1881,18 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
1836 | static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) | 1881 | static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) |
1837 | { | 1882 | { |
1838 | struct ipmr_mfc_iter *it = seq->private; | 1883 | struct ipmr_mfc_iter *it = seq->private; |
1884 | struct net *net = seq_file_net(seq); | ||
1839 | 1885 | ||
1840 | if (it->cache == &mfc_unres_queue) | 1886 | if (it->cache == &mfc_unres_queue) |
1841 | spin_unlock_bh(&mfc_unres_lock); | 1887 | spin_unlock_bh(&mfc_unres_lock); |
1842 | else if (it->cache == mfc_cache_array) | 1888 | else if (it->cache == net->ipv4.mfc_cache_array) |
1843 | read_unlock(&mrt_lock); | 1889 | read_unlock(&mrt_lock); |
1844 | } | 1890 | } |
1845 | 1891 | ||
1846 | static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) | 1892 | static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) |
1847 | { | 1893 | { |
1848 | int n; | 1894 | int n; |
1895 | struct net *net = seq_file_net(seq); | ||
1849 | 1896 | ||
1850 | if (v == SEQ_START_TOKEN) { | 1897 | if (v == SEQ_START_TOKEN) { |
1851 | seq_puts(seq, | 1898 | seq_puts(seq, |
@@ -1866,9 +1913,9 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) | |||
1866 | mfc->mfc_un.res.wrong_if); | 1913 | mfc->mfc_un.res.wrong_if); |
1867 | for (n = mfc->mfc_un.res.minvif; | 1914 | for (n = mfc->mfc_un.res.minvif; |
1868 | n < mfc->mfc_un.res.maxvif; n++ ) { | 1915 | n < mfc->mfc_un.res.maxvif; n++ ) { |
1869 | if (VIF_EXISTS(n) | 1916 | if (VIF_EXISTS(net, n) && |
1870 | && mfc->mfc_un.res.ttls[n] < 255) | 1917 | mfc->mfc_un.res.ttls[n] < 255) |
1871 | seq_printf(seq, | 1918 | seq_printf(seq, |
1872 | " %2d:%-3d", | 1919 | " %2d:%-3d", |
1873 | n, mfc->mfc_un.res.ttls[n]); | 1920 | n, mfc->mfc_un.res.ttls[n]); |
1874 | } | 1921 | } |
@@ -1892,8 +1939,8 @@ static const struct seq_operations ipmr_mfc_seq_ops = { | |||
1892 | 1939 | ||
1893 | static int ipmr_mfc_open(struct inode *inode, struct file *file) | 1940 | static int ipmr_mfc_open(struct inode *inode, struct file *file) |
1894 | { | 1941 | { |
1895 | return seq_open_private(file, &ipmr_mfc_seq_ops, | 1942 | return seq_open_net(inode, file, &ipmr_mfc_seq_ops, |
1896 | sizeof(struct ipmr_mfc_iter)); | 1943 | sizeof(struct ipmr_mfc_iter)); |
1897 | } | 1944 | } |
1898 | 1945 | ||
1899 | static const struct file_operations ipmr_mfc_fops = { | 1946 | static const struct file_operations ipmr_mfc_fops = { |
@@ -1901,7 +1948,7 @@ static const struct file_operations ipmr_mfc_fops = { | |||
1901 | .open = ipmr_mfc_open, | 1948 | .open = ipmr_mfc_open, |
1902 | .read = seq_read, | 1949 | .read = seq_read, |
1903 | .llseek = seq_lseek, | 1950 | .llseek = seq_lseek, |
1904 | .release = seq_release_private, | 1951 | .release = seq_release_net, |
1905 | }; | 1952 | }; |
1906 | #endif | 1953 | #endif |
1907 | 1954 | ||
@@ -1915,6 +1962,65 @@ static struct net_protocol pim_protocol = { | |||
1915 | /* | 1962 | /* |
1916 | * Setup for IP multicast routing | 1963 | * Setup for IP multicast routing |
1917 | */ | 1964 | */ |
1965 | static int __net_init ipmr_net_init(struct net *net) | ||
1966 | { | ||
1967 | int err = 0; | ||
1968 | |||
1969 | net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device), | ||
1970 | GFP_KERNEL); | ||
1971 | if (!net->ipv4.vif_table) { | ||
1972 | err = -ENOMEM; | ||
1973 | goto fail; | ||
1974 | } | ||
1975 | |||
1976 | /* Forwarding cache */ | ||
1977 | net->ipv4.mfc_cache_array = kcalloc(MFC_LINES, | ||
1978 | sizeof(struct mfc_cache *), | ||
1979 | GFP_KERNEL); | ||
1980 | if (!net->ipv4.mfc_cache_array) { | ||
1981 | err = -ENOMEM; | ||
1982 | goto fail_mfc_cache; | ||
1983 | } | ||
1984 | |||
1985 | #ifdef CONFIG_IP_PIMSM | ||
1986 | net->ipv4.mroute_reg_vif_num = -1; | ||
1987 | #endif | ||
1988 | |||
1989 | #ifdef CONFIG_PROC_FS | ||
1990 | err = -ENOMEM; | ||
1991 | if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops)) | ||
1992 | goto proc_vif_fail; | ||
1993 | if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops)) | ||
1994 | goto proc_cache_fail; | ||
1995 | #endif | ||
1996 | return 0; | ||
1997 | |||
1998 | #ifdef CONFIG_PROC_FS | ||
1999 | proc_cache_fail: | ||
2000 | proc_net_remove(net, "ip_mr_vif"); | ||
2001 | proc_vif_fail: | ||
2002 | kfree(net->ipv4.mfc_cache_array); | ||
2003 | #endif | ||
2004 | fail_mfc_cache: | ||
2005 | kfree(net->ipv4.vif_table); | ||
2006 | fail: | ||
2007 | return err; | ||
2008 | } | ||
2009 | |||
2010 | static void __net_exit ipmr_net_exit(struct net *net) | ||
2011 | { | ||
2012 | #ifdef CONFIG_PROC_FS | ||
2013 | proc_net_remove(net, "ip_mr_cache"); | ||
2014 | proc_net_remove(net, "ip_mr_vif"); | ||
2015 | #endif | ||
2016 | kfree(net->ipv4.mfc_cache_array); | ||
2017 | kfree(net->ipv4.vif_table); | ||
2018 | } | ||
2019 | |||
2020 | static struct pernet_operations ipmr_net_ops = { | ||
2021 | .init = ipmr_net_init, | ||
2022 | .exit = ipmr_net_exit, | ||
2023 | }; | ||
1918 | 2024 | ||
1919 | int __init ip_mr_init(void) | 2025 | int __init ip_mr_init(void) |
1920 | { | 2026 | { |
@@ -1927,26 +2033,20 @@ int __init ip_mr_init(void) | |||
1927 | if (!mrt_cachep) | 2033 | if (!mrt_cachep) |
1928 | return -ENOMEM; | 2034 | return -ENOMEM; |
1929 | 2035 | ||
2036 | err = register_pernet_subsys(&ipmr_net_ops); | ||
2037 | if (err) | ||
2038 | goto reg_pernet_fail; | ||
2039 | |||
1930 | setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0); | 2040 | setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0); |
1931 | err = register_netdevice_notifier(&ip_mr_notifier); | 2041 | err = register_netdevice_notifier(&ip_mr_notifier); |
1932 | if (err) | 2042 | if (err) |
1933 | goto reg_notif_fail; | 2043 | goto reg_notif_fail; |
1934 | #ifdef CONFIG_PROC_FS | ||
1935 | err = -ENOMEM; | ||
1936 | if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops)) | ||
1937 | goto proc_vif_fail; | ||
1938 | if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops)) | ||
1939 | goto proc_cache_fail; | ||
1940 | #endif | ||
1941 | return 0; | 2044 | return 0; |
1942 | #ifdef CONFIG_PROC_FS | 2045 | |
1943 | proc_cache_fail: | ||
1944 | proc_net_remove(&init_net, "ip_mr_vif"); | ||
1945 | proc_vif_fail: | ||
1946 | unregister_netdevice_notifier(&ip_mr_notifier); | ||
1947 | #endif | ||
1948 | reg_notif_fail: | 2046 | reg_notif_fail: |
1949 | del_timer(&ipmr_expire_timer); | 2047 | del_timer(&ipmr_expire_timer); |
2048 | unregister_pernet_subsys(&ipmr_net_ops); | ||
2049 | reg_pernet_fail: | ||
1950 | kmem_cache_destroy(mrt_cachep); | 2050 | kmem_cache_destroy(mrt_cachep); |
1951 | return err; | 2051 | return err; |
1952 | } | 2052 | } |
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 3816e1dc9295..1833bdbf9805 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig | |||
@@ -31,7 +31,7 @@ config NF_CONNTRACK_PROC_COMPAT | |||
31 | default y | 31 | default y |
32 | help | 32 | help |
33 | This option enables /proc and sysctl compatibility with the old | 33 | This option enables /proc and sysctl compatibility with the old |
34 | layer 3 dependant connection tracking. This is needed to keep | 34 | layer 3 dependent connection tracking. This is needed to keep |
35 | old programs that have not been adapted to the new names working. | 35 | old programs that have not been adapted to the new names working. |
36 | 36 | ||
37 | If unsure, say Y. | 37 | If unsure, say Y. |
@@ -95,11 +95,11 @@ config IP_NF_MATCH_ECN | |||
95 | config IP_NF_MATCH_TTL | 95 | config IP_NF_MATCH_TTL |
96 | tristate '"ttl" match support' | 96 | tristate '"ttl" match support' |
97 | depends on NETFILTER_ADVANCED | 97 | depends on NETFILTER_ADVANCED |
98 | help | 98 | select NETFILTER_XT_MATCH_HL |
99 | This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user | 99 | ---help--- |
100 | to match packets by their TTL value. | 100 | This is a backwards-compat option for the user's convenience |
101 | 101 | (e.g. when running oldconfig). It selects | |
102 | To compile it as a module, choose M here. If unsure, say N. | 102 | CONFIG_NETFILTER_XT_MATCH_HL. |
103 | 103 | ||
104 | # `filter', generic and specific targets | 104 | # `filter', generic and specific targets |
105 | config IP_NF_FILTER | 105 | config IP_NF_FILTER |
@@ -323,19 +323,13 @@ config IP_NF_TARGET_ECN | |||
323 | To compile it as a module, choose M here. If unsure, say N. | 323 | To compile it as a module, choose M here. If unsure, say N. |
324 | 324 | ||
325 | config IP_NF_TARGET_TTL | 325 | config IP_NF_TARGET_TTL |
326 | tristate 'TTL target support' | 326 | tristate '"TTL" target support' |
327 | depends on IP_NF_MANGLE | ||
328 | depends on NETFILTER_ADVANCED | 327 | depends on NETFILTER_ADVANCED |
329 | help | 328 | select NETFILTER_XT_TARGET_HL |
330 | This option adds a `TTL' target, which enables the user to modify | 329 | ---help--- |
331 | the TTL value of the IP header. | 330 | This is a backwards-compat option for the user's convenience |
332 | 331 | (e.g. when running oldconfig). It selects | |
333 | While it is safe to decrement/lower the TTL, this target also enables | 332 | CONFIG_NETFILTER_XT_TARGET_HL. |
334 | functionality to increment and set the TTL value of the IP header to | ||
335 | arbitrary values. This is EXTREMELY DANGEROUS since you can easily | ||
336 | create immortal packets that loop forever on the network. | ||
337 | |||
338 | To compile it as a module, choose M here. If unsure, say N. | ||
339 | 333 | ||
340 | # raw + specific targets | 334 | # raw + specific targets |
341 | config IP_NF_RAW | 335 | config IP_NF_RAW |
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 5f9b650d90fc..48111594ee9b 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile | |||
@@ -51,7 +51,6 @@ obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o | |||
51 | obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o | 51 | obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o |
52 | obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o | 52 | obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o |
53 | obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o | 53 | obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o |
54 | obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o | ||
55 | 54 | ||
56 | # targets | 55 | # targets |
57 | obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o | 56 | obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o |
@@ -61,7 +60,6 @@ obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o | |||
61 | obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o | 60 | obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o |
62 | obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o | 61 | obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o |
63 | obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o | 62 | obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o |
64 | obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o | ||
65 | obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o | 63 | obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o |
66 | 64 | ||
67 | # generic ARP tables | 65 | # generic ARP tables |
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 7ea88b61cb0d..35c5f6a5cb7c 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
@@ -73,6 +73,28 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, | |||
73 | return (ret != 0); | 73 | return (ret != 0); |
74 | } | 74 | } |
75 | 75 | ||
76 | /* | ||
77 | * Unfortunatly, _b and _mask are not aligned to an int (or long int) | ||
78 | * Some arches dont care, unrolling the loop is a win on them. | ||
79 | * For other arches, we only have a 16bit alignement. | ||
80 | */ | ||
81 | static unsigned long ifname_compare(const char *_a, const char *_b, const char *_mask) | ||
82 | { | ||
83 | #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS | ||
84 | unsigned long ret = ifname_compare_aligned(_a, _b, _mask); | ||
85 | #else | ||
86 | unsigned long ret = 0; | ||
87 | const u16 *a = (const u16 *)_a; | ||
88 | const u16 *b = (const u16 *)_b; | ||
89 | const u16 *mask = (const u16 *)_mask; | ||
90 | int i; | ||
91 | |||
92 | for (i = 0; i < IFNAMSIZ/sizeof(u16); i++) | ||
93 | ret |= (a[i] ^ b[i]) & mask[i]; | ||
94 | #endif | ||
95 | return ret; | ||
96 | } | ||
97 | |||
76 | /* Returns whether packet matches rule or not. */ | 98 | /* Returns whether packet matches rule or not. */ |
77 | static inline int arp_packet_match(const struct arphdr *arphdr, | 99 | static inline int arp_packet_match(const struct arphdr *arphdr, |
78 | struct net_device *dev, | 100 | struct net_device *dev, |
@@ -83,7 +105,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr, | |||
83 | const char *arpptr = (char *)(arphdr + 1); | 105 | const char *arpptr = (char *)(arphdr + 1); |
84 | const char *src_devaddr, *tgt_devaddr; | 106 | const char *src_devaddr, *tgt_devaddr; |
85 | __be32 src_ipaddr, tgt_ipaddr; | 107 | __be32 src_ipaddr, tgt_ipaddr; |
86 | int i, ret; | 108 | long ret; |
87 | 109 | ||
88 | #define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg))) | 110 | #define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg))) |
89 | 111 | ||
@@ -156,10 +178,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr, | |||
156 | } | 178 | } |
157 | 179 | ||
158 | /* Look for ifname matches. */ | 180 | /* Look for ifname matches. */ |
159 | for (i = 0, ret = 0; i < IFNAMSIZ; i++) { | 181 | ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask); |
160 | ret |= (indev[i] ^ arpinfo->iniface[i]) | ||
161 | & arpinfo->iniface_mask[i]; | ||
162 | } | ||
163 | 182 | ||
164 | if (FWINV(ret != 0, ARPT_INV_VIA_IN)) { | 183 | if (FWINV(ret != 0, ARPT_INV_VIA_IN)) { |
165 | dprintf("VIA in mismatch (%s vs %s).%s\n", | 184 | dprintf("VIA in mismatch (%s vs %s).%s\n", |
@@ -168,10 +187,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr, | |||
168 | return 0; | 187 | return 0; |
169 | } | 188 | } |
170 | 189 | ||
171 | for (i = 0, ret = 0; i < IFNAMSIZ; i++) { | 190 | ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask); |
172 | ret |= (outdev[i] ^ arpinfo->outiface[i]) | ||
173 | & arpinfo->outiface_mask[i]; | ||
174 | } | ||
175 | 191 | ||
176 | if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) { | 192 | if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) { |
177 | dprintf("VIA out mismatch (%s vs %s).%s\n", | 193 | dprintf("VIA out mismatch (%s vs %s).%s\n", |
@@ -221,7 +237,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
221 | const struct net_device *out, | 237 | const struct net_device *out, |
222 | struct xt_table *table) | 238 | struct xt_table *table) |
223 | { | 239 | { |
224 | static const char nulldevname[IFNAMSIZ]; | 240 | static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); |
225 | unsigned int verdict = NF_DROP; | 241 | unsigned int verdict = NF_DROP; |
226 | const struct arphdr *arp; | 242 | const struct arphdr *arp; |
227 | bool hotdrop = false; | 243 | bool hotdrop = false; |
@@ -237,9 +253,10 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
237 | indev = in ? in->name : nulldevname; | 253 | indev = in ? in->name : nulldevname; |
238 | outdev = out ? out->name : nulldevname; | 254 | outdev = out ? out->name : nulldevname; |
239 | 255 | ||
240 | read_lock_bh(&table->lock); | 256 | rcu_read_lock(); |
241 | private = table->private; | 257 | private = rcu_dereference(table->private); |
242 | table_base = (void *)private->entries[smp_processor_id()]; | 258 | table_base = rcu_dereference(private->entries[smp_processor_id()]); |
259 | |||
243 | e = get_entry(table_base, private->hook_entry[hook]); | 260 | e = get_entry(table_base, private->hook_entry[hook]); |
244 | back = get_entry(table_base, private->underflow[hook]); | 261 | back = get_entry(table_base, private->underflow[hook]); |
245 | 262 | ||
@@ -311,7 +328,8 @@ unsigned int arpt_do_table(struct sk_buff *skb, | |||
311 | e = (void *)e + e->next_offset; | 328 | e = (void *)e + e->next_offset; |
312 | } | 329 | } |
313 | } while (!hotdrop); | 330 | } while (!hotdrop); |
314 | read_unlock_bh(&table->lock); | 331 | |
332 | rcu_read_unlock(); | ||
315 | 333 | ||
316 | if (hotdrop) | 334 | if (hotdrop) |
317 | return NF_DROP; | 335 | return NF_DROP; |
@@ -374,7 +392,9 @@ static int mark_source_chains(struct xt_table_info *newinfo, | |||
374 | && unconditional(&e->arp)) || visited) { | 392 | && unconditional(&e->arp)) || visited) { |
375 | unsigned int oldpos, size; | 393 | unsigned int oldpos, size; |
376 | 394 | ||
377 | if (t->verdict < -NF_MAX_VERDICT - 1) { | 395 | if ((strcmp(t->target.u.user.name, |
396 | ARPT_STANDARD_TARGET) == 0) && | ||
397 | t->verdict < -NF_MAX_VERDICT - 1) { | ||
378 | duprintf("mark_source_chains: bad " | 398 | duprintf("mark_source_chains: bad " |
379 | "negative verdict (%i)\n", | 399 | "negative verdict (%i)\n", |
380 | t->verdict); | 400 | t->verdict); |
@@ -714,11 +734,65 @@ static void get_counters(const struct xt_table_info *t, | |||
714 | } | 734 | } |
715 | } | 735 | } |
716 | 736 | ||
717 | static inline struct xt_counters *alloc_counters(struct xt_table *table) | 737 | |
738 | /* We're lazy, and add to the first CPU; overflow works its fey magic | ||
739 | * and everything is OK. */ | ||
740 | static int | ||
741 | add_counter_to_entry(struct arpt_entry *e, | ||
742 | const struct xt_counters addme[], | ||
743 | unsigned int *i) | ||
744 | { | ||
745 | ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); | ||
746 | |||
747 | (*i)++; | ||
748 | return 0; | ||
749 | } | ||
750 | |||
751 | /* Take values from counters and add them back onto the current cpu */ | ||
752 | static void put_counters(struct xt_table_info *t, | ||
753 | const struct xt_counters counters[]) | ||
754 | { | ||
755 | unsigned int i, cpu; | ||
756 | |||
757 | local_bh_disable(); | ||
758 | cpu = smp_processor_id(); | ||
759 | i = 0; | ||
760 | ARPT_ENTRY_ITERATE(t->entries[cpu], | ||
761 | t->size, | ||
762 | add_counter_to_entry, | ||
763 | counters, | ||
764 | &i); | ||
765 | local_bh_enable(); | ||
766 | } | ||
767 | |||
768 | static inline int | ||
769 | zero_entry_counter(struct arpt_entry *e, void *arg) | ||
770 | { | ||
771 | e->counters.bcnt = 0; | ||
772 | e->counters.pcnt = 0; | ||
773 | return 0; | ||
774 | } | ||
775 | |||
776 | static void | ||
777 | clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) | ||
778 | { | ||
779 | unsigned int cpu; | ||
780 | const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; | ||
781 | |||
782 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | ||
783 | for_each_possible_cpu(cpu) { | ||
784 | memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); | ||
785 | ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, | ||
786 | zero_entry_counter, NULL); | ||
787 | } | ||
788 | } | ||
789 | |||
790 | static struct xt_counters *alloc_counters(struct xt_table *table) | ||
718 | { | 791 | { |
719 | unsigned int countersize; | 792 | unsigned int countersize; |
720 | struct xt_counters *counters; | 793 | struct xt_counters *counters; |
721 | const struct xt_table_info *private = table->private; | 794 | struct xt_table_info *private = table->private; |
795 | struct xt_table_info *info; | ||
722 | 796 | ||
723 | /* We need atomic snapshot of counters: rest doesn't change | 797 | /* We need atomic snapshot of counters: rest doesn't change |
724 | * (other than comefrom, which userspace doesn't care | 798 | * (other than comefrom, which userspace doesn't care |
@@ -728,14 +802,30 @@ static inline struct xt_counters *alloc_counters(struct xt_table *table) | |||
728 | counters = vmalloc_node(countersize, numa_node_id()); | 802 | counters = vmalloc_node(countersize, numa_node_id()); |
729 | 803 | ||
730 | if (counters == NULL) | 804 | if (counters == NULL) |
731 | return ERR_PTR(-ENOMEM); | 805 | goto nomem; |
806 | |||
807 | info = xt_alloc_table_info(private->size); | ||
808 | if (!info) | ||
809 | goto free_counters; | ||
810 | |||
811 | clone_counters(info, private); | ||
812 | |||
813 | mutex_lock(&table->lock); | ||
814 | xt_table_entry_swap_rcu(private, info); | ||
815 | synchronize_net(); /* Wait until smoke has cleared */ | ||
816 | |||
817 | get_counters(info, counters); | ||
818 | put_counters(private, counters); | ||
819 | mutex_unlock(&table->lock); | ||
732 | 820 | ||
733 | /* First, sum counters... */ | 821 | xt_free_table_info(info); |
734 | write_lock_bh(&table->lock); | ||
735 | get_counters(private, counters); | ||
736 | write_unlock_bh(&table->lock); | ||
737 | 822 | ||
738 | return counters; | 823 | return counters; |
824 | |||
825 | free_counters: | ||
826 | vfree(counters); | ||
827 | nomem: | ||
828 | return ERR_PTR(-ENOMEM); | ||
739 | } | 829 | } |
740 | 830 | ||
741 | static int copy_entries_to_user(unsigned int total_size, | 831 | static int copy_entries_to_user(unsigned int total_size, |
@@ -1075,20 +1165,6 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) | |||
1075 | return ret; | 1165 | return ret; |
1076 | } | 1166 | } |
1077 | 1167 | ||
1078 | /* We're lazy, and add to the first CPU; overflow works its fey magic | ||
1079 | * and everything is OK. | ||
1080 | */ | ||
1081 | static inline int add_counter_to_entry(struct arpt_entry *e, | ||
1082 | const struct xt_counters addme[], | ||
1083 | unsigned int *i) | ||
1084 | { | ||
1085 | |||
1086 | ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); | ||
1087 | |||
1088 | (*i)++; | ||
1089 | return 0; | ||
1090 | } | ||
1091 | |||
1092 | static int do_add_counters(struct net *net, void __user *user, unsigned int len, | 1168 | static int do_add_counters(struct net *net, void __user *user, unsigned int len, |
1093 | int compat) | 1169 | int compat) |
1094 | { | 1170 | { |
@@ -1148,13 +1224,14 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, | |||
1148 | goto free; | 1224 | goto free; |
1149 | } | 1225 | } |
1150 | 1226 | ||
1151 | write_lock_bh(&t->lock); | 1227 | mutex_lock(&t->lock); |
1152 | private = t->private; | 1228 | private = t->private; |
1153 | if (private->number != num_counters) { | 1229 | if (private->number != num_counters) { |
1154 | ret = -EINVAL; | 1230 | ret = -EINVAL; |
1155 | goto unlock_up_free; | 1231 | goto unlock_up_free; |
1156 | } | 1232 | } |
1157 | 1233 | ||
1234 | preempt_disable(); | ||
1158 | i = 0; | 1235 | i = 0; |
1159 | /* Choose the copy that is on our node */ | 1236 | /* Choose the copy that is on our node */ |
1160 | loc_cpu_entry = private->entries[smp_processor_id()]; | 1237 | loc_cpu_entry = private->entries[smp_processor_id()]; |
@@ -1163,8 +1240,10 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, | |||
1163 | add_counter_to_entry, | 1240 | add_counter_to_entry, |
1164 | paddc, | 1241 | paddc, |
1165 | &i); | 1242 | &i); |
1243 | preempt_enable(); | ||
1166 | unlock_up_free: | 1244 | unlock_up_free: |
1167 | write_unlock_bh(&t->lock); | 1245 | mutex_unlock(&t->lock); |
1246 | |||
1168 | xt_table_unlock(t); | 1247 | xt_table_unlock(t); |
1169 | module_put(t->me); | 1248 | module_put(t->me); |
1170 | free: | 1249 | free: |
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index e091187e864f..6ecfdae7c589 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c | |||
@@ -48,8 +48,6 @@ static struct | |||
48 | static struct xt_table packet_filter = { | 48 | static struct xt_table packet_filter = { |
49 | .name = "filter", | 49 | .name = "filter", |
50 | .valid_hooks = FILTER_VALID_HOOKS, | 50 | .valid_hooks = FILTER_VALID_HOOKS, |
51 | .lock = __RW_LOCK_UNLOCKED(packet_filter.lock), | ||
52 | .private = NULL, | ||
53 | .me = THIS_MODULE, | 51 | .me = THIS_MODULE, |
54 | .af = NFPROTO_ARP, | 52 | .af = NFPROTO_ARP, |
55 | }; | 53 | }; |
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 432ce9d1c11c..5f22c91c6e15 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/proc_fs.h> | 24 | #include <linux/proc_fs.h> |
25 | #include <linux/seq_file.h> | 25 | #include <linux/seq_file.h> |
26 | #include <linux/security.h> | 26 | #include <linux/security.h> |
27 | #include <linux/net.h> | ||
27 | #include <linux/mutex.h> | 28 | #include <linux/mutex.h> |
28 | #include <net/net_namespace.h> | 29 | #include <net/net_namespace.h> |
29 | #include <net/sock.h> | 30 | #include <net/sock.h> |
@@ -640,6 +641,7 @@ static void __exit ip_queue_fini(void) | |||
640 | MODULE_DESCRIPTION("IPv4 packet queue handler"); | 641 | MODULE_DESCRIPTION("IPv4 packet queue handler"); |
641 | MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); | 642 | MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); |
642 | MODULE_LICENSE("GPL"); | 643 | MODULE_LICENSE("GPL"); |
644 | MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_FIREWALL); | ||
643 | 645 | ||
644 | module_init(ip_queue_init); | 646 | module_init(ip_queue_init); |
645 | module_exit(ip_queue_fini); | 647 | module_exit(ip_queue_fini); |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index ef8b6ca068b2..82ee7c9049ff 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
@@ -83,7 +83,6 @@ ip_packet_match(const struct iphdr *ip, | |||
83 | const struct ipt_ip *ipinfo, | 83 | const struct ipt_ip *ipinfo, |
84 | int isfrag) | 84 | int isfrag) |
85 | { | 85 | { |
86 | size_t i; | ||
87 | unsigned long ret; | 86 | unsigned long ret; |
88 | 87 | ||
89 | #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg))) | 88 | #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg))) |
@@ -103,12 +102,7 @@ ip_packet_match(const struct iphdr *ip, | |||
103 | return false; | 102 | return false; |
104 | } | 103 | } |
105 | 104 | ||
106 | /* Look for ifname matches; this should unroll nicely. */ | 105 | ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask); |
107 | for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { | ||
108 | ret |= (((const unsigned long *)indev)[i] | ||
109 | ^ ((const unsigned long *)ipinfo->iniface)[i]) | ||
110 | & ((const unsigned long *)ipinfo->iniface_mask)[i]; | ||
111 | } | ||
112 | 106 | ||
113 | if (FWINV(ret != 0, IPT_INV_VIA_IN)) { | 107 | if (FWINV(ret != 0, IPT_INV_VIA_IN)) { |
114 | dprintf("VIA in mismatch (%s vs %s).%s\n", | 108 | dprintf("VIA in mismatch (%s vs %s).%s\n", |
@@ -117,11 +111,7 @@ ip_packet_match(const struct iphdr *ip, | |||
117 | return false; | 111 | return false; |
118 | } | 112 | } |
119 | 113 | ||
120 | for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { | 114 | ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask); |
121 | ret |= (((const unsigned long *)outdev)[i] | ||
122 | ^ ((const unsigned long *)ipinfo->outiface)[i]) | ||
123 | & ((const unsigned long *)ipinfo->outiface_mask)[i]; | ||
124 | } | ||
125 | 115 | ||
126 | if (FWINV(ret != 0, IPT_INV_VIA_OUT)) { | 116 | if (FWINV(ret != 0, IPT_INV_VIA_OUT)) { |
127 | dprintf("VIA out mismatch (%s vs %s).%s\n", | 117 | dprintf("VIA out mismatch (%s vs %s).%s\n", |
@@ -347,10 +337,12 @@ ipt_do_table(struct sk_buff *skb, | |||
347 | mtpar.family = tgpar.family = NFPROTO_IPV4; | 337 | mtpar.family = tgpar.family = NFPROTO_IPV4; |
348 | tgpar.hooknum = hook; | 338 | tgpar.hooknum = hook; |
349 | 339 | ||
350 | read_lock_bh(&table->lock); | ||
351 | IP_NF_ASSERT(table->valid_hooks & (1 << hook)); | 340 | IP_NF_ASSERT(table->valid_hooks & (1 << hook)); |
352 | private = table->private; | 341 | |
353 | table_base = (void *)private->entries[smp_processor_id()]; | 342 | rcu_read_lock(); |
343 | private = rcu_dereference(table->private); | ||
344 | table_base = rcu_dereference(private->entries[smp_processor_id()]); | ||
345 | |||
354 | e = get_entry(table_base, private->hook_entry[hook]); | 346 | e = get_entry(table_base, private->hook_entry[hook]); |
355 | 347 | ||
356 | /* For return from builtin chain */ | 348 | /* For return from builtin chain */ |
@@ -445,7 +437,7 @@ ipt_do_table(struct sk_buff *skb, | |||
445 | } | 437 | } |
446 | } while (!hotdrop); | 438 | } while (!hotdrop); |
447 | 439 | ||
448 | read_unlock_bh(&table->lock); | 440 | rcu_read_unlock(); |
449 | 441 | ||
450 | #ifdef DEBUG_ALLOW_ALL | 442 | #ifdef DEBUG_ALLOW_ALL |
451 | return NF_ACCEPT; | 443 | return NF_ACCEPT; |
@@ -496,7 +488,9 @@ mark_source_chains(struct xt_table_info *newinfo, | |||
496 | && unconditional(&e->ip)) || visited) { | 488 | && unconditional(&e->ip)) || visited) { |
497 | unsigned int oldpos, size; | 489 | unsigned int oldpos, size; |
498 | 490 | ||
499 | if (t->verdict < -NF_MAX_VERDICT - 1) { | 491 | if ((strcmp(t->target.u.user.name, |
492 | IPT_STANDARD_TARGET) == 0) && | ||
493 | t->verdict < -NF_MAX_VERDICT - 1) { | ||
500 | duprintf("mark_source_chains: bad " | 494 | duprintf("mark_source_chains: bad " |
501 | "negative verdict (%i)\n", | 495 | "negative verdict (%i)\n", |
502 | t->verdict); | 496 | t->verdict); |
@@ -924,13 +918,68 @@ get_counters(const struct xt_table_info *t, | |||
924 | counters, | 918 | counters, |
925 | &i); | 919 | &i); |
926 | } | 920 | } |
921 | |||
922 | } | ||
923 | |||
924 | /* We're lazy, and add to the first CPU; overflow works its fey magic | ||
925 | * and everything is OK. */ | ||
926 | static int | ||
927 | add_counter_to_entry(struct ipt_entry *e, | ||
928 | const struct xt_counters addme[], | ||
929 | unsigned int *i) | ||
930 | { | ||
931 | ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); | ||
932 | |||
933 | (*i)++; | ||
934 | return 0; | ||
935 | } | ||
936 | |||
937 | /* Take values from counters and add them back onto the current cpu */ | ||
938 | static void put_counters(struct xt_table_info *t, | ||
939 | const struct xt_counters counters[]) | ||
940 | { | ||
941 | unsigned int i, cpu; | ||
942 | |||
943 | local_bh_disable(); | ||
944 | cpu = smp_processor_id(); | ||
945 | i = 0; | ||
946 | IPT_ENTRY_ITERATE(t->entries[cpu], | ||
947 | t->size, | ||
948 | add_counter_to_entry, | ||
949 | counters, | ||
950 | &i); | ||
951 | local_bh_enable(); | ||
952 | } | ||
953 | |||
954 | |||
955 | static inline int | ||
956 | zero_entry_counter(struct ipt_entry *e, void *arg) | ||
957 | { | ||
958 | e->counters.bcnt = 0; | ||
959 | e->counters.pcnt = 0; | ||
960 | return 0; | ||
961 | } | ||
962 | |||
963 | static void | ||
964 | clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) | ||
965 | { | ||
966 | unsigned int cpu; | ||
967 | const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; | ||
968 | |||
969 | memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); | ||
970 | for_each_possible_cpu(cpu) { | ||
971 | memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); | ||
972 | IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, | ||
973 | zero_entry_counter, NULL); | ||
974 | } | ||
927 | } | 975 | } |
928 | 976 | ||
929 | static struct xt_counters * alloc_counters(struct xt_table *table) | 977 | static struct xt_counters * alloc_counters(struct xt_table *table) |
930 | { | 978 | { |
931 | unsigned int countersize; | 979 | unsigned int countersize; |
932 | struct xt_counters *counters; | 980 | struct xt_counters *counters; |
933 | const struct xt_table_info *private = table->private; | 981 | struct xt_table_info *private = table->private; |
982 | struct xt_table_info *info; | ||
934 | 983 | ||
935 | /* We need atomic snapshot of counters: rest doesn't change | 984 | /* We need atomic snapshot of counters: rest doesn't change |
936 | (other than comefrom, which userspace doesn't care | 985 | (other than comefrom, which userspace doesn't care |
@@ -939,14 +988,30 @@ static struct xt_counters * alloc_counters(struct xt_table *table) | |||
939 | counters = vmalloc_node(countersize, numa_node_id()); | 988 | counters = vmalloc_node(countersize, numa_node_id()); |
940 | 989 | ||
941 | if (counters == NULL) | 990 | if (counters == NULL) |
942 | return ERR_PTR(-ENOMEM); | 991 | goto nomem; |
992 | |||
993 | info = xt_alloc_table_info(private->size); | ||
994 | if (!info) | ||
995 | goto free_counters; | ||
996 | |||
997 | clone_counters(info, private); | ||
943 | 998 | ||
944 | /* First, sum counters... */ | 999 | mutex_lock(&table->lock); |
945 | write_lock_bh(&table->lock); | 1000 | xt_table_entry_swap_rcu(private, info); |
946 | get_counters(private, counters); | 1001 | synchronize_net(); /* Wait until smoke has cleared */ |
947 | write_unlock_bh(&table->lock); | 1002 | |
1003 | get_counters(info, counters); | ||
1004 | put_counters(private, counters); | ||
1005 | mutex_unlock(&table->lock); | ||
1006 | |||
1007 | xt_free_table_info(info); | ||
948 | 1008 | ||
949 | return counters; | 1009 | return counters; |
1010 | |||
1011 | free_counters: | ||
1012 | vfree(counters); | ||
1013 | nomem: | ||
1014 | return ERR_PTR(-ENOMEM); | ||
950 | } | 1015 | } |
951 | 1016 | ||
952 | static int | 1017 | static int |
@@ -1312,27 +1377,6 @@ do_replace(struct net *net, void __user *user, unsigned int len) | |||
1312 | return ret; | 1377 | return ret; |
1313 | } | 1378 | } |
1314 | 1379 | ||
1315 | /* We're lazy, and add to the first CPU; overflow works its fey magic | ||
1316 | * and everything is OK. */ | ||
1317 | static int | ||
1318 | add_counter_to_entry(struct ipt_entry *e, | ||
1319 | const struct xt_counters addme[], | ||
1320 | unsigned int *i) | ||
1321 | { | ||
1322 | #if 0 | ||
1323 | duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n", | ||
1324 | *i, | ||
1325 | (long unsigned int)e->counters.pcnt, | ||
1326 | (long unsigned int)e->counters.bcnt, | ||
1327 | (long unsigned int)addme[*i].pcnt, | ||
1328 | (long unsigned int)addme[*i].bcnt); | ||
1329 | #endif | ||
1330 | |||
1331 | ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); | ||
1332 | |||
1333 | (*i)++; | ||
1334 | return 0; | ||
1335 | } | ||
1336 | 1380 | ||
1337 | static int | 1381 | static int |
1338 | do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) | 1382 | do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) |
@@ -1393,13 +1437,14 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat | |||
1393 | goto free; | 1437 | goto free; |
1394 | } | 1438 | } |
1395 | 1439 | ||
1396 | write_lock_bh(&t->lock); | 1440 | mutex_lock(&t->lock); |
1397 | private = t->private; | 1441 | private = t->private; |
1398 | if (private->number != num_counters) { | 1442 | if (private->number != num_counters) { |
1399 | ret = -EINVAL; | 1443 | ret = -EINVAL; |
1400 | goto unlock_up_free; | 1444 | goto unlock_up_free; |
1401 | } | 1445 | } |
1402 | 1446 | ||
1447 | preempt_disable(); | ||
1403 | i = 0; | 1448 | i = 0; |
1404 | /* Choose the copy that is on our node */ | 1449 | /* Choose the copy that is on our node */ |
1405 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; | 1450 | loc_cpu_entry = private->entries[raw_smp_processor_id()]; |
@@ -1408,8 +1453,9 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat | |||
1408 | add_counter_to_entry, | 1453 | add_counter_to_entry, |
1409 | paddc, | 1454 | paddc, |
1410 | &i); | 1455 | &i); |
1456 | preempt_enable(); | ||
1411 | unlock_up_free: | 1457 | unlock_up_free: |
1412 | write_unlock_bh(&t->lock); | 1458 | mutex_unlock(&t->lock); |
1413 | xt_table_unlock(t); | 1459 | xt_table_unlock(t); |
1414 | module_put(t->me); | 1460 | module_put(t->me); |
1415 | free: | 1461 | free: |
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 27a78fbbd92b..acc44c69eb68 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c | |||
@@ -464,7 +464,7 @@ static struct xt_target log_tg_reg __read_mostly = { | |||
464 | .me = THIS_MODULE, | 464 | .me = THIS_MODULE, |
465 | }; | 465 | }; |
466 | 466 | ||
467 | static const struct nf_logger ipt_log_logger ={ | 467 | static struct nf_logger ipt_log_logger __read_mostly = { |
468 | .name = "ipt_LOG", | 468 | .name = "ipt_LOG", |
469 | .logfn = &ipt_log_packet, | 469 | .logfn = &ipt_log_packet, |
470 | .me = THIS_MODULE, | 470 | .me = THIS_MODULE, |
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c deleted file mode 100644 index 6d76aae90cc0..000000000000 --- a/net/ipv4/netfilter/ipt_TTL.c +++ /dev/null | |||
@@ -1,97 +0,0 @@ | |||
1 | /* TTL modification target for IP tables | ||
2 | * (C) 2000,2005 by Harald Welte <laforge@netfilter.org> | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License version 2 as | ||
6 | * published by the Free Software Foundation. | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | #include <linux/module.h> | ||
11 | #include <linux/skbuff.h> | ||
12 | #include <linux/ip.h> | ||
13 | #include <net/checksum.h> | ||
14 | |||
15 | #include <linux/netfilter/x_tables.h> | ||
16 | #include <linux/netfilter_ipv4/ipt_TTL.h> | ||
17 | |||
18 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | ||
19 | MODULE_DESCRIPTION("Xtables: IPv4 TTL field modification target"); | ||
20 | MODULE_LICENSE("GPL"); | ||
21 | |||
22 | static unsigned int | ||
23 | ttl_tg(struct sk_buff *skb, const struct xt_target_param *par) | ||
24 | { | ||
25 | struct iphdr *iph; | ||
26 | const struct ipt_TTL_info *info = par->targinfo; | ||
27 | int new_ttl; | ||
28 | |||
29 | if (!skb_make_writable(skb, skb->len)) | ||
30 | return NF_DROP; | ||
31 | |||
32 | iph = ip_hdr(skb); | ||
33 | |||
34 | switch (info->mode) { | ||
35 | case IPT_TTL_SET: | ||
36 | new_ttl = info->ttl; | ||
37 | break; | ||
38 | case IPT_TTL_INC: | ||
39 | new_ttl = iph->ttl + info->ttl; | ||
40 | if (new_ttl > 255) | ||
41 | new_ttl = 255; | ||
42 | break; | ||
43 | case IPT_TTL_DEC: | ||
44 | new_ttl = iph->ttl - info->ttl; | ||
45 | if (new_ttl < 0) | ||
46 | new_ttl = 0; | ||
47 | break; | ||
48 | default: | ||
49 | new_ttl = iph->ttl; | ||
50 | break; | ||
51 | } | ||
52 | |||
53 | if (new_ttl != iph->ttl) { | ||
54 | csum_replace2(&iph->check, htons(iph->ttl << 8), | ||
55 | htons(new_ttl << 8)); | ||
56 | iph->ttl = new_ttl; | ||
57 | } | ||
58 | |||
59 | return XT_CONTINUE; | ||
60 | } | ||
61 | |||
62 | static bool ttl_tg_check(const struct xt_tgchk_param *par) | ||
63 | { | ||
64 | const struct ipt_TTL_info *info = par->targinfo; | ||
65 | |||
66 | if (info->mode > IPT_TTL_MAXMODE) { | ||
67 | printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n", | ||
68 | info->mode); | ||
69 | return false; | ||
70 | } | ||
71 | if (info->mode != IPT_TTL_SET && info->ttl == 0) | ||
72 | return false; | ||
73 | return true; | ||
74 | } | ||
75 | |||
76 | static struct xt_target ttl_tg_reg __read_mostly = { | ||
77 | .name = "TTL", | ||
78 | .family = NFPROTO_IPV4, | ||
79 | .target = ttl_tg, | ||
80 | .targetsize = sizeof(struct ipt_TTL_info), | ||
81 | .table = "mangle", | ||
82 | .checkentry = ttl_tg_check, | ||
83 | .me = THIS_MODULE, | ||
84 | }; | ||
85 | |||
86 | static int __init ttl_tg_init(void) | ||
87 | { | ||
88 | return xt_register_target(&ttl_tg_reg); | ||
89 | } | ||
90 | |||
91 | static void __exit ttl_tg_exit(void) | ||
92 | { | ||
93 | xt_unregister_target(&ttl_tg_reg); | ||
94 | } | ||
95 | |||
96 | module_init(ttl_tg_init); | ||
97 | module_exit(ttl_tg_exit); | ||
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 18a2826b57c6..d32cc4bb328a 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c | |||
@@ -379,7 +379,7 @@ static struct xt_target ulog_tg_reg __read_mostly = { | |||
379 | .me = THIS_MODULE, | 379 | .me = THIS_MODULE, |
380 | }; | 380 | }; |
381 | 381 | ||
382 | static struct nf_logger ipt_ulog_logger = { | 382 | static struct nf_logger ipt_ulog_logger __read_mostly = { |
383 | .name = "ipt_ULOG", | 383 | .name = "ipt_ULOG", |
384 | .logfn = ipt_logfn, | 384 | .logfn = ipt_logfn, |
385 | .me = THIS_MODULE, | 385 | .me = THIS_MODULE, |
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c deleted file mode 100644 index 297f1cbf4ff5..000000000000 --- a/net/ipv4/netfilter/ipt_ttl.c +++ /dev/null | |||
@@ -1,63 +0,0 @@ | |||
1 | /* IP tables module for matching the value of the TTL | ||
2 | * | ||
3 | * (C) 2000,2001 by Harald Welte <laforge@netfilter.org> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify | ||
6 | * it under the terms of the GNU General Public License version 2 as | ||
7 | * published by the Free Software Foundation. | ||
8 | */ | ||
9 | |||
10 | #include <linux/ip.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/skbuff.h> | ||
13 | |||
14 | #include <linux/netfilter_ipv4/ipt_ttl.h> | ||
15 | #include <linux/netfilter/x_tables.h> | ||
16 | |||
17 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | ||
18 | MODULE_DESCRIPTION("Xtables: IPv4 TTL field match"); | ||
19 | MODULE_LICENSE("GPL"); | ||
20 | |||
21 | static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par) | ||
22 | { | ||
23 | const struct ipt_ttl_info *info = par->matchinfo; | ||
24 | const u8 ttl = ip_hdr(skb)->ttl; | ||
25 | |||
26 | switch (info->mode) { | ||
27 | case IPT_TTL_EQ: | ||
28 | return ttl == info->ttl; | ||
29 | case IPT_TTL_NE: | ||
30 | return ttl != info->ttl; | ||
31 | case IPT_TTL_LT: | ||
32 | return ttl < info->ttl; | ||
33 | case IPT_TTL_GT: | ||
34 | return ttl > info->ttl; | ||
35 | default: | ||
36 | printk(KERN_WARNING "ipt_ttl: unknown mode %d\n", | ||
37 | info->mode); | ||
38 | return false; | ||
39 | } | ||
40 | |||
41 | return false; | ||
42 | } | ||
43 | |||
44 | static struct xt_match ttl_mt_reg __read_mostly = { | ||
45 | .name = "ttl", | ||
46 | .family = NFPROTO_IPV4, | ||
47 | .match = ttl_mt, | ||
48 | .matchsize = sizeof(struct ipt_ttl_info), | ||
49 | .me = THIS_MODULE, | ||
50 | }; | ||
51 | |||
52 | static int __init ttl_mt_init(void) | ||
53 | { | ||
54 | return xt_register_match(&ttl_mt_reg); | ||
55 | } | ||
56 | |||
57 | static void __exit ttl_mt_exit(void) | ||
58 | { | ||
59 | xt_unregister_match(&ttl_mt_reg); | ||
60 | } | ||
61 | |||
62 | module_init(ttl_mt_init); | ||
63 | module_exit(ttl_mt_exit); | ||
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 52cb6939d093..c30a969724f8 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c | |||
@@ -56,7 +56,6 @@ static struct | |||
56 | static struct xt_table packet_filter = { | 56 | static struct xt_table packet_filter = { |
57 | .name = "filter", | 57 | .name = "filter", |
58 | .valid_hooks = FILTER_VALID_HOOKS, | 58 | .valid_hooks = FILTER_VALID_HOOKS, |
59 | .lock = __RW_LOCK_UNLOCKED(packet_filter.lock), | ||
60 | .me = THIS_MODULE, | 59 | .me = THIS_MODULE, |
61 | .af = AF_INET, | 60 | .af = AF_INET, |
62 | }; | 61 | }; |
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 3929d20b9e45..4087614d9519 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c | |||
@@ -67,7 +67,6 @@ static struct | |||
67 | static struct xt_table packet_mangler = { | 67 | static struct xt_table packet_mangler = { |
68 | .name = "mangle", | 68 | .name = "mangle", |
69 | .valid_hooks = MANGLE_VALID_HOOKS, | 69 | .valid_hooks = MANGLE_VALID_HOOKS, |
70 | .lock = __RW_LOCK_UNLOCKED(packet_mangler.lock), | ||
71 | .me = THIS_MODULE, | 70 | .me = THIS_MODULE, |
72 | .af = AF_INET, | 71 | .af = AF_INET, |
73 | }; | 72 | }; |
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 7f65d18333e3..e5356da1fb54 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c | |||
@@ -39,7 +39,6 @@ static struct | |||
39 | static struct xt_table packet_raw = { | 39 | static struct xt_table packet_raw = { |
40 | .name = "raw", | 40 | .name = "raw", |
41 | .valid_hooks = RAW_VALID_HOOKS, | 41 | .valid_hooks = RAW_VALID_HOOKS, |
42 | .lock = __RW_LOCK_UNLOCKED(packet_raw.lock), | ||
43 | .me = THIS_MODULE, | 42 | .me = THIS_MODULE, |
44 | .af = AF_INET, | 43 | .af = AF_INET, |
45 | }; | 44 | }; |
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index a52a35f4a584..29ab630f240a 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c | |||
@@ -60,7 +60,6 @@ static struct | |||
60 | static struct xt_table security_table = { | 60 | static struct xt_table security_table = { |
61 | .name = "security", | 61 | .name = "security", |
62 | .valid_hooks = SECURITY_VALID_HOOKS, | 62 | .valid_hooks = SECURITY_VALID_HOOKS, |
63 | .lock = __RW_LOCK_UNLOCKED(security_table.lock), | ||
64 | .me = THIS_MODULE, | 63 | .me = THIS_MODULE, |
65 | .af = AF_INET, | 64 | .af = AF_INET, |
66 | }; | 65 | }; |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 4beb04fac588..7d2ead7228ac 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | |||
@@ -120,8 +120,10 @@ static unsigned int ipv4_confirm(unsigned int hooknum, | |||
120 | typeof(nf_nat_seq_adjust_hook) seq_adjust; | 120 | typeof(nf_nat_seq_adjust_hook) seq_adjust; |
121 | 121 | ||
122 | seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); | 122 | seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); |
123 | if (!seq_adjust || !seq_adjust(skb, ct, ctinfo)) | 123 | if (!seq_adjust || !seq_adjust(skb, ct, ctinfo)) { |
124 | NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); | ||
124 | return NF_DROP; | 125 | return NF_DROP; |
126 | } | ||
125 | } | 127 | } |
126 | out: | 128 | out: |
127 | /* We've seen it coming out the other side: confirm it */ | 129 | /* We've seen it coming out the other side: confirm it */ |
@@ -326,6 +328,11 @@ static int ipv4_nlattr_to_tuple(struct nlattr *tb[], | |||
326 | 328 | ||
327 | return 0; | 329 | return 0; |
328 | } | 330 | } |
331 | |||
332 | static int ipv4_nlattr_tuple_size(void) | ||
333 | { | ||
334 | return nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1); | ||
335 | } | ||
329 | #endif | 336 | #endif |
330 | 337 | ||
331 | static struct nf_sockopt_ops so_getorigdst = { | 338 | static struct nf_sockopt_ops so_getorigdst = { |
@@ -345,6 +352,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { | |||
345 | .get_l4proto = ipv4_get_l4proto, | 352 | .get_l4proto = ipv4_get_l4proto, |
346 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 353 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) |
347 | .tuple_to_nlattr = ipv4_tuple_to_nlattr, | 354 | .tuple_to_nlattr = ipv4_tuple_to_nlattr, |
355 | .nlattr_tuple_size = ipv4_nlattr_tuple_size, | ||
348 | .nlattr_to_tuple = ipv4_nlattr_to_tuple, | 356 | .nlattr_to_tuple = ipv4_nlattr_to_tuple, |
349 | .nla_policy = ipv4_nla_policy, | 357 | .nla_policy = ipv4_nla_policy, |
350 | #endif | 358 | #endif |
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 6ba5c557690c..8668a3defda6 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | |||
@@ -25,40 +25,42 @@ struct ct_iter_state { | |||
25 | unsigned int bucket; | 25 | unsigned int bucket; |
26 | }; | 26 | }; |
27 | 27 | ||
28 | static struct hlist_node *ct_get_first(struct seq_file *seq) | 28 | static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) |
29 | { | 29 | { |
30 | struct net *net = seq_file_net(seq); | 30 | struct net *net = seq_file_net(seq); |
31 | struct ct_iter_state *st = seq->private; | 31 | struct ct_iter_state *st = seq->private; |
32 | struct hlist_node *n; | 32 | struct hlist_nulls_node *n; |
33 | 33 | ||
34 | for (st->bucket = 0; | 34 | for (st->bucket = 0; |
35 | st->bucket < nf_conntrack_htable_size; | 35 | st->bucket < nf_conntrack_htable_size; |
36 | st->bucket++) { | 36 | st->bucket++) { |
37 | n = rcu_dereference(net->ct.hash[st->bucket].first); | 37 | n = rcu_dereference(net->ct.hash[st->bucket].first); |
38 | if (n) | 38 | if (!is_a_nulls(n)) |
39 | return n; | 39 | return n; |
40 | } | 40 | } |
41 | return NULL; | 41 | return NULL; |
42 | } | 42 | } |
43 | 43 | ||
44 | static struct hlist_node *ct_get_next(struct seq_file *seq, | 44 | static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, |
45 | struct hlist_node *head) | 45 | struct hlist_nulls_node *head) |
46 | { | 46 | { |
47 | struct net *net = seq_file_net(seq); | 47 | struct net *net = seq_file_net(seq); |
48 | struct ct_iter_state *st = seq->private; | 48 | struct ct_iter_state *st = seq->private; |
49 | 49 | ||
50 | head = rcu_dereference(head->next); | 50 | head = rcu_dereference(head->next); |
51 | while (head == NULL) { | 51 | while (is_a_nulls(head)) { |
52 | if (++st->bucket >= nf_conntrack_htable_size) | 52 | if (likely(get_nulls_value(head) == st->bucket)) { |
53 | return NULL; | 53 | if (++st->bucket >= nf_conntrack_htable_size) |
54 | return NULL; | ||
55 | } | ||
54 | head = rcu_dereference(net->ct.hash[st->bucket].first); | 56 | head = rcu_dereference(net->ct.hash[st->bucket].first); |
55 | } | 57 | } |
56 | return head; | 58 | return head; |
57 | } | 59 | } |
58 | 60 | ||
59 | static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos) | 61 | static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos) |
60 | { | 62 | { |
61 | struct hlist_node *head = ct_get_first(seq); | 63 | struct hlist_nulls_node *head = ct_get_first(seq); |
62 | 64 | ||
63 | if (head) | 65 | if (head) |
64 | while (pos && (head = ct_get_next(seq, head))) | 66 | while (pos && (head = ct_get_next(seq, head))) |
@@ -87,69 +89,76 @@ static void ct_seq_stop(struct seq_file *s, void *v) | |||
87 | 89 | ||
88 | static int ct_seq_show(struct seq_file *s, void *v) | 90 | static int ct_seq_show(struct seq_file *s, void *v) |
89 | { | 91 | { |
90 | const struct nf_conntrack_tuple_hash *hash = v; | 92 | struct nf_conntrack_tuple_hash *hash = v; |
91 | const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); | 93 | struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); |
92 | const struct nf_conntrack_l3proto *l3proto; | 94 | const struct nf_conntrack_l3proto *l3proto; |
93 | const struct nf_conntrack_l4proto *l4proto; | 95 | const struct nf_conntrack_l4proto *l4proto; |
96 | int ret = 0; | ||
94 | 97 | ||
95 | NF_CT_ASSERT(ct); | 98 | NF_CT_ASSERT(ct); |
99 | if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) | ||
100 | return 0; | ||
101 | |||
96 | 102 | ||
97 | /* we only want to print DIR_ORIGINAL */ | 103 | /* we only want to print DIR_ORIGINAL */ |
98 | if (NF_CT_DIRECTION(hash)) | 104 | if (NF_CT_DIRECTION(hash)) |
99 | return 0; | 105 | goto release; |
100 | if (nf_ct_l3num(ct) != AF_INET) | 106 | if (nf_ct_l3num(ct) != AF_INET) |
101 | return 0; | 107 | goto release; |
102 | 108 | ||
103 | l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); | 109 | l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); |
104 | NF_CT_ASSERT(l3proto); | 110 | NF_CT_ASSERT(l3proto); |
105 | l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); | 111 | l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); |
106 | NF_CT_ASSERT(l4proto); | 112 | NF_CT_ASSERT(l4proto); |
107 | 113 | ||
114 | ret = -ENOSPC; | ||
108 | if (seq_printf(s, "%-8s %u %ld ", | 115 | if (seq_printf(s, "%-8s %u %ld ", |
109 | l4proto->name, nf_ct_protonum(ct), | 116 | l4proto->name, nf_ct_protonum(ct), |
110 | timer_pending(&ct->timeout) | 117 | timer_pending(&ct->timeout) |
111 | ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0) | 118 | ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0) |
112 | return -ENOSPC; | 119 | goto release; |
113 | 120 | ||
114 | if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct)) | 121 | if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct)) |
115 | return -ENOSPC; | 122 | goto release; |
116 | 123 | ||
117 | if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, | 124 | if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, |
118 | l3proto, l4proto)) | 125 | l3proto, l4proto)) |
119 | return -ENOSPC; | 126 | goto release; |
120 | 127 | ||
121 | if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) | 128 | if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) |
122 | return -ENOSPC; | 129 | goto release; |
123 | 130 | ||
124 | if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) | 131 | if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) |
125 | if (seq_printf(s, "[UNREPLIED] ")) | 132 | if (seq_printf(s, "[UNREPLIED] ")) |
126 | return -ENOSPC; | 133 | goto release; |
127 | 134 | ||
128 | if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, | 135 | if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, |
129 | l3proto, l4proto)) | 136 | l3proto, l4proto)) |
130 | return -ENOSPC; | 137 | goto release; |
131 | 138 | ||
132 | if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) | 139 | if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) |
133 | return -ENOSPC; | 140 | goto release; |
134 | 141 | ||
135 | if (test_bit(IPS_ASSURED_BIT, &ct->status)) | 142 | if (test_bit(IPS_ASSURED_BIT, &ct->status)) |
136 | if (seq_printf(s, "[ASSURED] ")) | 143 | if (seq_printf(s, "[ASSURED] ")) |
137 | return -ENOSPC; | 144 | goto release; |
138 | 145 | ||
139 | #ifdef CONFIG_NF_CONNTRACK_MARK | 146 | #ifdef CONFIG_NF_CONNTRACK_MARK |
140 | if (seq_printf(s, "mark=%u ", ct->mark)) | 147 | if (seq_printf(s, "mark=%u ", ct->mark)) |
141 | return -ENOSPC; | 148 | goto release; |
142 | #endif | 149 | #endif |
143 | 150 | ||
144 | #ifdef CONFIG_NF_CONNTRACK_SECMARK | 151 | #ifdef CONFIG_NF_CONNTRACK_SECMARK |
145 | if (seq_printf(s, "secmark=%u ", ct->secmark)) | 152 | if (seq_printf(s, "secmark=%u ", ct->secmark)) |
146 | return -ENOSPC; | 153 | goto release; |
147 | #endif | 154 | #endif |
148 | 155 | ||
149 | if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) | 156 | if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) |
150 | return -ENOSPC; | 157 | goto release; |
151 | 158 | ret = 0; | |
152 | return 0; | 159 | release: |
160 | nf_ct_put(ct); | ||
161 | return ret; | ||
153 | } | 162 | } |
154 | 163 | ||
155 | static const struct seq_operations ct_seq_ops = { | 164 | static const struct seq_operations ct_seq_ops = { |
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 2a8bee26f43d..23b2c2ee869a 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c | |||
@@ -262,6 +262,11 @@ static int icmp_nlattr_to_tuple(struct nlattr *tb[], | |||
262 | 262 | ||
263 | return 0; | 263 | return 0; |
264 | } | 264 | } |
265 | |||
266 | static int icmp_nlattr_tuple_size(void) | ||
267 | { | ||
268 | return nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1); | ||
269 | } | ||
265 | #endif | 270 | #endif |
266 | 271 | ||
267 | #ifdef CONFIG_SYSCTL | 272 | #ifdef CONFIG_SYSCTL |
@@ -309,6 +314,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = | |||
309 | .me = NULL, | 314 | .me = NULL, |
310 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) | 315 | #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) |
311 | .tuple_to_nlattr = icmp_tuple_to_nlattr, | 316 | .tuple_to_nlattr = icmp_tuple_to_nlattr, |
317 | .nlattr_tuple_size = icmp_nlattr_tuple_size, | ||
312 | .nlattr_to_tuple = icmp_nlattr_to_tuple, | 318 | .nlattr_to_tuple = icmp_nlattr_to_tuple, |
313 | .nla_policy = icmp_nla_policy, | 319 | .nla_policy = icmp_nla_policy, |
314 | #endif | 320 | #endif |
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index a65cf692359f..fe65187810f0 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c | |||
@@ -679,7 +679,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, | |||
679 | static int __net_init nf_nat_net_init(struct net *net) | 679 | static int __net_init nf_nat_net_init(struct net *net) |
680 | { | 680 | { |
681 | net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, | 681 | net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, |
682 | &net->ipv4.nat_vmalloced); | 682 | &net->ipv4.nat_vmalloced, 0); |
683 | if (!net->ipv4.nat_bysource) | 683 | if (!net->ipv4.nat_bysource) |
684 | return -ENOMEM; | 684 | return -ENOMEM; |
685 | return 0; | 685 | return 0; |
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index a7eb04719044..6348a793936e 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c | |||
@@ -61,7 +61,6 @@ static struct | |||
61 | static struct xt_table nat_table = { | 61 | static struct xt_table nat_table = { |
62 | .name = "nat", | 62 | .name = "nat", |
63 | .valid_hooks = NAT_VALID_HOOKS, | 63 | .valid_hooks = NAT_VALID_HOOKS, |
64 | .lock = __RW_LOCK_UNLOCKED(nat_table.lock), | ||
65 | .me = THIS_MODULE, | 64 | .me = THIS_MODULE, |
66 | .af = AF_INET, | 65 | .af = AF_INET, |
67 | }; | 66 | }; |
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 182f845de92f..d9521f6f9ed0 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c | |||
@@ -1292,7 +1292,7 @@ static struct nf_conntrack_helper snmp_helper __read_mostly = { | |||
1292 | .expect_policy = &snmp_exp_policy, | 1292 | .expect_policy = &snmp_exp_policy, |
1293 | .name = "snmp", | 1293 | .name = "snmp", |
1294 | .tuple.src.l3num = AF_INET, | 1294 | .tuple.src.l3num = AF_INET, |
1295 | .tuple.src.u.udp.port = __constant_htons(SNMP_PORT), | 1295 | .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT), |
1296 | .tuple.dst.protonum = IPPROTO_UDP, | 1296 | .tuple.dst.protonum = IPPROTO_UDP, |
1297 | }; | 1297 | }; |
1298 | 1298 | ||
@@ -1302,7 +1302,7 @@ static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { | |||
1302 | .expect_policy = &snmp_exp_policy, | 1302 | .expect_policy = &snmp_exp_policy, |
1303 | .name = "snmp_trap", | 1303 | .name = "snmp_trap", |
1304 | .tuple.src.l3num = AF_INET, | 1304 | .tuple.src.l3num = AF_INET, |
1305 | .tuple.src.u.udp.port = __constant_htons(SNMP_TRAP_PORT), | 1305 | .tuple.src.u.udp.port = cpu_to_be16(SNMP_TRAP_PORT), |
1306 | .tuple.dst.protonum = IPPROTO_UDP, | 1306 | .tuple.dst.protonum = IPPROTO_UDP, |
1307 | }; | 1307 | }; |
1308 | 1308 | ||
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index eb62e58bff79..cf0cdeeb1db0 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -54,8 +54,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) | |||
54 | int orphans, sockets; | 54 | int orphans, sockets; |
55 | 55 | ||
56 | local_bh_disable(); | 56 | local_bh_disable(); |
57 | orphans = percpu_counter_sum_positive(&tcp_orphan_count), | 57 | orphans = percpu_counter_sum_positive(&tcp_orphan_count); |
58 | sockets = percpu_counter_sum_positive(&tcp_sockets_allocated), | 58 | sockets = percpu_counter_sum_positive(&tcp_sockets_allocated); |
59 | local_bh_enable(); | 59 | local_bh_enable(); |
60 | 60 | ||
61 | socket_seq_show(seq); | 61 | socket_seq_show(seq); |
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index dff8bc4e0fac..f774651f0a47 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -493,6 +493,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
493 | 493 | ||
494 | ipc.addr = inet->saddr; | 494 | ipc.addr = inet->saddr; |
495 | ipc.opt = NULL; | 495 | ipc.opt = NULL; |
496 | ipc.shtx.flags = 0; | ||
496 | ipc.oif = sk->sk_bound_dev_if; | 497 | ipc.oif = sk->sk_bound_dev_if; |
497 | 498 | ||
498 | if (msg->msg_controllen) { | 499 | if (msg->msg_controllen) { |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 97f71153584f..5caee609be06 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -151,7 +151,7 @@ static void rt_emergency_hash_rebuild(struct net *net); | |||
151 | 151 | ||
152 | static struct dst_ops ipv4_dst_ops = { | 152 | static struct dst_ops ipv4_dst_ops = { |
153 | .family = AF_INET, | 153 | .family = AF_INET, |
154 | .protocol = __constant_htons(ETH_P_IP), | 154 | .protocol = cpu_to_be16(ETH_P_IP), |
155 | .gc = rt_garbage_collect, | 155 | .gc = rt_garbage_collect, |
156 | .check = ipv4_dst_check, | 156 | .check = ipv4_dst_check, |
157 | .destroy = ipv4_dst_destroy, | 157 | .destroy = ipv4_dst_destroy, |
@@ -2696,7 +2696,7 @@ static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) | |||
2696 | 2696 | ||
2697 | static struct dst_ops ipv4_dst_blackhole_ops = { | 2697 | static struct dst_ops ipv4_dst_blackhole_ops = { |
2698 | .family = AF_INET, | 2698 | .family = AF_INET, |
2699 | .protocol = __constant_htons(ETH_P_IP), | 2699 | .protocol = cpu_to_be16(ETH_P_IP), |
2700 | .destroy = ipv4_dst_destroy, | 2700 | .destroy = ipv4_dst_destroy, |
2701 | .check = ipv4_dst_check, | 2701 | .check = ipv4_dst_check, |
2702 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, | 2702 | .update_pmtu = ipv4_rt_blackhole_update_pmtu, |
@@ -2779,7 +2779,8 @@ int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) | |||
2779 | return ip_route_output_flow(net, rp, flp, NULL, 0); | 2779 | return ip_route_output_flow(net, rp, flp, NULL, 0); |
2780 | } | 2780 | } |
2781 | 2781 | ||
2782 | static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | 2782 | static int rt_fill_info(struct net *net, |
2783 | struct sk_buff *skb, u32 pid, u32 seq, int event, | ||
2783 | int nowait, unsigned int flags) | 2784 | int nowait, unsigned int flags) |
2784 | { | 2785 | { |
2785 | struct rtable *rt = skb->rtable; | 2786 | struct rtable *rt = skb->rtable; |
@@ -2844,8 +2845,8 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, | |||
2844 | __be32 dst = rt->rt_dst; | 2845 | __be32 dst = rt->rt_dst; |
2845 | 2846 | ||
2846 | if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && | 2847 | if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && |
2847 | IPV4_DEVCONF_ALL(&init_net, MC_FORWARDING)) { | 2848 | IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { |
2848 | int err = ipmr_get_route(skb, r, nowait); | 2849 | int err = ipmr_get_route(net, skb, r, nowait); |
2849 | if (err <= 0) { | 2850 | if (err <= 0) { |
2850 | if (!nowait) { | 2851 | if (!nowait) { |
2851 | if (err == 0) | 2852 | if (err == 0) |
@@ -2950,7 +2951,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void | |||
2950 | if (rtm->rtm_flags & RTM_F_NOTIFY) | 2951 | if (rtm->rtm_flags & RTM_F_NOTIFY) |
2951 | rt->rt_flags |= RTCF_NOTIFY; | 2952 | rt->rt_flags |= RTCF_NOTIFY; |
2952 | 2953 | ||
2953 | err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, | 2954 | err = rt_fill_info(net, skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, |
2954 | RTM_NEWROUTE, 0, 0); | 2955 | RTM_NEWROUTE, 0, 0); |
2955 | if (err <= 0) | 2956 | if (err <= 0) |
2956 | goto errout_free; | 2957 | goto errout_free; |
@@ -2988,7 +2989,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
2988 | if (rt_is_expired(rt)) | 2989 | if (rt_is_expired(rt)) |
2989 | continue; | 2990 | continue; |
2990 | skb->dst = dst_clone(&rt->u.dst); | 2991 | skb->dst = dst_clone(&rt->u.dst); |
2991 | if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, | 2992 | if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, |
2992 | cb->nlh->nlmsg_seq, RTM_NEWROUTE, | 2993 | cb->nlh->nlmsg_seq, RTM_NEWROUTE, |
2993 | 1, NLM_F_MULTI) <= 0) { | 2994 | 1, NLM_F_MULTI) <= 0) { |
2994 | dst_release(xchg(&skb->dst, NULL)); | 2995 | dst_release(xchg(&skb->dst, NULL)); |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 76b148bcb0dc..2451aeb5ac23 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -661,6 +661,47 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) | |||
661 | return NULL; | 661 | return NULL; |
662 | } | 662 | } |
663 | 663 | ||
664 | static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, | ||
665 | int large_allowed) | ||
666 | { | ||
667 | struct tcp_sock *tp = tcp_sk(sk); | ||
668 | u32 xmit_size_goal, old_size_goal; | ||
669 | |||
670 | xmit_size_goal = mss_now; | ||
671 | |||
672 | if (large_allowed && sk_can_gso(sk)) { | ||
673 | xmit_size_goal = ((sk->sk_gso_max_size - 1) - | ||
674 | inet_csk(sk)->icsk_af_ops->net_header_len - | ||
675 | inet_csk(sk)->icsk_ext_hdr_len - | ||
676 | tp->tcp_header_len); | ||
677 | |||
678 | xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal); | ||
679 | |||
680 | /* We try hard to avoid divides here */ | ||
681 | old_size_goal = tp->xmit_size_goal_segs * mss_now; | ||
682 | |||
683 | if (likely(old_size_goal <= xmit_size_goal && | ||
684 | old_size_goal + mss_now > xmit_size_goal)) { | ||
685 | xmit_size_goal = old_size_goal; | ||
686 | } else { | ||
687 | tp->xmit_size_goal_segs = xmit_size_goal / mss_now; | ||
688 | xmit_size_goal = tp->xmit_size_goal_segs * mss_now; | ||
689 | } | ||
690 | } | ||
691 | |||
692 | return max(xmit_size_goal, mss_now); | ||
693 | } | ||
694 | |||
695 | static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) | ||
696 | { | ||
697 | int mss_now; | ||
698 | |||
699 | mss_now = tcp_current_mss(sk); | ||
700 | *size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB)); | ||
701 | |||
702 | return mss_now; | ||
703 | } | ||
704 | |||
664 | static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, | 705 | static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, |
665 | size_t psize, int flags) | 706 | size_t psize, int flags) |
666 | { | 707 | { |
@@ -677,13 +718,12 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse | |||
677 | 718 | ||
678 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); | 719 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); |
679 | 720 | ||
680 | mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); | 721 | mss_now = tcp_send_mss(sk, &size_goal, flags); |
681 | size_goal = tp->xmit_size_goal; | ||
682 | copied = 0; | 722 | copied = 0; |
683 | 723 | ||
684 | err = -EPIPE; | 724 | err = -EPIPE; |
685 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) | 725 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) |
686 | goto do_error; | 726 | goto out_err; |
687 | 727 | ||
688 | while (psize > 0) { | 728 | while (psize > 0) { |
689 | struct sk_buff *skb = tcp_write_queue_tail(sk); | 729 | struct sk_buff *skb = tcp_write_queue_tail(sk); |
@@ -761,8 +801,7 @@ wait_for_memory: | |||
761 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) | 801 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) |
762 | goto do_error; | 802 | goto do_error; |
763 | 803 | ||
764 | mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); | 804 | mss_now = tcp_send_mss(sk, &size_goal, flags); |
765 | size_goal = tp->xmit_size_goal; | ||
766 | } | 805 | } |
767 | 806 | ||
768 | out: | 807 | out: |
@@ -844,8 +883,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | |||
844 | /* This should be in poll */ | 883 | /* This should be in poll */ |
845 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); | 884 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); |
846 | 885 | ||
847 | mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); | 886 | mss_now = tcp_send_mss(sk, &size_goal, flags); |
848 | size_goal = tp->xmit_size_goal; | ||
849 | 887 | ||
850 | /* Ok commence sending. */ | 888 | /* Ok commence sending. */ |
851 | iovlen = msg->msg_iovlen; | 889 | iovlen = msg->msg_iovlen; |
@@ -854,7 +892,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | |||
854 | 892 | ||
855 | err = -EPIPE; | 893 | err = -EPIPE; |
856 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) | 894 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) |
857 | goto do_error; | 895 | goto out_err; |
858 | 896 | ||
859 | while (--iovlen >= 0) { | 897 | while (--iovlen >= 0) { |
860 | int seglen = iov->iov_len; | 898 | int seglen = iov->iov_len; |
@@ -1007,8 +1045,7 @@ wait_for_memory: | |||
1007 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) | 1045 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) |
1008 | goto do_error; | 1046 | goto do_error; |
1009 | 1047 | ||
1010 | mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); | 1048 | mss_now = tcp_send_mss(sk, &size_goal, flags); |
1011 | size_goal = tp->xmit_size_goal; | ||
1012 | } | 1049 | } |
1013 | } | 1050 | } |
1014 | 1051 | ||
@@ -1045,8 +1082,7 @@ out_err: | |||
1045 | */ | 1082 | */ |
1046 | 1083 | ||
1047 | static int tcp_recv_urg(struct sock *sk, long timeo, | 1084 | static int tcp_recv_urg(struct sock *sk, long timeo, |
1048 | struct msghdr *msg, int len, int flags, | 1085 | struct msghdr *msg, int len, int flags) |
1049 | int *addr_len) | ||
1050 | { | 1086 | { |
1051 | struct tcp_sock *tp = tcp_sk(sk); | 1087 | struct tcp_sock *tp = tcp_sk(sk); |
1052 | 1088 | ||
@@ -1661,7 +1697,7 @@ out: | |||
1661 | return err; | 1697 | return err; |
1662 | 1698 | ||
1663 | recv_urg: | 1699 | recv_urg: |
1664 | err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len); | 1700 | err = tcp_recv_urg(sk, timeo, msg, len, flags); |
1665 | goto out; | 1701 | goto out; |
1666 | } | 1702 | } |
1667 | 1703 | ||
@@ -2478,23 +2514,23 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) | |||
2478 | struct tcphdr *th2; | 2514 | struct tcphdr *th2; |
2479 | unsigned int thlen; | 2515 | unsigned int thlen; |
2480 | unsigned int flags; | 2516 | unsigned int flags; |
2481 | unsigned int total; | ||
2482 | unsigned int mss = 1; | 2517 | unsigned int mss = 1; |
2483 | int flush = 1; | 2518 | int flush = 1; |
2519 | int i; | ||
2484 | 2520 | ||
2485 | if (!pskb_may_pull(skb, sizeof(*th))) | 2521 | th = skb_gro_header(skb, sizeof(*th)); |
2522 | if (unlikely(!th)) | ||
2486 | goto out; | 2523 | goto out; |
2487 | 2524 | ||
2488 | th = tcp_hdr(skb); | ||
2489 | thlen = th->doff * 4; | 2525 | thlen = th->doff * 4; |
2490 | if (thlen < sizeof(*th)) | 2526 | if (thlen < sizeof(*th)) |
2491 | goto out; | 2527 | goto out; |
2492 | 2528 | ||
2493 | if (!pskb_may_pull(skb, thlen)) | 2529 | th = skb_gro_header(skb, thlen); |
2530 | if (unlikely(!th)) | ||
2494 | goto out; | 2531 | goto out; |
2495 | 2532 | ||
2496 | th = tcp_hdr(skb); | 2533 | skb_gro_pull(skb, thlen); |
2497 | __skb_pull(skb, thlen); | ||
2498 | 2534 | ||
2499 | flags = tcp_flag_word(th); | 2535 | flags = tcp_flag_word(th); |
2500 | 2536 | ||
@@ -2504,7 +2540,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) | |||
2504 | 2540 | ||
2505 | th2 = tcp_hdr(p); | 2541 | th2 = tcp_hdr(p); |
2506 | 2542 | ||
2507 | if (th->source != th2->source || th->dest != th2->dest) { | 2543 | if ((th->source ^ th2->source) | (th->dest ^ th2->dest)) { |
2508 | NAPI_GRO_CB(p)->same_flow = 0; | 2544 | NAPI_GRO_CB(p)->same_flow = 0; |
2509 | continue; | 2545 | continue; |
2510 | } | 2546 | } |
@@ -2519,14 +2555,15 @@ found: | |||
2519 | flush |= flags & TCP_FLAG_CWR; | 2555 | flush |= flags & TCP_FLAG_CWR; |
2520 | flush |= (flags ^ tcp_flag_word(th2)) & | 2556 | flush |= (flags ^ tcp_flag_word(th2)) & |
2521 | ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH); | 2557 | ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH); |
2522 | flush |= th->ack_seq != th2->ack_seq || th->window != th2->window; | 2558 | flush |= (th->ack_seq ^ th2->ack_seq) | (th->window ^ th2->window); |
2523 | flush |= memcmp(th + 1, th2 + 1, thlen - sizeof(*th)); | 2559 | for (i = sizeof(*th); !flush && i < thlen; i += 4) |
2560 | flush |= *(u32 *)((u8 *)th + i) ^ | ||
2561 | *(u32 *)((u8 *)th2 + i); | ||
2524 | 2562 | ||
2525 | total = p->len; | ||
2526 | mss = skb_shinfo(p)->gso_size; | 2563 | mss = skb_shinfo(p)->gso_size; |
2527 | 2564 | ||
2528 | flush |= skb->len > mss || skb->len <= 0; | 2565 | flush |= (skb_gro_len(skb) > mss) | !skb_gro_len(skb); |
2529 | flush |= ntohl(th2->seq) + total != ntohl(th->seq); | 2566 | flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); |
2530 | 2567 | ||
2531 | if (flush || skb_gro_receive(head, skb)) { | 2568 | if (flush || skb_gro_receive(head, skb)) { |
2532 | mss = 1; | 2569 | mss = 1; |
@@ -2538,7 +2575,7 @@ found: | |||
2538 | tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); | 2575 | tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); |
2539 | 2576 | ||
2540 | out_check_final: | 2577 | out_check_final: |
2541 | flush = skb->len < mss; | 2578 | flush = skb_gro_len(skb) < mss; |
2542 | flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | | 2579 | flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | |
2543 | TCP_FLAG_SYN | TCP_FLAG_FIN); | 2580 | TCP_FLAG_SYN | TCP_FLAG_FIN); |
2544 | 2581 | ||
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 7eb7636db0d0..3b53fd1af23f 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c | |||
@@ -149,16 +149,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
149 | tcp_slow_start(tp); | 149 | tcp_slow_start(tp); |
150 | else { | 150 | else { |
151 | bictcp_update(ca, tp->snd_cwnd); | 151 | bictcp_update(ca, tp->snd_cwnd); |
152 | 152 | tcp_cong_avoid_ai(tp, ca->cnt); | |
153 | /* In dangerous area, increase slowly. | ||
154 | * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd | ||
155 | */ | ||
156 | if (tp->snd_cwnd_cnt >= ca->cnt) { | ||
157 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
158 | tp->snd_cwnd++; | ||
159 | tp->snd_cwnd_cnt = 0; | ||
160 | } else | ||
161 | tp->snd_cwnd_cnt++; | ||
162 | } | 153 | } |
163 | 154 | ||
164 | } | 155 | } |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 4ec5b4e97c4e..e92beb9e55e0 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
@@ -336,6 +336,19 @@ void tcp_slow_start(struct tcp_sock *tp) | |||
336 | } | 336 | } |
337 | EXPORT_SYMBOL_GPL(tcp_slow_start); | 337 | EXPORT_SYMBOL_GPL(tcp_slow_start); |
338 | 338 | ||
339 | /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w) */ | ||
340 | void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w) | ||
341 | { | ||
342 | if (tp->snd_cwnd_cnt >= w) { | ||
343 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
344 | tp->snd_cwnd++; | ||
345 | tp->snd_cwnd_cnt = 0; | ||
346 | } else { | ||
347 | tp->snd_cwnd_cnt++; | ||
348 | } | ||
349 | } | ||
350 | EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai); | ||
351 | |||
339 | /* | 352 | /* |
340 | * TCP Reno congestion control | 353 | * TCP Reno congestion control |
341 | * This is special case used for fallback as well. | 354 | * This is special case used for fallback as well. |
@@ -365,13 +378,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
365 | tp->snd_cwnd++; | 378 | tp->snd_cwnd++; |
366 | } | 379 | } |
367 | } else { | 380 | } else { |
368 | /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */ | 381 | tcp_cong_avoid_ai(tp, tp->snd_cwnd); |
369 | if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { | ||
370 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
371 | tp->snd_cwnd++; | ||
372 | tp->snd_cwnd_cnt = 0; | ||
373 | } else | ||
374 | tp->snd_cwnd_cnt++; | ||
375 | } | 382 | } |
376 | } | 383 | } |
377 | EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); | 384 | EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); |
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index ee467ec40c4f..71d5f2f29fa6 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c | |||
@@ -294,16 +294,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
294 | tcp_slow_start(tp); | 294 | tcp_slow_start(tp); |
295 | } else { | 295 | } else { |
296 | bictcp_update(ca, tp->snd_cwnd); | 296 | bictcp_update(ca, tp->snd_cwnd); |
297 | 297 | tcp_cong_avoid_ai(tp, ca->cnt); | |
298 | /* In dangerous area, increase slowly. | ||
299 | * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd | ||
300 | */ | ||
301 | if (tp->snd_cwnd_cnt >= ca->cnt) { | ||
302 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
303 | tp->snd_cwnd++; | ||
304 | tp->snd_cwnd_cnt = 0; | ||
305 | } else | ||
306 | tp->snd_cwnd_cnt++; | ||
307 | } | 298 | } |
308 | 299 | ||
309 | } | 300 | } |
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 937549b8a921..26d5c7fc7de5 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c | |||
@@ -115,8 +115,7 @@ static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, s32 rtt | |||
115 | return; | 115 | return; |
116 | 116 | ||
117 | /* achieved throughput calculations */ | 117 | /* achieved throughput calculations */ |
118 | if (icsk->icsk_ca_state != TCP_CA_Open && | 118 | if (!((1 << icsk->icsk_ca_state) & (TCPF_CA_Open | TCPF_CA_Disorder))) { |
119 | icsk->icsk_ca_state != TCP_CA_Disorder) { | ||
120 | ca->packetcount = 0; | 119 | ca->packetcount = 0; |
121 | ca->lasttime = now; | 120 | ca->lasttime = now; |
122 | return; | 121 | return; |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c28976a7e596..2bc8e27a163d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -64,6 +64,7 @@ | |||
64 | #include <linux/mm.h> | 64 | #include <linux/mm.h> |
65 | #include <linux/module.h> | 65 | #include <linux/module.h> |
66 | #include <linux/sysctl.h> | 66 | #include <linux/sysctl.h> |
67 | #include <linux/kernel.h> | ||
67 | #include <net/dst.h> | 68 | #include <net/dst.h> |
68 | #include <net/tcp.h> | 69 | #include <net/tcp.h> |
69 | #include <net/inet_common.h> | 70 | #include <net/inet_common.h> |
@@ -1178,10 +1179,18 @@ static void tcp_mark_lost_retrans(struct sock *sk) | |||
1178 | if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) | 1179 | if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) |
1179 | continue; | 1180 | continue; |
1180 | 1181 | ||
1181 | if (after(received_upto, ack_seq) && | 1182 | /* TODO: We would like to get rid of tcp_is_fack(tp) only |
1182 | (tcp_is_fack(tp) || | 1183 | * constraint here (see above) but figuring out that at |
1183 | !before(received_upto, | 1184 | * least tp->reordering SACK blocks reside between ack_seq |
1184 | ack_seq + tp->reordering * tp->mss_cache))) { | 1185 | * and received_upto is not easy task to do cheaply with |
1186 | * the available datastructures. | ||
1187 | * | ||
1188 | * Whether FACK should check here for tp->reordering segs | ||
1189 | * in-between one could argue for either way (it would be | ||
1190 | * rather simple to implement as we could count fack_count | ||
1191 | * during the walk and do tp->fackets_out - fack_count). | ||
1192 | */ | ||
1193 | if (after(received_upto, ack_seq)) { | ||
1185 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | 1194 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; |
1186 | tp->retrans_out -= tcp_skb_pcount(skb); | 1195 | tp->retrans_out -= tcp_skb_pcount(skb); |
1187 | 1196 | ||
@@ -1794,11 +1803,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, | |||
1794 | for (i = used_sacks - 1; i > 0; i--) { | 1803 | for (i = used_sacks - 1; i > 0; i--) { |
1795 | for (j = 0; j < i; j++) { | 1804 | for (j = 0; j < i; j++) { |
1796 | if (after(sp[j].start_seq, sp[j + 1].start_seq)) { | 1805 | if (after(sp[j].start_seq, sp[j + 1].start_seq)) { |
1797 | struct tcp_sack_block tmp; | 1806 | swap(sp[j], sp[j + 1]); |
1798 | |||
1799 | tmp = sp[j]; | ||
1800 | sp[j] = sp[j + 1]; | ||
1801 | sp[j + 1] = tmp; | ||
1802 | 1807 | ||
1803 | /* Track where the first SACK block goes to */ | 1808 | /* Track where the first SACK block goes to */ |
1804 | if (j == first_sack_index) | 1809 | if (j == first_sack_index) |
@@ -2453,6 +2458,44 @@ static int tcp_time_to_recover(struct sock *sk) | |||
2453 | return 0; | 2458 | return 0; |
2454 | } | 2459 | } |
2455 | 2460 | ||
2461 | /* New heuristics: it is possible only after we switched to restart timer | ||
2462 | * each time when something is ACKed. Hence, we can detect timed out packets | ||
2463 | * during fast retransmit without falling to slow start. | ||
2464 | * | ||
2465 | * Usefulness of this as is very questionable, since we should know which of | ||
2466 | * the segments is the next to timeout which is relatively expensive to find | ||
2467 | * in general case unless we add some data structure just for that. The | ||
2468 | * current approach certainly won't find the right one too often and when it | ||
2469 | * finally does find _something_ it usually marks large part of the window | ||
2470 | * right away (because a retransmission with a larger timestamp blocks the | ||
2471 | * loop from advancing). -ij | ||
2472 | */ | ||
2473 | static void tcp_timeout_skbs(struct sock *sk) | ||
2474 | { | ||
2475 | struct tcp_sock *tp = tcp_sk(sk); | ||
2476 | struct sk_buff *skb; | ||
2477 | |||
2478 | if (!tcp_is_fack(tp) || !tcp_head_timedout(sk)) | ||
2479 | return; | ||
2480 | |||
2481 | skb = tp->scoreboard_skb_hint; | ||
2482 | if (tp->scoreboard_skb_hint == NULL) | ||
2483 | skb = tcp_write_queue_head(sk); | ||
2484 | |||
2485 | tcp_for_write_queue_from(skb, sk) { | ||
2486 | if (skb == tcp_send_head(sk)) | ||
2487 | break; | ||
2488 | if (!tcp_skb_timedout(sk, skb)) | ||
2489 | break; | ||
2490 | |||
2491 | tcp_skb_mark_lost(tp, skb); | ||
2492 | } | ||
2493 | |||
2494 | tp->scoreboard_skb_hint = skb; | ||
2495 | |||
2496 | tcp_verify_left_out(tp); | ||
2497 | } | ||
2498 | |||
2456 | /* Mark head of queue up as lost. With RFC3517 SACK, the packets is | 2499 | /* Mark head of queue up as lost. With RFC3517 SACK, the packets is |
2457 | * is against sacked "cnt", otherwise it's against facked "cnt" | 2500 | * is against sacked "cnt", otherwise it's against facked "cnt" |
2458 | */ | 2501 | */ |
@@ -2525,30 +2568,7 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) | |||
2525 | tcp_mark_head_lost(sk, sacked_upto); | 2568 | tcp_mark_head_lost(sk, sacked_upto); |
2526 | } | 2569 | } |
2527 | 2570 | ||
2528 | /* New heuristics: it is possible only after we switched | 2571 | tcp_timeout_skbs(sk); |
2529 | * to restart timer each time when something is ACKed. | ||
2530 | * Hence, we can detect timed out packets during fast | ||
2531 | * retransmit without falling to slow start. | ||
2532 | */ | ||
2533 | if (tcp_is_fack(tp) && tcp_head_timedout(sk)) { | ||
2534 | struct sk_buff *skb; | ||
2535 | |||
2536 | skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint | ||
2537 | : tcp_write_queue_head(sk); | ||
2538 | |||
2539 | tcp_for_write_queue_from(skb, sk) { | ||
2540 | if (skb == tcp_send_head(sk)) | ||
2541 | break; | ||
2542 | if (!tcp_skb_timedout(sk, skb)) | ||
2543 | break; | ||
2544 | |||
2545 | tcp_skb_mark_lost(tp, skb); | ||
2546 | } | ||
2547 | |||
2548 | tp->scoreboard_skb_hint = skb; | ||
2549 | |||
2550 | tcp_verify_left_out(tp); | ||
2551 | } | ||
2552 | } | 2572 | } |
2553 | 2573 | ||
2554 | /* CWND moderation, preventing bursts due to too big ACKs | 2574 | /* CWND moderation, preventing bursts due to too big ACKs |
@@ -2813,7 +2833,7 @@ static void tcp_mtup_probe_failed(struct sock *sk) | |||
2813 | icsk->icsk_mtup.probe_size = 0; | 2833 | icsk->icsk_mtup.probe_size = 0; |
2814 | } | 2834 | } |
2815 | 2835 | ||
2816 | static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb) | 2836 | static void tcp_mtup_probe_success(struct sock *sk) |
2817 | { | 2837 | { |
2818 | struct tcp_sock *tp = tcp_sk(sk); | 2838 | struct tcp_sock *tp = tcp_sk(sk); |
2819 | struct inet_connection_sock *icsk = inet_csk(sk); | 2839 | struct inet_connection_sock *icsk = inet_csk(sk); |
@@ -2841,7 +2861,7 @@ void tcp_simple_retransmit(struct sock *sk) | |||
2841 | const struct inet_connection_sock *icsk = inet_csk(sk); | 2861 | const struct inet_connection_sock *icsk = inet_csk(sk); |
2842 | struct tcp_sock *tp = tcp_sk(sk); | 2862 | struct tcp_sock *tp = tcp_sk(sk); |
2843 | struct sk_buff *skb; | 2863 | struct sk_buff *skb; |
2844 | unsigned int mss = tcp_current_mss(sk, 0); | 2864 | unsigned int mss = tcp_current_mss(sk); |
2845 | u32 prior_lost = tp->lost_out; | 2865 | u32 prior_lost = tp->lost_out; |
2846 | 2866 | ||
2847 | tcp_for_write_queue(skb, sk) { | 2867 | tcp_for_write_queue(skb, sk) { |
@@ -3178,7 +3198,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3178 | 3198 | ||
3179 | while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { | 3199 | while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { |
3180 | struct tcp_skb_cb *scb = TCP_SKB_CB(skb); | 3200 | struct tcp_skb_cb *scb = TCP_SKB_CB(skb); |
3181 | u32 end_seq; | ||
3182 | u32 acked_pcount; | 3201 | u32 acked_pcount; |
3183 | u8 sacked = scb->sacked; | 3202 | u8 sacked = scb->sacked; |
3184 | 3203 | ||
@@ -3193,16 +3212,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3193 | break; | 3212 | break; |
3194 | 3213 | ||
3195 | fully_acked = 0; | 3214 | fully_acked = 0; |
3196 | end_seq = tp->snd_una; | ||
3197 | } else { | 3215 | } else { |
3198 | acked_pcount = tcp_skb_pcount(skb); | 3216 | acked_pcount = tcp_skb_pcount(skb); |
3199 | end_seq = scb->end_seq; | ||
3200 | } | ||
3201 | |||
3202 | /* MTU probing checks */ | ||
3203 | if (fully_acked && icsk->icsk_mtup.probe_size && | ||
3204 | !after(tp->mtu_probe.probe_seq_end, scb->end_seq)) { | ||
3205 | tcp_mtup_probe_success(sk, skb); | ||
3206 | } | 3217 | } |
3207 | 3218 | ||
3208 | if (sacked & TCPCB_RETRANS) { | 3219 | if (sacked & TCPCB_RETRANS) { |
@@ -3267,24 +3278,26 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3267 | const struct tcp_congestion_ops *ca_ops | 3278 | const struct tcp_congestion_ops *ca_ops |
3268 | = inet_csk(sk)->icsk_ca_ops; | 3279 | = inet_csk(sk)->icsk_ca_ops; |
3269 | 3280 | ||
3281 | if (unlikely(icsk->icsk_mtup.probe_size && | ||
3282 | !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) { | ||
3283 | tcp_mtup_probe_success(sk); | ||
3284 | } | ||
3285 | |||
3270 | tcp_ack_update_rtt(sk, flag, seq_rtt); | 3286 | tcp_ack_update_rtt(sk, flag, seq_rtt); |
3271 | tcp_rearm_rto(sk); | 3287 | tcp_rearm_rto(sk); |
3272 | 3288 | ||
3273 | if (tcp_is_reno(tp)) { | 3289 | if (tcp_is_reno(tp)) { |
3274 | tcp_remove_reno_sacks(sk, pkts_acked); | 3290 | tcp_remove_reno_sacks(sk, pkts_acked); |
3275 | } else { | 3291 | } else { |
3292 | int delta; | ||
3293 | |||
3276 | /* Non-retransmitted hole got filled? That's reordering */ | 3294 | /* Non-retransmitted hole got filled? That's reordering */ |
3277 | if (reord < prior_fackets) | 3295 | if (reord < prior_fackets) |
3278 | tcp_update_reordering(sk, tp->fackets_out - reord, 0); | 3296 | tcp_update_reordering(sk, tp->fackets_out - reord, 0); |
3279 | 3297 | ||
3280 | /* No need to care for underflows here because | 3298 | delta = tcp_is_fack(tp) ? pkts_acked : |
3281 | * the lost_skb_hint gets NULLed if we're past it | 3299 | prior_sacked - tp->sacked_out; |
3282 | * (or something non-trivial happened) | 3300 | tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta); |
3283 | */ | ||
3284 | if (tcp_is_fack(tp)) | ||
3285 | tp->lost_cnt_hint -= pkts_acked; | ||
3286 | else | ||
3287 | tp->lost_cnt_hint -= prior_sacked - tp->sacked_out; | ||
3288 | } | 3301 | } |
3289 | 3302 | ||
3290 | tp->fackets_out -= min(pkts_acked, tp->fackets_out); | 3303 | tp->fackets_out -= min(pkts_acked, tp->fackets_out); |
@@ -3396,7 +3409,7 @@ static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack, | |||
3396 | 3409 | ||
3397 | if (tcp_may_update_window(tp, ack, ack_seq, nwin)) { | 3410 | if (tcp_may_update_window(tp, ack, ack_seq, nwin)) { |
3398 | flag |= FLAG_WIN_UPDATE; | 3411 | flag |= FLAG_WIN_UPDATE; |
3399 | tcp_update_wl(tp, ack, ack_seq); | 3412 | tcp_update_wl(tp, ack_seq); |
3400 | 3413 | ||
3401 | if (tp->snd_wnd != nwin) { | 3414 | if (tp->snd_wnd != nwin) { |
3402 | tp->snd_wnd = nwin; | 3415 | tp->snd_wnd = nwin; |
@@ -3572,15 +3585,18 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
3572 | int prior_packets; | 3585 | int prior_packets; |
3573 | int frto_cwnd = 0; | 3586 | int frto_cwnd = 0; |
3574 | 3587 | ||
3575 | /* If the ack is newer than sent or older than previous acks | 3588 | /* If the ack is older than previous acks |
3576 | * then we can probably ignore it. | 3589 | * then we can probably ignore it. |
3577 | */ | 3590 | */ |
3578 | if (after(ack, tp->snd_nxt)) | ||
3579 | goto uninteresting_ack; | ||
3580 | |||
3581 | if (before(ack, prior_snd_una)) | 3591 | if (before(ack, prior_snd_una)) |
3582 | goto old_ack; | 3592 | goto old_ack; |
3583 | 3593 | ||
3594 | /* If the ack includes data we haven't sent yet, discard | ||
3595 | * this segment (RFC793 Section 3.9). | ||
3596 | */ | ||
3597 | if (after(ack, tp->snd_nxt)) | ||
3598 | goto invalid_ack; | ||
3599 | |||
3584 | if (after(ack, prior_snd_una)) | 3600 | if (after(ack, prior_snd_una)) |
3585 | flag |= FLAG_SND_UNA_ADVANCED; | 3601 | flag |= FLAG_SND_UNA_ADVANCED; |
3586 | 3602 | ||
@@ -3601,7 +3617,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
3601 | * No more checks are required. | 3617 | * No more checks are required. |
3602 | * Note, we use the fact that SND.UNA>=SND.WL2. | 3618 | * Note, we use the fact that SND.UNA>=SND.WL2. |
3603 | */ | 3619 | */ |
3604 | tcp_update_wl(tp, ack, ack_seq); | 3620 | tcp_update_wl(tp, ack_seq); |
3605 | tp->snd_una = ack; | 3621 | tp->snd_una = ack; |
3606 | flag |= FLAG_WIN_UPDATE; | 3622 | flag |= FLAG_WIN_UPDATE; |
3607 | 3623 | ||
@@ -3670,6 +3686,10 @@ no_queue: | |||
3670 | tcp_ack_probe(sk); | 3686 | tcp_ack_probe(sk); |
3671 | return 1; | 3687 | return 1; |
3672 | 3688 | ||
3689 | invalid_ack: | ||
3690 | SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt); | ||
3691 | return -1; | ||
3692 | |||
3673 | old_ack: | 3693 | old_ack: |
3674 | if (TCP_SKB_CB(skb)->sacked) { | 3694 | if (TCP_SKB_CB(skb)->sacked) { |
3675 | tcp_sacktag_write_queue(sk, skb, prior_snd_una); | 3695 | tcp_sacktag_write_queue(sk, skb, prior_snd_una); |
@@ -3677,8 +3697,7 @@ old_ack: | |||
3677 | tcp_try_keep_open(sk); | 3697 | tcp_try_keep_open(sk); |
3678 | } | 3698 | } |
3679 | 3699 | ||
3680 | uninteresting_ack: | 3700 | SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt); |
3681 | SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt); | ||
3682 | return 0; | 3701 | return 0; |
3683 | } | 3702 | } |
3684 | 3703 | ||
@@ -3866,8 +3885,7 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) | |||
3866 | * Not only, also it occurs for expired timestamps. | 3885 | * Not only, also it occurs for expired timestamps. |
3867 | */ | 3886 | */ |
3868 | 3887 | ||
3869 | if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 || | 3888 | if (tcp_paws_check(&tp->rx_opt, 0)) |
3870 | get_seconds() >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS) | ||
3871 | tcp_store_ts_recent(tp); | 3889 | tcp_store_ts_recent(tp); |
3872 | } | 3890 | } |
3873 | } | 3891 | } |
@@ -3919,9 +3937,9 @@ static inline int tcp_paws_discard(const struct sock *sk, | |||
3919 | const struct sk_buff *skb) | 3937 | const struct sk_buff *skb) |
3920 | { | 3938 | { |
3921 | const struct tcp_sock *tp = tcp_sk(sk); | 3939 | const struct tcp_sock *tp = tcp_sk(sk); |
3922 | return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW && | 3940 | |
3923 | get_seconds() < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS && | 3941 | return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) && |
3924 | !tcp_disordered_ack(sk, skb)); | 3942 | !tcp_disordered_ack(sk, skb); |
3925 | } | 3943 | } |
3926 | 3944 | ||
3927 | /* Check segment sequence number for validity. | 3945 | /* Check segment sequence number for validity. |
@@ -4079,7 +4097,6 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) | |||
4079 | tp->rx_opt.dsack = 1; | 4097 | tp->rx_opt.dsack = 1; |
4080 | tp->duplicate_sack[0].start_seq = seq; | 4098 | tp->duplicate_sack[0].start_seq = seq; |
4081 | tp->duplicate_sack[0].end_seq = end_seq; | 4099 | tp->duplicate_sack[0].end_seq = end_seq; |
4082 | tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + 1; | ||
4083 | } | 4100 | } |
4084 | } | 4101 | } |
4085 | 4102 | ||
@@ -4134,8 +4151,6 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp) | |||
4134 | * Decrease num_sacks. | 4151 | * Decrease num_sacks. |
4135 | */ | 4152 | */ |
4136 | tp->rx_opt.num_sacks--; | 4153 | tp->rx_opt.num_sacks--; |
4137 | tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + | ||
4138 | tp->rx_opt.dsack; | ||
4139 | for (i = this_sack; i < tp->rx_opt.num_sacks; i++) | 4154 | for (i = this_sack; i < tp->rx_opt.num_sacks; i++) |
4140 | sp[i] = sp[i + 1]; | 4155 | sp[i] = sp[i + 1]; |
4141 | continue; | 4156 | continue; |
@@ -4144,20 +4159,6 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp) | |||
4144 | } | 4159 | } |
4145 | } | 4160 | } |
4146 | 4161 | ||
4147 | static inline void tcp_sack_swap(struct tcp_sack_block *sack1, | ||
4148 | struct tcp_sack_block *sack2) | ||
4149 | { | ||
4150 | __u32 tmp; | ||
4151 | |||
4152 | tmp = sack1->start_seq; | ||
4153 | sack1->start_seq = sack2->start_seq; | ||
4154 | sack2->start_seq = tmp; | ||
4155 | |||
4156 | tmp = sack1->end_seq; | ||
4157 | sack1->end_seq = sack2->end_seq; | ||
4158 | sack2->end_seq = tmp; | ||
4159 | } | ||
4160 | |||
4161 | static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq) | 4162 | static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq) |
4162 | { | 4163 | { |
4163 | struct tcp_sock *tp = tcp_sk(sk); | 4164 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -4172,7 +4173,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq) | |||
4172 | if (tcp_sack_extend(sp, seq, end_seq)) { | 4173 | if (tcp_sack_extend(sp, seq, end_seq)) { |
4173 | /* Rotate this_sack to the first one. */ | 4174 | /* Rotate this_sack to the first one. */ |
4174 | for (; this_sack > 0; this_sack--, sp--) | 4175 | for (; this_sack > 0; this_sack--, sp--) |
4175 | tcp_sack_swap(sp, sp - 1); | 4176 | swap(*sp, *(sp - 1)); |
4176 | if (cur_sacks > 1) | 4177 | if (cur_sacks > 1) |
4177 | tcp_sack_maybe_coalesce(tp); | 4178 | tcp_sack_maybe_coalesce(tp); |
4178 | return; | 4179 | return; |
@@ -4198,7 +4199,6 @@ new_sack: | |||
4198 | sp->start_seq = seq; | 4199 | sp->start_seq = seq; |
4199 | sp->end_seq = end_seq; | 4200 | sp->end_seq = end_seq; |
4200 | tp->rx_opt.num_sacks++; | 4201 | tp->rx_opt.num_sacks++; |
4201 | tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; | ||
4202 | } | 4202 | } |
4203 | 4203 | ||
4204 | /* RCV.NXT advances, some SACKs should be eaten. */ | 4204 | /* RCV.NXT advances, some SACKs should be eaten. */ |
@@ -4212,7 +4212,6 @@ static void tcp_sack_remove(struct tcp_sock *tp) | |||
4212 | /* Empty ofo queue, hence, all the SACKs are eaten. Clear. */ | 4212 | /* Empty ofo queue, hence, all the SACKs are eaten. Clear. */ |
4213 | if (skb_queue_empty(&tp->out_of_order_queue)) { | 4213 | if (skb_queue_empty(&tp->out_of_order_queue)) { |
4214 | tp->rx_opt.num_sacks = 0; | 4214 | tp->rx_opt.num_sacks = 0; |
4215 | tp->rx_opt.eff_sacks = tp->rx_opt.dsack; | ||
4216 | return; | 4215 | return; |
4217 | } | 4216 | } |
4218 | 4217 | ||
@@ -4233,11 +4232,7 @@ static void tcp_sack_remove(struct tcp_sock *tp) | |||
4233 | this_sack++; | 4232 | this_sack++; |
4234 | sp++; | 4233 | sp++; |
4235 | } | 4234 | } |
4236 | if (num_sacks != tp->rx_opt.num_sacks) { | 4235 | tp->rx_opt.num_sacks = num_sacks; |
4237 | tp->rx_opt.num_sacks = num_sacks; | ||
4238 | tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + | ||
4239 | tp->rx_opt.dsack; | ||
4240 | } | ||
4241 | } | 4236 | } |
4242 | 4237 | ||
4243 | /* This one checks to see if we can put data from the | 4238 | /* This one checks to see if we can put data from the |
@@ -4313,10 +4308,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) | |||
4313 | 4308 | ||
4314 | TCP_ECN_accept_cwr(tp, skb); | 4309 | TCP_ECN_accept_cwr(tp, skb); |
4315 | 4310 | ||
4316 | if (tp->rx_opt.dsack) { | 4311 | tp->rx_opt.dsack = 0; |
4317 | tp->rx_opt.dsack = 0; | ||
4318 | tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks; | ||
4319 | } | ||
4320 | 4312 | ||
4321 | /* Queue data for delivery to the user. | 4313 | /* Queue data for delivery to the user. |
4322 | * Packets in sequence go to the receive queue. | 4314 | * Packets in sequence go to the receive queue. |
@@ -4435,8 +4427,6 @@ drop: | |||
4435 | /* Initial out of order segment, build 1 SACK. */ | 4427 | /* Initial out of order segment, build 1 SACK. */ |
4436 | if (tcp_is_sack(tp)) { | 4428 | if (tcp_is_sack(tp)) { |
4437 | tp->rx_opt.num_sacks = 1; | 4429 | tp->rx_opt.num_sacks = 1; |
4438 | tp->rx_opt.dsack = 0; | ||
4439 | tp->rx_opt.eff_sacks = 1; | ||
4440 | tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; | 4430 | tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; |
4441 | tp->selective_acks[0].end_seq = | 4431 | tp->selective_acks[0].end_seq = |
4442 | TCP_SKB_CB(skb)->end_seq; | 4432 | TCP_SKB_CB(skb)->end_seq; |
@@ -5157,7 +5147,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
5157 | */ | 5147 | */ |
5158 | 5148 | ||
5159 | if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags && | 5149 | if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags && |
5160 | TCP_SKB_CB(skb)->seq == tp->rcv_nxt) { | 5150 | TCP_SKB_CB(skb)->seq == tp->rcv_nxt && |
5151 | !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) { | ||
5161 | int tcp_header_len = tp->tcp_header_len; | 5152 | int tcp_header_len = tp->tcp_header_len; |
5162 | 5153 | ||
5163 | /* Timestamp header prediction: tcp_header_len | 5154 | /* Timestamp header prediction: tcp_header_len |
@@ -5310,8 +5301,8 @@ slow_path: | |||
5310 | return -res; | 5301 | return -res; |
5311 | 5302 | ||
5312 | step5: | 5303 | step5: |
5313 | if (th->ack) | 5304 | if (th->ack && tcp_ack(sk, skb, FLAG_SLOWPATH) < 0) |
5314 | tcp_ack(sk, skb, FLAG_SLOWPATH); | 5305 | goto discard; |
5315 | 5306 | ||
5316 | tcp_rcv_rtt_measure_ts(sk, skb); | 5307 | tcp_rcv_rtt_measure_ts(sk, skb); |
5317 | 5308 | ||
@@ -5409,7 +5400,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
5409 | * never scaled. | 5400 | * never scaled. |
5410 | */ | 5401 | */ |
5411 | tp->snd_wnd = ntohs(th->window); | 5402 | tp->snd_wnd = ntohs(th->window); |
5412 | tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq); | 5403 | tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); |
5413 | 5404 | ||
5414 | if (!tp->rx_opt.wscale_ok) { | 5405 | if (!tp->rx_opt.wscale_ok) { |
5415 | tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0; | 5406 | tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0; |
@@ -5510,7 +5501,7 @@ discard: | |||
5510 | 5501 | ||
5511 | /* PAWS check. */ | 5502 | /* PAWS check. */ |
5512 | if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && | 5503 | if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && |
5513 | tcp_paws_check(&tp->rx_opt, 0)) | 5504 | tcp_paws_reject(&tp->rx_opt, 0)) |
5514 | goto discard_and_undo; | 5505 | goto discard_and_undo; |
5515 | 5506 | ||
5516 | if (th->syn) { | 5507 | if (th->syn) { |
@@ -5648,7 +5639,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5648 | 5639 | ||
5649 | /* step 5: check the ACK field */ | 5640 | /* step 5: check the ACK field */ |
5650 | if (th->ack) { | 5641 | if (th->ack) { |
5651 | int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH); | 5642 | int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0; |
5652 | 5643 | ||
5653 | switch (sk->sk_state) { | 5644 | switch (sk->sk_state) { |
5654 | case TCP_SYN_RECV: | 5645 | case TCP_SYN_RECV: |
@@ -5670,8 +5661,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5670 | tp->snd_una = TCP_SKB_CB(skb)->ack_seq; | 5661 | tp->snd_una = TCP_SKB_CB(skb)->ack_seq; |
5671 | tp->snd_wnd = ntohs(th->window) << | 5662 | tp->snd_wnd = ntohs(th->window) << |
5672 | tp->rx_opt.snd_wscale; | 5663 | tp->rx_opt.snd_wscale; |
5673 | tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, | 5664 | tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); |
5674 | TCP_SKB_CB(skb)->seq); | ||
5675 | 5665 | ||
5676 | /* tcp_ack considers this ACK as duplicate | 5666 | /* tcp_ack considers this ACK as duplicate |
5677 | * and does not calculate rtt. | 5667 | * and does not calculate rtt. |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cf74c416831a..d0a314879d81 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -1226,15 +1226,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1226 | if (want_cookie && !tmp_opt.saw_tstamp) | 1226 | if (want_cookie && !tmp_opt.saw_tstamp) |
1227 | tcp_clear_options(&tmp_opt); | 1227 | tcp_clear_options(&tmp_opt); |
1228 | 1228 | ||
1229 | if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) { | ||
1230 | /* Some OSes (unknown ones, but I see them on web server, which | ||
1231 | * contains information interesting only for windows' | ||
1232 | * users) do not send their stamp in SYN. It is easy case. | ||
1233 | * We simply do not advertise TS support. | ||
1234 | */ | ||
1235 | tmp_opt.saw_tstamp = 0; | ||
1236 | tmp_opt.tstamp_ok = 0; | ||
1237 | } | ||
1238 | tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; | 1229 | tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; |
1239 | 1230 | ||
1240 | tcp_openreq_init(req, &tmp_opt, skb); | 1231 | tcp_openreq_init(req, &tmp_opt, skb); |
@@ -2355,7 +2346,7 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) | |||
2355 | 2346 | ||
2356 | switch (skb->ip_summed) { | 2347 | switch (skb->ip_summed) { |
2357 | case CHECKSUM_COMPLETE: | 2348 | case CHECKSUM_COMPLETE: |
2358 | if (!tcp_v4_check(skb->len, iph->saddr, iph->daddr, | 2349 | if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, |
2359 | skb->csum)) { | 2350 | skb->csum)) { |
2360 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 2351 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
2361 | break; | 2352 | break; |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f67effbb102b..43bbba7926ee 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -107,7 +107,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, | |||
107 | if (tmp_opt.saw_tstamp) { | 107 | if (tmp_opt.saw_tstamp) { |
108 | tmp_opt.ts_recent = tcptw->tw_ts_recent; | 108 | tmp_opt.ts_recent = tcptw->tw_ts_recent; |
109 | tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; | 109 | tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; |
110 | paws_reject = tcp_paws_check(&tmp_opt, th->rst); | 110 | paws_reject = tcp_paws_reject(&tmp_opt, th->rst); |
111 | } | 111 | } |
112 | } | 112 | } |
113 | 113 | ||
@@ -399,7 +399,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
399 | 399 | ||
400 | tcp_prequeue_init(newtp); | 400 | tcp_prequeue_init(newtp); |
401 | 401 | ||
402 | tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn); | 402 | tcp_init_wl(newtp, treq->rcv_isn); |
403 | 403 | ||
404 | newtp->srtt = 0; | 404 | newtp->srtt = 0; |
405 | newtp->mdev = TCP_TIMEOUT_INIT; | 405 | newtp->mdev = TCP_TIMEOUT_INIT; |
@@ -434,9 +434,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
434 | newtp->rx_opt.saw_tstamp = 0; | 434 | newtp->rx_opt.saw_tstamp = 0; |
435 | 435 | ||
436 | newtp->rx_opt.dsack = 0; | 436 | newtp->rx_opt.dsack = 0; |
437 | newtp->rx_opt.eff_sacks = 0; | ||
438 | |||
439 | newtp->rx_opt.num_sacks = 0; | 437 | newtp->rx_opt.num_sacks = 0; |
438 | |||
440 | newtp->urg_data = 0; | 439 | newtp->urg_data = 0; |
441 | 440 | ||
442 | if (sock_flag(newsk, SOCK_KEEPOPEN)) | 441 | if (sock_flag(newsk, SOCK_KEEPOPEN)) |
@@ -512,7 +511,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
512 | * from another data. | 511 | * from another data. |
513 | */ | 512 | */ |
514 | tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans); | 513 | tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans); |
515 | paws_reject = tcp_paws_check(&tmp_opt, th->rst); | 514 | paws_reject = tcp_paws_reject(&tmp_opt, th->rst); |
516 | } | 515 | } |
517 | } | 516 | } |
518 | 517 | ||
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index da2c3b8794f2..c1f259d2d33b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -441,10 +441,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, | |||
441 | *ptr++ = htonl(sp[this_sack].end_seq); | 441 | *ptr++ = htonl(sp[this_sack].end_seq); |
442 | } | 442 | } |
443 | 443 | ||
444 | if (tp->rx_opt.dsack) { | 444 | tp->rx_opt.dsack = 0; |
445 | tp->rx_opt.dsack = 0; | ||
446 | tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks; | ||
447 | } | ||
448 | } | 445 | } |
449 | } | 446 | } |
450 | 447 | ||
@@ -550,6 +547,7 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, | |||
550 | struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; | 547 | struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; |
551 | struct tcp_sock *tp = tcp_sk(sk); | 548 | struct tcp_sock *tp = tcp_sk(sk); |
552 | unsigned size = 0; | 549 | unsigned size = 0; |
550 | unsigned int eff_sacks; | ||
553 | 551 | ||
554 | #ifdef CONFIG_TCP_MD5SIG | 552 | #ifdef CONFIG_TCP_MD5SIG |
555 | *md5 = tp->af_specific->md5_lookup(sk, sk); | 553 | *md5 = tp->af_specific->md5_lookup(sk, sk); |
@@ -568,10 +566,11 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, | |||
568 | size += TCPOLEN_TSTAMP_ALIGNED; | 566 | size += TCPOLEN_TSTAMP_ALIGNED; |
569 | } | 567 | } |
570 | 568 | ||
571 | if (unlikely(tp->rx_opt.eff_sacks)) { | 569 | eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; |
570 | if (unlikely(eff_sacks)) { | ||
572 | const unsigned remaining = MAX_TCP_OPTION_SPACE - size; | 571 | const unsigned remaining = MAX_TCP_OPTION_SPACE - size; |
573 | opts->num_sack_blocks = | 572 | opts->num_sack_blocks = |
574 | min_t(unsigned, tp->rx_opt.eff_sacks, | 573 | min_t(unsigned, eff_sacks, |
575 | (remaining - TCPOLEN_SACK_BASE_ALIGNED) / | 574 | (remaining - TCPOLEN_SACK_BASE_ALIGNED) / |
576 | TCPOLEN_SACK_PERBLOCK); | 575 | TCPOLEN_SACK_PERBLOCK); |
577 | size += TCPOLEN_SACK_BASE_ALIGNED + | 576 | size += TCPOLEN_SACK_BASE_ALIGNED + |
@@ -663,10 +662,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
663 | th->urg_ptr = 0; | 662 | th->urg_ptr = 0; |
664 | 663 | ||
665 | /* The urg_mode check is necessary during a below snd_una win probe */ | 664 | /* The urg_mode check is necessary during a below snd_una win probe */ |
666 | if (unlikely(tcp_urg_mode(tp) && | 665 | if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) { |
667 | between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) { | 666 | if (before(tp->snd_up, tcb->seq + 0x10000)) { |
668 | th->urg_ptr = htons(tp->snd_up - tcb->seq); | 667 | th->urg_ptr = htons(tp->snd_up - tcb->seq); |
669 | th->urg = 1; | 668 | th->urg = 1; |
669 | } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) { | ||
670 | th->urg_ptr = 0xFFFF; | ||
671 | th->urg = 1; | ||
672 | } | ||
670 | } | 673 | } |
671 | 674 | ||
672 | tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); | 675 | tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); |
@@ -763,11 +766,10 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, | |||
763 | struct sk_buff *buff; | 766 | struct sk_buff *buff; |
764 | int nsize, old_factor; | 767 | int nsize, old_factor; |
765 | int nlen; | 768 | int nlen; |
766 | u16 flags; | 769 | u8 flags; |
767 | 770 | ||
768 | BUG_ON(len > skb->len); | 771 | BUG_ON(len > skb->len); |
769 | 772 | ||
770 | tcp_clear_retrans_hints_partial(tp); | ||
771 | nsize = skb_headlen(skb) - len; | 773 | nsize = skb_headlen(skb) - len; |
772 | if (nsize < 0) | 774 | if (nsize < 0) |
773 | nsize = 0; | 775 | nsize = 0; |
@@ -850,6 +852,12 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, | |||
850 | tcp_verify_left_out(tp); | 852 | tcp_verify_left_out(tp); |
851 | } | 853 | } |
852 | tcp_adjust_fackets_out(sk, skb, diff); | 854 | tcp_adjust_fackets_out(sk, skb, diff); |
855 | |||
856 | if (tp->lost_skb_hint && | ||
857 | before(TCP_SKB_CB(skb)->seq, | ||
858 | TCP_SKB_CB(tp->lost_skb_hint)->seq) && | ||
859 | (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked)) | ||
860 | tp->lost_cnt_hint -= diff; | ||
853 | } | 861 | } |
854 | 862 | ||
855 | /* Link BUFF into the send queue. */ | 863 | /* Link BUFF into the send queue. */ |
@@ -913,7 +921,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) | |||
913 | * factor and mss. | 921 | * factor and mss. |
914 | */ | 922 | */ |
915 | if (tcp_skb_pcount(skb) > 1) | 923 | if (tcp_skb_pcount(skb) > 1) |
916 | tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk, 1)); | 924 | tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk)); |
917 | 925 | ||
918 | return 0; | 926 | return 0; |
919 | } | 927 | } |
@@ -974,15 +982,6 @@ void tcp_mtup_init(struct sock *sk) | |||
974 | icsk->icsk_mtup.probe_size = 0; | 982 | icsk->icsk_mtup.probe_size = 0; |
975 | } | 983 | } |
976 | 984 | ||
977 | /* Bound MSS / TSO packet size with the half of the window */ | ||
978 | static int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) | ||
979 | { | ||
980 | if (tp->max_window && pktsize > (tp->max_window >> 1)) | ||
981 | return max(tp->max_window >> 1, 68U - tp->tcp_header_len); | ||
982 | else | ||
983 | return pktsize; | ||
984 | } | ||
985 | |||
986 | /* This function synchronize snd mss to current pmtu/exthdr set. | 985 | /* This function synchronize snd mss to current pmtu/exthdr set. |
987 | 986 | ||
988 | tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts | 987 | tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts |
@@ -1029,22 +1028,17 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) | |||
1029 | /* Compute the current effective MSS, taking SACKs and IP options, | 1028 | /* Compute the current effective MSS, taking SACKs and IP options, |
1030 | * and even PMTU discovery events into account. | 1029 | * and even PMTU discovery events into account. |
1031 | */ | 1030 | */ |
1032 | unsigned int tcp_current_mss(struct sock *sk, int large_allowed) | 1031 | unsigned int tcp_current_mss(struct sock *sk) |
1033 | { | 1032 | { |
1034 | struct tcp_sock *tp = tcp_sk(sk); | 1033 | struct tcp_sock *tp = tcp_sk(sk); |
1035 | struct dst_entry *dst = __sk_dst_get(sk); | 1034 | struct dst_entry *dst = __sk_dst_get(sk); |
1036 | u32 mss_now; | 1035 | u32 mss_now; |
1037 | u16 xmit_size_goal; | ||
1038 | int doing_tso = 0; | ||
1039 | unsigned header_len; | 1036 | unsigned header_len; |
1040 | struct tcp_out_options opts; | 1037 | struct tcp_out_options opts; |
1041 | struct tcp_md5sig_key *md5; | 1038 | struct tcp_md5sig_key *md5; |
1042 | 1039 | ||
1043 | mss_now = tp->mss_cache; | 1040 | mss_now = tp->mss_cache; |
1044 | 1041 | ||
1045 | if (large_allowed && sk_can_gso(sk)) | ||
1046 | doing_tso = 1; | ||
1047 | |||
1048 | if (dst) { | 1042 | if (dst) { |
1049 | u32 mtu = dst_mtu(dst); | 1043 | u32 mtu = dst_mtu(dst); |
1050 | if (mtu != inet_csk(sk)->icsk_pmtu_cookie) | 1044 | if (mtu != inet_csk(sk)->icsk_pmtu_cookie) |
@@ -1062,19 +1056,6 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) | |||
1062 | mss_now -= delta; | 1056 | mss_now -= delta; |
1063 | } | 1057 | } |
1064 | 1058 | ||
1065 | xmit_size_goal = mss_now; | ||
1066 | |||
1067 | if (doing_tso) { | ||
1068 | xmit_size_goal = ((sk->sk_gso_max_size - 1) - | ||
1069 | inet_csk(sk)->icsk_af_ops->net_header_len - | ||
1070 | inet_csk(sk)->icsk_ext_hdr_len - | ||
1071 | tp->tcp_header_len); | ||
1072 | |||
1073 | xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal); | ||
1074 | xmit_size_goal -= (xmit_size_goal % mss_now); | ||
1075 | } | ||
1076 | tp->xmit_size_goal = xmit_size_goal; | ||
1077 | |||
1078 | return mss_now; | 1059 | return mss_now; |
1079 | } | 1060 | } |
1080 | 1061 | ||
@@ -1256,7 +1237,7 @@ int tcp_may_send_now(struct sock *sk) | |||
1256 | struct sk_buff *skb = tcp_send_head(sk); | 1237 | struct sk_buff *skb = tcp_send_head(sk); |
1257 | 1238 | ||
1258 | return (skb && | 1239 | return (skb && |
1259 | tcp_snd_test(sk, skb, tcp_current_mss(sk, 1), | 1240 | tcp_snd_test(sk, skb, tcp_current_mss(sk), |
1260 | (tcp_skb_is_last(sk, skb) ? | 1241 | (tcp_skb_is_last(sk, skb) ? |
1261 | tp->nonagle : TCP_NAGLE_PUSH))); | 1242 | tp->nonagle : TCP_NAGLE_PUSH))); |
1262 | } | 1243 | } |
@@ -1273,7 +1254,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | |||
1273 | { | 1254 | { |
1274 | struct sk_buff *buff; | 1255 | struct sk_buff *buff; |
1275 | int nlen = skb->len - len; | 1256 | int nlen = skb->len - len; |
1276 | u16 flags; | 1257 | u8 flags; |
1277 | 1258 | ||
1278 | /* All of a TSO frame must be composed of paged data. */ | 1259 | /* All of a TSO frame must be composed of paged data. */ |
1279 | if (skb->len != skb->data_len) | 1260 | if (skb->len != skb->data_len) |
@@ -1352,6 +1333,10 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) | |||
1352 | if (limit >= sk->sk_gso_max_size) | 1333 | if (limit >= sk->sk_gso_max_size) |
1353 | goto send_now; | 1334 | goto send_now; |
1354 | 1335 | ||
1336 | /* Middle in queue won't get any more data, full sendable already? */ | ||
1337 | if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len)) | ||
1338 | goto send_now; | ||
1339 | |||
1355 | if (sysctl_tcp_tso_win_divisor) { | 1340 | if (sysctl_tcp_tso_win_divisor) { |
1356 | u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); | 1341 | u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); |
1357 | 1342 | ||
@@ -1405,11 +1390,11 @@ static int tcp_mtu_probe(struct sock *sk) | |||
1405 | icsk->icsk_mtup.probe_size || | 1390 | icsk->icsk_mtup.probe_size || |
1406 | inet_csk(sk)->icsk_ca_state != TCP_CA_Open || | 1391 | inet_csk(sk)->icsk_ca_state != TCP_CA_Open || |
1407 | tp->snd_cwnd < 11 || | 1392 | tp->snd_cwnd < 11 || |
1408 | tp->rx_opt.eff_sacks) | 1393 | tp->rx_opt.num_sacks || tp->rx_opt.dsack) |
1409 | return -1; | 1394 | return -1; |
1410 | 1395 | ||
1411 | /* Very simple search strategy: just double the MSS. */ | 1396 | /* Very simple search strategy: just double the MSS. */ |
1412 | mss_now = tcp_current_mss(sk, 0); | 1397 | mss_now = tcp_current_mss(sk); |
1413 | probe_size = 2 * tp->mss_cache; | 1398 | probe_size = 2 * tp->mss_cache; |
1414 | size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache; | 1399 | size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache; |
1415 | if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) { | 1400 | if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) { |
@@ -1754,11 +1739,9 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) | |||
1754 | struct tcp_sock *tp = tcp_sk(sk); | 1739 | struct tcp_sock *tp = tcp_sk(sk); |
1755 | struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); | 1740 | struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); |
1756 | int skb_size, next_skb_size; | 1741 | int skb_size, next_skb_size; |
1757 | u16 flags; | ||
1758 | 1742 | ||
1759 | skb_size = skb->len; | 1743 | skb_size = skb->len; |
1760 | next_skb_size = next_skb->len; | 1744 | next_skb_size = next_skb->len; |
1761 | flags = TCP_SKB_CB(skb)->flags; | ||
1762 | 1745 | ||
1763 | BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); | 1746 | BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); |
1764 | 1747 | ||
@@ -1778,9 +1761,8 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) | |||
1778 | /* Update sequence range on original skb. */ | 1761 | /* Update sequence range on original skb. */ |
1779 | TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; | 1762 | TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; |
1780 | 1763 | ||
1781 | /* Merge over control information. */ | 1764 | /* Merge over control information. This moves PSH/FIN etc. over */ |
1782 | flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */ | 1765 | TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(next_skb)->flags; |
1783 | TCP_SKB_CB(skb)->flags = flags; | ||
1784 | 1766 | ||
1785 | /* All done, get rid of second SKB and account for it so | 1767 | /* All done, get rid of second SKB and account for it so |
1786 | * packet counting does not break. | 1768 | * packet counting does not break. |
@@ -1894,7 +1876,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1894 | if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) | 1876 | if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) |
1895 | return -EHOSTUNREACH; /* Routing failure or similar. */ | 1877 | return -EHOSTUNREACH; /* Routing failure or similar. */ |
1896 | 1878 | ||
1897 | cur_mss = tcp_current_mss(sk, 0); | 1879 | cur_mss = tcp_current_mss(sk); |
1898 | 1880 | ||
1899 | /* If receiver has shrunk his window, and skb is out of | 1881 | /* If receiver has shrunk his window, and skb is out of |
1900 | * new window, do not retransmit it. The exception is the | 1882 | * new window, do not retransmit it. The exception is the |
@@ -1908,6 +1890,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1908 | if (skb->len > cur_mss) { | 1890 | if (skb->len > cur_mss) { |
1909 | if (tcp_fragment(sk, skb, cur_mss, cur_mss)) | 1891 | if (tcp_fragment(sk, skb, cur_mss, cur_mss)) |
1910 | return -ENOMEM; /* We'll try again later. */ | 1892 | return -ENOMEM; /* We'll try again later. */ |
1893 | } else { | ||
1894 | tcp_init_tso_segs(sk, skb, cur_mss); | ||
1911 | } | 1895 | } |
1912 | 1896 | ||
1913 | tcp_retrans_try_collapse(sk, skb, cur_mss); | 1897 | tcp_retrans_try_collapse(sk, skb, cur_mss); |
@@ -2061,7 +2045,7 @@ begin_fwd: | |||
2061 | goto begin_fwd; | 2045 | goto begin_fwd; |
2062 | 2046 | ||
2063 | } else if (!(sacked & TCPCB_LOST)) { | 2047 | } else if (!(sacked & TCPCB_LOST)) { |
2064 | if (hole == NULL && !(sacked & TCPCB_SACKED_RETRANS)) | 2048 | if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED))) |
2065 | hole = skb; | 2049 | hole = skb; |
2066 | continue; | 2050 | continue; |
2067 | 2051 | ||
@@ -2100,7 +2084,7 @@ void tcp_send_fin(struct sock *sk) | |||
2100 | * unsent frames. But be careful about outgoing SACKS | 2084 | * unsent frames. But be careful about outgoing SACKS |
2101 | * and IP options. | 2085 | * and IP options. |
2102 | */ | 2086 | */ |
2103 | mss_now = tcp_current_mss(sk, 1); | 2087 | mss_now = tcp_current_mss(sk); |
2104 | 2088 | ||
2105 | if (tcp_send_head(sk) != NULL) { | 2089 | if (tcp_send_head(sk) != NULL) { |
2106 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; | 2090 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; |
@@ -2325,7 +2309,7 @@ static void tcp_connect_init(struct sock *sk) | |||
2325 | sk->sk_err = 0; | 2309 | sk->sk_err = 0; |
2326 | sock_reset_flag(sk, SOCK_DONE); | 2310 | sock_reset_flag(sk, SOCK_DONE); |
2327 | tp->snd_wnd = 0; | 2311 | tp->snd_wnd = 0; |
2328 | tcp_init_wl(tp, tp->write_seq, 0); | 2312 | tcp_init_wl(tp, 0); |
2329 | tp->snd_una = tp->write_seq; | 2313 | tp->snd_una = tp->write_seq; |
2330 | tp->snd_sml = tp->write_seq; | 2314 | tp->snd_sml = tp->write_seq; |
2331 | tp->snd_up = tp->write_seq; | 2315 | tp->snd_up = tp->write_seq; |
@@ -2512,7 +2496,7 @@ int tcp_write_wakeup(struct sock *sk) | |||
2512 | if ((skb = tcp_send_head(sk)) != NULL && | 2496 | if ((skb = tcp_send_head(sk)) != NULL && |
2513 | before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) { | 2497 | before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) { |
2514 | int err; | 2498 | int err; |
2515 | unsigned int mss = tcp_current_mss(sk, 0); | 2499 | unsigned int mss = tcp_current_mss(sk); |
2516 | unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; | 2500 | unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; |
2517 | 2501 | ||
2518 | if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq)) | 2502 | if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq)) |
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 25524d4e372a..59f5b5e7c566 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c | |||
@@ -165,9 +165,10 @@ static int tcpprobe_sprint(char *tbuf, int n) | |||
165 | static ssize_t tcpprobe_read(struct file *file, char __user *buf, | 165 | static ssize_t tcpprobe_read(struct file *file, char __user *buf, |
166 | size_t len, loff_t *ppos) | 166 | size_t len, loff_t *ppos) |
167 | { | 167 | { |
168 | int error = 0, cnt = 0; | 168 | int error = 0; |
169 | size_t cnt = 0; | ||
169 | 170 | ||
170 | if (!buf || len < 0) | 171 | if (!buf) |
171 | return -EINVAL; | 172 | return -EINVAL; |
172 | 173 | ||
173 | while (cnt < len) { | 174 | while (cnt < len) { |
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 4660b088a8ce..a76513779e2b 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c | |||
@@ -24,14 +24,8 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
24 | 24 | ||
25 | if (tp->snd_cwnd <= tp->snd_ssthresh) | 25 | if (tp->snd_cwnd <= tp->snd_ssthresh) |
26 | tcp_slow_start(tp); | 26 | tcp_slow_start(tp); |
27 | else { | 27 | else |
28 | tp->snd_cwnd_cnt++; | 28 | tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)); |
29 | if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){ | ||
30 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
31 | tp->snd_cwnd++; | ||
32 | tp->snd_cwnd_cnt = 0; | ||
33 | } | ||
34 | } | ||
35 | } | 29 | } |
36 | 30 | ||
37 | static u32 tcp_scalable_ssthresh(struct sock *sk) | 31 | static u32 tcp_scalable_ssthresh(struct sock *sk) |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 0170e914f1b0..b144a26359bc 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -328,19 +328,16 @@ static void tcp_retransmit_timer(struct sock *sk) | |||
328 | if (icsk->icsk_retransmits == 0) { | 328 | if (icsk->icsk_retransmits == 0) { |
329 | int mib_idx; | 329 | int mib_idx; |
330 | 330 | ||
331 | if (icsk->icsk_ca_state == TCP_CA_Disorder || | 331 | if (icsk->icsk_ca_state == TCP_CA_Disorder) { |
332 | icsk->icsk_ca_state == TCP_CA_Recovery) { | 332 | if (tcp_is_sack(tp)) |
333 | if (tcp_is_sack(tp)) { | 333 | mib_idx = LINUX_MIB_TCPSACKFAILURES; |
334 | if (icsk->icsk_ca_state == TCP_CA_Recovery) | 334 | else |
335 | mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL; | 335 | mib_idx = LINUX_MIB_TCPRENOFAILURES; |
336 | else | 336 | } else if (icsk->icsk_ca_state == TCP_CA_Recovery) { |
337 | mib_idx = LINUX_MIB_TCPSACKFAILURES; | 337 | if (tcp_is_sack(tp)) |
338 | } else { | 338 | mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL; |
339 | if (icsk->icsk_ca_state == TCP_CA_Recovery) | 339 | else |
340 | mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL; | 340 | mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL; |
341 | else | ||
342 | mib_idx = LINUX_MIB_TCPRENOFAILURES; | ||
343 | } | ||
344 | } else if (icsk->icsk_ca_state == TCP_CA_Loss) { | 341 | } else if (icsk->icsk_ca_state == TCP_CA_Loss) { |
345 | mib_idx = LINUX_MIB_TCPLOSSFAILURES; | 342 | mib_idx = LINUX_MIB_TCPLOSSFAILURES; |
346 | } else { | 343 | } else { |
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index d08b2e855c22..e9bbff746488 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c | |||
@@ -159,12 +159,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
159 | /* In the "non-congestive state", increase cwnd | 159 | /* In the "non-congestive state", increase cwnd |
160 | * every rtt. | 160 | * every rtt. |
161 | */ | 161 | */ |
162 | if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { | 162 | tcp_cong_avoid_ai(tp, tp->snd_cwnd); |
163 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
164 | tp->snd_cwnd++; | ||
165 | tp->snd_cwnd_cnt = 0; | ||
166 | } else | ||
167 | tp->snd_cwnd_cnt++; | ||
168 | } else { | 163 | } else { |
169 | /* In the "congestive state", increase cwnd | 164 | /* In the "congestive state", increase cwnd |
170 | * every other rtt. | 165 | * every other rtt. |
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 9ec843a9bbb2..66b6821b984e 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c | |||
@@ -94,14 +94,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
94 | 94 | ||
95 | } else { | 95 | } else { |
96 | /* Reno */ | 96 | /* Reno */ |
97 | 97 | tcp_cong_avoid_ai(tp, tp->snd_cwnd); | |
98 | if (tp->snd_cwnd_cnt < tp->snd_cwnd) | ||
99 | tp->snd_cwnd_cnt++; | ||
100 | |||
101 | if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { | ||
102 | tp->snd_cwnd++; | ||
103 | tp->snd_cwnd_cnt = 0; | ||
104 | } | ||
105 | } | 98 | } |
106 | 99 | ||
107 | /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt. | 100 | /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt. |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c47c989cb1fb..bda08a09357d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -222,7 +222,7 @@ fail: | |||
222 | return error; | 222 | return error; |
223 | } | 223 | } |
224 | 224 | ||
225 | static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) | 225 | int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) |
226 | { | 226 | { |
227 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); | 227 | struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); |
228 | 228 | ||
@@ -596,6 +596,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
596 | return -EOPNOTSUPP; | 596 | return -EOPNOTSUPP; |
597 | 597 | ||
598 | ipc.opt = NULL; | 598 | ipc.opt = NULL; |
599 | ipc.shtx.flags = 0; | ||
599 | 600 | ||
600 | if (up->pending) { | 601 | if (up->pending) { |
601 | /* | 602 | /* |
@@ -643,6 +644,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, | |||
643 | ipc.addr = inet->saddr; | 644 | ipc.addr = inet->saddr; |
644 | 645 | ||
645 | ipc.oif = sk->sk_bound_dev_if; | 646 | ipc.oif = sk->sk_bound_dev_if; |
647 | err = sock_tx_timestamp(msg, sk, &ipc.shtx); | ||
648 | if (err) | ||
649 | return err; | ||
646 | if (msg->msg_controllen) { | 650 | if (msg->msg_controllen) { |
647 | err = ip_cmsg_send(sock_net(sk), msg, &ipc); | 651 | err = ip_cmsg_send(sock_net(sk), msg, &ipc); |
648 | if (err) | 652 | if (err) |
@@ -1180,7 +1184,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, | |||
1180 | sk = sknext; | 1184 | sk = sknext; |
1181 | } while (sknext); | 1185 | } while (sknext); |
1182 | } else | 1186 | } else |
1183 | kfree_skb(skb); | 1187 | consume_skb(skb); |
1184 | spin_unlock(&hslot->lock); | 1188 | spin_unlock(&hslot->lock); |
1185 | return 0; | 1189 | return 0; |
1186 | } | 1190 | } |
@@ -1614,7 +1618,8 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) | |||
1614 | } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); | 1618 | } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); |
1615 | 1619 | ||
1616 | if (!sk) { | 1620 | if (!sk) { |
1617 | spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); | 1621 | if (state->bucket < UDP_HTABLE_SIZE) |
1622 | spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); | ||
1618 | return udp_get_first(seq, state->bucket + 1); | 1623 | return udp_get_first(seq, state->bucket + 1); |
1619 | } | 1624 | } |
1620 | return sk; | 1625 | return sk; |
@@ -1632,6 +1637,9 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) | |||
1632 | 1637 | ||
1633 | static void *udp_seq_start(struct seq_file *seq, loff_t *pos) | 1638 | static void *udp_seq_start(struct seq_file *seq, loff_t *pos) |
1634 | { | 1639 | { |
1640 | struct udp_iter_state *state = seq->private; | ||
1641 | state->bucket = UDP_HTABLE_SIZE; | ||
1642 | |||
1635 | return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; | 1643 | return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; |
1636 | } | 1644 | } |
1637 | 1645 | ||
@@ -1815,6 +1823,7 @@ EXPORT_SYMBOL(udp_lib_getsockopt); | |||
1815 | EXPORT_SYMBOL(udp_lib_setsockopt); | 1823 | EXPORT_SYMBOL(udp_lib_setsockopt); |
1816 | EXPORT_SYMBOL(udp_poll); | 1824 | EXPORT_SYMBOL(udp_poll); |
1817 | EXPORT_SYMBOL(udp_lib_get_port); | 1825 | EXPORT_SYMBOL(udp_lib_get_port); |
1826 | EXPORT_SYMBOL(ipv4_rcv_saddr_equal); | ||
1818 | 1827 | ||
1819 | #ifdef CONFIG_PROC_FS | 1828 | #ifdef CONFIG_PROC_FS |
1820 | EXPORT_SYMBOL(udp_proc_register); | 1829 | EXPORT_SYMBOL(udp_proc_register); |
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 2ad24ba31f9d..60d918c96a4f 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c | |||
@@ -241,7 +241,7 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, | |||
241 | 241 | ||
242 | static struct dst_ops xfrm4_dst_ops = { | 242 | static struct dst_ops xfrm4_dst_ops = { |
243 | .family = AF_INET, | 243 | .family = AF_INET, |
244 | .protocol = __constant_htons(ETH_P_IP), | 244 | .protocol = cpu_to_be16(ETH_P_IP), |
245 | .gc = xfrm4_garbage_collect, | 245 | .gc = xfrm4_garbage_collect, |
246 | .update_pmtu = xfrm4_update_pmtu, | 246 | .update_pmtu = xfrm4_update_pmtu, |
247 | .destroy = xfrm4_dst_destroy, | 247 | .destroy = xfrm4_dst_destroy, |